diff --git a/.github/scripts/merge-perf/data.py b/.github/scripts/merge-perf/data.py new file mode 100644 index 0000000000..a44b60b75b --- /dev/null +++ b/.github/scripts/merge-perf/data.py @@ -0,0 +1,61 @@ +import os +import shutil +import sys +import random + +if len(sys.argv) != 5: + print("usage: python3 data.py ") + sys.exit(1) + +table_dir = sys.argv[1] +tables = int(sys.argv[2]) +rows = int(sys.argv[3]) + +adds = int(sys.argv[4]) + +if __name__=="__main__": + if not os.path.exists(table_dir): + shutil.rmtree(table_dir, ignore_errors=True) + os.makedirs(table_dir) + + ys = [i for i in range(rows+adds)] + random.shuffle(ys) + + with open(f"{table_dir}/create.sql", "+w") as f: + for i in range(tables): + if i == 0: + f.write(f"create table table{i} (x int primary key, y int, z int, key y_idx(y));\n") + else: + f.write(f"create table table{i} (x int primary key, y int, z int, key y_idx(y), foreign key (y) references table{i-1}(y));\n") + + + for j in range(tables): + with open(f"{table_dir}/table{j}.csv", "+w") as f: + f.write("x,y,z\n") + for i in range(rows): + f.write(f"{i},{ys[i]},{i}\n") + + with open(f"{table_dir}/branch.sql", "+w") as f: + for i in range(tables): + f.write(f"set foreign_key_checks = 0;\n") + f.write(f"set unique_checks = 0;\n") + f.write(f"insert into table{i} values\n") + for j,k in enumerate(ys[rows:rows+adds]): + if j == 0: + f.write(f" ") + else: + f.write(f", ") + f.write(f"({rows+j},{k},{rows+j})") + f.write(f";\n") + with open(f"{table_dir}/diverge_main.sql", "+w") as f: + for i in range(tables): + f.write(f"set foreign_key_checks = 0;\n") + f.write(f"set unique_checks = 0;\n") + f.write(f"insert into table{i} values\n") + for j,k in enumerate(ys[rows:rows+adds]): + if j == 0: + f.write(f" ") + else: + f.write(f", ") + f.write(f"({rows+j},{k+1},{rows+j})") + f.write(f";\n") diff --git a/.github/scripts/merge-perf/setup.sh b/.github/scripts/merge-perf/setup.sh new file mode 100755 index 0000000000..518bbf26ae --- /dev/null +++ b/.github/scripts/merge-perf/setup.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +if [ "$#" -ne 2 ]; then + echo "usage: setup.sh " + exit 1 +fi + +DIR=$1 +DATA=$2 + +rm -rf $DIR +mkdir $DIR +cd $DIR + +dolt init + +dolt sql < $DATA/create.sql + +i=0 +for t in $(ls $DATA/ | grep "table"); do + echo $t + dolt table import --disable-fk-checks -u "table${i}" "$DATA/$t" + ((i++)) +done + +dolt commit -Am "add tables" + +dolt sql < $DATA/diverge_main.sql + +dolt commit -Am "add rows to conflict" + +dolt checkout -b feature +dolt reset --hard head~1 + +dolt sql < $DATA/branch.sql + +dolt commit -Am "new branch" + diff --git a/.github/workflows/ci-bats-unix.yaml b/.github/workflows/ci-bats-unix.yaml index d334eda361..efaa589d1e 100644 --- a/.github/workflows/ci-bats-unix.yaml +++ b/.github/workflows/ci-bats-unix.yaml @@ -53,7 +53,7 @@ jobs: fi - name: Configure AWS Credentials if: ${{ env.use_credentials == 'true' }} - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/ci-bats-windows.yaml b/.github/workflows/ci-bats-windows.yaml index 4dda5627c1..16402167a6 100644 --- a/.github/workflows/ci-bats-windows.yaml +++ b/.github/workflows/ci-bats-windows.yaml @@ -85,7 +85,7 @@ jobs: fi - name: Configure AWS Credentials if: ${{ env.use_credentials == 'true' }} - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -159,7 +159,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/ci-orm-tests.yaml b/.github/workflows/ci-orm-tests.yaml index a7cd1deb2d..c93c6e5a46 100644 --- a/.github/workflows/ci-orm-tests.yaml +++ b/.github/workflows/ci-orm-tests.yaml @@ -19,7 +19,7 @@ jobs: uses: ./.github/actions/orm-tests - name: Configure AWS Credentials if: ${{ failure() }} - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/email-report.yaml b/.github/workflows/email-report.yaml index e6898bd293..8c5285ff54 100644 --- a/.github/workflows/email-report.yaml +++ b/.github/workflows/email-report.yaml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/import-perf.yaml b/.github/workflows/import-perf.yaml index 9fb902b72b..3416d2c26a 100644 --- a/.github/workflows/import-perf.yaml +++ b/.github/workflows/import-perf.yaml @@ -134,7 +134,7 @@ jobs: - name: Configure AWS Credentials if: ${{ github.event.client_payload.email_recipient }} != "" - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/k8s-benchmark-latency.yaml b/.github/workflows/k8s-benchmark-latency.yaml index 59db7f9a52..01bcec9531 100644 --- a/.github/workflows/k8s-benchmark-latency.yaml +++ b/.github/workflows/k8s-benchmark-latency.yaml @@ -24,7 +24,7 @@ jobs: sudo cp ./aws-iam-authenticator /usr/local/bin/aws-iam-authenticator aws-iam-authenticator version - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/k8s-fuzzer.yaml b/.github/workflows/k8s-fuzzer.yaml index cd6e26cbd4..2c80d34107 100644 --- a/.github/workflows/k8s-fuzzer.yaml +++ b/.github/workflows/k8s-fuzzer.yaml @@ -18,7 +18,7 @@ jobs: with: version: 'v1.23.6' - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/k8s-sql-correctness.yaml b/.github/workflows/k8s-sql-correctness.yaml index c8c9dd1c46..1430245f67 100644 --- a/.github/workflows/k8s-sql-correctness.yaml +++ b/.github/workflows/k8s-sql-correctness.yaml @@ -17,7 +17,7 @@ jobs: with: version: 'v1.23.6' - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/merge-perf-pr.yaml b/.github/workflows/merge-perf-pr.yaml new file mode 100644 index 0000000000..a2a722abf4 --- /dev/null +++ b/.github/workflows/merge-perf-pr.yaml @@ -0,0 +1,70 @@ +name: Run Merge Benchmark on Pull Requests + +on: + pull_request: + types: [ opened ] + issue_comment: + types: [ created ] + +jobs: + validate-commentor: + runs-on: ubuntu-22.04 + outputs: + valid: ${{ steps.set_valid.outputs.valid }} + steps: + - uses: actions/checkout@v3 + - name: Validate Commentor + id: set_valid + run: ./.github/scripts/performance-benchmarking/validate-commentor.sh "$ACTOR" + env: + ACTOR: ${{ github.actor }} + + check-comments: + runs-on: ubuntu-22.04 + needs: validate-commentor + if: ${{ needs.validate-commentor.outputs.valid == 'true' }} + outputs: + benchmark: ${{ steps.set_benchmark.outputs.benchmark }} + comment-body: ${{ steps.set_body.outputs.body }} + steps: + - name: Check for Deploy Trigger + uses: dolthub/pull-request-comment-trigger@master + id: check + with: + trigger: '#merge-benchmark' + reaction: rocket + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Set Benchmark + if: ${{ steps.check.outputs.triggered == 'true' }} + id: set_benchmark + run: | + echo "benchmark=true" >> $GITHUB_OUTPUT + + performance: + runs-on: ubuntu-22.04 + needs: [validate-commentor, check-comments] + if: ${{ needs.check-comments.outputs.benchmark == 'true' }} + name: Trigger Benchmark Merge Workflow + steps: + - uses: dolthub/pull-request-comment-branch@v3 + id: comment-branch + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + - name: Get pull number + uses: actions/github-script@v6 + id: get_pull_number + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: core.setOutput("pull_number", JSON.stringify(context.issue.number)); + - uses: peter-evans/repository-dispatch@v2.0.0 + with: + token: ${{ secrets.REPO_ACCESS_TOKEN }} + event-type: benchmark-merge + client-payload: | + { + "version": "${{ steps.comment-branch.outputs.head_sha }}", + "commit_to_branch": "${{ steps.comment-branch.outputs.head_sha }}", + "actor": "${{ github.actor }}", + "issue_id": "${{ steps.get_pull_number.outputs.pull_number }}" + } diff --git a/.github/workflows/merge-perf.yaml b/.github/workflows/merge-perf.yaml new file mode 100644 index 0000000000..a1e4268dbe --- /dev/null +++ b/.github/workflows/merge-perf.yaml @@ -0,0 +1,183 @@ +name: Merge Benchmarks +on: + repository_dispatch: + types: [ benchmark-merge ] +env: + SCRIPT_DIR: '.github/scripts/merge-perf' + RESULT_TABLE_NAME: 'merge_perf_results' + DOLTHUB_DB: 'import-perf/merge-perf' +jobs: + bench: + name: Benchmark + defaults: + run: + shell: bash + strategy: + fail-fast: true + runs-on: ubuntu-latest + steps: + - name: Set up Go 1.x + id: go + uses: actions/setup-go@v3 + with: + go-version: ^1.19 + + - name: Setup Python 3.x + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Dolt version + id: version + run: | + version=${{ github.event.client_payload.version }} + + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.client_payload.version }} + + - name: Install dolt + working-directory: ./go + run: go install ./cmd/dolt + + - name: Run bench + id: bench + run: | + dolt config --global --add user.email "merge-perf@dolthub.com" + dolt config --global --add user.name "merge-perf" + + gw=$GITHUB_WORKSPACE + DATADIR=$gw/data + TABLE_NUM=2 + ROW_NUM=1000000 + ADD_NUM=60000 + python ${{ env.SCRIPT_DIR }}/data.py $DATADIR $TABLE_NUM $ROW_NUM $ADD_NUM + + TMPDIR=$gw/tmp + ./${{ env.SCRIPT_DIR}}/setup.sh $TMPDIR $DATADIR + + TIMES=$gw/time.log + cd $TMPDIR + latency=$(python3 -c "import time, subprocess; start = time.time(); res=subprocess.run(['dolt', 'merge', 'main'], capture_output=True); output = res.stdout + res.stderr if res.returncode != 0 else time.time() -start; print(output); exit(res.returncode)") + + RESULTS=$gw/results.sql + echo "CREATE TABLE ${{env.RESULT_TABLE_NAME }} (name varchar(50) primary key, table_cnt int, run_cnt int, add_cnt int, conflict_cnt int, fks bool, latency float);" >> $RESULTS + echo "INSERT INTO ${{ env.RESULT_TABLE_NAME }} values ('1m rows, 100k conflicts', 2, $ROW_NUM, $ADD_NUM, $ADD_NUM, true, $latency);" >> $RESULTS + + echo "::set-output name=result_path::$RESULTS" + + - name: Report + id: report + run: | + gw=$GITHUB_WORKSPACE + in="${{ steps.bench.outputs.result_path }}" + query="select name, round(latency, 2) as latency from ${{ env.RESULT_TABLE_NAME }}" + summaryq="select round(avg(latency), 2) as avg from ${{ env.RESULT_TABLE_NAME }}" + + out="$gw/results.csv" + dolt_dir="$gw/merge-perf" + + dolt config --global --add user.email "merge-perf@dolthub.com" + dolt config --global --add user.name "merge-perf" + + echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import + dolt clone ${{ env.DOLTHUB_DB }} "$dolt_dir" + + cd "$dolt_dir" + + branch="${{ github.event.client_payload.commit_to_branch }}" + # checkout branch + if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then + dolt checkout -b $branch + else + dolt checkout $branch + fi + + dolt sql -q "drop table if exists ${{ env.RESULT_TABLE_NAME }}" + + # load results + dolt sql < "$in" + + # push results to dolthub + dolt add ${{ env.RESULT_TABLE_NAME }} + dolt commit -m "CI commit" + dolt push -f origin $branch + + # generate report + dolt sql -r csv -q "$query" > "$out" + + cat "$out" + echo "::set-output name=report_path::$out" + + avg=$(dolt sql -r csv -q "$summaryq" | tail -1) + echo "::set-output name=avg::$avg" + + - name: Format Results + id: html + if: ${{ github.event.client_payload.email_recipient }} != "" + run: | + gw="$GITHUB_WORKSPACE" + in="${{ steps.report.outputs.report_path }}" + out="$gw/results.html" + + echo "" > "$out" + print_header=true + while read line; do + if "$print_header"; then + echo " " >> "$out" + print_header=false + continue + fi + echo " " >> "$out" + done < "$in" + echo "
${line//,/}
${line//,/}
" >> "$out" + + avg="${{ steps.report.outputs.avg }}" + echo "
Average
$avg
" >> "$out" + + cat "$out" + echo "::set-output name=html::$(echo $out)" + + - name: Configure AWS Credentials + if: ${{ github.event.client_payload.email_recipient }} != "" + uses: aws-actions/configure-aws-credentials@v1-node16 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Send Email + uses: ./.github/actions/ses-email-action + if: ${{ github.event.client_payload.email_recipient }} != "" + with: + region: us-west-2 + toAddresses: '["${{ github.event.client_payload.email_recipient }}"]' + subject: 'System Table Performance Benchmarks: ${{ github.event.client_payload.version }}' + bodyPath: ${{ steps.html.outputs.html }} + template: 'SysbenchTemplate' + + - name: Read CSV + if: ${{ github.event.client_payload.issue_id }} != "" + id: csv + uses: juliangruber/read-file-action@v1 + with: + path: "${{ steps.report.outputs.report_path }}" + + - name: Create MD + if: ${{ github.event.client_payload.issue_id }} != "" + uses: petems/csv-to-md-table-action@master + id: md + with: + csvinput: ${{ steps.csv.outputs.content }} + + - uses: mshick/add-pr-comment@v2 + if: ${{ github.event.client_payload.issue_id }} != "" + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + issue: ${{ github.event.client_payload.issue_id }} + message-failure: merge benchmark failed + message-cancelled: merge benchmark cancelled + allow-repeats: true + message: | + @${{ github.event.client_payload.actor }} __DOLT__ + ${{ steps.md.outputs.markdown-table }} diff --git a/.github/workflows/nightly-performance-benchmarks-email-report.yaml b/.github/workflows/nightly-performance-benchmarks-email-report.yaml index 1322f37ff1..ef0b6b9532 100644 --- a/.github/workflows/nightly-performance-benchmarks-email-report.yaml +++ b/.github/workflows/nightly-performance-benchmarks-email-report.yaml @@ -52,3 +52,14 @@ jobs: "commit_to_branch": "nightly", "actor": "${{ github.actor }}" } + - uses: peter-evans/repository-dispatch@v2.0.0 + with: + token: ${{ secrets.REPO_ACCESS_TOKEN }} + event-type: benchmark-merge + client-payload: | + { + "email_recipient": "${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}", + "version": "${{ github.sha }}", + "commit_to_branch": "nightly", + "actor": "${{ github.actor }}" + } diff --git a/.github/workflows/pull-report.yaml b/.github/workflows/pull-report.yaml index f3e60ded99..cb4903f21e 100644 --- a/.github/workflows/pull-report.yaml +++ b/.github/workflows/pull-report.yaml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v3 - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/sysbench-perf.yaml b/.github/workflows/sysbench-perf.yaml index 095c7d4496..c9a38272df 100644 --- a/.github/workflows/sysbench-perf.yaml +++ b/.github/workflows/sysbench-perf.yaml @@ -135,7 +135,7 @@ jobs: - name: Configure AWS Credentials if: ${{ github.event.client_payload.email_recipient }} != "" - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/README.md b/README.md index 98235c2bb5..ca92757cc3 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - + # Dolt is Git for Data! @@ -526,11 +526,11 @@ Hate the command line? Let's use [Tableplus](https://tableplus.com/) to make som Now, to connect you must select MySQL as the connection type. Then enter a name for your connection, `getting_started` as your database, and `root` as your user. -![Tableplus Connection](./docs/getting-started-tp-connect.png) +![Tableplus Connection](./images/getting-started-tp-connect.png) Click connect and you'll be presented with a familiar database workbench GUI. -![Tableplus](./docs/getting-started-tp.png) +![Tableplus](./images/getting-started-tp.png) ## Make changes on a branch @@ -549,7 +549,7 @@ call dolt_commit('-am', 'Modifications on a branch'); Here's the result in Tableplus. -![New Updates](./docs/getting-started-new-updates.png) +![New Updates](./images/getting-started-new-updates.png) Back in my terminal, I cannot see the table modifications made in Tableplus because they happened on a different branch than the one I have checked out in my session. diff --git a/docs/deploy-server.sh b/docs/deploy-server.sh deleted file mode 100644 index 0d5790125d..0000000000 --- a/docs/deploy-server.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/bin/bash - -# This script installs starts a dolt server on your Unix compatible computer. - -if test -z "$BASH_VERSION"; then - echo "Please run this script using bash, not sh or any other shell. It should be run as root." >&2 - exit 1 -fi - -_() { - -install_dolt() { - # Install Dolt if it already doesn't exist - echo "Installing Dolt..." - - if ! command -v dolt &> /dev/null - then - sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | bash' - fi -} - -setup_configs() { - # Set up the dolt user along with core dolt configurations - echo "Setting up Configurations..." - - # Check if the user "dolt" already exists. If it exists double check that it is okay to continue - if id -u "dolt" &> /dev/null; then - echo "The user dolt already exists" - read -r -p "Do you want to continue adding privileges to the existing user dolt? " response - - response=${response,,} # tolower - if ! ([[ $response =~ ^(yes|y| ) ]] || [[ -z $response ]]); then - exit 1 - fi - - else - # add the user if `dolt` doesn't exist - useradd -r -m -d /var/lib/doltdb dolt - fi - - cd /var/lib/doltdb - - read -e -p "Enter an email associated with your user: " -i "dolt-user@dolt.com" email - read -e -p "Enter a username associated with your user: " -i "Dolt Server Account" username - - sudo -u dolt dolt config --global --add user.email $email - sudo -u dolt dolt config --global --add user.name $username -} - -# Database creation -database_configuration() { - echo "Setting up the dolt database..." - - read -e -p "Input the name of your database: " -i "mydb" db_name - local db_dir="databases/$db_name" - - cd /var/lib/doltdb - sudo -u dolt mkdir -p $db_dir - - cd $db_dir - sudo -u dolt dolt init -} - -# Setup and Start daemon -start_server() { - echo "Starting the server" - - cd ~ - cat > dolt_config.yaml< doltdb.service< \ No newline at end of file diff --git a/docs/dolt_logo.svg b/docs/dolt_logo.svg deleted file mode 100644 index 3f07ce8aee..0000000000 --- a/docs/dolt_logo.svg +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/dolthub-clone.png b/docs/dolthub-clone.png deleted file mode 100755 index 45a743f726..0000000000 Binary files a/docs/dolthub-clone.png and /dev/null differ diff --git a/docs/dolthub-fork.png b/docs/dolthub-fork.png deleted file mode 100755 index fae88cfb76..0000000000 Binary files a/docs/dolthub-fork.png and /dev/null differ diff --git a/docs/dolthub-pr-1.png b/docs/dolthub-pr-1.png deleted file mode 100755 index 686e58ff70..0000000000 Binary files a/docs/dolthub-pr-1.png and /dev/null differ diff --git a/docs/dolthub-pr-2.png b/docs/dolthub-pr-2.png deleted file mode 100755 index a453e5907e..0000000000 Binary files a/docs/dolthub-pr-2.png and /dev/null differ diff --git a/docs/faq.md b/docs/faq.md deleted file mode 100644 index 70eb7d5143..0000000000 --- a/docs/faq.md +++ /dev/null @@ -1,85 +0,0 @@ -# Dolt FAQ - -## Why is it called Dolt? Are you calling me dumb? - -It's named `dolt` to pay homage to [how Linus Torvalds named -git](https://en.wikipedia.org/wiki/Git#Naming): - -> Torvalds sarcastically quipped about the name git (which means -> "unpleasant person" in British English slang): "I'm an egotistical -> bastard, and I name all my projects after myself. First 'Linux', -> now 'git'." - -We wanted a word meaning "idiot", starting with D for Data, -short enough to type on the command line, and -not taken in the standard command line lexicon. So, -`dolt`. - -## The MySQL shell gives me an error: `Can't connect to local MySQL server through socket '/tmp/mysql.sock'` - -The MySQL shell will try to connect through a socket file on many OSes. -To force it to use TCP instead, give it the loopback address like this: - -```bash -% mysql --host 127.0.0.1 ... -``` - -## What does `@@autocommit` do? - -This is a SQL variable that you can turn on for your SQL session like so: - -`SET @@autocommit = 1` - -It's on by default in the MySQL shell, as well as in most clients. But -some clients (notably the Python MySQL connector) turn it off by -default. - -You must commit your changes for them to persist after your session -ends, either by setting `@@autocommit` to on, or by issuing `COMMIT` -statements manually. - -## What's the difference between `COMMIT` and `DOLT_COMMIT()`? - -`COMMIT` is a standard SQL statement that commits a transaction. In -dolt, it just flushes any pending changes in the current SQL session -to disk, updating the working set. HEAD stays the same, but your -working set changes. This means your edits will persist after this -session ends. - -`DOLT_COMMIT()` commits the current SQL transaction, then creates a -new dolt commit on the current branch. It's the same as if you run -`dolt commit` from the command line. - -## I want each of my connected SQL users to get their own branch to make changes on, then merge them back into `main` when they're done making edits. How do I do that? - -We are glad you asked! This is a common use case, and giving each user -their own branch is something we've spent a lot of time getting -right. For more details on how to use this pattern effectively, see -[using branches](https://docs.dolthub.com/reference/sql/branches). - -## Does Dolt support transactions? - -Yes, it should exactly work the same as MySQL, but with fewer locks -for competing writes. - -It's also possible for different sessions to connect to different -branches on the same server. See [using -branches](https://docs.dolthub.com/reference/sql/branches) for details. - -## What SQL features / syntax are supported? - -Most of them! Check out [the docs for the full list of supported -features](https://docs.dolthub.com/reference/sql/support). - -You can check out what we're working on next on our -[roadmap](roadmap.md). Paying customers get their feature requests -bumped to the front of the line. - -## Does Dolt support my favorite SQL workbench / tool? - -Probably! Have you tried it? If you try it and it doesn't work, [let -us know with an issue](https://github.com/dolthub/dolt/issues) or in -[our Discord](https://discord.com/invite/RFwfYpu) and we'll see what -we can do. A lot of times we can fix small compatibility issues really -quick, like the same week. And even if we can't, we want to know about -it! Our goal is to be a 100% drop-in replacement for MySQL. diff --git a/docs/quickstart.md b/docs/quickstart.md deleted file mode 100644 index c3d32095da..0000000000 --- a/docs/quickstart.md +++ /dev/null @@ -1,182 +0,0 @@ -# Dolt quickstart guide - -This is a one-page guide to getting you started with Dolt as quickly -as possible. If you're trying to participate in a -[data bounty](https://www.dolthub.com/bounties), this will get you -up and running. We think bounties are the most engaging way to get -started using Dolt and DoltHub and understand how it all works. - -This guide is intended for new data bounty participants, and is geared -to that use case. You can find more complete documentation on how to -use Dolt in the [README](../README.md) and in the [DoltHub -documentation](https://docs.dolthub.com/introduction/installation). - -## Install Dolt - -```sh -% sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | bash' -``` - -For windows installation, see [here](windows.md). - -## Configure dolt - -```sh -% dolt config --global --add user.email YOU@DOMAIN.COM -% dolt config --global --add user.name "YOUR NAME" -``` - -## Fork the data bounty - -Forking a database makes a private copy for you to edit. Find the -database you want to edit, then click the "Fork" button on the top -left. - -![Forking a repository](dolthub-fork.png) - -## Clone your fork - -Cloning your fork of the database downloads it to your local computer -so you can make changes to it. Click -"Clone" to find the command to copy and paste into your terminal. This -clone command will be different for every fork, so you can't just copy -and paste the command in the text below. - -![Cloning a repository](dolthub-clone.png) - -Run the command, then cd into the database directory. - -```sh -% dolt clone dolthub/hospital-price-transparency -% cd hospital-price-transparency -``` - -## Inspect the data - -Get familiar with the tables and their columns. The easiest way to do -this is by using SQL commands. `show tables` and `describe ` are good commands to use when exploring a new database. - -```sql -% dolt sql -# Welcome to the DoltSQL shell. -# Statements must be terminated with ';'. -# "exit" or "quit" (or Ctrl-D) to exit. -hospital_price_transparency> show tables; -+-----------+ -| Table | -+-----------+ -| cpt_hcpcs | -| hospitals | -| prices | -+-----------+ -hospital_price_transparency> describe hospitals; -+----------------+--------------+------+-----+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------------+--------------+------+-----+---------+-------+ -| npi_number | char(16) | NO | PRI | | | -| name | varchar(256) | YES | | | | -| url | varchar(512) | YES | | | | -| street_address | varchar(512) | YES | | | | -| city | varchar(64) | YES | | | | -| state | varchar(32) | YES | | | | -| zip_code | varchar(16) | YES | | | | -| publish_date | date | YES | | | | -+----------------+--------------+------+-----+---------+-------+ -hospital_price_transparency> select npi_number, name, street_address from hospitals limit 3; -+------------+------------------------------------+---------------------+ -| npi_number | name | street_address | -+------------+------------------------------------+---------------------+ -| 1003873225 | The Specialty Hospital Of Meridian | 1314 19th Ave | -| 1023061405 | Grandview Medical Center | 3690 Grandview Pkwy | -| 1023180502 | Medical City Dallas | 7777 Forest Ln | -+------------+------------------------------------+---------------------+ -hospital_price_transparency> exit -Bye -``` - -## Add some data - -There are two main ways to add data into your copy of the -database. You can either import from files, or you can add data by -writing scripts and inserting rows with SQL statements. - -### Importing files - -Use the `dolt table import` command to import CSV or JSON files. Use -the `-u` option to update the table (instead of replacing the -contents). - -```sh -% dolt table import -u prices hospital_prices.csv -``` - -### Starting a SQL server - -If you want to write a script to insert data with python or another -programming language, start a SQL server on the command line: - -```sh -% dolt sql-server -Starting server with Config HP="localhost:3306"|T="28800000"|R="false"|L="info" -``` - -Then connect to the database with any standard MySQL connector and -make your edits. - -## See your changes - -After you've inserted some data, you can inspect the changes you made -using `dolt diff`. If you added a lot of rows, use the `--summary` flag -to get a summary instead. - -```sh -% dolt diff -% dolt diff --summary -``` - -## Commit your changes - -These commands work like `git`, if you know `git`. If you don't know -`git`, don't worry! Most people who know `git` don't actually know -`git` either! - -```sh -% dolt add . -% dolt commit -m "This message describes my changes" -``` - -You can repeat these steps as many times as you have more changes to add: - -1. Add data -2. Commit your changes - -Every time you commit it creates a checkpoint you can roll back to if -you mess up later. - -## Push your changes back to DoltHub and create a PR - -When you're done adding data, push the database back to DoltHub and -submit a pull request (PR) to merge them back into the original fork. - -```sh -% dolt push origin master -``` - -![Create new PR](dolthub-pr-1.png) - -![Create new PR](dolthub-pr-2.png) - -## Respond to PR review feedback - -Your PR will be reviewed by the people running the bounty, and they -may ask you to make changes. If they do, then go ahead and make your -changes on your machine, then `dolt push` those new commits back to -DoltHub and your existing PR will automatically be updated with them. - -## Questions? Still need help? - -Come hang out with us on [our -Discord](https://discord.com/invite/RFwfYpu), where the team that -builds Dolt and lots of other customers are available to chat and ask -questions. If this guide is missing something obvious, come tell us -there! diff --git a/docs/roadmap.md b/docs/roadmap.md deleted file mode 100644 index c43c22ceea..0000000000 --- a/docs/roadmap.md +++ /dev/null @@ -1,58 +0,0 @@ -# Dolt Feature Roadmap - -Full details on [supported SQL -features](https://docs.dolthub.com/reference/sql/support) are -available on the docs site. - -This is a selection of unimplemented features we're working on. Don't -see what you need on here? [Let us -know!](https://github.com/dolthub/dolt/issues) Paying customers get -their feature requests implemented first. - -Roadmap last updated Apr 2022, next update Jun 2022. - -## Upcoming features - -| Feature | Estimate | -| ------- | --- | -| 99.9% SQL correctness | Q2 2022 | -| Hosted Dolt v1 | Q2 2022 | -| Hash join strategy | Q2 2022 | -| Storage performance | Q2 2022 | -| Lock / unlock tables | Q2 2022 | -| SQL GUI support tests | Q2 2022 | -| `JSON_TABLE()` | Q2 2022 | -| Table / index statistics | Q2 2022 | -| Universal SQL path for CLI | Q2 2022 | -| Pipeline query processing | Q3 2022 | -| Row-level locking (`SELECT FOR UPDATE`) | Q3 2022 | -| All transaction isolation levels | Q3 2022 | -| Postgres Support | 2023 | -| Automatic garbage collection | Unscheduled | -| Collation and charset support | Unscheduled | -| Virtual columns and json indexing | Unscheduled | -| Full text indexes | Unscheduled | -| Spatial indexes | Unscheduled | -| Multiple DBs in one repo | Unscheduled | -| Embedded dolt | Unscheduled | -| Signed commits | Unscheduled | -| Cross-database joins with indexes | Unscheduled | -| More function coverage | Ongoing | - -## Recently launched features - -| Feature | Launch Date | -| ------- | --- | -| Join for update | Oct 2021 | -| Backup and replication | Nov 2021 | -| Commit graph performance | Nov 2021 | -| Persistent SQL configuration | Dec 2021 | -| CREATE / DROP DATABASE | Dec 2021 | -| Hosted Dolt Alpha | Jan 2022 | -| `ROWS` window definitions | Jan 2022 | -| `RANGE` window definitions | Jan 2022 | -| DoltLab (on-prem DoltHub) | Jan 2022 | -| Users / grants | Feb 2022 | -| Geometry types and functions | Feb 2022 | -| Better `dolt_diff` table experience | Mar 2022 | - diff --git a/docs/windows.md b/docs/windows.md deleted file mode 100644 index ce0f7e58fd..0000000000 --- a/docs/windows.md +++ /dev/null @@ -1,24 +0,0 @@ -# Windows support - -Dolt is tested and supported on windows! If you find any problems -specific to Windows, please file an -[issue](https://github.com/dolthub/dolt/issues/) and let us know. - -## Installation - -Download the latest Microsoft Installer (`.msi` file) in -[releases](https://github.com/dolthub/dolt/releases) and run it. - -Package manager releases coming soon! - -## Environment - -Dolt runs best under the Windows Subsystem for Linux, or WSL. But it -should also work fine with `cmd.exe` or `powershell`. If you find this -isn't true, please file an -[issue](https://github.com/dolthub/dolt/issues/) and let us know. - -WSL 2 currently has [known -bugs](https://github.com/dolthub/dolt/issues/992), so we recommend -using WSL 1 for now. Or if you do use WSL 2, we recommend using the -Linux `dolt` binary, rather than the Windows `dolt.exe` binary. diff --git a/go/cmd/dolt/commands/diff.go b/go/cmd/dolt/commands/diff.go index 28ee48f7c0..2e18a49756 100644 --- a/go/cmd/dolt/commands/diff.go +++ b/go/cmd/dolt/commands/diff.go @@ -23,6 +23,7 @@ import ( "strings" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/types" "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/cmd/dolt/commands/engine" @@ -35,7 +36,9 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlfmt" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" + "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/tabular" "github.com/dolthub/dolt/go/libraries/utils/argparser" + "github.com/dolthub/dolt/go/libraries/utils/iohelp" "github.com/dolthub/dolt/go/libraries/utils/set" ) @@ -46,7 +49,8 @@ type diffMode int const ( SchemaOnlyDiff diffPart = 1 // 0b0001 DataOnlyDiff diffPart = 2 // 0b0010 - Summary diffPart = 4 // 0b0100 + Stat diffPart = 4 // 0b0100 + Summary diffPart = 8 // 0b1000 SchemaAndDataDiff = SchemaOnlyDiff | DataOnlyDiff @@ -56,6 +60,7 @@ const ( DataFlag = "data" SchemaFlag = "schema" + StatFlag = "stat" SummaryFlag = "summary" whereParam = "where" limitParam = "limit" @@ -138,7 +143,8 @@ func (cmd DiffCmd) ArgParser() *argparser.ArgParser { ap := argparser.NewArgParser() ap.SupportsFlag(DataFlag, "d", "Show only the data changes, do not show the schema changes (Both shown by default).") ap.SupportsFlag(SchemaFlag, "s", "Show only the schema changes, do not show the data changes (Both shown by default).") - ap.SupportsFlag(SummaryFlag, "", "Show summary of data changes") + ap.SupportsFlag(StatFlag, "", "Show stats of data changes") + ap.SupportsFlag(SummaryFlag, "", "Show summary of data and schema changes") ap.SupportsString(FormatFlag, "r", "result output format", "How to format diff output. Valid values are tabular, sql, json. Defaults to tabular.") ap.SupportsString(whereParam, "", "column", "filters columns based on values in the diff. See {{.EmphasisLeft}}dolt diff --help{{.EmphasisRight}} for details.") ap.SupportsInt(limitParam, "", "record_count", "limits to the first N diffs.") @@ -173,9 +179,9 @@ func (cmd DiffCmd) Exec(ctx context.Context, commandStr string, args []string, d } func (cmd DiffCmd) validateArgs(apr *argparser.ArgParseResults) errhand.VerboseError { - if apr.Contains(SummaryFlag) { + if apr.Contains(StatFlag) || apr.Contains(SummaryFlag) { if apr.Contains(SchemaFlag) || apr.Contains(DataFlag) { - return errhand.BuildDError("invalid Arguments: --summary cannot be combined with --schema or --data").Build() + return errhand.BuildDError("invalid Arguments: --stat and --summary cannot be combined with --schema or --data").Build() } } @@ -197,6 +203,8 @@ func parseDiffArgs(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgPar dArgs.diffParts = DataOnlyDiff } else if apr.Contains(SchemaFlag) && !apr.Contains(DataFlag) { dArgs.diffParts = SchemaOnlyDiff + } else if apr.Contains(StatFlag) { + dArgs.diffParts = Stat } else if apr.Contains(SummaryFlag) { dArgs.diffParts = Summary } @@ -248,6 +256,10 @@ func parseDiffArgs(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgPar if err != nil { return nil, err } + if ok { + dArgs.tableSet.Add(tableName) + continue + } if !ok { return nil, fmt.Errorf("table %s does not exist in either revision", tableName) } @@ -467,6 +479,41 @@ func maybeResolve(ctx context.Context, dEnv *env.DoltEnv, spec string) (*doltdb. return root, true } +var diffSummarySchema = sql.Schema{ + &sql.Column{Name: "Table name", Type: types.Text, Nullable: false}, + &sql.Column{Name: "Diff type", Type: types.Text, Nullable: false}, + &sql.Column{Name: "Data change", Type: types.Boolean, Nullable: false}, + &sql.Column{Name: "Schema change", Type: types.Boolean, Nullable: false}, +} + +func printDiffSummary(ctx context.Context, tds []diff.TableDelta, dArgs *diffArgs) errhand.VerboseError { + cliWR := iohelp.NopWrCloser(cli.OutStream) + wr := tabular.NewFixedWidthTableWriter(diffSummarySchema, cliWR, 100) + defer wr.Close(ctx) + + for _, td := range tds { + if !dArgs.tableSet.Contains(td.FromName) && !dArgs.tableSet.Contains(td.ToName) { + continue + } + + if td.FromTable == nil && td.ToTable == nil { + return errhand.BuildDError("error: both tables in tableDelta are nil").Build() + } + + summ, err := td.GetSummary(ctx) + if err != nil { + return errhand.BuildDError("could not get table delta summary").AddCause(err).Build() + } + + err = wr.WriteSqlRow(ctx, sql.Row{td.CurName(), summ.DiffType, summ.DataChange, summ.SchemaChange}) + if err != nil { + return errhand.BuildDError("could not write table delta summary").AddCause(err).Build() + } + } + + return nil +} + func diffUserTables(ctx context.Context, dEnv *env.DoltEnv, dArgs *diffArgs) errhand.VerboseError { var err error @@ -490,6 +537,10 @@ func diffUserTables(ctx context.Context, dEnv *env.DoltEnv, dArgs *diffArgs) err return strings.Compare(tableDeltas[i].ToName, tableDeltas[j].ToName) < 0 }) + if dArgs.diffParts&Summary != 0 { + return printDiffSummary(ctx, tableDeltas, dArgs) + } + dw, err := newDiffWriter(dArgs.diffOutput) if err != nil { return errhand.VerboseErrorFromError(err) @@ -538,8 +589,8 @@ func diffUserTable( return errhand.BuildDError("cannot retrieve schema for table %s", td.ToName).AddCause(err).Build() } - if dArgs.diffParts&Summary != 0 { - return printDiffSummary(ctx, td, fromSch.GetAllCols().Size(), toSch.GetAllCols().Size()) + if dArgs.diffParts&Stat != 0 { + return printDiffStat(ctx, td, fromSch.GetAllCols().Size(), toSch.GetAllCols().Size()) } if dArgs.diffParts&SchemaOnlyDiff != 0 { @@ -687,7 +738,6 @@ func diffRows( } fromSch = pkSch.Schema } - if td.ToSch != nil { pkSch, err := sqlutil.FromDoltSchema(td.ToName, td.ToSch) if err != nil { diff --git a/go/cmd/dolt/commands/diff_output.go b/go/cmd/dolt/commands/diff_output.go index afcbd1c316..bd4c9b83da 100644 --- a/go/cmd/dolt/commands/diff_output.go +++ b/go/cmd/dolt/commands/diff_output.go @@ -65,18 +65,18 @@ func newDiffWriter(diffOutput diffOutput) (diffWriter, error) { } } -func printDiffSummary(ctx context.Context, td diff.TableDelta, oldColLen, newColLen int) errhand.VerboseError { +func printDiffStat(ctx context.Context, td diff.TableDelta, oldColLen, newColLen int) errhand.VerboseError { // todo: use errgroup.Group ae := atomicerr.New() - ch := make(chan diff.DiffSummaryProgress) + ch := make(chan diff.DiffStatProgress) go func() { defer close(ch) - err := diff.SummaryForTableDelta(ctx, ch, td) + err := diff.StatForTableDelta(ctx, ch, td) ae.SetIfError(err) }() - acc := diff.DiffSummaryProgress{} + acc := diff.DiffStatProgress{} var count int64 var pos int eP := cli.NewEphemeralPrinter() @@ -119,15 +119,15 @@ func printDiffSummary(ctx context.Context, td diff.TableDelta, oldColLen, newCol } if keyless { - printKeylessSummary(acc) + printKeylessStat(acc) } else { - printSummary(acc, oldColLen, newColLen) + printStat(acc, oldColLen, newColLen) } return nil } -func printSummary(acc diff.DiffSummaryProgress, oldColLen, newColLen int) { +func printStat(acc diff.DiffStatProgress, oldColLen, newColLen int) { numCellInserts, numCellDeletes := sqle.GetCellsAddedAndDeleted(acc, newColLen) rowsUnmodified := uint64(acc.OldRowSize - acc.Changes - acc.Removes) unmodified := pluralize("Row Unmodified", "Rows Unmodified", rowsUnmodified) @@ -161,7 +161,7 @@ func printSummary(acc diff.DiffSummaryProgress, oldColLen, newColLen int) { cli.Printf("(%s vs %s)\n\n", oldValues, newValues) } -func printKeylessSummary(acc diff.DiffSummaryProgress) { +func printKeylessStat(acc diff.DiffStatProgress) { insertions := pluralize("Row Added", "Rows Added", acc.Adds) deletions := pluralize("Row Deleted", "Rows Deleted", acc.Removes) diff --git a/go/cmd/dolt/dolt.go b/go/cmd/dolt/dolt.go index eaecb1843f..f1ed8b1a3a 100644 --- a/go/cmd/dolt/dolt.go +++ b/go/cmd/dolt/dolt.go @@ -56,7 +56,7 @@ import ( ) const ( - Version = "0.53.1" + Version = "0.53.2" ) var dumpDocsCommand = &commands.DumpDocsCmd{} diff --git a/go/gen/proto/dolt/services/eventsapi/go.mod b/go/gen/proto/dolt/services/eventsapi/go.mod index 1e95dfbf0f..db88b93e15 100644 --- a/go/gen/proto/dolt/services/eventsapi/go.mod +++ b/go/gen/proto/dolt/services/eventsapi/go.mod @@ -3,10 +3,9 @@ module github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi go 1.13 require ( - github.com/golang/protobuf v1.4.2 - golang.org/x/net v0.0.0-20200602114024-627f9648deb9 // indirect - golang.org/x/sys v0.0.0-20200620081246-981b61492c35 // indirect - golang.org/x/text v0.3.3 // indirect + github.com/golang/protobuf v1.4.2 // indirect + golang.org/x/text v0.3.8 // indirect google.golang.org/genproto v0.0.0-20200622133129-d0ee0c36e670 // indirect google.golang.org/grpc v1.29.1 + google.golang.org/protobuf v1.24.0 ) diff --git a/go/gen/proto/dolt/services/eventsapi/go.sum b/go/gen/proto/dolt/services/eventsapi/go.sum index e175c06a50..7d9e40bd03 100644 --- a/go/gen/proto/dolt/services/eventsapi/go.sum +++ b/go/gen/proto/dolt/services/eventsapi/go.sum @@ -10,7 +10,6 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= @@ -24,43 +23,56 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200620081246-981b61492c35 h1:wb/9mP8eUAmHfkM8RmpeLq6nUA7c2i5+bQOtcDftjaE= -golang.org/x/sys v0.0.0-20200620081246-981b61492c35/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8 h1:Nw54tB0rB7hY/N0NQvRW8DG4Yk3Q6T9cu9RcFQDu1tc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= @@ -68,8 +80,6 @@ google.golang.org/genproto v0.0.0-20200622133129-d0ee0c36e670 h1:v/N9fZIfu6jopNI google.golang.org/genproto v0.0.0-20200622133129-d0ee0c36e670/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.24.0 h1:vb/1TCsVn3DcJlQ0Gs1yB1pKI6Do2/QNwxdKqmc/b0s= -google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.29.1 h1:EC2SB8S04d2r73uptxphDSUG+kTKVgjRPF+N3xpxRB4= diff --git a/go/go.mod b/go/go.mod index 9b223677ec..f55825f48b 100644 --- a/go/go.mod +++ b/go/go.mod @@ -15,7 +15,7 @@ require ( github.com/dolthub/fslock v0.0.3 github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 - github.com/dolthub/vitess v0.0.0-20230216234925-189ffe819e56 + github.com/dolthub/vitess v0.0.0-20230223032306-95d4b04eabad github.com/dustin/go-humanize v1.0.0 github.com/fatih/color v1.13.0 github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 @@ -58,7 +58,7 @@ require ( github.com/cenkalti/backoff/v4 v4.1.3 github.com/cespare/xxhash v1.1.0 github.com/creasty/defaults v1.6.0 - github.com/dolthub/go-mysql-server v0.14.1-0.20230218000648-8448267c2200 + github.com/dolthub/go-mysql-server v0.14.1-0.20230227175231-786abd289f41 github.com/google/flatbuffers v2.0.6+incompatible github.com/jmoiron/sqlx v1.3.4 github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 diff --git a/go/go.sum b/go/go.sum index 9935c03f3e..3769e1d2b6 100644 --- a/go/go.sum +++ b/go/go.sum @@ -166,16 +166,16 @@ github.com/dolthub/flatbuffers v1.13.0-dh.1 h1:OWJdaPep22N52O/0xsUevxJ6Qfw1M2txC github.com/dolthub/flatbuffers v1.13.0-dh.1/go.mod h1:CorYGaDmXjHz1Z7i50PYXG1Ricn31GcA2wNOTFIQAKE= github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= -github.com/dolthub/go-mysql-server v0.14.1-0.20230218000648-8448267c2200 h1:j8tumbRo3G2YgE5qcKlAznGfbgHqZa/e3Li7wsuizj8= -github.com/dolthub/go-mysql-server v0.14.1-0.20230218000648-8448267c2200/go.mod h1:BRFyf6PUuoR+iSLZ+JdpjtqgHzo5cT+tF7oHIpVdytY= +github.com/dolthub/go-mysql-server v0.14.1-0.20230227175231-786abd289f41 h1:8vc9pwtRgqb1RIJyWHsTetx+VZnd7pZlzCewTQIXk7Y= +github.com/dolthub/go-mysql-server v0.14.1-0.20230227175231-786abd289f41/go.mod h1:I2Mu8LSpwUII53EyBXqJMEKTQH5DUetV4ulP88JVsKA= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8= github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474/go.mod h1:kMz7uXOXq4qRriCEyZ/LUeTqraLJCjf0WVZcUi6TxUY= github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE= github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY= -github.com/dolthub/vitess v0.0.0-20230216234925-189ffe819e56 h1:dHuKfUwaDUe847BVN3Wo+4GUGUNdlhuUif4RWkvG3Go= -github.com/dolthub/vitess v0.0.0-20230216234925-189ffe819e56/go.mod h1:oVFIBdqMFEkt4Xz2fzFJBNtzKhDEjwdCF0dzde39iKs= +github.com/dolthub/vitess v0.0.0-20230223032306-95d4b04eabad h1:9FPQtKoqyREEsHfGKNU2DImktOusXTXklLtvTxtIuZ0= +github.com/dolthub/vitess v0.0.0-20230223032306-95d4b04eabad/go.mod h1:oVFIBdqMFEkt4Xz2fzFJBNtzKhDEjwdCF0dzde39iKs= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -638,6 +638,7 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= @@ -744,6 +745,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.7.0 h1:LapD9S96VoQRhi/GrNTqeBJFrUjs5UHCAtTlgwA5oZA= golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -778,7 +780,6 @@ golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= @@ -789,6 +790,7 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -810,6 +812,7 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -853,7 +856,6 @@ golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200620081246-981b61492c35/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -876,6 +878,8 @@ golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -892,6 +896,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -954,6 +959,7 @@ golang.org/x/tools v0.0.0-20200915173823-2db8f0ff891c/go.mod h1:z6u4i615ZeAfBE4X golang.org/x/tools v0.0.0-20200918232735-d647fc253266/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.3.0 h1:SrNbZl6ECOS1qFzgTdQfWXZM9XBkiA6tkFrH9YSTPHM= golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/go/libraries/doltcore/diff/diff_summary.go b/go/libraries/doltcore/diff/diff_stat.go similarity index 76% rename from go/libraries/doltcore/diff/diff_summary.go rename to go/libraries/doltcore/diff/diff_stat.go index 64fcb31ce2..3da53ed5d4 100644 --- a/go/libraries/doltcore/diff/diff_summary.go +++ b/go/libraries/doltcore/diff/diff_stat.go @@ -34,16 +34,16 @@ import ( var ErrPrimaryKeySetChanged = errors.New("primary key set changed") -type DiffSummaryProgress struct { +type DiffStatProgress struct { Adds, Removes, Changes, CellChanges, NewRowSize, OldRowSize, NewCellSize, OldCellSize uint64 } -type prollyReporter func(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffSummaryProgress) error -type nomsReporter func(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffSummaryProgress) error +type prollyReporter func(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffStatProgress) error +type nomsReporter func(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffStatProgress) error -// Summary reports a summary of diff changes between two values +// Stat reports a stat of diff changes between two values // todo: make package private once dolthub is migrated -func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to durable.Index, fromSch, toSch schema.Schema) (err error) { +func Stat(ctx context.Context, ch chan DiffStatProgress, from, to durable.Index, fromSch, toSch schema.Schema) (err error) { fc, err := from.Count() if err != nil { return err @@ -52,7 +52,7 @@ func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to durable. if err != nil { return err } - ch <- DiffSummaryProgress{OldRowSize: fc, NewRowSize: tc} + ch <- DiffStatProgress{OldRowSize: fc, NewRowSize: tc} fk, tk := schema.IsKeyless(fromSch), schema.IsKeyless(toSch) var keyless bool @@ -69,15 +69,15 @@ func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to durable. return diffNomsMaps(ctx, ch, keyless, from, to, fromSch, toSch) } -// SummaryForTableDelta pushes diff summary progress messages for the table delta given to the channel given -func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td TableDelta) error { +// StatForTableDelta pushes diff stat progress messages for the table delta given to the channel given +func StatForTableDelta(ctx context.Context, ch chan DiffStatProgress, td TableDelta) error { fromSch, toSch, err := td.GetSchemas(ctx) if err != nil { return errhand.BuildDError("cannot retrieve schema for table %s", td.ToName).AddCause(err).Build() } if !schema.ArePrimaryKeySetsDiffable(td.Format(), fromSch, toSch) { - return fmt.Errorf("failed to compute diff summary for table %s: %w", td.CurName(), ErrPrimaryKeySetChanged) + return fmt.Errorf("failed to compute diff stat for table %s: %w", td.CurName(), ErrPrimaryKeySetChanged) } keyless, err := td.IsKeyless(ctx) @@ -97,7 +97,7 @@ func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td T } } -func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error { +func diffProllyTrees(ctx context.Context, ch chan DiffStatProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error { _, vMapping, err := schema.MapSchemaBasedOnTagAndName(fromSch, toSch) if err != nil { return err @@ -123,7 +123,7 @@ func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless b } ctc := uint64(len(toSch.GetAllCols().GetColumns())) * tc rpr = reportPkChanges - ch <- DiffSummaryProgress{ + ch <- DiffStatProgress{ OldRowSize: fc, NewRowSize: tc, OldCellSize: cfc, @@ -140,7 +140,7 @@ func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless b return nil } -func diffNomsMaps(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, fromRows durable.Index, toRows durable.Index, fromSch, toSch schema.Schema) error { +func diffNomsMaps(ctx context.Context, ch chan DiffStatProgress, keyless bool, fromRows durable.Index, toRows durable.Index, fromSch, toSch schema.Schema) error { var rpr nomsReporter if keyless { rpr = reportNomsKeylessChanges @@ -156,7 +156,7 @@ func diffNomsMaps(ctx context.Context, ch chan DiffSummaryProgress, keyless bool } ctc := uint64(len(toSch.GetAllCols().GetColumns())) * tc rpr = reportNomsPkChanges - ch <- DiffSummaryProgress{ + ch <- DiffStatProgress{ OldRowSize: fc, NewRowSize: tc, OldCellSize: cfc, @@ -164,10 +164,10 @@ func diffNomsMaps(ctx context.Context, ch chan DiffSummaryProgress, keyless bool } } - return summaryWithReporter(ctx, ch, durable.NomsMapFromIndex(fromRows), durable.NomsMapFromIndex(toRows), rpr, fromSch, toSch) + return statWithReporter(ctx, ch, durable.NomsMapFromIndex(fromRows), durable.NomsMapFromIndex(toRows), rpr, fromSch, toSch) } -func summaryWithReporter(ctx context.Context, ch chan DiffSummaryProgress, from, to types.Map, rpr nomsReporter, fromSch, toSch schema.Schema) (err error) { +func statWithReporter(ctx context.Context, ch chan DiffStatProgress, from, to types.Map, rpr nomsReporter, fromSch, toSch schema.Schema) (err error) { ad := NewAsyncDiffer(1024) ad.Start(ctx, from, to) defer func() { @@ -199,50 +199,50 @@ func summaryWithReporter(ctx context.Context, ch chan DiffSummaryProgress, from, return nil } -func reportPkChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffSummaryProgress) error { - var sum DiffSummaryProgress +func reportPkChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffStatProgress) error { + var stat DiffStatProgress switch change.Type { case tree.AddedDiff: - sum.Adds++ + stat.Adds++ case tree.RemovedDiff: - sum.Removes++ + stat.Removes++ case tree.ModifiedDiff: - sum.CellChanges = prollyCountCellDiff(vMapping, fromD, toD, val.Tuple(change.From), val.Tuple(change.To)) - sum.Changes++ + stat.CellChanges = prollyCountCellDiff(vMapping, fromD, toD, val.Tuple(change.From), val.Tuple(change.To)) + stat.Changes++ default: return errors.New("unknown change type") } select { - case ch <- sum: + case ch <- stat: return nil case <-ctx.Done(): return ctx.Err() } } -func reportKeylessChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffSummaryProgress) error { - var sum DiffSummaryProgress +func reportKeylessChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffStatProgress) error { + var stat DiffStatProgress var n, n2 uint64 switch change.Type { case tree.AddedDiff: n, _ = toD.GetUint64(0, val.Tuple(change.To)) - sum.Adds += n + stat.Adds += n case tree.RemovedDiff: n, _ = fromD.GetUint64(0, val.Tuple(change.From)) - sum.Removes += n + stat.Removes += n case tree.ModifiedDiff: n, _ = fromD.GetUint64(0, val.Tuple(change.From)) n2, _ = toD.GetUint64(0, val.Tuple(change.To)) if n < n2 { - sum.Adds += n2 - n + stat.Adds += n2 - n } else { - sum.Removes += n - n2 + stat.Removes += n - n2 } default: return errors.New("unknown change type") } select { - case ch <- sum: + case ch <- stat: return nil case <-ctx.Done(): return ctx.Err() @@ -280,13 +280,13 @@ func prollyCountCellDiff(mapping val.OrdinalMapping, fromD, toD val.TupleDesc, f return changed } -func reportNomsPkChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffSummaryProgress) error { - var summary DiffSummaryProgress +func reportNomsPkChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffStatProgress) error { + var stat DiffStatProgress switch change.ChangeType { case types.DiffChangeAdded: - summary = DiffSummaryProgress{Adds: 1} + stat = DiffStatProgress{Adds: 1} case types.DiffChangeRemoved: - summary = DiffSummaryProgress{Removes: 1} + stat = DiffStatProgress{Removes: 1} case types.DiffChangeModified: oldTuple := change.OldValue.(types.Tuple) newTuple := change.NewValue.(types.Tuple) @@ -294,19 +294,19 @@ func reportNomsPkChanges(ctx context.Context, change *diff.Difference, fromSch, if err != nil { return err } - summary = DiffSummaryProgress{Changes: 1, CellChanges: cellChanges} + stat = DiffStatProgress{Changes: 1, CellChanges: cellChanges} default: return errors.New("unknown change type") } select { - case ch <- summary: + case ch <- stat: return nil case <-ctx.Done(): return ctx.Err() } } -func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffSummaryProgress) error { +func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffStatProgress) error { var oldCard uint64 if change.OldValue != nil { v, err := change.OldValue.(types.Tuple).Get(row.KeylessCardinalityValIdx) @@ -325,18 +325,18 @@ func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, from newCard = uint64(v.(types.Uint)) } - var summary DiffSummaryProgress + var stat DiffStatProgress delta := int64(newCard) - int64(oldCard) if delta > 0 { - summary = DiffSummaryProgress{Adds: uint64(delta)} + stat = DiffStatProgress{Adds: uint64(delta)} } else if delta < 0 { - summary = DiffSummaryProgress{Removes: uint64(-delta)} + stat = DiffStatProgress{Removes: uint64(-delta)} } else { return fmt.Errorf("diff with delta = 0 for key: %s", change.KeyValue.HumanReadableString()) } select { - case ch <- summary: + case ch <- stat: return nil case <-ctx.Done(): return ctx.Err() diff --git a/go/libraries/doltcore/diff/table_deltas.go b/go/libraries/doltcore/diff/table_deltas.go index fa94cd4960..b042e4a524 100644 --- a/go/libraries/doltcore/diff/table_deltas.go +++ b/go/libraries/doltcore/diff/table_deltas.go @@ -57,6 +57,13 @@ type TableDelta struct { FromFksParentSch map[string]schema.Schema } +type TableDeltaSummary struct { + DiffType string + DataChange bool + SchemaChange bool + TableName string +} + // GetStagedUnstagedTableDeltas represents staged and unstaged changes as TableDelta slices. func GetStagedUnstagedTableDeltas(ctx context.Context, roots doltdb.Roots) (staged, unstaged []TableDelta, err error) { staged, err = GetTableDeltas(ctx, roots.Head, roots.Staged) @@ -387,6 +394,83 @@ func (td TableDelta) IsKeyless(ctx context.Context) (bool, error) { } } +// isTableDataEmpty return true if the table does not contain any data +func isTableDataEmpty(ctx context.Context, table *doltdb.Table) (bool, error) { + rowData, err := table.GetRowData(ctx) + if err != nil { + return false, err + } + + return rowData.Empty() +} + +// GetSummary returns a summary of the table delta. +func (td TableDelta) GetSummary(ctx context.Context) (*TableDeltaSummary, error) { + // Dropping a table is always a schema change, and also a data change if the table contained data + if td.IsDrop() { + isEmpty, err := isTableDataEmpty(ctx, td.FromTable) + if err != nil { + return nil, err + } + + return &TableDeltaSummary{ + TableName: td.FromName, + DataChange: !isEmpty, + SchemaChange: true, + DiffType: "dropped", + }, nil + } + + // Renaming a table is always a schema change, and also a data change if the table data differs + if td.IsRename() { + dataChanged, err := td.HasHashChanged() + if err != nil { + return nil, err + } + + return &TableDeltaSummary{ + TableName: td.ToName, + DataChange: dataChanged, + SchemaChange: true, + DiffType: "renamed", + }, nil + } + + // Creating a table is always a schema change, and also a data change if data was inserted + if td.IsAdd() { + isEmpty, err := isTableDataEmpty(ctx, td.ToTable) + if err != nil { + return nil, err + } + + return &TableDeltaSummary{ + TableName: td.ToName, + DataChange: !isEmpty, + SchemaChange: true, + DiffType: "added", + }, nil + } + + // TODO: Renamed columns without a data change are not accounted for here, + // `dataChanged` is true when it should be false + dataChanged, err := td.HasHashChanged() + if err != nil { + return nil, err + } + + schemaChanged, err := td.HasSchemaChanged(ctx) + if err != nil { + return nil, err + } + + return &TableDeltaSummary{ + TableName: td.ToName, + DataChange: dataChanged, + SchemaChange: schemaChanged, + DiffType: "modified", + }, nil +} + // GetRowData returns the table's row data at the fromRoot and toRoot, or an empty map if the table did not exist. func (td TableDelta) GetRowData(ctx context.Context) (from, to durable.Index, err error) { if td.FromTable == nil && td.ToTable == nil { diff --git a/go/libraries/doltcore/merge/merge_noms_rows.go b/go/libraries/doltcore/merge/merge_noms_rows.go index f1e86d8875..a6d1bf2ca8 100644 --- a/go/libraries/doltcore/merge/merge_noms_rows.go +++ b/go/libraries/doltcore/merge/merge_noms_rows.go @@ -60,6 +60,7 @@ func mergeNomsTableData( changeChan, mergeChangeChan := make(chan types.ValueChanged, 32), make(chan types.ValueChanged, 32) + originalCtx := ctx eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { @@ -197,7 +198,7 @@ func mergeNomsTableData( return nil, types.EmptyMap, nil, err } - mergedTable, err := tblEdit.Table(ctx) + mergedTable, err := tblEdit.Table(originalCtx) if err != nil { return nil, types.EmptyMap, nil, err } diff --git a/go/libraries/doltcore/merge/merge_rows.go b/go/libraries/doltcore/merge/merge_rows.go index 3155ff3227..f2f87ddf52 100644 --- a/go/libraries/doltcore/merge/merge_rows.go +++ b/go/libraries/doltcore/merge/merge_rows.go @@ -486,10 +486,10 @@ func calcTableMergeStats(ctx context.Context, tbl *doltdb.Table, mergeTbl *doltd } ae := atomicerr.New() - ch := make(chan diff.DiffSummaryProgress) + ch := make(chan diff.DiffStatProgress) go func() { defer close(ch) - err := diff.Summary(ctx, ch, rows, mergeRows, sch, mergeSch) + err := diff.Stat(ctx, ch, rows, mergeRows, sch, mergeSch) ae.SetIfError(err) }() diff --git a/go/libraries/doltcore/migrate/integration_test.go b/go/libraries/doltcore/migrate/integration_test.go index 0259785d3e..b2f3c5f9e0 100644 --- a/go/libraries/doltcore/migrate/integration_test.go +++ b/go/libraries/doltcore/migrate/integration_test.go @@ -77,6 +77,10 @@ func TestMigration(t *testing.T) { query: "SELECT count(*) FROM dolt_log", expected: []sql.Row{{int64(2)}}, }, + { + query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping", + expected: []sql.Row{{int64(2)}}, + }, }, }, { @@ -109,6 +113,36 @@ func TestMigration(t *testing.T) { }, }, }, + { + name: "create more commits", + setup: []string{ + "CREATE TABLE test (pk int primary key)", + "INSERT INTO test VALUES (1),(2),(3)", + "CALL dolt_commit('-Am', 'new table')", + "INSERT INTO test VALUES (4)", + "CALL dolt_commit('-am', 'added row 4')", + "INSERT INTO test VALUES (5)", + "CALL dolt_commit('-am', 'added row 5')", + }, + asserts: []assertion{ + { + query: "SELECT count(*) FROM dolt_log", + expected: []sql.Row{{int64(4)}}, + }, + { + query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping", + expected: []sql.Row{{int64(4)}}, + }, + { + query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping WHERE new_commit_hash IN (SELECT commit_hash FROM dolt_log)", + expected: []sql.Row{{int64(4)}}, + }, + { + query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping WHERE new_commit_hash NOT IN (SELECT commit_hash FROM dolt_log)", + expected: []sql.Row{{int64(0)}}, + }, + }, + }, } for _, test := range tests { diff --git a/go/libraries/doltcore/migrate/progress.go b/go/libraries/doltcore/migrate/progress.go index 0a23415b53..12139e6b54 100644 --- a/go/libraries/doltcore/migrate/progress.go +++ b/go/libraries/doltcore/migrate/progress.go @@ -17,8 +17,14 @@ package migrate import ( "context" "fmt" + "io" "time" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" + "github.com/dolthub/dolt/go/libraries/doltcore/ref" + "github.com/dolthub/dolt/go/libraries/doltcore/schema" + "github.com/dolthub/dolt/go/store/datas" + "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/store/chunks" @@ -31,37 +37,34 @@ import ( "github.com/dolthub/dolt/go/store/val" ) -type ChunkMapping interface { - Has(ctx context.Context, addr hash.Hash) (bool, error) - Get(ctx context.Context, addr hash.Hash) (hash.Hash, error) - Put(ctx context.Context, old, new hash.Hash) error - Close(ctx context.Context) error -} +const ( + MigratedCommitsBranch = "dolt_migrated_commits" + MigratedCommitsTable = "dolt_commit_mapping" +) -type CommitStack interface { - Push(ctx context.Context, cm *doltdb.Commit) error - Pop(ctx context.Context) (*doltdb.Commit, error) -} +var ( + mappingSchema, _ = schema.SchemaFromCols(schema.NewColCollection( + schema.NewColumn("old_commit_hash", 0, types.StringKind, true), + schema.NewColumn("new_commit_hash", 1, types.StringKind, false), + )) + desc = val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false}) +) -type Progress interface { - ChunkMapping - CommitStack +// progress maintains the state of migration. +type progress struct { + stack []*doltdb.Commit - Log(ctx context.Context, format string, args ...any) - Close(ctx context.Context) error -} - -// A memory stack with a persisted commit mapping. -type memoryStackProgress struct { - stack []*doltdb.Commit + // mapping tracks migrated commits + // it maps old commit hash to new hash mapping *prolly.MutableMap kb, vb *val.TupleBuilder buffPool pool.BuffPool - vs *types.ValueStore - cs chunks.ChunkStore + + vs *types.ValueStore + cs chunks.ChunkStore } -func newProgress(ctx context.Context, cs chunks.ChunkStore) (Progress, error) { +func newProgress(ctx context.Context, cs chunks.ChunkStore) (*progress, error) { kd := val.NewTupleDescriptor(val.Type{ Enc: val.ByteStringEnc, Nullable: false, @@ -83,7 +86,7 @@ func newProgress(ctx context.Context, cs chunks.ChunkStore) (Progress, error) { kb := val.NewTupleBuilder(kd) vb := val.NewTupleBuilder(vd) - return &memoryStackProgress{ + return &progress{ stack: make([]*doltdb.Commit, 0, 128), mapping: mut, kb: kb, @@ -94,18 +97,18 @@ func newProgress(ctx context.Context, cs chunks.ChunkStore) (Progress, error) { }, nil } -func (mem *memoryStackProgress) Has(ctx context.Context, addr hash.Hash) (ok bool, err error) { - mem.kb.PutByteString(0, addr[:]) - k := mem.kb.Build(mem.buffPool) - return mem.mapping.Has(ctx, k) +func (p *progress) Has(ctx context.Context, addr hash.Hash) (ok bool, err error) { + p.kb.PutByteString(0, addr[:]) + k := p.kb.Build(p.buffPool) + return p.mapping.Has(ctx, k) } -func (mem *memoryStackProgress) Get(ctx context.Context, old hash.Hash) (new hash.Hash, err error) { - mem.kb.PutByteString(0, old[:]) - k := mem.kb.Build(mem.buffPool) - err = mem.mapping.Get(ctx, k, func(_, v val.Tuple) error { +func (p *progress) Get(ctx context.Context, old hash.Hash) (new hash.Hash, err error) { + p.kb.PutByteString(0, old[:]) + k := p.kb.Build(p.buffPool) + err = p.mapping.Get(ctx, k, func(_, v val.Tuple) error { if len(v) > 0 { - n, ok := mem.vb.Desc.GetBytes(0, v) + n, ok := p.vb.Desc.GetBytes(0, v) if !ok { return fmt.Errorf("failed to get string address from commit mapping value") } @@ -116,56 +119,185 @@ func (mem *memoryStackProgress) Get(ctx context.Context, old hash.Hash) (new has return } -func (mem *memoryStackProgress) Put(ctx context.Context, old, new hash.Hash) (err error) { - mem.kb.PutByteString(0, old[:]) - k := mem.kb.Build(mem.buffPool) - mem.vb.PutByteString(0, new[:]) - v := mem.vb.Build(mem.buffPool) - err = mem.mapping.Put(ctx, k, v) +func (p *progress) Put(ctx context.Context, old, new hash.Hash) (err error) { + p.kb.PutByteString(0, old[:]) + k := p.kb.Build(p.buffPool) + p.vb.PutByteString(0, new[:]) + v := p.vb.Build(p.buffPool) + err = p.mapping.Put(ctx, k, v) return } -func (mem *memoryStackProgress) Push(ctx context.Context, cm *doltdb.Commit) (err error) { - mem.stack = append(mem.stack, cm) +func (p *progress) Push(ctx context.Context, cm *doltdb.Commit) (err error) { + p.stack = append(p.stack, cm) return } -func (mem *memoryStackProgress) Pop(ctx context.Context) (cm *doltdb.Commit, err error) { - if len(mem.stack) == 0 { +func (p *progress) Pop(ctx context.Context) (cm *doltdb.Commit, err error) { + if len(p.stack) == 0 { return nil, nil } - top := len(mem.stack) - 1 - cm = mem.stack[top] - mem.stack = mem.stack[:top] + top := len(p.stack) - 1 + cm = p.stack[top] + p.stack = p.stack[:top] return } -func (mem *memoryStackProgress) Log(ctx context.Context, format string, args ...any) { +func (p *progress) Log(ctx context.Context, format string, args ...any) { cli.Println(time.Now().UTC().String() + " " + fmt.Sprintf(format, args...)) } -func (mem *memoryStackProgress) Close(ctx context.Context) error { - m, err := mem.mapping.Map(ctx) +func (p *progress) Finalize(ctx context.Context) (prolly.Map, error) { + m, err := p.mapping.Map(ctx) if err != nil { - return err + return prolly.Map{}, err } v := shim.ValueFromMap(m) - ref, err := mem.vs.WriteValue(ctx, v) + ref, err := p.vs.WriteValue(ctx, v) if err != nil { - return err + return prolly.Map{}, err } - last, err := mem.vs.Root(ctx) + last, err := p.vs.Root(ctx) if err != nil { - return err + return prolly.Map{}, err } - ok, err := mem.vs.Commit(ctx, last, last) + ok, err := p.vs.Commit(ctx, last, last) if err != nil { - return err - } - if !ok { - return fmt.Errorf("failed to commit, manifest swapped out beneath us") + return prolly.Map{}, err + } else if !ok { + return prolly.Map{}, fmt.Errorf("failed to commit, manifest swapped out beneath us") } - mem.Log(ctx, "Wrote commit mapping!! [commit_mapping_ref: %s]", ref.TargetHash().String()) - return nil + p.Log(ctx, "Wrote commit mapping!! [commit_mapping_ref: %s]", ref.TargetHash().String()) + p.Log(ctx, "Commit mapping allow mapping pre-migration commit hashes to post-migration commit hashes, "+ + "it is available on branch '%s' in table '%s'", MigratedCommitsBranch, MigratedCommitsTable) + return m, nil +} + +func persistMigratedCommitMapping(ctx context.Context, ddb *doltdb.DoltDB, mapping prolly.Map) error { + // create a new branch to persist the migrated commit mapping + init, err := ddb.ResolveCommitRef(ctx, ref.NewInternalRef(doltdb.CreationBranch)) + if err != nil { + return err + } + + br := ref.NewBranchRef(MigratedCommitsBranch) + err = ddb.NewBranchAtCommit(ctx, br, init) + if err != nil { + return err + } + + ns, vrw := ddb.NodeStore(), ddb.ValueReadWriter() + m, err := prolly.NewMapFromTuples(ctx, ns, desc, desc) + if err != nil { + return err + } + + rows := m.Mutate() + bld := val.NewTupleBuilder(desc) + + // convert |mapping| values from hash.Hash to string + iter, err := mapping.IterAll(ctx) + if err != nil { + return err + } + + var k, v val.Tuple + kd, vd := mapping.Descriptors() + for { + k, v, err = iter.Next(ctx) + if err == io.EOF { + break + } else if err != nil { + return err + } + + o, _ := kd.GetBytes(0, k) + bld.PutString(0, hash.New(o).String()) + key := bld.Build(ddb.NodeStore().Pool()) + + n, _ := vd.GetBytes(0, v) + bld.PutString(0, hash.New(n).String()) + value := bld.Build(ddb.NodeStore().Pool()) + + if err = rows.Put(ctx, key, value); err != nil { + return err + } + } + + m, err = rows.Map(ctx) + if err != nil { + return err + } + idx := durable.IndexFromProllyMap(m) + + tbl, err := doltdb.NewTable(ctx, vrw, ns, mappingSchema, idx, nil, nil) + if err != nil { + return err + } + + root, err := init.GetRootValue(ctx) + if err != nil { + return err + } + + root, err = root.PutTable(ctx, MigratedCommitsTable, tbl) + if err != nil { + return err + } + + return commitRoot(ctx, ddb, br, root, init) +} + +func commitRoot( + ctx context.Context, + ddb *doltdb.DoltDB, + br ref.BranchRef, + root *doltdb.RootValue, + parent *doltdb.Commit, +) error { + roots := doltdb.Roots{ + Head: root, + Working: root, + Staged: root, + } + parents := []*doltdb.Commit{parent} + + meta, err := parent.GetCommitMeta(ctx) + if err != nil { + return err + } + + meta, err = datas.NewCommitMeta(meta.Name, meta.Email, meta.Description) + if err != nil { + return err + } + + pcm, err := ddb.NewPendingCommit(ctx, roots, parents, meta) + if err != nil { + return err + } + + wsr, err := ref.WorkingSetRefForHead(br) + if err != nil { + return err + } + + ws, err := ddb.ResolveWorkingSet(ctx, wsr) + if err != nil { + return err + } + + prev, err := ws.HashOf() + if err != nil { + return err + } + ws = ws.WithWorkingRoot(root).WithStagedRoot(root) + + _, err = ddb.CommitWithWorkingSet(ctx, br, wsr, pcm, ws, prev, &datas.WorkingSetMeta{ + Name: meta.Name, + Email: meta.Email, + Timestamp: uint64(time.Now().Unix()), + }) + return err } diff --git a/go/libraries/doltcore/migrate/transform.go b/go/libraries/doltcore/migrate/transform.go index 2380a744b7..aedfad4689 100644 --- a/go/libraries/doltcore/migrate/transform.go +++ b/go/libraries/doltcore/migrate/transform.go @@ -96,7 +96,7 @@ func migrateWorkingSet(ctx context.Context, menv Environment, brRef ref.BranchRe return new.UpdateWorkingSet(ctx, wsRef, newWs, hash.Hash{}, oldWs.Meta()) } -func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, new *doltdb.DoltDB, prog Progress) error { +func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error { oldHash, err := oldCm.HashOf() if err != nil { return err @@ -204,7 +204,7 @@ func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, return nil } -func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltDB, prog Progress) error { +func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error { oldHash, err := cm.HashOf() if err != nil { return err @@ -244,7 +244,7 @@ func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltD return prog.Put(ctx, oldHash, newHash) } -func migrateCommitOptions(ctx context.Context, oldCm *doltdb.Commit, prog Progress) (datas.CommitOptions, error) { +func migrateCommitOptions(ctx context.Context, oldCm *doltdb.Commit, prog *progress) (datas.CommitOptions, error) { parents, err := oldCm.ParentHashes(ctx) if err != nil { return datas.CommitOptions{}, err @@ -414,6 +414,7 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb var newRows durable.Index var newSet durable.IndexSet + originalCtx := ctx eg, ctx := errgroup.WithContext(ctx) eg.Go(func() error { @@ -433,13 +434,13 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb return nil, err } - ai, err := oldTbl.GetAutoIncrementValue(ctx) + ai, err := oldTbl.GetAutoIncrementValue(originalCtx) if err != nil { return nil, err } autoInc := types.Uint(ai) - return doltdb.NewTable(ctx, vrw, ns, newSch, newRows, newSet, autoInc) + return doltdb.NewTable(originalCtx, vrw, ns, newSch, newRows, newSet, autoInc) } func migrateSchema(ctx context.Context, tableName string, existing schema.Schema) (schema.Schema, error) { diff --git a/go/libraries/doltcore/migrate/traverse.go b/go/libraries/doltcore/migrate/traverse.go index 29f2158a32..19b1964cf6 100644 --- a/go/libraries/doltcore/migrate/traverse.go +++ b/go/libraries/doltcore/migrate/traverse.go @@ -28,7 +28,7 @@ import ( // TraverseDAG traverses |old|, migrating values to |new|. func TraverseDAG(ctx context.Context, menv Environment, old, new *doltdb.DoltDB) (err error) { var heads []ref.DoltRef - var prog Progress + var prog *progress heads, err = old.GetHeadRefs(ctx) if err != nil { @@ -42,12 +42,6 @@ func TraverseDAG(ctx context.Context, menv Environment, old, new *doltdb.DoltDB) if err != nil { return err } - defer func() { - cerr := prog.Close(ctx) - if err == nil { - err = cerr - } - }() for i := range heads { if err = traverseRefHistory(ctx, menv, heads[i], old, new, prog); err != nil { @@ -58,10 +52,19 @@ func TraverseDAG(ctx context.Context, menv Environment, old, new *doltdb.DoltDB) if err = validateBranchMapping(ctx, old, new); err != nil { return err } + + // write the migrated commit mapping to a special branch + m, err := prog.Finalize(ctx) + if err != nil { + return err + } + if err = persistMigratedCommitMapping(ctx, new, m); err != nil { + return err + } return nil } -func traverseRefHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog Progress) error { +func traverseRefHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog *progress) error { switch r.GetType() { case ref.BranchRefType: if err := traverseBranchHistory(ctx, menv, r, old, new, prog); err != nil { @@ -87,7 +90,7 @@ func traverseRefHistory(ctx context.Context, menv Environment, r ref.DoltRef, ol } } -func traverseBranchHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog Progress) error { +func traverseBranchHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog *progress) error { cm, err := old.ResolveCommitRef(ctx, r) if err != nil { return err @@ -108,7 +111,7 @@ func traverseBranchHistory(ctx context.Context, menv Environment, r ref.DoltRef, return new.SetHead(ctx, r, newHash) } -func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old, new *doltdb.DoltDB, prog Progress) error { +func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old, new *doltdb.DoltDB, prog *progress) error { t, err := old.ResolveTag(ctx, r) if err != nil { return err @@ -133,7 +136,7 @@ func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old return new.NewTagAtCommit(ctx, r, cm, t.Meta) } -func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Commit, new *doltdb.DoltDB, prog Progress) error { +func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error { ch, err := cm.HashOf() if err != nil { return err @@ -180,7 +183,7 @@ func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Com } } -func firstAbsent(ctx context.Context, p Progress, addrs []hash.Hash) (int, error) { +func firstAbsent(ctx context.Context, p *progress, addrs []hash.Hash) (int, error) { for i := range addrs { ok, err := p.Has(ctx, addrs[i]) if err != nil { diff --git a/go/libraries/doltcore/sqle/binlogreplication/binlog_replica_applier.go b/go/libraries/doltcore/sqle/binlogreplication/binlog_replica_applier.go index c763b3af92..2aa202fc8c 100644 --- a/go/libraries/doltcore/sqle/binlogreplication/binlog_replica_applier.go +++ b/go/libraries/doltcore/sqle/binlogreplication/binlog_replica_applier.go @@ -228,6 +228,20 @@ func (a *binlogReplicaApplier) startReplicationEventStream(ctx *sql.Context, con a.currentPosition = position + // Clear out the format description in case we're reconnecting, so that we don't use the old format description + // to interpret any event messages before we receive the new format description from the new stream. + a.format = mysql.BinlogFormat{} + + // If the source server has binlog checksums enabled (@@global.binlog_checksum), then the replica MUST + // set @master_binlog_checksum to handshake with the server to acknowledge that it knows that checksums + // are in use. Without this step, the server will just send back error messages saying that the replica + // does not support the binlog checksum algorithm in use on the primary. + // For more details, see: https://dev.mysql.com/worklog/task/?id=2540 + _, err = conn.ExecuteFetch("set @master_binlog_checksum=@@global.binlog_checksum;", 0, false) + if err != nil { + return err + } + return conn.SendBinlogDumpCommand(serverId, *position) } @@ -271,10 +285,6 @@ func (a *binlogReplicaApplier) replicaBinlogEventHandler(ctx *sql.Context) error return err } continue - } else if strings.Contains(sqlError.Message, "can not handle replication events with the checksum") { - // Ignore any errors about checksums - ctx.GetLogger().Debug("ignoring binlog checksum error message") - continue } } @@ -285,6 +295,19 @@ func (a *binlogReplicaApplier) replicaBinlogEventHandler(ctx *sql.Context) error continue } + // We don't support checksum validation, so we must strip off any checksum data if present, otherwise + // it could get interpreted as part of the data fields and corrupt the fields we pull out. There is not + // a future-proof guarantee on the checksum size, so we can't strip a checksum until we've seen the + // Format binlog event that definitively tells us if checksums are enabled and what algorithm they use. + if a.format.IsZero() == false { + event, _, err = event.StripChecksum(a.format) + if err != nil { + msg := fmt.Sprintf("unable to strip checksum from binlog event: '%v'", err.Error()) + ctx.GetLogger().Error(msg) + DoltBinlogReplicaController.setSqlError(mysql.ERUnknownError, msg) + } + } + err = a.processBinlogEvent(ctx, engine, event) if err != nil { ctx.GetLogger().Errorf("unexpected error of type %T: '%v'", err, err.Error()) @@ -328,6 +351,8 @@ func (a *binlogReplicaApplier) processBinlogEvent(ctx *sql.Context, engine *gms. "database": query.Database, "charset": query.Charset, "query": query.SQL, + "options": fmt.Sprintf("0x%x", query.Options), + "sql_mode": fmt.Sprintf("0x%x", query.SqlMode), }).Debug("Received binlog event: Query") // When executing SQL statements sent from the primary, we can't be sure what database was modified unless we @@ -337,6 +362,39 @@ func (a *binlogReplicaApplier) processBinlogEvent(ctx *sql.Context, engine *gms. // avoid issues with correctness, at the cost of being slightly less efficient commitToAllDatabases = true + if query.Options&mysql.QFlagOptionAutoIsNull > 0 { + ctx.GetLogger().Tracef("Setting sql_auto_is_null ON") + ctx.SetSessionVariable(ctx, "sql_auto_is_null", 1) + } else { + ctx.GetLogger().Tracef("Setting sql_auto_is_null OFF") + ctx.SetSessionVariable(ctx, "sql_auto_is_null", 0) + } + + if query.Options&mysql.QFlagOptionNotAutocommit > 0 { + ctx.GetLogger().Tracef("Setting autocommit=0") + ctx.SetSessionVariable(ctx, "autocommit", 0) + } else { + ctx.GetLogger().Tracef("Setting autocommit=1") + ctx.SetSessionVariable(ctx, "autocommit", 1) + } + + if query.Options&mysql.QFlagOptionNoForeignKeyChecks > 0 { + ctx.GetLogger().Tracef("Setting foreign_key_checks=0") + ctx.SetSessionVariable(ctx, "foreign_key_checks", 0) + } else { + ctx.GetLogger().Tracef("Setting foreign_key_checks=1") + ctx.SetSessionVariable(ctx, "foreign_key_checks", 1) + } + + // NOTE: unique_checks is not currently honored by Dolt + if query.Options&mysql.QFlagOptionRelaxedUniqueChecks > 0 { + ctx.GetLogger().Tracef("Setting unique_checks=0") + ctx.SetSessionVariable(ctx, "unique_checks", 0) + } else { + ctx.GetLogger().Tracef("Setting unique_checks=1") + ctx.SetSessionVariable(ctx, "unique_checks", 1) + } + executeQueryWithEngine(ctx, engine, query.SQL) createCommit = strings.ToLower(query.SQL) != "begin" @@ -493,16 +551,18 @@ func (a *binlogReplicaApplier) processBinlogEvent(ctx *sql.Context, engine *gms. // processRowEvent processes a WriteRows, DeleteRows, or UpdateRows binlog event and returns an error if any problems // were encountered. func (a *binlogReplicaApplier) processRowEvent(ctx *sql.Context, event mysql.BinlogEvent, engine *gms.Engine) error { + var eventType string switch { case event.IsDeleteRows(): - ctx.GetLogger().Debug("Received binlog event: DeleteRows") + eventType = "DeleteRows" case event.IsWriteRows(): - ctx.GetLogger().Debug("Received binlog event: WriteRows") + eventType = "WriteRows" case event.IsUpdateRows(): - ctx.GetLogger().Debug("Received binlog event: UpdateRows") + eventType = "UpdateRows" default: return fmt.Errorf("unsupported event type: %v", event) } + ctx.GetLogger().Debugf("Received binlog event: %s", eventType) tableId := event.TableID(a.format) tableMap, ok := a.tableMapsById[tableId] @@ -519,16 +579,22 @@ func (a *binlogReplicaApplier) processRowEvent(ctx *sql.Context, event mysql.Bin return err } + ctx.GetLogger().WithFields(logrus.Fields{ + "flags": fmt.Sprintf("%x", rows.Flags), + }).Debugf("Processing rows from %s event", eventType) + flags := rows.Flags - if flags&rowFlag_endOfStatement == rowFlag_endOfStatement { + foreignKeyChecksDisabled := false + if flags&rowFlag_endOfStatement > 0 { // nothing to be done for end of statement; just clear the flag and move on flags = flags &^ rowFlag_endOfStatement } - if flags&rowFlag_noForeignKeyChecks == rowFlag_noForeignKeyChecks { + if flags&rowFlag_noForeignKeyChecks > 0 { + foreignKeyChecksDisabled = true flags = flags &^ rowFlag_noForeignKeyChecks } if flags != 0 { - msg := fmt.Sprintf("unsupported binlog protocol message: DeleteRows event with unsupported flags '%x'", flags) + msg := fmt.Sprintf("unsupported binlog protocol message: row event with unsupported flags '%x'", flags) ctx.GetLogger().Errorf(msg) DoltBinlogReplicaController.setSqlError(mysql.ERUnknownError, msg) } @@ -543,10 +609,9 @@ func (a *binlogReplicaApplier) processRowEvent(ctx *sql.Context, event mysql.Bin case event.IsUpdateRows(): ctx.GetLogger().Debugf(" - Updated Rows (table: %s)", tableMap.Name) case event.IsWriteRows(): - ctx.GetLogger().Debugf(" - New Rows (table: %s)", tableMap.Name) + ctx.GetLogger().Debugf(" - Inserted Rows (table: %s)", tableMap.Name) } - foreignKeyChecksDisabled := tableMap.Flags&rowFlag_noForeignKeyChecks > 0 writeSession, tableWriter, err := getTableWriter(ctx, engine, tableMap.Name, tableMap.Database, foreignKeyChecksDisabled) if err != nil { return err diff --git a/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_test.go b/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_test.go index eb7aefbe5e..b08c4e495c 100644 --- a/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_test.go +++ b/go/libraries/doltcore/sqle/binlogreplication/binlog_replication_test.go @@ -263,11 +263,15 @@ func TestForeignKeyChecks(t *testing.T) { startSqlServers(t) startReplication(t, mySqlPort) - // Insert a record with a foreign key check - primaryDatabase.MustExec("CREATE TABLE colors (name varchar(100) primary key);") + // Test that we can execute statement-based replication that requires foreign_key_checks + // being turned off (referenced table doesn't exist yet). + primaryDatabase.MustExec("SET foreign_key_checks = 0;") primaryDatabase.MustExec("CREATE TABLE t1 (pk int primary key, color varchar(100), FOREIGN KEY (color) REFERENCES colors(name));") - primaryDatabase.MustExec("START TRANSACTION;") + primaryDatabase.MustExec("CREATE TABLE colors (name varchar(100) primary key);") primaryDatabase.MustExec("SET foreign_key_checks = 1;") + + // Insert a record with foreign key checks enabled + primaryDatabase.MustExec("START TRANSACTION;") primaryDatabase.MustExec("INSERT INTO colors VALUES ('green'), ('red'), ('blue');") primaryDatabase.MustExec("INSERT INTO t1 VALUES (1, 'red'), (2, 'green');") primaryDatabase.MustExec("COMMIT;") @@ -362,7 +366,7 @@ func TestCharsetsAndCollations(t *testing.T) { // waitForReplicaToCatchUp waits (up to 20s) for the replica to catch up with the primary database. The // lag is measured by checking that gtid_executed is the same on the primary and replica. func waitForReplicaToCatchUp(t *testing.T) { - timeLimit := 20 * time.Second + timeLimit := 60 * time.Second endTime := time.Now().Add(timeLimit) for time.Now().Before(endTime) { replicaGtid := queryGtid(t, replicaDatabase) @@ -486,8 +490,8 @@ func stopDoltSqlServer(t *testing.T) { func startReplication(_ *testing.T, port int) { replicaDatabase.MustExec("SET @@GLOBAL.server_id=123;") replicaDatabase.MustExec( - fmt.Sprintf("change replication source to SOURCE_HOST='localhost', SOURCE_USER='root', "+ - "SOURCE_PASSWORD='', SOURCE_PORT=%v;", port)) + fmt.Sprintf("change replication source to SOURCE_HOST='localhost', SOURCE_USER='replicator', "+ + "SOURCE_PASSWORD='Zqr8_blrGm1!', SOURCE_PORT=%v;", port)) replicaDatabase.MustExec("start replica;") } @@ -588,7 +592,6 @@ func startMySqlServer(dir string) (int, *os.Process, error) { fmt.Sprintf("--port=%v", mySqlPort), "--server-id=11223344", fmt.Sprintf("--socket=mysql-%v.sock", mySqlPort), - "--binlog-checksum=NONE", "--general_log_file="+dir+"general_log", "--log-bin="+dir+"log_bin", "--slow_query_log_file="+dir+"slow_query_log", @@ -623,9 +626,11 @@ func startMySqlServer(dir string) (int, *os.Process, error) { primaryDatabase = sqlx.MustOpen("mysql", dsn) os.Chdir(originalCwd) - fmt.Printf("MySQL server started on port %v \n", mySqlPort) + primaryDatabase.MustExec("CREATE USER 'replicator'@'%' IDENTIFIED BY 'Zqr8_blrGm1!';") + primaryDatabase.MustExec("GRANT REPLICATION SLAVE ON *.* TO 'replicator'@'%';") + return mySqlPort, cmd.Process, nil } diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index aa495e2bd0..84043f0ded 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -937,12 +937,14 @@ func (p DoltDatabaseProvider) ExternalStoredProcedures(_ *sql.Context, name stri // TableFunction implements the sql.TableFunctionProvider interface func (p DoltDatabaseProvider) TableFunction(_ *sql.Context, name string) (sql.TableFunction, error) { - // currently, only one table function is supported, if we extend this, we should clean this up - // and store table functions in a map, similar to regular functions. + // TODO: Clean this up and store table functions in a map, similar to regular functions. switch strings.ToLower(name) { case "dolt_diff": dtf := &DiffTableFunction{} return dtf, nil + case "dolt_diff_stat": + dtf := &DiffStatTableFunction{} + return dtf, nil case "dolt_diff_summary": dtf := &DiffSummaryTableFunction{} return dtf, nil diff --git a/go/libraries/doltcore/sqle/dfunctions/hashof.go b/go/libraries/doltcore/sqle/dfunctions/hashof.go index bd938185ee..296f39f7bd 100644 --- a/go/libraries/doltcore/sqle/dfunctions/hashof.go +++ b/go/libraries/doltcore/sqle/dfunctions/hashof.go @@ -1,4 +1,4 @@ -// Copyright 2020 Dolthub, Inc. +// Copyright 2021 Dolthub, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/store/hash" ) const HashOfFuncName = "hashof" @@ -80,12 +81,21 @@ func (t *HashOf) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { } else { ref, err := ddb.GetRefByNameInsensitive(ctx, name) if err != nil { - return nil, err - } - - cm, err = ddb.ResolveCommitRef(ctx, ref) - if err != nil { - return nil, err + hsh, parsed := hash.MaybeParse(name) + if parsed { + orgErr := err + cm, err = ddb.ReadCommit(ctx, hsh) + if err != nil { + return nil, orgErr + } + } else { + return nil, err + } + } else { + cm, err = ddb.ResolveCommitRef(ctx, ref) + if err != nil { + return nil, err + } } } diff --git a/go/libraries/doltcore/sqle/dolt_diff_stat_table_function.go b/go/libraries/doltcore/sqle/dolt_diff_stat_table_function.go new file mode 100644 index 0000000000..3a01922ee4 --- /dev/null +++ b/go/libraries/doltcore/sqle/dolt_diff_stat_table_function.go @@ -0,0 +1,571 @@ +// Copyright 2022 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sqle + +import ( + "errors" + "fmt" + "io" + "math" + "strings" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/types" + "golang.org/x/sync/errgroup" + + "github.com/dolthub/dolt/go/libraries/doltcore/diff" + "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtables" +) + +var _ sql.TableFunction = (*DiffStatTableFunction)(nil) + +type DiffStatTableFunction struct { + ctx *sql.Context + + fromCommitExpr sql.Expression + toCommitExpr sql.Expression + dotCommitExpr sql.Expression + tableNameExpr sql.Expression + database sql.Database +} + +var diffStatTableSchema = sql.Schema{ + &sql.Column{Name: "table_name", Type: types.LongText, Nullable: false}, + &sql.Column{Name: "rows_unmodified", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "rows_added", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "rows_deleted", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "rows_modified", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "cells_added", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "cells_deleted", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "cells_modified", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "old_row_count", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "new_row_count", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "old_cell_count", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "new_cell_count", Type: types.Int64, Nullable: true}, +} + +// NewInstance creates a new instance of TableFunction interface +func (ds *DiffStatTableFunction) NewInstance(ctx *sql.Context, db sql.Database, expressions []sql.Expression) (sql.Node, error) { + newInstance := &DiffStatTableFunction{ + ctx: ctx, + database: db, + } + + node, err := newInstance.WithExpressions(expressions...) + if err != nil { + return nil, err + } + + return node, nil +} + +// Database implements the sql.Databaser interface +func (ds *DiffStatTableFunction) Database() sql.Database { + return ds.database +} + +// WithDatabase implements the sql.Databaser interface +func (ds *DiffStatTableFunction) WithDatabase(database sql.Database) (sql.Node, error) { + ds.database = database + return ds, nil +} + +// Name implements the sql.TableFunction interface +func (ds *DiffStatTableFunction) Name() string { + return "dolt_diff_stat" +} + +func (ds *DiffStatTableFunction) commitsResolved() bool { + if ds.dotCommitExpr != nil { + return ds.dotCommitExpr.Resolved() + } + return ds.fromCommitExpr.Resolved() && ds.toCommitExpr.Resolved() +} + +// Resolved implements the sql.Resolvable interface +func (ds *DiffStatTableFunction) Resolved() bool { + if ds.tableNameExpr != nil { + return ds.commitsResolved() && ds.tableNameExpr.Resolved() + } + return ds.commitsResolved() +} + +// String implements the Stringer interface +func (ds *DiffStatTableFunction) String() string { + if ds.dotCommitExpr != nil { + if ds.tableNameExpr != nil { + return fmt.Sprintf("DOLT_DIFF_STAT(%s, %s)", ds.dotCommitExpr.String(), ds.tableNameExpr.String()) + } + return fmt.Sprintf("DOLT_DIFF_STAT(%s)", ds.dotCommitExpr.String()) + } + if ds.tableNameExpr != nil { + return fmt.Sprintf("DOLT_DIFF_STAT(%s, %s, %s)", ds.fromCommitExpr.String(), ds.toCommitExpr.String(), ds.tableNameExpr.String()) + } + return fmt.Sprintf("DOLT_DIFF_STAT(%s, %s)", ds.fromCommitExpr.String(), ds.toCommitExpr.String()) +} + +// Schema implements the sql.Node interface. +func (ds *DiffStatTableFunction) Schema() sql.Schema { + return diffStatTableSchema +} + +// Children implements the sql.Node interface. +func (ds *DiffStatTableFunction) Children() []sql.Node { + return nil +} + +// WithChildren implements the sql.Node interface. +func (ds *DiffStatTableFunction) WithChildren(children ...sql.Node) (sql.Node, error) { + if len(children) != 0 { + return nil, fmt.Errorf("unexpected children") + } + return ds, nil +} + +// CheckPrivileges implements the interface sql.Node. +func (ds *DiffStatTableFunction) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { + if ds.tableNameExpr != nil { + if !types.IsText(ds.tableNameExpr.Type()) { + return false + } + + tableNameVal, err := ds.tableNameExpr.Eval(ds.ctx, nil) + if err != nil { + return false + } + tableName, ok := tableNameVal.(string) + if !ok { + return false + } + + // TODO: Add tests for privilege checking + return opChecker.UserHasPrivileges(ctx, + sql.NewPrivilegedOperation(ds.database.Name(), tableName, "", sql.PrivilegeType_Select)) + } + + tblNames, err := ds.database.GetTableNames(ctx) + if err != nil { + return false + } + + var operations []sql.PrivilegedOperation + for _, tblName := range tblNames { + operations = append(operations, sql.NewPrivilegedOperation(ds.database.Name(), tblName, "", sql.PrivilegeType_Select)) + } + + return opChecker.UserHasPrivileges(ctx, operations...) +} + +// Expressions implements the sql.Expressioner interface. +func (ds *DiffStatTableFunction) Expressions() []sql.Expression { + exprs := []sql.Expression{} + if ds.dotCommitExpr != nil { + exprs = append(exprs, ds.dotCommitExpr) + } else { + exprs = append(exprs, ds.fromCommitExpr, ds.toCommitExpr) + } + if ds.tableNameExpr != nil { + exprs = append(exprs, ds.tableNameExpr) + } + return exprs +} + +// WithExpressions implements the sql.Expressioner interface. +func (ds *DiffStatTableFunction) WithExpressions(expression ...sql.Expression) (sql.Node, error) { + if len(expression) < 1 { + return nil, sql.ErrInvalidArgumentNumber.New(ds.Name(), "1 to 3", len(expression)) + } + + for _, expr := range expression { + if !expr.Resolved() { + return nil, ErrInvalidNonLiteralArgument.New(ds.Name(), expr.String()) + } + // prepared statements resolve functions beforehand, so above check fails + if _, ok := expr.(sql.FunctionExpression); ok { + return nil, ErrInvalidNonLiteralArgument.New(ds.Name(), expr.String()) + } + } + + if strings.Contains(expression[0].String(), "..") { + if len(expression) < 1 || len(expression) > 2 { + return nil, sql.ErrInvalidArgumentNumber.New(ds.Name(), "1 or 2", len(expression)) + } + ds.dotCommitExpr = expression[0] + if len(expression) == 2 { + ds.tableNameExpr = expression[1] + } + } else { + if len(expression) < 2 || len(expression) > 3 { + return nil, sql.ErrInvalidArgumentNumber.New(ds.Name(), "2 or 3", len(expression)) + } + ds.fromCommitExpr = expression[0] + ds.toCommitExpr = expression[1] + if len(expression) == 3 { + ds.tableNameExpr = expression[2] + } + } + + // validate the expressions + if ds.dotCommitExpr != nil { + if !types.IsText(ds.dotCommitExpr.Type()) { + return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.dotCommitExpr.String()) + } + } else { + if !types.IsText(ds.fromCommitExpr.Type()) { + return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.fromCommitExpr.String()) + } + if !types.IsText(ds.toCommitExpr.Type()) { + return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.toCommitExpr.String()) + } + } + + if ds.tableNameExpr != nil { + if !types.IsText(ds.tableNameExpr.Type()) { + return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.tableNameExpr.String()) + } + } + + return ds, nil +} + +// RowIter implements the sql.Node interface +func (ds *DiffStatTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter, error) { + fromCommitVal, toCommitVal, dotCommitVal, tableName, err := ds.evaluateArguments() + if err != nil { + return nil, err + } + + sqledb, ok := ds.database.(SqlDatabase) + if !ok { + return nil, fmt.Errorf("unexpected database type: %T", ds.database) + } + + fromCommitStr, toCommitStr, err := loadCommitStrings(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb) + if err != nil { + return nil, err + } + + sess := dsess.DSessFromSess(ctx.Session) + fromRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), fromCommitStr) + if err != nil { + return nil, err + } + + toRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), toCommitStr) + if err != nil { + return nil, err + } + + deltas, err := diff.GetTableDeltas(ctx, fromRoot, toRoot) + if err != nil { + return nil, err + } + + // If tableNameExpr defined, return a single table diff stat result + if ds.tableNameExpr != nil { + delta := findMatchingDelta(deltas, tableName) + diffStat, hasDiff, err := getDiffStatNodeFromDelta(ctx, delta, fromRoot, toRoot, tableName) + if err != nil { + return nil, err + } + if !hasDiff { + return NewDiffStatTableFunctionRowIter([]diffStatNode{}), nil + } + return NewDiffStatTableFunctionRowIter([]diffStatNode{diffStat}), nil + } + + var diffStats []diffStatNode + for _, delta := range deltas { + tblName := delta.ToName + if tblName == "" { + tblName = delta.FromName + } + diffStat, hasDiff, err := getDiffStatNodeFromDelta(ctx, delta, fromRoot, toRoot, tblName) + if err != nil { + if errors.Is(err, diff.ErrPrimaryKeySetChanged) { + ctx.Warn(dtables.PrimaryKeyChangeWarningCode, fmt.Sprintf("stat for table %s cannot be determined. Primary key set changed.", tblName)) + // Report an empty diff for tables that have primary key set changes + diffStats = append(diffStats, diffStatNode{tblName: tblName}) + continue + } + return nil, err + } + if hasDiff { + diffStats = append(diffStats, diffStat) + } + } + + return NewDiffStatTableFunctionRowIter(diffStats), nil +} + +// evaluateArguments returns fromCommitVal, toCommitVal, dotCommitVal, and tableName. +// It evaluates the argument expressions to turn them into values this DiffStatTableFunction +// can use. Note that this method only evals the expressions, and doesn't validate the values. +func (ds *DiffStatTableFunction) evaluateArguments() (interface{}, interface{}, interface{}, string, error) { + var tableName string + if ds.tableNameExpr != nil { + tableNameVal, err := ds.tableNameExpr.Eval(ds.ctx, nil) + if err != nil { + return nil, nil, nil, "", err + } + tn, ok := tableNameVal.(string) + if !ok { + return nil, nil, nil, "", ErrInvalidTableName.New(ds.tableNameExpr.String()) + } + tableName = tn + } + + if ds.dotCommitExpr != nil { + dotCommitVal, err := ds.dotCommitExpr.Eval(ds.ctx, nil) + if err != nil { + return nil, nil, nil, "", err + } + + return nil, nil, dotCommitVal, tableName, nil + } + + fromCommitVal, err := ds.fromCommitExpr.Eval(ds.ctx, nil) + if err != nil { + return nil, nil, nil, "", err + } + + toCommitVal, err := ds.toCommitExpr.Eval(ds.ctx, nil) + if err != nil { + return nil, nil, nil, "", err + } + + return fromCommitVal, toCommitVal, nil, tableName, nil +} + +// getDiffStatNodeFromDelta returns diffStatNode object and whether there is data diff or not. It gets tables +// from roots and diff stat if there is a valid table exists in both fromRoot and toRoot. +func getDiffStatNodeFromDelta(ctx *sql.Context, delta diff.TableDelta, fromRoot, toRoot *doltdb.RootValue, tableName string) (diffStatNode, bool, error) { + var oldColLen int + var newColLen int + fromTable, _, fromTableExists, err := fromRoot.GetTableInsensitive(ctx, tableName) + if err != nil { + return diffStatNode{}, false, err + } + + if fromTableExists { + fromSch, err := fromTable.GetSchema(ctx) + if err != nil { + return diffStatNode{}, false, err + } + oldColLen = len(fromSch.GetAllCols().GetColumns()) + } + + toTable, _, toTableExists, err := toRoot.GetTableInsensitive(ctx, tableName) + if err != nil { + return diffStatNode{}, false, err + } + + if toTableExists { + toSch, err := toTable.GetSchema(ctx) + if err != nil { + return diffStatNode{}, false, err + } + newColLen = len(toSch.GetAllCols().GetColumns()) + } + + if !fromTableExists && !toTableExists { + return diffStatNode{}, false, sql.ErrTableNotFound.New(tableName) + } + + // no diff from tableDelta + if delta.FromTable == nil && delta.ToTable == nil { + return diffStatNode{}, false, nil + } + + diffStat, hasDiff, keyless, err := getDiffStat(ctx, delta) + if err != nil { + return diffStatNode{}, false, err + } + + return diffStatNode{tableName, diffStat, oldColLen, newColLen, keyless}, hasDiff, nil +} + +// getDiffStat returns diff.DiffStatProgress object and whether there is a data diff or not. +func getDiffStat(ctx *sql.Context, td diff.TableDelta) (diff.DiffStatProgress, bool, bool, error) { + // got this method from diff_output.go + + ch := make(chan diff.DiffStatProgress) + + grp, ctx2 := errgroup.WithContext(ctx) + grp.Go(func() error { + defer close(ch) + err := diff.StatForTableDelta(ctx2, ch, td) + return err + }) + + acc := diff.DiffStatProgress{} + var count int64 + grp.Go(func() error { + for { + select { + case p, ok := <-ch: + if !ok { + return nil + } + acc.Adds += p.Adds + acc.Removes += p.Removes + acc.Changes += p.Changes + acc.CellChanges += p.CellChanges + acc.NewRowSize += p.NewRowSize + acc.OldRowSize += p.OldRowSize + acc.NewCellSize += p.NewCellSize + acc.OldCellSize += p.OldCellSize + count++ + case <-ctx2.Done(): + return ctx2.Err() + } + } + }) + + if err := grp.Wait(); err != nil { + return diff.DiffStatProgress{}, false, false, err + } + + keyless, err := td.IsKeyless(ctx) + if err != nil { + return diff.DiffStatProgress{}, false, keyless, err + } + + if (acc.Adds+acc.Removes+acc.Changes) == 0 && (acc.OldCellSize-acc.NewCellSize) == 0 { + return diff.DiffStatProgress{}, false, keyless, nil + } + + return acc, true, keyless, nil +} + +//------------------------------------ +// diffStatTableFunctionRowIter +//------------------------------------ + +var _ sql.RowIter = &diffStatTableFunctionRowIter{} + +type diffStatTableFunctionRowIter struct { + diffStats []diffStatNode + diffIdx int +} + +func (d *diffStatTableFunctionRowIter) incrementIndexes() { + d.diffIdx++ + if d.diffIdx >= len(d.diffStats) { + d.diffIdx = 0 + d.diffStats = nil + } +} + +type diffStatNode struct { + tblName string + diffStat diff.DiffStatProgress + oldColLen int + newColLen int + keyless bool +} + +func NewDiffStatTableFunctionRowIter(ds []diffStatNode) sql.RowIter { + return &diffStatTableFunctionRowIter{ + diffStats: ds, + } +} + +func (d *diffStatTableFunctionRowIter) Next(ctx *sql.Context) (sql.Row, error) { + defer d.incrementIndexes() + if d.diffIdx >= len(d.diffStats) { + return nil, io.EOF + } + + if d.diffStats == nil { + return nil, io.EOF + } + + ds := d.diffStats[d.diffIdx] + return getRowFromDiffStat(ds.tblName, ds.diffStat, ds.newColLen, ds.oldColLen, ds.keyless), nil +} + +func (d *diffStatTableFunctionRowIter) Close(context *sql.Context) error { + return nil +} + +// getRowFromDiffStat takes diff.DiffStatProgress and calculates the row_modified, cell_added, cell_deleted. +// If the number of cell change from old to new cell count does not equal to cell_added and/or cell_deleted, there +// must be schema changes that affects cell_added and cell_deleted value addition to the row count * col length number. +func getRowFromDiffStat(tblName string, dsp diff.DiffStatProgress, newColLen, oldColLen int, keyless bool) sql.Row { + // if table is keyless table, match current CLI command result + if keyless { + return sql.Row{ + tblName, // table_name + nil, // rows_unmodified + int64(dsp.Adds), // rows_added + int64(dsp.Removes), // rows_deleted + nil, // rows_modified + nil, // cells_added + nil, // cells_deleted + nil, // cells_modified + nil, // old_row_count + nil, // new_row_count + nil, // old_cell_count + nil, // new_cell_count + } + } + + numCellInserts, numCellDeletes := GetCellsAddedAndDeleted(dsp, newColLen) + rowsUnmodified := dsp.OldRowSize - dsp.Changes - dsp.Removes + + return sql.Row{ + tblName, // table_name + int64(rowsUnmodified), // rows_unmodified + int64(dsp.Adds), // rows_added + int64(dsp.Removes), // rows_deleted + int64(dsp.Changes), // rows_modified + int64(numCellInserts), // cells_added + int64(numCellDeletes), // cells_deleted + int64(dsp.CellChanges), // cells_modified + int64(dsp.OldRowSize), // old_row_count + int64(dsp.NewRowSize), // new_row_count + int64(dsp.OldCellSize), // old_cell_count + int64(dsp.NewCellSize), // new_cell_count + } +} + +// GetCellsAddedAndDeleted calculates cells added and deleted given diff.DiffStatProgress and toCommit table +// column length. We use rows added and deleted to calculate cells added and deleted, but it does not include +// cells added and deleted from schema changes. Here we fill those in using total number of cells in each commit table. +func GetCellsAddedAndDeleted(acc diff.DiffStatProgress, newColLen int) (uint64, uint64) { + var numCellInserts, numCellDeletes float64 + rowToCellInserts := float64(acc.Adds) * float64(newColLen) + rowToCellDeletes := float64(acc.Removes) * float64(newColLen) + cellDiff := float64(acc.NewCellSize) - float64(acc.OldCellSize) + if cellDiff > 0 { + numCellInserts = cellDiff + rowToCellDeletes + numCellDeletes = rowToCellDeletes + } else if cellDiff < 0 { + numCellInserts = rowToCellInserts + numCellDeletes = math.Abs(cellDiff) + rowToCellInserts + } else { + if rowToCellInserts != rowToCellDeletes { + numCellDeletes = math.Max(rowToCellDeletes, rowToCellInserts) + numCellInserts = math.Max(rowToCellDeletes, rowToCellInserts) + } else { + numCellDeletes = rowToCellDeletes + numCellInserts = rowToCellInserts + } + } + return uint64(numCellInserts), uint64(numCellDeletes) +} diff --git a/go/libraries/doltcore/sqle/dolt_diff_summary_table_function.go b/go/libraries/doltcore/sqle/dolt_diff_summary_table_function.go index f73af9ca75..3da844f762 100644 --- a/go/libraries/doltcore/sqle/dolt_diff_summary_table_function.go +++ b/go/libraries/doltcore/sqle/dolt_diff_summary_table_function.go @@ -15,19 +15,16 @@ package sqle import ( - "errors" "fmt" "io" - "math" + "sort" "strings" "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" - "golang.org/x/sync/errgroup" "github.com/dolthub/dolt/go/libraries/doltcore/diff" - "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" - "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" + "github.com/dolthub/dolt/go/libraries/doltcore/schema" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtables" ) @@ -45,17 +42,9 @@ type DiffSummaryTableFunction struct { var diffSummaryTableSchema = sql.Schema{ &sql.Column{Name: "table_name", Type: types.LongText, Nullable: false}, - &sql.Column{Name: "rows_unmodified", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "rows_added", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "rows_deleted", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "rows_modified", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "cells_added", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "cells_deleted", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "cells_modified", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "old_row_count", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "new_row_count", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "old_cell_count", Type: types.Int64, Nullable: true}, - &sql.Column{Name: "new_cell_count", Type: types.Int64, Nullable: true}, + &sql.Column{Name: "diff_type", Type: types.Text, Nullable: false}, + &sql.Column{Name: "data_change", Type: types.Boolean, Nullable: false}, + &sql.Column{Name: "schema_change", Type: types.Boolean, Nullable: false}, } // NewInstance creates a new instance of TableFunction interface @@ -254,64 +243,75 @@ func (ds *DiffSummaryTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql. return nil, fmt.Errorf("unexpected database type: %T", ds.database) } - fromCommitStr, toCommitStr, err := loadCommitStrings(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb) + fromDetails, toDetails, err := loadDetailsForRefs(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb) if err != nil { return nil, err } - sess := dsess.DSessFromSess(ctx.Session) - fromRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), fromCommitStr) + deltas, err := diff.GetTableDeltas(ctx, fromDetails.root, toDetails.root) if err != nil { return nil, err } - toRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), toCommitStr) - if err != nil { - return nil, err - } - - deltas, err := diff.GetTableDeltas(ctx, fromRoot, toRoot) - if err != nil { - return nil, err - } + sort.Slice(deltas, func(i, j int) bool { + return strings.Compare(deltas[i].ToName, deltas[j].ToName) < 0 + }) // If tableNameExpr defined, return a single table diff summary result if ds.tableNameExpr != nil { delta := findMatchingDelta(deltas, tableName) - diffSum, hasDiff, err := getDiffSummaryNodeFromDelta(ctx, delta, fromRoot, toRoot, tableName) + + summ, err := getSummaryForDelta(ctx, delta, sqledb, fromDetails, toDetails, true) if err != nil { return nil, err } - if !hasDiff { - return NewDiffSummaryTableFunctionRowIter([]diffSummaryNode{}), nil + + summs := []*diff.TableDeltaSummary{} + if summ != nil { + // Old name of renamed table can be matched, use provided name in result + summ.TableName = tableName + summs = []*diff.TableDeltaSummary{summ} } - return NewDiffSummaryTableFunctionRowIter([]diffSummaryNode{diffSum}), nil + + return NewDiffSummaryTableFunctionRowIter(summs), nil } - var diffSummaries []diffSummaryNode + var diffSummaries []*diff.TableDeltaSummary for _, delta := range deltas { - tblName := delta.ToName - if tblName == "" { - tblName = delta.FromName - } - diffSum, hasDiff, err := getDiffSummaryNodeFromDelta(ctx, delta, fromRoot, toRoot, tblName) + summ, err := getSummaryForDelta(ctx, delta, sqledb, fromDetails, toDetails, false) if err != nil { - if errors.Is(err, diff.ErrPrimaryKeySetChanged) { - ctx.Warn(dtables.PrimaryKeyChangeWarningCode, fmt.Sprintf("summary for table %s cannot be determined. Primary key set changed.", tblName)) - // Report an empty diff for tables that have primary key set changes - diffSummaries = append(diffSummaries, diffSummaryNode{tblName: tblName}) - continue - } return nil, err } - if hasDiff { - diffSummaries = append(diffSummaries, diffSum) + if summ != nil { + diffSummaries = append(diffSummaries, summ) } } return NewDiffSummaryTableFunctionRowIter(diffSummaries), nil } +func getSummaryForDelta(ctx *sql.Context, delta diff.TableDelta, sqledb SqlDatabase, fromDetails, toDetails *refDetails, shouldErrorOnPKChange bool) (*diff.TableDeltaSummary, error) { + if delta.FromTable == nil && delta.ToTable == nil { + return nil, nil + } + + if !schema.ArePrimaryKeySetsDiffable(delta.Format(), delta.FromSch, delta.ToSch) { + if shouldErrorOnPKChange { + return nil, fmt.Errorf("failed to compute diff summary for table %s: %w", delta.CurName(), diff.ErrPrimaryKeySetChanged) + } + + ctx.Warn(dtables.PrimaryKeyChangeWarningCode, fmt.Sprintf(dtables.PrimaryKeyChangeWarning, fromDetails.hashStr, toDetails.hashStr)) + return nil, nil + } + + summ, err := delta.GetSummary(ctx) + if err != nil { + return nil, err + } + + return summ, nil +} + // evaluateArguments returns fromCommitVal, toCommitVal, dotCommitVal, and tableName. // It evaluates the argument expressions to turn them into values this DiffSummaryTableFunction // can use. Note that this method only evals the expressions, and doesn't validate the values. @@ -351,107 +351,6 @@ func (ds *DiffSummaryTableFunction) evaluateArguments() (interface{}, interface{ return fromCommitVal, toCommitVal, nil, tableName, nil } -// getDiffSummaryNodeFromDelta returns diffSummaryNode object and whether there is data diff or not. It gets tables -// from roots and diff summary if there is a valid table exists in both fromRoot and toRoot. -func getDiffSummaryNodeFromDelta(ctx *sql.Context, delta diff.TableDelta, fromRoot, toRoot *doltdb.RootValue, tableName string) (diffSummaryNode, bool, error) { - var oldColLen int - var newColLen int - fromTable, _, fromTableExists, err := fromRoot.GetTableInsensitive(ctx, tableName) - if err != nil { - return diffSummaryNode{}, false, err - } - - if fromTableExists { - fromSch, err := fromTable.GetSchema(ctx) - if err != nil { - return diffSummaryNode{}, false, err - } - oldColLen = len(fromSch.GetAllCols().GetColumns()) - } - - toTable, _, toTableExists, err := toRoot.GetTableInsensitive(ctx, tableName) - if err != nil { - return diffSummaryNode{}, false, err - } - - if toTableExists { - toSch, err := toTable.GetSchema(ctx) - if err != nil { - return diffSummaryNode{}, false, err - } - newColLen = len(toSch.GetAllCols().GetColumns()) - } - - if !fromTableExists && !toTableExists { - return diffSummaryNode{}, false, sql.ErrTableNotFound.New(tableName) - } - - // no diff from tableDelta - if delta.FromTable == nil && delta.ToTable == nil { - return diffSummaryNode{}, false, nil - } - - diffSum, hasDiff, keyless, err := getDiffSummary(ctx, delta) - if err != nil { - return diffSummaryNode{}, false, err - } - - return diffSummaryNode{tableName, diffSum, oldColLen, newColLen, keyless}, hasDiff, nil -} - -// getDiffSummary returns diff.DiffSummaryProgress object and whether there is a data diff or not. -func getDiffSummary(ctx *sql.Context, td diff.TableDelta) (diff.DiffSummaryProgress, bool, bool, error) { - // got this method from diff_output.go - - ch := make(chan diff.DiffSummaryProgress) - - grp, ctx2 := errgroup.WithContext(ctx) - grp.Go(func() error { - defer close(ch) - err := diff.SummaryForTableDelta(ctx2, ch, td) - return err - }) - - acc := diff.DiffSummaryProgress{} - var count int64 - grp.Go(func() error { - for { - select { - case p, ok := <-ch: - if !ok { - return nil - } - acc.Adds += p.Adds - acc.Removes += p.Removes - acc.Changes += p.Changes - acc.CellChanges += p.CellChanges - acc.NewRowSize += p.NewRowSize - acc.OldRowSize += p.OldRowSize - acc.NewCellSize += p.NewCellSize - acc.OldCellSize += p.OldCellSize - count++ - case <-ctx2.Done(): - return ctx2.Err() - } - } - }) - - if err := grp.Wait(); err != nil { - return diff.DiffSummaryProgress{}, false, false, err - } - - keyless, err := td.IsKeyless(ctx) - if err != nil { - return diff.DiffSummaryProgress{}, false, keyless, err - } - - if (acc.Adds+acc.Removes+acc.Changes) == 0 && (acc.OldCellSize-acc.NewCellSize) == 0 { - return diff.DiffSummaryProgress{}, false, keyless, nil - } - - return acc, true, keyless, nil -} - //------------------------------------ // diffSummaryTableFunctionRowIter //------------------------------------ @@ -459,113 +358,47 @@ func getDiffSummary(ctx *sql.Context, td diff.TableDelta) (diff.DiffSummaryProgr var _ sql.RowIter = &diffSummaryTableFunctionRowIter{} type diffSummaryTableFunctionRowIter struct { - diffSums []diffSummaryNode - diffIdx int + summaries []*diff.TableDeltaSummary + diffIdx int } func (d *diffSummaryTableFunctionRowIter) incrementIndexes() { d.diffIdx++ - if d.diffIdx >= len(d.diffSums) { + if d.diffIdx >= len(d.summaries) { d.diffIdx = 0 - d.diffSums = nil + d.summaries = nil } } -type diffSummaryNode struct { - tblName string - diffSummary diff.DiffSummaryProgress - oldColLen int - newColLen int - keyless bool -} - -func NewDiffSummaryTableFunctionRowIter(ds []diffSummaryNode) sql.RowIter { +func NewDiffSummaryTableFunctionRowIter(ds []*diff.TableDeltaSummary) sql.RowIter { return &diffSummaryTableFunctionRowIter{ - diffSums: ds, + summaries: ds, } } func (d *diffSummaryTableFunctionRowIter) Next(ctx *sql.Context) (sql.Row, error) { defer d.incrementIndexes() - if d.diffIdx >= len(d.diffSums) { + if d.diffIdx >= len(d.summaries) { return nil, io.EOF } - if d.diffSums == nil { + if d.summaries == nil { return nil, io.EOF } - ds := d.diffSums[d.diffIdx] - return getRowFromDiffSummary(ds.tblName, ds.diffSummary, ds.newColLen, ds.oldColLen, ds.keyless), nil + ds := d.summaries[d.diffIdx] + return getRowFromSummary(ds), nil } func (d *diffSummaryTableFunctionRowIter) Close(context *sql.Context) error { return nil } -// getRowFromDiffSummary takes diff.DiffSummaryProgress and calculates the row_modified, cell_added, cell_deleted. -// If the number of cell change from old to new cell count does not equal to cell_added and/or cell_deleted, there -// must be schema changes that affects cell_added and cell_deleted value addition to the row count * col length number. -func getRowFromDiffSummary(tblName string, dsp diff.DiffSummaryProgress, newColLen, oldColLen int, keyless bool) sql.Row { - // if table is keyless table, match current CLI command result - if keyless { - return sql.Row{ - tblName, // table_name - nil, // rows_unmodified - int64(dsp.Adds), // rows_added - int64(dsp.Removes), // rows_deleted - nil, // rows_modified - nil, // cells_added - nil, // cells_deleted - nil, // cells_modified - nil, // old_row_count - nil, // new_row_count - nil, // old_cell_count - nil, // new_cell_count - } - } - - numCellInserts, numCellDeletes := GetCellsAddedAndDeleted(dsp, newColLen) - rowsUnmodified := dsp.OldRowSize - dsp.Changes - dsp.Removes - +func getRowFromSummary(ds *diff.TableDeltaSummary) sql.Row { return sql.Row{ - tblName, // table_name - int64(rowsUnmodified), // rows_unmodified - int64(dsp.Adds), // rows_added - int64(dsp.Removes), // rows_deleted - int64(dsp.Changes), // rows_modified - int64(numCellInserts), // cells_added - int64(numCellDeletes), // cells_deleted - int64(dsp.CellChanges), // cells_modified - int64(dsp.OldRowSize), // old_row_count - int64(dsp.NewRowSize), // new_row_count - int64(dsp.OldCellSize), // old_cell_count - int64(dsp.NewCellSize), // new_cell_count + ds.TableName, // table_name + ds.DiffType, // diff_type + ds.DataChange, // data_change + ds.SchemaChange, // schema_change } } - -// GetCellsAddedAndDeleted calculates cells added and deleted given diff.DiffSummaryProgress and toCommit table -// column length. We use rows added and deleted to calculate cells added and deleted, but it does not include -// cells added and deleted from schema changes. Here we fill those in using total number of cells in each commit table. -func GetCellsAddedAndDeleted(acc diff.DiffSummaryProgress, newColLen int) (uint64, uint64) { - var numCellInserts, numCellDeletes float64 - rowToCellInserts := float64(acc.Adds) * float64(newColLen) - rowToCellDeletes := float64(acc.Removes) * float64(newColLen) - cellDiff := float64(acc.NewCellSize) - float64(acc.OldCellSize) - if cellDiff > 0 { - numCellInserts = cellDiff + rowToCellDeletes - numCellDeletes = rowToCellDeletes - } else if cellDiff < 0 { - numCellInserts = rowToCellInserts - numCellDeletes = math.Abs(cellDiff) + rowToCellInserts - } else { - if rowToCellInserts != rowToCellDeletes { - numCellDeletes = math.Max(rowToCellDeletes, rowToCellInserts) - numCellInserts = math.Max(rowToCellDeletes, rowToCellInserts) - } else { - numCellDeletes = rowToCellDeletes - numCellInserts = rowToCellInserts - } - } - return uint64(numCellInserts), uint64(numCellDeletes) -} diff --git a/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go b/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go index 53d58d26d0..cf5acdffeb 100644 --- a/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/unscoped_diff_table.go @@ -52,14 +52,6 @@ type UnscopedDiffTable struct { commitCheck doltdb.CommitFilter } -// tableChange is an internal data structure used to hold the results of processing -// a diff.TableDelta structure into the output data for this system table. -type tableChange struct { - tableName string - dataChange bool - schemaChange bool -} - // NewUnscopedDiffTable creates an UnscopedDiffTable func NewUnscopedDiffTable(_ *sql.Context, dbName string, ddb *doltdb.DoltDB, head *doltdb.Commit) sql.Table { return &UnscopedDiffTable{dbName: dbName, ddb: ddb, head: head} @@ -241,20 +233,20 @@ func (d *doltDiffWorkingSetRowItr) Next(ctx *sql.Context) (sql.Row, error) { return nil, io.EOF } - change, err := processTableDelta(ctx, tableDelta) + change, err := tableDelta.GetSummary(ctx) if err != nil { return nil, err } sqlRow := sql.NewRow( changeSet, - change.tableName, + change.TableName, nil, // committer nil, // email nil, // date nil, // message - change.dataChange, - change.schemaChange, + change.DataChange, + change.SchemaChange, ) return sqlRow, nil @@ -288,7 +280,7 @@ type doltDiffCommitHistoryRowItr struct { commits []*doltdb.Commit meta *datas.CommitMeta hash hash.Hash - tableChanges []tableChange + tableChanges []diff.TableDeltaSummary tableChangesIdx int } @@ -358,13 +350,13 @@ func (itr *doltDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, error) return sql.NewRow( h.String(), - tableChange.tableName, + tableChange.TableName, meta.Name, meta.Email, meta.Time(), meta.Description, - tableChange.dataChange, - tableChange.schemaChange, + tableChange.DataChange, + tableChange.SchemaChange, ), nil } @@ -399,7 +391,7 @@ func (itr *doltDiffCommitHistoryRowItr) loadTableChanges(ctx context.Context, co // calculateTableChanges calculates the tables that changed in the specified commit, by comparing that // commit with its immediate ancestor commit. -func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]tableChange, error) { +func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]diff.TableDeltaSummary, error) { if len(commit.DatasParents()) == 0 { return nil, nil } @@ -424,9 +416,9 @@ func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Contex return nil, err } - tableChanges := make([]tableChange, len(deltas)) + tableChanges := make([]diff.TableDeltaSummary, len(deltas)) for i := 0; i < len(deltas); i++ { - change, err := processTableDelta(itr.ctx, deltas[i]) + change, err := deltas[i].GetSummary(itr.ctx) if err != nil { return nil, err } @@ -442,68 +434,6 @@ func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Contex return tableChanges, nil } -// processTableDelta processes the specified TableDelta to determine what kind of change it was (i.e. table drop, -// table rename, table create, or data update) and returns a tableChange struct representing the change. -func processTableDelta(ctx *sql.Context, delta diff.TableDelta) (*tableChange, error) { - // Dropping a table is always a schema change, and also a data change if the table contained data - if delta.IsDrop() { - isEmpty, err := isTableDataEmpty(ctx, delta.FromTable) - if err != nil { - return nil, err - } - - return &tableChange{ - tableName: delta.FromName, - dataChange: !isEmpty, - schemaChange: true, - }, nil - } - - // Renaming a table is always a schema change, and also a data change if the table data differs - if delta.IsRename() { - dataChanged, err := delta.HasHashChanged() - if err != nil { - return nil, err - } - - return &tableChange{ - tableName: delta.ToName, - dataChange: dataChanged, - schemaChange: true, - }, nil - } - - // Creating a table is always a schema change, and also a data change if data was inserted - if delta.IsAdd() { - isEmpty, err := isTableDataEmpty(ctx, delta.ToTable) - if err != nil { - return nil, err - } - - return &tableChange{ - tableName: delta.ToName, - dataChange: !isEmpty, - schemaChange: true, - }, nil - } - - dataChanged, err := delta.HasHashChanged() - if err != nil { - return nil, err - } - - schemaChanged, err := delta.HasSchemaChanged(ctx) - if err != nil { - return nil, err - } - - return &tableChange{ - tableName: delta.ToName, - dataChange: dataChanged, - schemaChange: schemaChanged, - }, nil -} - // Close closes the iterator. func (itr *doltDiffCommitHistoryRowItr) Close(*sql.Context) error { return nil diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go index b135335c9b..a081905d60 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go @@ -26,6 +26,7 @@ import ( "github.com/dolthub/go-mysql-server/enginetest/scriptgen/setup" "github.com/dolthub/go-mysql-server/server" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/analyzer" "github.com/dolthub/go-mysql-server/sql/mysql_db" "github.com/dolthub/go-mysql-server/sql/plan" gmstypes "github.com/dolthub/go-mysql-server/sql/types" @@ -1235,6 +1236,28 @@ func TestDiffTableFunctionPrepared(t *testing.T) { } } +func TestDiffStatTableFunction(t *testing.T) { + harness := newDoltHarness(t) + harness.Setup(setup.MydbData) + for _, test := range DiffStatTableFunctionScriptTests { + harness.engine = nil + t.Run(test.Name, func(t *testing.T) { + enginetest.TestScript(t, harness, test) + }) + } +} + +func TestDiffStatTableFunctionPrepared(t *testing.T) { + harness := newDoltHarness(t) + harness.Setup(setup.MydbData) + for _, test := range DiffStatTableFunctionScriptTests { + harness.engine = nil + t.Run(test.Name, func(t *testing.T) { + enginetest.TestScriptPrepared(t, harness, test) + }) + } +} + func TestDiffSummaryTableFunction(t *testing.T) { harness := newDoltHarness(t) harness.Setup(setup.MydbData) @@ -1351,6 +1374,13 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) *gms.Engine { return e } +var biasedCosters = []analyzer.Coster{ + analyzer.NewInnerBiasedCoster(), + analyzer.NewLookupBiasedCoster(), + analyzer.NewHashBiasedCoster(), + analyzer.NewMergeBiasedCoster(), +} + func TestSystemTableIndexes(t *testing.T) { if !types.IsFormat_DOLT(types.Format_Default) { t.Skip("only new format support system table indexing") @@ -1361,23 +1391,27 @@ func TestSystemTableIndexes(t *testing.T) { harness.SkipSetupCommit() e := mustNewEngine(t, harness) defer e.Close() + e.Analyzer.Coster = analyzer.NewMergeBiasedCoster() ctx := enginetest.NewContext(harness) for _, q := range stt.setup { enginetest.RunQuery(t, e, harness, q) } - for _, tt := range stt.queries { - t.Run(fmt.Sprintf("%s: %s", stt.name, tt.query), func(t *testing.T) { - if tt.skip { - t.Skip() - } + for i, c := range []string{"inner", "lookup", "hash", "merge"} { + e.Analyzer.Coster = biasedCosters[i] + for _, tt := range stt.queries { + t.Run(fmt.Sprintf("%s(%s): %s", stt.name, c, tt.query), func(t *testing.T) { + if tt.skip { + t.Skip() + } - ctx = ctx.WithQuery(tt.query) - if tt.exp != nil { - enginetest.TestQueryWithContext(t, ctx, e, harness, tt.query, tt.exp, nil, nil) - } - }) + ctx = ctx.WithQuery(tt.query) + if tt.exp != nil { + enginetest.TestQueryWithContext(t, ctx, e, harness, tt.query, tt.exp, nil, nil) + } + }) + } } } } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go index 6490a0e203..178c53a999 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_harness.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_harness.go @@ -104,6 +104,7 @@ func (d *DoltHarness) resetScripts() []setup.SetupScript { } var resetCmds []setup.SetupScript + resetCmds = append(resetCmds, setup.SetupScript{"SET foreign_key_checks=0;"}) for i := range dbs { db := dbs[i] resetCmds = append(resetCmds, setup.SetupScript{fmt.Sprintf("use %s", db)}) @@ -138,6 +139,7 @@ func (d *DoltHarness) resetScripts() []setup.SetupScript { resetCmds = append(resetCmds, setup.SetupScript{"call dreset('--hard', 'head')"}) } + resetCmds = append(resetCmds, setup.SetupScript{"SET foreign_key_checks=1;"}) resetCmds = append(resetCmds, setup.SetupScript{"use mydb"}) return resetCmds } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go index 8c5bd812b2..ba8886a6f0 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go @@ -777,6 +777,52 @@ var DoltScripts = []queries.ScriptTest{ }, }, }, + { + Name: "test hashof", + SetUpScript: []string{ + "CREATE TABLE hashof_test (pk int primary key, c1 int)", + "INSERT INTO hashof_test values (1,1), (2,2), (3,3)", + "CALL DOLT_ADD('hashof_test')", + "CALL DOLT_COMMIT('-a', '-m', 'first commit')", + "SET @Commit1 = (SELECT commit_hash FROM DOLT_LOG() LIMIT 1)", + "INSERT INTO hashof_test values (4,4), (5,5), (6,6)", + "CALL DOLT_COMMIT('-a', '-m', 'second commit')", + "SET @Commit2 = (SELECT commit_hash from DOLT_LOG() LIMIT 1)", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT (hashof(@Commit1) = hashof(@Commit2))", + Expected: []sql.Row{{false}}, + }, + { + Query: "SELECT (hashof(@Commit1) = hashof('HEAD~1'))", + Expected: []sql.Row{ + {true}, + }, + }, + { + Query: "SELECT (hashof(@Commit2) = hashof('HEAD'))", + Expected: []sql.Row{ + {true}, + }, + }, + { + Query: "SELECT (hashof(@Commit2) = hashof('main'))", + Expected: []sql.Row{ + {true}, + }, + }, + { + Query: "SELECT hashof('non_branch')", + ExpectedErrStr: "invalid ref spec", + }, + { + // Test that a short commit is invalid. This may change in the future. + Query: "SELECT hashof(left(@Commit2,30))", + ExpectedErrStr: "invalid ref spec", + }, + }, + }, } func makeLargeInsert(sz int) string { @@ -817,6 +863,20 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{ Query: "SELECT * FROM dolt_diff('main~..main', 'test');", ExpectedErr: sql.ErrDatabaseAccessDeniedForUser, }, + { + // Without access to the database, dolt_diff_stat should fail with a database access error + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~', 'main', 'test');", + ExpectedErr: sql.ErrDatabaseAccessDeniedForUser, + }, + { + // Without access to the database, dolt_diff_stat with dots should fail with a database access error + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~..main', 'test');", + ExpectedErr: sql.ErrDatabaseAccessDeniedForUser, + }, { // Without access to the database, dolt_diff_summary should fail with a database access error User: "tester", @@ -873,6 +933,34 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{ Query: "SELECT * FROM dolt_diff('main~..main', 'test2');", ExpectedErr: sql.ErrPrivilegeCheckFailed, }, + { + // With access to the db, but not the table, dolt_diff_stat should fail + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~', 'main', 'test2');", + ExpectedErr: sql.ErrPrivilegeCheckFailed, + }, + { + // With access to the db, but not the table, dolt_diff_stat with dots should fail + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~...main', 'test2');", + ExpectedErr: sql.ErrPrivilegeCheckFailed, + }, + { + // With access to the db, dolt_diff_stat should fail for all tables if no access any of tables + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~', 'main');", + ExpectedErr: sql.ErrPrivilegeCheckFailed, + }, + { + // With access to the db, dolt_diff_stat with dots should fail for all tables if no access any of tables + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~...main');", + ExpectedErr: sql.ErrPrivilegeCheckFailed, + }, { // With access to the db, but not the table, dolt_diff_summary should fail User: "tester", @@ -943,6 +1031,20 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{ Query: "SELECT COUNT(*) FROM dolt_diff('main~..main', 'test');", Expected: []sql.Row{{1}}, }, + { + // After granting access to the entire db, dolt_diff_stat should work + User: "tester", + Host: "localhost", + Query: "SELECT COUNT(*) FROM dolt_diff_stat('main~', 'main');", + Expected: []sql.Row{{1}}, + }, + { + // After granting access to the entire db, dolt_diff_stat with dots should work + User: "tester", + Host: "localhost", + Query: "SELECT COUNT(*) FROM dolt_diff_stat('main~...main');", + Expected: []sql.Row{{1}}, + }, { // After granting access to the entire db, dolt_diff_summary should work User: "tester", @@ -985,6 +1087,13 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{ Query: "SELECT * FROM dolt_diff('main~...main', 'test');", ExpectedErr: sql.ErrDatabaseAccessDeniedForUser, }, + { + // After revoking access, dolt_diff_stat should fail + User: "tester", + Host: "localhost", + Query: "SELECT * FROM dolt_diff_stat('main~', 'main', 'test');", + ExpectedErr: sql.ErrDatabaseAccessDeniedForUser, + }, { // After revoking access, dolt_diff_summary should fail User: "tester", diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go index 0a445694cd..85e889f983 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries_diff.go @@ -1345,6 +1345,708 @@ inner join t on to_pk = t.pk;`, }, } +var DiffStatTableFunctionScriptTests = []queries.ScriptTest{ + { + Name: "invalid arguments", + SetUpScript: []string{ + "create table t (pk int primary key, c1 varchar(20), c2 varchar(20));", + "call dolt_add('.')", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '-am', 'creating table t');", + + "insert into t values(1, 'one', 'two'), (2, 'two', 'three');", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'inserting into t');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat();", + ExpectedErr: sql.ErrInvalidArgumentNumber, + }, + { + Query: "SELECT * from dolt_diff_stat('t');", + ExpectedErr: sql.ErrInvalidArgumentNumber, + }, + { + Query: "SELECT * from dolt_diff_stat('t', @Commit1, @Commit2, 'extra');", + ExpectedErr: sql.ErrInvalidArgumentNumber, + }, + { + Query: "SELECT * from dolt_diff_stat(null, null, null);", + ExpectedErr: sql.ErrInvalidArgumentDetails, + }, + { + Query: "SELECT * from dolt_diff_stat(123, @Commit1, @Commit2);", + ExpectedErr: sql.ErrInvalidArgumentDetails, + }, + { + Query: "SELECT * from dolt_diff_stat('t', 123, @Commit2);", + ExpectedErr: sql.ErrInvalidArgumentDetails, + }, + { + Query: "SELECT * from dolt_diff_stat('t', @Commit1, 123);", + ExpectedErr: sql.ErrInvalidArgumentDetails, + }, + { + Query: "SELECT * from dolt_diff_stat('fake-branch', @Commit2, 't');", + ExpectedErrStr: "branch not found: fake-branch", + }, + { + Query: "SELECT * from dolt_diff_stat('fake-branch..main', 't');", + ExpectedErrStr: "branch not found: fake-branch", + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, 'fake-branch', 't');", + ExpectedErrStr: "branch not found: fake-branch", + }, + { + Query: "SELECT * from dolt_diff_stat('main..fake-branch', 't');", + ExpectedErrStr: "branch not found: fake-branch", + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2, 'doesnotexist');", + ExpectedErr: sql.ErrTableNotFound, + }, + { + Query: "SELECT * from dolt_diff_stat('main^..main', 'doesnotexist');", + ExpectedErr: sql.ErrTableNotFound, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, concat('fake', '-', 'branch'), 't');", + ExpectedErr: sqle.ErrInvalidNonLiteralArgument, + }, + { + Query: "SELECT * from dolt_diff_stat(hashof('main'), @Commit2, 't');", + ExpectedErr: sqle.ErrInvalidNonLiteralArgument, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2, LOWER('T'));", + ExpectedErr: sqle.ErrInvalidNonLiteralArgument, + }, + { + Query: "SELECT * from dolt_diff_stat('main..main~', LOWER('T'));", + ExpectedErr: sqle.ErrInvalidNonLiteralArgument, + }, + }, + }, + { + Name: "basic case with single table", + SetUpScript: []string{ + "set @Commit0 = HashOf('HEAD');", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '--allow-empty', '-m', 'creating table t');", + + // create table t only + "create table t (pk int primary key, c1 varchar(20), c2 varchar(20));", + "call dolt_add('.')", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'creating table t');", + + // insert 1 row into t + "insert into t values(1, 'one', 'two');", + "set @Commit3 = '';", + "call dolt_commit_hash_out(@Commit3, '-am', 'inserting 1 into table t');", + + // insert 2 rows into t and update two cells + "insert into t values(2, 'two', 'three'), (3, 'three', 'four');", + "update t set c1='uno', c2='dos' where pk=1;", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'inserting 2 into table t');", + + // drop table t only + "drop table t;", + "set @Commit5 = '';", + "call dolt_commit_hash_out(@Commit5, '-am', 'drop table t');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + // table is added, no data diff, result is empty + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2, 't');", + Expected: []sql.Row{}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit2, @Commit3, 't');", + Expected: []sql.Row{{"t", 0, 1, 0, 0, 3, 0, 0, 0, 1, 0, 3}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit3, @Commit4, 't');", + Expected: []sql.Row{{"t", 0, 2, 0, 1, 6, 0, 2, 1, 3, 3, 9}}, + }, + { + // change from and to commits + Query: "SELECT * from dolt_diff_stat(@Commit4, @Commit3, 't');", + Expected: []sql.Row{{"t", 0, 0, 2, 1, 0, 6, 2, 3, 1, 9, 3}}, + }, + { + // table is dropped + Query: "SELECT * from dolt_diff_stat(@Commit4, @Commit5, 't');", + Expected: []sql.Row{{"t", 0, 0, 3, 0, 0, 9, 0, 3, 0, 9, 0}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit4, 't');", + Expected: []sql.Row{{"t", 0, 3, 0, 0, 9, 0, 0, 0, 3, 0, 9}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit5, 't');", + ExpectedErr: sql.ErrTableNotFound, + }, + { + Query: ` +SELECT * +from dolt_diff_stat(@Commit3, @Commit4, 't') +inner join t as of @Commit3 on rows_unmodified = t.pk;`, + Expected: []sql.Row{}, + }, + }, + }, + { + Name: "basic case with single keyless table", + SetUpScript: []string{ + "set @Commit0 = HashOf('HEAD');", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '--allow-empty', '-m', 'creating table t');", + + // create table t only + "create table t (id int, c1 varchar(20), c2 varchar(20));", + "call dolt_add('.')", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'creating table t');", + + // insert 1 row into t + "insert into t values(1, 'one', 'two');", + "set @Commit3 = '';", + "call dolt_commit_hash_out(@Commit3, '-am', 'inserting 1 into table t');", + + // insert 2 rows into t and update two cells + "insert into t values(2, 'two', 'three'), (3, 'three', 'four');", + "update t set c1='uno', c2='dos' where id=1;", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'inserting 2 into table t');", + + // drop table t only + "drop table t;", + "set @Commit5 = '';", + "call dolt_commit_hash_out(@Commit5, '-am', 'drop table t');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + // table is added, no data diff, result is empty + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2, 't');", + Expected: []sql.Row{}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit2, @Commit3, 't');", + Expected: []sql.Row{{"t", nil, 1, 0, nil, nil, nil, nil, nil, nil, nil, nil}}, + }, + { + // TODO : (correct result is commented out) + // update row for keyless table deletes the row and insert the new row + // this causes row added = 3 and row deleted = 1 + Query: "SELECT * from dolt_diff_stat(@Commit3, @Commit4, 't');", + //Expected: []sql.Row{{"t", nil, 2, 0, nil, nil, nil, nil, nil, nil, nil, nil}}, + Expected: []sql.Row{{"t", nil, 3, 1, nil, nil, nil, nil, nil, nil, nil, nil}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit4, @Commit3, 't');", + //Expected: []sql.Row{{"t", nil, 0, 2, nil, nil, nil, nil, nil, nil, nil, nil}}, + Expected: []sql.Row{{"t", nil, 1, 3, nil, nil, nil, nil, nil, nil, nil, nil}}, + }, + { + // table is dropped + Query: "SELECT * from dolt_diff_stat(@Commit4, @Commit5, 't');", + Expected: []sql.Row{{"t", nil, 0, 3, nil, nil, nil, nil, nil, nil, nil, nil}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit4, 't');", + Expected: []sql.Row{{"t", nil, 3, 0, nil, nil, nil, nil, nil, nil, nil, nil}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit5, 't');", + ExpectedErr: sql.ErrTableNotFound, + }, + }, + }, + { + Name: "basic case with multiple tables", + SetUpScript: []string{ + "set @Commit0 = HashOf('HEAD');", + + // add table t with 1 row + "create table t (pk int primary key, c1 varchar(20), c2 varchar(20));", + "insert into t values(1, 'one', 'two');", + "call dolt_add('.')", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '-am', 'inserting into table t');", + + // add table t2 with 1 row + "create table t2 (pk int primary key, c1 varchar(20), c2 varchar(20));", + "insert into t2 values(100, 'hundred', 'hundert');", + "call dolt_add('.')", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'inserting into table t2');", + + // changes on both tables + "insert into t values(2, 'two', 'three'), (3, 'three', 'four'), (4, 'four', 'five');", + "update t set c1='uno', c2='dos' where pk=1;", + "insert into t2 values(101, 'hundred one', 'one');", + "set @Commit3 = '';", + "call dolt_commit_hash_out(@Commit3, '-am', 'inserting into table t');", + + // changes on both tables + "delete from t where c2 = 'four';", + "update t2 set c2='zero' where pk=100;", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'inserting into table t');", + + // create keyless table + "create table keyless (id int);", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat(@Commit0, @Commit1);", + Expected: []sql.Row{{"t", 0, 1, 0, 0, 3, 0, 0, 0, 1, 0, 3}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2);", + Expected: []sql.Row{{"t2", 0, 1, 0, 0, 3, 0, 0, 0, 1, 0, 3}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit2, @Commit3);", + Expected: []sql.Row{{"t", 0, 3, 0, 1, 9, 0, 2, 1, 4, 3, 12}, {"t2", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit3, @Commit4);", + Expected: []sql.Row{{"t", 3, 0, 1, 0, 0, 3, 0, 4, 3, 12, 9}, {"t2", 1, 0, 0, 1, 0, 0, 1, 2, 2, 6, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit4, @Commit2);", + Expected: []sql.Row{{"t", 0, 0, 2, 1, 0, 6, 2, 3, 1, 9, 3}, {"t2", 0, 0, 1, 1, 0, 3, 1, 2, 1, 6, 3}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit3, 'WORKING');", + Expected: []sql.Row{{"t", 3, 0, 1, 0, 0, 3, 0, 4, 3, 12, 9}, {"t2", 1, 0, 0, 1, 0, 0, 1, 2, 2, 6, 6}}, + }, + }, + }, + { + Name: "WORKING and STAGED", + SetUpScript: []string{ + "set @Commit0 = HashOf('HEAD');", + + "create table t (pk int primary key, c1 text, c2 text);", + "call dolt_add('.')", + "insert into t values (1, 'one', 'two'), (2, 'three', 'four');", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '-am', 'inserting two rows into table t');", + + "insert into t values (3, 'five', 'six');", + "delete from t where pk = 2", + "update t set c2 = '100' where pk = 1", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat(@Commit1, 'WORKING', 't')", + Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('STAGED', 'WORKING', 't')", + Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('STAGED..WORKING', 't')", + Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('WORKING', 'STAGED', 't')", + Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('WORKING', 'WORKING', 't')", + Expected: []sql.Row{}, + }, + { + Query: "SELECT * from dolt_diff_stat('WORKING..WORKING', 't')", + Expected: []sql.Row{}, + }, + { + Query: "SELECT * from dolt_diff_stat('STAGED', 'STAGED', 't')", + Expected: []sql.Row{}, + }, + { + Query: "call dolt_add('.')", + SkipResultsCheck: true, + }, + { + Query: "SELECT * from dolt_diff_stat('WORKING', 'STAGED', 't')", + Expected: []sql.Row{}, + }, + { + Query: "SELECT * from dolt_diff_stat('HEAD', 'STAGED', 't')", + Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + }, + }, + }, + { + Name: "diff with branch refs", + SetUpScript: []string{ + "create table t (pk int primary key, c1 varchar(20), c2 varchar(20));", + "call dolt_add('.')", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '-am', 'creating table t');", + + "insert into t values(1, 'one', 'two');", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'inserting row 1 into t in main');", + + "CALL DOLT_checkout('-b', 'branch1');", + "alter table t drop column c2;", + "set @Commit3 = '';", + "call dolt_commit_hash_out(@Commit3, '-am', 'dropping column c2 in branch1');", + + "delete from t where pk=1;", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'deleting row 1 in branch1');", + + "insert into t values (2, 'two');", + "set @Commit5 = '';", + "call dolt_commit_hash_out(@Commit5, '-am', 'inserting row 2 in branch1');", + + "CALL DOLT_checkout('main');", + "insert into t values (2, 'two', 'three');", + "set @Commit6 = '';", + "call dolt_commit_hash_out(@Commit6, '-am', 'inserting row 2 in main');", + + "create table newtable (pk int primary key);", + "insert into newtable values (1), (2);", + "set @Commit7 = '';", + "call dolt_commit_hash_out(@Commit7, '-Am', 'new table newtable');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat('main', 'branch1', 't');", + Expected: []sql.Row{{"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}}, + }, + { + Query: "SELECT * from dolt_diff_stat('main..branch1', 't');", + Expected: []sql.Row{{"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}}, + }, + { + Query: "SELECT * from dolt_diff_stat('main', 'branch1');", + Expected: []sql.Row{ + {"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}, + {"newtable", 0, 0, 2, 0, 0, 2, 0, 2, 0, 2, 0}, + }, + }, + { + Query: "SELECT * from dolt_diff_stat('main..branch1');", + Expected: []sql.Row{ + {"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}, + {"newtable", 0, 0, 2, 0, 0, 2, 0, 2, 0, 2, 0}, + }, + }, + { + Query: "SELECT * from dolt_diff_stat('branch1', 'main', 't');", + Expected: []sql.Row{{"t", 0, 1, 0, 1, 4, 0, 1, 1, 2, 2, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('branch1..main', 't');", + Expected: []sql.Row{{"t", 0, 1, 0, 1, 4, 0, 1, 1, 2, 2, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('main~2', 'branch1', 't');", + Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + }, + { + Query: "SELECT * from dolt_diff_stat('main~2..branch1', 't');", + Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + }, + + // Three dot + { + Query: "SELECT * from dolt_diff_stat('main...branch1', 't');", + Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + }, + { + Query: "SELECT * from dolt_diff_stat('main...branch1');", + Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + }, + { + Query: "SELECT * from dolt_diff_stat('branch1...main', 't');", + Expected: []sql.Row{{"t", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('branch1...main');", + Expected: []sql.Row{ + {"t", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}, + {"newtable", 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2}, + }, + }, + { + Query: "SELECT * from dolt_diff_stat('branch1...main^');", + Expected: []sql.Row{{"t", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat('branch1...main', 'newtable');", + Expected: []sql.Row{{"newtable", 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2}}, + }, + { + Query: "SELECT * from dolt_diff_stat('main...main', 'newtable');", + Expected: []sql.Row{}, + }, + }, + }, + { + Name: "schema modification: drop and add column", + SetUpScript: []string{ + "create table t (pk int primary key, c1 varchar(20), c2 varchar(20));", + "call dolt_add('.');", + "insert into t values (1, 'one', 'two'), (2, 'two', 'three');", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '-am', 'inserting row 1, 2 into t');", + + // drop 1 column and add 1 row + "alter table t drop column c2;", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'dropping column c2');", + + // drop 1 column and add 1 row + "insert into t values (3, 'three');", + "set @Commit3 = '';", + "call dolt_commit_hash_out(@Commit3, '-am', 'inserting row 3');", + + // add 1 column and 1 row and update + "alter table t add column c2 varchar(20);", + "insert into t values (4, 'four', 'five');", + "update t set c2='foo' where pk=1;", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'adding column c2, inserting, and updating data');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2, 't');", + Expected: []sql.Row{{"t", 0, 0, 0, 2, 0, 2, 0, 2, 2, 6, 4}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit2, @Commit3, 't');", + Expected: []sql.Row{{"t", 2, 1, 0, 0, 2, 0, 0, 2, 3, 4, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit3, 't');", + Expected: []sql.Row{{"t", 0, 1, 0, 2, 2, 2, 0, 2, 3, 6, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit3, @Commit4, 't');", + Expected: []sql.Row{{"t", 2, 1, 0, 1, 6, 0, 1, 3, 4, 6, 12}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit4, 't');", + Expected: []sql.Row{{"t", 0, 2, 0, 2, 6, 0, 2, 2, 4, 6, 12}}, + }, + }, + }, + { + Name: "schema modification: rename columns", + SetUpScript: []string{ + "create table t (pk int primary key, c1 varchar(20), c2 int);", + "call dolt_add('.')", + "set @Commit1 = '';", + "call dolt_commit_hash_out(@Commit1, '-am', 'creating table t');", + + "insert into t values(1, 'one', -1), (2, 'two', -2);", + "set @Commit2 = '';", + "call dolt_commit_hash_out(@Commit2, '-am', 'inserting into t');", + + "alter table t rename column c2 to c3;", + "set @Commit3 = '';", + "call dolt_commit_hash_out(@Commit3, '-am', 'renaming column c2 to c3');", + + "insert into t values (3, 'three', -3);", + "update t set c3=1 where pk=1;", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'inserting and updating data');", + + "alter table t rename column c3 to c2;", + "insert into t values (4, 'four', -4);", + "set @Commit5 = '';", + "call dolt_commit_hash_out(@Commit5, '-am', 'renaming column c3 to c2, and inserting data');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit2, 't');", + Expected: []sql.Row{{"t", 0, 2, 0, 0, 6, 0, 0, 0, 2, 0, 6}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit2, @Commit3, 't');", + Expected: []sql.Row{}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit3, @Commit4, 't');", + Expected: []sql.Row{{"t", 1, 1, 0, 1, 3, 0, 1, 2, 3, 6, 9}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit4, @Commit5, 't');", + Expected: []sql.Row{{"t", 3, 1, 0, 0, 3, 0, 0, 3, 4, 9, 12}}, + }, + { + Query: "SELECT * from dolt_diff_stat(@Commit1, @Commit5, 't');", + Expected: []sql.Row{{"t", 0, 4, 0, 0, 12, 0, 0, 0, 4, 0, 12}}, + }, + }, + }, + { + Name: "new table", + SetUpScript: []string{ + "create table t1 (a int primary key, b int)", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "select * from dolt_diff_stat('HEAD', 'WORKING')", + Expected: []sql.Row{}, + }, + { + Query: "select * from dolt_diff_stat('WORKING', 'HEAD')", + Expected: []sql.Row{}, + }, + { + Query: "insert into t1 values (1,2)", + SkipResultsCheck: true, + }, + { + Query: "select * from dolt_diff_stat('HEAD', 'WORKING', 't1')", + Expected: []sql.Row{{"t1", 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 2}}, + }, + { + Query: "select * from dolt_diff_stat('WORKING', 'HEAD', 't1')", + Expected: []sql.Row{{"t1", 0, 0, 1, 0, 0, 2, 0, 1, 0, 2, 0}}, + }, + }, + }, + { + Name: "dropped table", + SetUpScript: []string{ + "create table t1 (a int primary key, b int)", + "call dolt_add('.')", + "insert into t1 values (1,2)", + "call dolt_commit('-am', 'new table')", + "drop table t1", + "call dolt_commit('-am', 'dropped table')", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "select * from dolt_diff_stat('HEAD~', 'HEAD', 't1')", + Expected: []sql.Row{{"t1", 0, 0, 1, 0, 0, 2, 0, 1, 0, 2, 0}}, + }, + { + Query: "select * from dolt_diff_stat('HEAD', 'HEAD~', 't1')", + Expected: []sql.Row{{"t1", 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 2}}, + }, + }, + }, + { + Name: "renamed table", + SetUpScript: []string{ + "create table t1 (a int primary key, b int)", + "call dolt_add('.')", + "insert into t1 values (1,2)", + "call dolt_commit('-am', 'new table')", + "alter table t1 rename to t2", + "call dolt_add('.')", + "insert into t2 values (3,4)", + "call dolt_commit('-am', 'renamed table')", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "select * from dolt_diff_stat('HEAD~', 'HEAD', 't2')", + Expected: []sql.Row{{"t2", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + }, + { + Query: "select * from dolt_diff_stat('HEAD~..HEAD', 't2')", + Expected: []sql.Row{{"t2", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + }, + { + // Old table name can be matched as well + Query: "select * from dolt_diff_stat('HEAD~', 'HEAD', 't1')", + Expected: []sql.Row{{"t1", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + }, + { + // Old table name can be matched as well + Query: "select * from dolt_diff_stat('HEAD~..HEAD', 't1')", + Expected: []sql.Row{{"t1", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + }, + }, + }, + { + Name: "add multiple columns, then set and unset a value. Should not show a diff", + SetUpScript: []string{ + "CREATE table t (pk int primary key);", + "Insert into t values (1);", + "CALL DOLT_ADD('.');", + "CALL DOLT_COMMIT('-am', 'setup');", + "alter table t add column col1 int;", + "alter table t add column col2 int;", + "CALL DOLT_ADD('.');", + "CALL DOLT_COMMIT('-am', 'add columns');", + "UPDATE t set col1 = 1 where pk = 1;", + "UPDATE t set col1 = null where pk = 1;", + "CALL DOLT_COMMIT('--allow-empty', '-am', 'fix short tuple');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat('HEAD~2', 'HEAD');", + Expected: []sql.Row{{"t", 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 3}}, + }, + { + Query: "SELECT * from dolt_diff_stat('HEAD~', 'HEAD');", + Expected: []sql.Row{}, + }, + }, + }, + { + Name: "pk set change should throw an error for 3 argument dolt_diff_stat", + SetUpScript: []string{ + "CREATE table t (pk int primary key);", + "INSERT INTO t values (1);", + "CALL DOLT_COMMIT('-Am', 'table with row');", + "ALTER TABLE t ADD col1 int not null default 0;", + "ALTER TABLE t drop primary key;", + "ALTER TABLE t add primary key (pk, col1);", + "CALL DOLT_COMMIT('-am', 'add secondary column with primary key');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat('HEAD~', 'HEAD', 't');", + ExpectedErrStr: "failed to compute diff stat for table t: primary key set changed", + }, + }, + }, + { + Name: "pk set change should report warning for 2 argument dolt_diff_stat", + SetUpScript: []string{ + "CREATE table t (pk int primary key);", + "INSERT INTO t values (1);", + "CREATE table t2 (pk int primary key);", + "INSERT INTO t2 values (2);", + "CALL DOLT_COMMIT('-Am', 'multiple tables');", + "ALTER TABLE t ADD col1 int not null default 0;", + "ALTER TABLE t drop primary key;", + "ALTER TABLE t add primary key (pk, col1);", + "INSERT INTO t2 values (3), (4), (5);", + "CALL DOLT_COMMIT('-am', 'add secondary column with primary key to t');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT * from dolt_diff_stat('HEAD~', 'HEAD')", + Expected: []sql.Row{ + {"t", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {"t2", 1, 3, 0, 0, 3, 0, 0, 1, 4, 1, 4}, + }, + ExpectedWarning: dtables.PrimaryKeyChangeWarningCode, + ExpectedWarningsCount: 1, + }, + }, + }, +} + var DiffSummaryTableFunctionScriptTests = []queries.ScriptTest{ { Name: "invalid arguments", @@ -1403,14 +2105,6 @@ var DiffSummaryTableFunctionScriptTests = []queries.ScriptTest{ Query: "SELECT * from dolt_diff_summary('main..fake-branch', 't');", ExpectedErrStr: "branch not found: fake-branch", }, - { - Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 'doesnotexist');", - ExpectedErr: sql.ErrTableNotFound, - }, - { - Query: "SELECT * from dolt_diff_summary('main^..main', 'doesnotexist');", - ExpectedErr: sql.ErrTableNotFound, - }, { Query: "SELECT * from dolt_diff_summary(@Commit1, concat('fake', '-', 'branch'), 't');", ExpectedErr: sqle.ErrInvalidNonLiteralArgument, @@ -1423,10 +2117,6 @@ var DiffSummaryTableFunctionScriptTests = []queries.ScriptTest{ Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, LOWER('T'));", ExpectedErr: sqle.ErrInvalidNonLiteralArgument, }, - { - Query: "SELECT * from dolt_diff_summary('main..main~', LOWER('T'));", - ExpectedErr: sqle.ErrInvalidNonLiteralArgument, - }, }, }, { @@ -1460,41 +2150,39 @@ var DiffSummaryTableFunctionScriptTests = []queries.ScriptTest{ }, Assertions: []queries.ScriptTestAssertion{ { - // table is added, no data diff, result is empty - Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 't');", + // table does not exist, empty result + Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 'doesnotexist');", Expected: []sql.Row{}, }, + { + // table is added, no data changes + Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 't');", + Expected: []sql.Row{{"t", "added", false, true}}, + }, { Query: "SELECT * from dolt_diff_summary(@Commit2, @Commit3, 't');", - Expected: []sql.Row{{"t", 0, 1, 0, 0, 3, 0, 0, 0, 1, 0, 3}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit4, 't');", - Expected: []sql.Row{{"t", 0, 2, 0, 1, 6, 0, 2, 1, 3, 3, 9}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { // change from and to commits Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit3, 't');", - Expected: []sql.Row{{"t", 0, 0, 2, 1, 0, 6, 2, 3, 1, 9, 3}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { // table is dropped Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit5, 't');", - Expected: []sql.Row{{"t", 0, 0, 3, 0, 0, 9, 0, 3, 0, 9, 0}}, + Expected: []sql.Row{{"t", "dropped", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit4, 't');", - Expected: []sql.Row{{"t", 0, 3, 0, 0, 9, 0, 0, 0, 3, 0, 9}}, + Expected: []sql.Row{{"t", "added", true, true}}, }, { - Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit5, 't');", - ExpectedErr: sql.ErrTableNotFound, - }, - { - Query: ` -SELECT * -from dolt_diff_summary(@Commit3, @Commit4, 't') -inner join t as of @Commit3 on rows_unmodified = t.pk;`, + Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit5, 't');", Expected: []sql.Row{}, }, }, @@ -1532,37 +2220,32 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, { // table is added, no data diff, result is empty Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 't');", - Expected: []sql.Row{}, + Expected: []sql.Row{{"t", "added", false, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit2, @Commit3, 't');", - Expected: []sql.Row{{"t", nil, 1, 0, nil, nil, nil, nil, nil, nil, nil, nil}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { - // TODO : (correct result is commented out) - // update row for keyless table deletes the row and insert the new row - // this causes row added = 3 and row deleted = 1 - Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit4, 't');", - //Expected: []sql.Row{{"t", nil, 2, 0, nil, nil, nil, nil, nil, nil, nil, nil}}, - Expected: []sql.Row{{"t", nil, 3, 1, nil, nil, nil, nil, nil, nil, nil, nil}}, + Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit4, 't');", + Expected: []sql.Row{{"t", "modified", true, false}}, }, { - Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit3, 't');", - //Expected: []sql.Row{{"t", nil, 0, 2, nil, nil, nil, nil, nil, nil, nil, nil}}, - Expected: []sql.Row{{"t", nil, 1, 3, nil, nil, nil, nil, nil, nil, nil, nil}}, + Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit3, 't');", + Expected: []sql.Row{{"t", "modified", true, false}}, }, { // table is dropped Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit5, 't');", - Expected: []sql.Row{{"t", nil, 0, 3, nil, nil, nil, nil, nil, nil, nil, nil}}, + Expected: []sql.Row{{"t", "dropped", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit4, 't');", - Expected: []sql.Row{{"t", nil, 3, 0, nil, nil, nil, nil, nil, nil, nil, nil}}, + Expected: []sql.Row{{"t", "added", true, true}}, }, { - Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit5, 't');", - ExpectedErr: sql.ErrTableNotFound, + Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit5, 't');", + Expected: []sql.Row{}, }, }, }, @@ -1604,27 +2287,32 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT * from dolt_diff_summary(@Commit0, @Commit1);", - Expected: []sql.Row{{"t", 0, 1, 0, 0, 3, 0, 0, 0, 1, 0, 3}}, + Expected: []sql.Row{{"t", "added", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2);", - Expected: []sql.Row{{"t2", 0, 1, 0, 0, 3, 0, 0, 0, 1, 0, 3}}, + Expected: []sql.Row{{"t2", "added", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit2, @Commit3);", - Expected: []sql.Row{{"t", 0, 3, 0, 1, 9, 0, 2, 1, 4, 3, 12}, {"t2", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}, {"t2", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit4);", - Expected: []sql.Row{{"t", 3, 0, 1, 0, 0, 3, 0, 4, 3, 12, 9}, {"t2", 1, 0, 0, 1, 0, 0, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}, {"t2", "modified", true, false}}, }, { - Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit2);", - Expected: []sql.Row{{"t", 0, 0, 2, 1, 0, 6, 2, 3, 1, 9, 3}, {"t2", 0, 0, 1, 1, 0, 3, 1, 2, 1, 6, 3}}, + Query: "SELECT * from dolt_diff_summary(@Commit0, @Commit4);", + Expected: []sql.Row{{"t", "added", true, true}, {"t2", "added", true, true}}, + }, + { + Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit2);", + + Expected: []sql.Row{{"t", "modified", true, false}, {"t2", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit3, 'WORKING');", - Expected: []sql.Row{{"t", 3, 0, 1, 0, 0, 3, 0, 4, 3, 12, 9}, {"t2", 1, 0, 0, 1, 0, 0, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}, {"t2", "modified", true, false}, {"keyless", "added", false, true}}, }, }, }, @@ -1646,19 +2334,19 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT * from dolt_diff_summary(@Commit1, 'WORKING', 't')", - Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary('STAGED', 'WORKING', 't')", - Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary('STAGED..WORKING', 't')", - Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary('WORKING', 'STAGED', 't')", - Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary('WORKING', 'WORKING', 't')", @@ -1682,7 +2370,7 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, }, { Query: "SELECT * from dolt_diff_summary('HEAD', 'STAGED', 't')", - Expected: []sql.Row{{"t", 0, 1, 1, 1, 3, 3, 1, 2, 2, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, }, }, @@ -1724,70 +2412,70 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT * from dolt_diff_summary('main', 'branch1', 't');", - Expected: []sql.Row{{"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('main..branch1', 't');", - Expected: []sql.Row{{"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('main', 'branch1');", Expected: []sql.Row{ - {"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}, - {"newtable", 0, 0, 2, 0, 0, 2, 0, 2, 0, 2, 0}, + {"t", "modified", true, true}, + {"newtable", "dropped", true, true}, }, }, { Query: "SELECT * from dolt_diff_summary('main..branch1');", Expected: []sql.Row{ - {"t", 0, 0, 1, 1, 0, 4, 0, 2, 1, 6, 2}, - {"newtable", 0, 0, 2, 0, 0, 2, 0, 2, 0, 2, 0}, + {"t", "modified", true, true}, + {"newtable", "dropped", true, true}, }, }, { Query: "SELECT * from dolt_diff_summary('branch1', 'main', 't');", - Expected: []sql.Row{{"t", 0, 1, 0, 1, 4, 0, 1, 1, 2, 2, 6}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('branch1..main', 't');", - Expected: []sql.Row{{"t", 0, 1, 0, 1, 4, 0, 1, 1, 2, 2, 6}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('main~2', 'branch1', 't');", - Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('main~2..branch1', 't');", - Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, // Three dot { Query: "SELECT * from dolt_diff_summary('main...branch1', 't');", - Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('main...branch1');", - Expected: []sql.Row{{"t", 0, 1, 1, 0, 2, 3, 0, 1, 1, 3, 2}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('branch1...main', 't');", - Expected: []sql.Row{{"t", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary('branch1...main');", Expected: []sql.Row{ - {"t", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}, - {"newtable", 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2}, + {"t", "modified", true, false}, + {"newtable", "added", true, true}, }, }, { Query: "SELECT * from dolt_diff_summary('branch1...main^');", - Expected: []sql.Row{{"t", 1, 1, 0, 0, 3, 0, 0, 1, 2, 3, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary('branch1...main', 'newtable');", - Expected: []sql.Row{{"newtable", 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2}}, + Expected: []sql.Row{{"newtable", "added", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('main...main', 'newtable');", @@ -1804,43 +2492,51 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, "set @Commit1 = '';", "call dolt_commit_hash_out(@Commit1, '-am', 'inserting row 1, 2 into t');", - // drop 1 column and add 1 row + // drop 1 column "alter table t drop column c2;", "set @Commit2 = '';", "call dolt_commit_hash_out(@Commit2, '-am', 'dropping column c2');", - // drop 1 column and add 1 row + // add 1 row "insert into t values (3, 'three');", "set @Commit3 = '';", "call dolt_commit_hash_out(@Commit3, '-am', 'inserting row 3');", - // add 1 column and 1 row and update + // add 1 column "alter table t add column c2 varchar(20);", + "set @Commit4 = '';", + "call dolt_commit_hash_out(@Commit4, '-am', 'adding column c2');", + + // add 1 row and update "insert into t values (4, 'four', 'five');", "update t set c2='foo' where pk=1;", - "set @Commit4 = '';", - "call dolt_commit_hash_out(@Commit4, '-am', 'adding column c2, inserting, and updating data');", + "set @Commit5 = '';", + "call dolt_commit_hash_out(@Commit5, '-am', 'inserting and updating data');", }, Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 't');", - Expected: []sql.Row{{"t", 0, 0, 0, 2, 0, 2, 0, 2, 2, 6, 4}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit2, @Commit3, 't');", - Expected: []sql.Row{{"t", 2, 1, 0, 0, 2, 0, 0, 2, 3, 4, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit3, 't');", - Expected: []sql.Row{{"t", 0, 1, 0, 2, 2, 2, 0, 2, 3, 6, 6}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit4, 't');", - Expected: []sql.Row{{"t", 2, 1, 0, 1, 6, 0, 1, 3, 4, 6, 12}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { - Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit4, 't');", - Expected: []sql.Row{{"t", 0, 2, 0, 2, 6, 0, 2, 2, 4, 6, 12}}, + Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit5, 't');", + Expected: []sql.Row{{"t", "modified", true, true}}, + }, + { + Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit5, 't');", + Expected: []sql.Row{{"t", "modified", true, false}}, }, }, }, @@ -1852,19 +2548,23 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, "set @Commit1 = '';", "call dolt_commit_hash_out(@Commit1, '-am', 'creating table t');", + // add rows "insert into t values(1, 'one', -1), (2, 'two', -2);", "set @Commit2 = '';", "call dolt_commit_hash_out(@Commit2, '-am', 'inserting into t');", + // rename column "alter table t rename column c2 to c3;", "set @Commit3 = '';", "call dolt_commit_hash_out(@Commit3, '-am', 'renaming column c2 to c3');", + // add row and update "insert into t values (3, 'three', -3);", "update t set c3=1 where pk=1;", "set @Commit4 = '';", "call dolt_commit_hash_out(@Commit4, '-am', 'inserting and updating data');", + // rename column and add row "alter table t rename column c3 to c2;", "insert into t values (4, 'four', -4);", "set @Commit5 = '';", @@ -1873,23 +2573,23 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit2, 't');", - Expected: []sql.Row{{"t", 0, 2, 0, 0, 6, 0, 0, 0, 2, 0, 6}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit2, @Commit3, 't');", - Expected: []sql.Row{}, + Expected: []sql.Row{{"t", "modified", true, true}}, // TODO: Data change should be false for renamed column }, { Query: "SELECT * from dolt_diff_summary(@Commit3, @Commit4, 't');", - Expected: []sql.Row{{"t", 1, 1, 0, 1, 3, 0, 1, 2, 3, 6, 9}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit4, @Commit5, 't');", - Expected: []sql.Row{{"t", 3, 1, 0, 0, 3, 0, 0, 3, 4, 9, 12}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary(@Commit1, @Commit5, 't');", - Expected: []sql.Row{{"t", 0, 4, 0, 0, 12, 0, 0, 0, 4, 0, 12}}, + Expected: []sql.Row{{"t", "modified", true, false}}, }, }, }, @@ -1901,11 +2601,11 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "select * from dolt_diff_summary('HEAD', 'WORKING')", - Expected: []sql.Row{}, + Expected: []sql.Row{{"t1", "added", false, true}}, }, { Query: "select * from dolt_diff_summary('WORKING', 'HEAD')", - Expected: []sql.Row{}, + Expected: []sql.Row{{"t1", "dropped", false, true}}, }, { Query: "insert into t1 values (1,2)", @@ -1913,11 +2613,11 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, }, { Query: "select * from dolt_diff_summary('HEAD', 'WORKING', 't1')", - Expected: []sql.Row{{"t1", 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 2}}, + Expected: []sql.Row{{"t1", "added", true, true}}, }, { Query: "select * from dolt_diff_summary('WORKING', 'HEAD', 't1')", - Expected: []sql.Row{{"t1", 0, 0, 1, 0, 0, 2, 0, 1, 0, 2, 0}}, + Expected: []sql.Row{{"t1", "dropped", true, true}}, }, }, }, @@ -1934,11 +2634,11 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "select * from dolt_diff_summary('HEAD~', 'HEAD', 't1')", - Expected: []sql.Row{{"t1", 0, 0, 1, 0, 0, 2, 0, 1, 0, 2, 0}}, + Expected: []sql.Row{{"t1", "dropped", true, true}}, }, { Query: "select * from dolt_diff_summary('HEAD', 'HEAD~', 't1')", - Expected: []sql.Row{{"t1", 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 2}}, + Expected: []sql.Row{{"t1", "added", true, true}}, }, }, }, @@ -1957,21 +2657,29 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "select * from dolt_diff_summary('HEAD~', 'HEAD', 't2')", - Expected: []sql.Row{{"t2", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + Expected: []sql.Row{{"t2", "renamed", true, true}}, }, { Query: "select * from dolt_diff_summary('HEAD~..HEAD', 't2')", - Expected: []sql.Row{{"t2", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + Expected: []sql.Row{{"t2", "renamed", true, true}}, + }, + { + Query: "select * from dolt_diff_summary('HEAD~', 'HEAD')", + Expected: []sql.Row{{"t2", "renamed", true, true}}, + }, + { + Query: "select * from dolt_diff_summary('HEAD~..HEAD')", + Expected: []sql.Row{{"t2", "renamed", true, true}}, }, { // Old table name can be matched as well Query: "select * from dolt_diff_summary('HEAD~', 'HEAD', 't1')", - Expected: []sql.Row{{"t1", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + Expected: []sql.Row{{"t1", "renamed", true, true}}, }, { // Old table name can be matched as well Query: "select * from dolt_diff_summary('HEAD~..HEAD', 't1')", - Expected: []sql.Row{{"t1", 1, 1, 0, 0, 2, 0, 0, 1, 2, 2, 4}}, + Expected: []sql.Row{{"t1", "renamed", true, true}}, }, }, }, @@ -1979,7 +2687,7 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Name: "add multiple columns, then set and unset a value. Should not show a diff", SetUpScript: []string{ "CREATE table t (pk int primary key);", - "Insert into t values (1);", + "insert into t values (1);", "CALL DOLT_ADD('.');", "CALL DOLT_COMMIT('-am', 'setup');", "alter table t add column col1 int;", @@ -1993,7 +2701,7 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, Assertions: []queries.ScriptTestAssertion{ { Query: "SELECT * from dolt_diff_summary('HEAD~2', 'HEAD');", - Expected: []sql.Row{{"t", 1, 0, 0, 0, 2, 0, 0, 1, 1, 1, 3}}, + Expected: []sql.Row{{"t", "modified", true, true}}, }, { Query: "SELECT * from dolt_diff_summary('HEAD~', 'HEAD');", @@ -2037,8 +2745,7 @@ inner join t as of @Commit3 on rows_unmodified = t.pk;`, { Query: "SELECT * from dolt_diff_summary('HEAD~', 'HEAD')", Expected: []sql.Row{ - {"t", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {"t2", 1, 3, 0, 0, 3, 0, 0, 1, 4, 1, 4}, + {"t2", "modified", true, false}, }, ExpectedWarning: dtables.PrimaryKeyChangeWarningCode, ExpectedWarningsCount: 1, diff --git a/go/store/chunks/memory_store.go b/go/store/chunks/memory_store.go index 42787f33cb..45d4202090 100644 --- a/go/store/chunks/memory_store.go +++ b/go/store/chunks/memory_store.go @@ -68,6 +68,9 @@ func (ms *MemoryStorage) NewViewWithDefaultFormat() ChunkStore { // Get retrieves the Chunk with the Hash h, returning EmptyChunk if it's not // present. func (ms *MemoryStorage) Get(ctx context.Context, h hash.Hash) (Chunk, error) { + if err := ctx.Err(); err != nil { + return Chunk{}, err + } ms.mu.RLock() defer ms.mu.RUnlock() if c, ok := ms.data[h]; ok { @@ -207,6 +210,9 @@ func (ms *MemoryStoreView) errorIfDangling(ctx context.Context, addrs hash.HashS } func (ms *MemoryStoreView) Put(ctx context.Context, c Chunk, getAddrs GetAddrsCb) error { + if err := ctx.Err(); err != nil { + return err + } addrs, err := getAddrs(ctx, c) if err != nil { return err diff --git a/go/store/diff/summary.go b/go/store/diff/summary.go deleted file mode 100644 index 250e0d08bd..0000000000 --- a/go/store/diff/summary.go +++ /dev/null @@ -1,331 +0,0 @@ -// Copyright 2019 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file incorporates work covered by the following copyright and -// permission notice: -// -// Copyright 2016 Attic Labs, Inc. All rights reserved. -// Licensed under the Apache License, version 2.0: -// http://www.apache.org/licenses/LICENSE-2.0 - -package diff - -import ( - "context" - "fmt" - "sync/atomic" - - humanize "github.com/dustin/go-humanize" - "golang.org/x/sync/errgroup" - - "github.com/dolthub/dolt/go/store/d" - "github.com/dolthub/dolt/go/store/datas" - "github.com/dolthub/dolt/go/store/types" - "github.com/dolthub/dolt/go/store/util/status" -) - -// Summary prints a summary of the diff between two values to stdout. -func Summary(ctx context.Context, vr1 types.ValueReader, vr2 types.ValueReader, value1, value2 types.Value) { - if is1, err := datas.IsCommit(value1); err != nil { - panic(err) - } else if is1 { - if is2, err := datas.IsCommit(value2); err != nil { - panic(err) - } else if is2 { - fmt.Println("Comparing commit values") - - var err error - value1, err = datas.GetCommittedValue(ctx, vr1, value1) - d.PanicIfError(err) - - value2, err = datas.GetCommittedValue(ctx, vr2, value2) - d.PanicIfError(err) - } - } - - var singular, plural string - if value1.Kind() == value2.Kind() { - switch value1.Kind() { - case types.StructKind: - singular = "field" - plural = "fields" - case types.MapKind: - singular = "entry" - plural = "entries" - default: - singular = "value" - plural = "values" - } - } - - eg, ctx := errgroup.WithContext(ctx) - var rp atomic.Value - ch := make(chan diffSummaryProgress) - - eg.Go(func() (err error) { - defer close(ch) - defer func() { - if r := recover(); r != nil { - rp.Store(r) - err = fmt.Errorf("panic") - } - }() - err = diffSummary(ctx, ch, value1, value2) - return - }) - eg.Go(func() error { - acc := diffSummaryProgress{} - LOOP: - for { - select { - case p, ok := <-ch: - if !ok { - break LOOP - } - acc.Adds += p.Adds - acc.Removes += p.Removes - acc.Changes += p.Changes - acc.NewSize += p.NewSize - acc.OldSize += p.OldSize - if status.WillPrint() { - formatStatus(acc, singular, plural) - } - case <-ctx.Done(): - return ctx.Err() - } - } - formatStatus(acc, singular, plural) - status.Done() - return nil - }) - - if err := eg.Wait(); err != nil { - if r := rp.Load(); r != nil { - panic(r) - } - panic(err) - } - -} - -type diffSummaryProgress struct { - Adds, Removes, Changes, NewSize, OldSize uint64 -} - -func diffSummary(ctx context.Context, ch chan diffSummaryProgress, v1, v2 types.Value) error { - if !v1.Equals(v2) { - if ShouldDescend(v1, v2) { - var err error - switch v1.Kind() { - case types.ListKind: - err = diffSummaryList(ctx, ch, v1.(types.List), v2.(types.List)) - case types.MapKind: - err = diffSummaryMap(ctx, ch, v1.(types.Map), v2.(types.Map)) - case types.SetKind: - err = diffSummarySet(ctx, ch, v1.(types.Set), v2.(types.Set)) - case types.StructKind: - err = diffSummaryStructs(ctx, ch, v1.(types.Struct), v2.(types.Struct)) - default: - panic("Unrecognized type in diff function") - } - if err != nil { - return err - } - } else { - ch <- diffSummaryProgress{Adds: 1, Removes: 1, NewSize: 1, OldSize: 1} - } - } - return nil -} - -func diffSummaryList(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.List) error { - select { - case ch <- diffSummaryProgress{OldSize: v1.Len(), NewSize: v2.Len()}: - case <-ctx.Done(): - return ctx.Err() - } - - spliceChan := make(chan types.Splice) - eg, ctx := errgroup.WithContext(ctx) - - var rp atomic.Value - eg.Go(func() (err error) { - defer close(spliceChan) - defer func() { - if r := recover(); r != nil { - rp.Store(r) - err = fmt.Errorf("panic") - } - }() - return v2.Diff(ctx, v1, spliceChan) - }) - - eg.Go(func() (err error) { - defer func() { - if r := recover(); r != nil { - rp.Store(r) - err = fmt.Errorf("panic") - } - }() - LOOP: - for { - select { - case splice, ok := <-spliceChan: - if !ok { - break LOOP - } - var summary diffSummaryProgress - if splice.SpRemoved == splice.SpAdded { - summary = diffSummaryProgress{Changes: splice.SpRemoved} - } else { - summary = diffSummaryProgress{Adds: splice.SpAdded, Removes: splice.SpRemoved} - } - select { - case ch <- summary: - case <-ctx.Done(): - return ctx.Err() - } - case <-ctx.Done(): - return ctx.Err() - } - } - return nil - }) - - if err := eg.Wait(); err != nil { - if r := rp.Load(); r != nil { - panic(r) - } - return err - } - return nil -} - -func diffSummaryMap(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.Map) error { - return diffSummaryValueChanged(ctx, ch, v1.Len(), v2.Len(), func(ctx context.Context, changeChan chan<- types.ValueChanged) error { - return v2.Diff(ctx, v1, changeChan) - }) -} - -func diffSummarySet(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.Set) error { - return diffSummaryValueChanged(ctx, ch, v1.Len(), v2.Len(), func(ctx context.Context, changeChan chan<- types.ValueChanged) error { - return v2.Diff(ctx, v1, changeChan) - }) -} - -func diffSummaryStructs(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.Struct) error { - // TODO: Operate on values directly - t1, err := types.TypeOf(v1) - if err != nil { - return err - } - - t2, err := types.TypeOf(v2) - if err != nil { - return err - } - - size1 := uint64(t1.Desc.(types.StructDesc).Len()) - size2 := uint64(t2.Desc.(types.StructDesc).Len()) - return diffSummaryValueChanged(ctx, ch, size1, size2, func(ctx context.Context, changeChan chan<- types.ValueChanged) error { - return v2.Diff(ctx, v1, changeChan) - }) -} - -func diffSummaryValueChanged(ctx context.Context, ch chan<- diffSummaryProgress, oldSize, newSize uint64, f diffFunc) error { - select { - case ch <- diffSummaryProgress{OldSize: oldSize, NewSize: newSize}: - case <-ctx.Done(): - return ctx.Err() - } - - changeChan := make(chan types.ValueChanged) - - eg, ctx := errgroup.WithContext(ctx) - - var rp atomic.Value - eg.Go(func() (err error) { - defer close(changeChan) - defer func() { - if r := recover(); r != nil { - rp.Store(r) - err = fmt.Errorf("panic") - } - }() - return f(ctx, changeChan) - }) - eg.Go(func() error { - return reportChanges(ctx, ch, changeChan) - }) - if err := eg.Wait(); err != nil { - if r := rp.Load(); r != nil { - panic(r) - } - return err - } - return nil -} - -func reportChanges(ctx context.Context, ch chan<- diffSummaryProgress, changeChan chan types.ValueChanged) error { -LOOP: - for { - select { - case change, ok := <-changeChan: - if !ok { - break LOOP - } - var summary diffSummaryProgress - switch change.ChangeType { - case types.DiffChangeAdded: - summary = diffSummaryProgress{Adds: 1} - case types.DiffChangeRemoved: - summary = diffSummaryProgress{Removes: 1} - case types.DiffChangeModified: - summary = diffSummaryProgress{Changes: 1} - default: - panic("unknown change type") - } - select { - case ch <- summary: - return nil - case <-ctx.Done(): - return ctx.Err() - } - case <-ctx.Done(): - return ctx.Err() - } - } - return nil -} - -func formatStatus(acc diffSummaryProgress, singular, plural string) { - pluralize := func(singular, plural string, n uint64) string { - var noun string - if n != 1 { - noun = plural - } else { - noun = singular - } - return fmt.Sprintf("%s %s", humanize.Comma(int64(n)), noun) - } - - insertions := pluralize("insertion", "insertions", acc.Adds) - deletions := pluralize("deletion", "deletions", acc.Removes) - changes := pluralize("change", "changes", acc.Changes) - - oldValues := pluralize(singular, plural, acc.OldSize) - newValues := pluralize(singular, plural, acc.NewSize) - - status.Printf("%s (%.2f%%), %s (%.2f%%), %s (%.2f%%), (%s vs %s)", insertions, (float64(100*acc.Adds) / float64(acc.OldSize)), deletions, (float64(100*acc.Removes) / float64(acc.OldSize)), changes, (float64(100*acc.Changes) / float64(acc.OldSize)), oldValues, newValues) -} diff --git a/go/store/nbs/aws_table_persister.go b/go/store/nbs/aws_table_persister.go index f27c85b10e..242f399bf5 100644 --- a/go/store/nbs/aws_table_persister.go +++ b/go/store/nbs/aws_table_persister.go @@ -335,27 +335,28 @@ func (s partsByPartNum) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s3p awsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) { +func (s3p awsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) { plan, err := planRangeCopyConjoin(sources, stats) if err != nil { - return nil, err + return nil, nil, err } if plan.chunkCount == 0 { - return emptyChunkSource{}, nil + return emptyChunkSource{}, nil, nil } t1 := time.Now() name := nameFromSuffixes(plan.suffixes()) err = s3p.executeCompactionPlan(ctx, plan, name.String()) if err != nil { - return nil, err + return nil, nil, err } verbose.Logger(ctx).Sugar().Debugf("Compacted table of %d Kb in %s", plan.totalCompressedData/1024, time.Since(t1)) tra := &s3TableReaderAt{&s3ObjectReader{s3: s3p.s3, bucket: s3p.bucket, readRl: s3p.rl, ns: s3p.ns}, name} - return newReaderFromIndexData(ctx, s3p.q, plan.mergedIndex, name, tra, s3BlockSize) + cs, err := newReaderFromIndexData(ctx, s3p.q, plan.mergedIndex, name, tra, s3BlockSize) + return cs, func() {}, err } func (s3p awsTablePersister) executeCompactionPlan(ctx context.Context, plan compactionPlan, key string) error { diff --git a/go/store/nbs/aws_table_persister_test.go b/go/store/nbs/aws_table_persister_test.go index 49df3f4ef8..e2cdc930f6 100644 --- a/go/store/nbs/aws_table_persister_test.go +++ b/go/store/nbs/aws_table_persister_test.go @@ -381,7 +381,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) { chunks := smallChunks[:len(smallChunks)-1] sources := makeSources(s3p, chunks) - src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) + src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) require.NoError(t, err) defer src.close() for _, s := range sources { @@ -402,7 +402,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) { s3p := newPersister(s3svc, ddb) sources := makeSources(s3p, smallChunks) - src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) + src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) require.NoError(t, err) defer src.close() for _, s := range sources { @@ -443,7 +443,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) { sources[i], err = s3p.Persist(context.Background(), mt, nil, &Stats{}) require.NoError(t, err) } - src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) + src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) require.NoError(t, err) defer src.close() for _, s := range sources { @@ -484,7 +484,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) { require.NoError(t, err) sources := chunkSources{cs1, cs2} - src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) + src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) require.NoError(t, err) defer src.close() for _, s := range sources { @@ -539,7 +539,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) { require.NoError(t, err) sources = append(sources, cs) - src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) + src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{}) require.NoError(t, err) defer src.close() for _, s := range sources { diff --git a/go/store/nbs/bs_persister.go b/go/store/nbs/bs_persister.go index cfb6e9136d..b41e694847 100644 --- a/go/store/nbs/bs_persister.go +++ b/go/store/nbs/bs_persister.go @@ -78,7 +78,7 @@ func (bsp *blobstorePersister) Persist(ctx context.Context, mt *memTable, haver } // ConjoinAll implements tablePersister. -func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) { +func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) { var sized []sourceWithSize for _, src := range sources { sized = append(sized, sourceWithSize{src, src.currentSize()}) @@ -86,7 +86,7 @@ func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSour plan, err := planConjoin(sized, stats) if err != nil { - return nil, err + return nil, nil, err } address := nameFromSuffixes(plan.suffixes()) name := address.String() @@ -101,24 +101,25 @@ func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSour for _, src := range plan.sources.sws { sub, err := bsp.getRecordsSubObject(ctx, src.source) if err != nil { - return nil, err + return nil, nil, err } conjoinees = append(conjoinees, sub) } // first concatenate all the sub-objects to create a composite sub-object if _, err = bsp.bs.Concatenate(ctx, name+tableRecordsExt, conjoinees); err != nil { - return nil, err + return nil, nil, err } if _, err = blobstore.PutBytes(ctx, bsp.bs, name+tableTailExt, plan.mergedIndex); err != nil { - return nil, err + return nil, nil, err } // then concatenate into a final blob if _, err = bsp.bs.Concatenate(ctx, name, []string{name + tableRecordsExt, name + tableTailExt}); err != nil { - return emptyChunkSource{}, err + return emptyChunkSource{}, nil, err } - return newBSChunkSource(ctx, bsp.bs, address, plan.chunkCount, bsp.q, stats) + cs, err := newBSChunkSource(ctx, bsp.bs, address, plan.chunkCount, bsp.q, stats) + return cs, func() {}, err } func (bsp *blobstorePersister) getRecordsSubObject(ctx context.Context, cs chunkSource) (name string, err error) { @@ -233,6 +234,14 @@ type bsTableReaderAt struct { bs blobstore.Blobstore } +func (bsTRA *bsTableReaderAt) Close() error { + return nil +} + +func (bsTRA *bsTableReaderAt) clone() (tableReaderAt, error) { + return bsTRA, nil +} + func (bsTRA *bsTableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) { rc, _, err := bsTRA.bs.Get(ctx, bsTRA.key, blobstore.AllRange) return rc, err diff --git a/go/store/nbs/conjoiner.go b/go/store/nbs/conjoiner.go index 68cf6bc489..bc0085b84c 100644 --- a/go/store/nbs/conjoiner.go +++ b/go/store/nbs/conjoiner.go @@ -94,6 +94,7 @@ func (c noopConjoiner) chooseConjoinees(sources []tableSpec) (conjoinees, keeper func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) (manifestContents, error) { var conjoined tableSpec var conjoinees, keepers, appendixSpecs []tableSpec + var cleanup cleanupFunc for { if conjoinees == nil { @@ -110,7 +111,7 @@ func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents, return manifestContents{}, err } - conjoined, err = conjoinTables(ctx, conjoinees, p, stats) + conjoined, cleanup, err = conjoinTables(ctx, conjoinees, p, stats) if err != nil { return manifestContents{}, err } @@ -140,11 +141,18 @@ func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents, } if newContents.lock == upstream.lock { + cleanup() return upstream, nil } - // Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us. - // If we can re-use the conjoin we already performed, we want to try again. Currently, we will only do so if ALL conjoinees are still present upstream. If we can't re-use...then someone else almost certainly landed a conjoin upstream. In this case, bail and let clients ask again if they think they still can't proceed. + // Optimistic lock failure. Someone else moved to the root, the + // set of tables, or both out from under us. If we can re-use + // the conjoin we already performed, we want to try again. + // Currently, we will only do so if ALL conjoinees are still + // present upstream. If we can't re-use...then someone else + // almost certainly landed a conjoin upstream. In this case, + // bail and let clients ask again if they think they still + // can't proceed. // If the appendix has changed we simply bail // and let the client retry @@ -186,7 +194,7 @@ func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents, } } -func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister, stats *Stats) (conjoined tableSpec, err error) { +func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister, stats *Stats) (conjoined tableSpec, cleanup cleanupFunc, err error) { eg, ectx := errgroup.WithContext(ctx) toConjoin := make(chunkSources, len(conjoinees)) @@ -205,14 +213,14 @@ func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister } }() if err = eg.Wait(); err != nil { - return tableSpec{}, err + return tableSpec{}, nil, err } t1 := time.Now() - conjoinedSrc, err := p.ConjoinAll(ctx, toConjoin, stats) + conjoinedSrc, cleanup, err := p.ConjoinAll(ctx, toConjoin, stats) if err != nil { - return tableSpec{}, err + return tableSpec{}, nil, err } defer conjoinedSrc.close() @@ -221,7 +229,7 @@ func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister cnt, err := conjoinedSrc.count() if err != nil { - return tableSpec{}, err + return tableSpec{}, nil, err } stats.ChunksPerConjoin.Sample(uint64(cnt)) @@ -229,9 +237,9 @@ func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister h := conjoinedSrc.hash() cnt, err = conjoinedSrc.count() if err != nil { - return tableSpec{}, err + return tableSpec{}, nil, err } - return tableSpec{h, cnt}, nil + return tableSpec{h, cnt}, cleanup, nil } func toSpecs(srcs chunkSources) ([]tableSpec, error) { diff --git a/go/store/nbs/dynamo_table_reader.go b/go/store/nbs/dynamo_table_reader.go index 1c15bae08a..8fb17da449 100644 --- a/go/store/nbs/dynamo_table_reader.go +++ b/go/store/nbs/dynamo_table_reader.go @@ -54,6 +54,14 @@ func (t tableNotInDynamoErr) Error() string { return fmt.Sprintf("NBS table %s not present in DynamoDB table %s", t.nbs, t.dynamo) } +func (dtra *dynamoTableReaderAt) Close() error { + return nil +} + +func (dtra *dynamoTableReaderAt) clone() (tableReaderAt, error) { + return dtra, nil +} + func (dtra *dynamoTableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) { data, err := dtra.ddb.ReadTable(ctx, dtra.h, &Stats{}) if err != nil { diff --git a/go/store/nbs/fd_cache.go b/go/store/nbs/fd_cache.go deleted file mode 100644 index aa45667be1..0000000000 --- a/go/store/nbs/fd_cache.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2019 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file incorporates work covered by the following copyright and -// permission notice: -// -// Copyright 2017 Attic Labs, Inc. All rights reserved. -// Licensed under the Apache License, version 2.0: -// http://www.apache.org/licenses/LICENSE-2.0 - -package nbs - -import ( - "os" - "sort" - "sync" -) - -func newFDCache(targetSize int) *fdCache { - return &fdCache{targetSize: targetSize, cache: map[string]fdCacheEntry{}} -} - -// fdCache ref-counts open file descriptors, but doesn't keep a hard cap on -// the number of open files. Once the cache's target size is exceeded, opening -// a new file causes the cache to try to get the cache back to the target size -// by closing fds with zero refs. If there aren't enough such fds, fdCache -// gives up and tries again next time a caller refs a file. -type fdCache struct { - targetSize int - mu sync.Mutex - cache map[string]fdCacheEntry -} - -type fdCacheEntry struct { - refCount uint32 - f *os.File -} - -// RefFile returns an opened *os.File for the file at |path|, or an error -// indicating why the file could not be opened. If the cache already had an -// entry for |path|, RefFile increments its refcount and returns the cached -// pointer. If not, it opens the file and caches the pointer for others to -// use. If RefFile returns an error, it's guaranteed that no refCounts were -// changed, so it's an error to make a subsequent call to UnrefFile(). -// This is intended for clients that hold fds for extremely short periods. -func (fc *fdCache) RefFile(path string) (f *os.File, err error) { - refFile := func() *os.File { - if ce, present := fc.cache[path]; present { - ce.refCount++ - fc.cache[path] = ce - return ce.f - } - return nil - } - - f = func() *os.File { - fc.mu.Lock() - defer fc.mu.Unlock() - return refFile() - }() - if f != nil { - return f, nil - } - - // Very much want this to be outside the lock, but the downside is that multiple callers may get here concurrently. That means we need to deal with the raciness below. - f, err = os.Open(path) - if err != nil { - return nil, err - } - - fc.mu.Lock() - defer fc.mu.Unlock() - if cached := refFile(); cached != nil { - // Someone beat us to it, so close f and return cached fd - f.Close() - return cached, nil - } - // I won the race! - fc.cache[path] = fdCacheEntry{f: f, refCount: 1} - return f, nil -} - -// UnrefFile reduces the refcount of the entry at |path|. If the cache is over -// |fc.targetSize|, UnrefFile makes a best effort to shrink the cache by dumping -// entries with a zero refcount. If there aren't enough zero refcount entries -// to drop to get the cache back to |fc.targetSize|, the cache will remain -// over |fc.targetSize| until the next call to UnrefFile(). -func (fc *fdCache) UnrefFile(path string) error { - fc.mu.Lock() - defer fc.mu.Unlock() - if ce, present := fc.cache[path]; present { - ce.refCount-- - fc.cache[path] = ce - } - if len(fc.cache) > fc.targetSize { - // Sadly, we can't remove items from a map while iterating, so we'll record the stuff we want to drop and then do it after - needed := len(fc.cache) - fc.targetSize - toDrop := make([]string, 0, needed) - for p, ce := range fc.cache { - if ce.refCount != 0 { - continue - } - toDrop = append(toDrop, p) - err := ce.f.Close() - - if err != nil { - return err - } - - needed-- - if needed == 0 { - break - } - } - for _, p := range toDrop { - delete(fc.cache, p) - } - } - - return nil -} - -// ShrinkCache forcefully removes all file handles with a refcount of zero. -func (fc *fdCache) ShrinkCache() error { - fc.mu.Lock() - defer fc.mu.Unlock() - toDrop := make([]string, 0, len(fc.cache)) - for p, ce := range fc.cache { - if ce.refCount != 0 { - continue - } - toDrop = append(toDrop, p) - err := ce.f.Close() - - if err != nil { - return err - } - } - - for _, p := range toDrop { - delete(fc.cache, p) - } - - return nil -} - -// Drop dumps the entire cache and closes all currently open files. -func (fc *fdCache) Drop() { - fc.mu.Lock() - defer fc.mu.Unlock() - for _, ce := range fc.cache { - ce.f.Close() - } - fc.cache = map[string]fdCacheEntry{} -} - -// reportEntries is meant for testing. -func (fc *fdCache) reportEntries() sort.StringSlice { - fc.mu.Lock() - defer fc.mu.Unlock() - ret := make(sort.StringSlice, 0, len(fc.cache)) - for p := range fc.cache { - ret = append(ret, p) - } - sort.Sort(ret) - return ret -} diff --git a/go/store/nbs/fd_cache_test.go b/go/store/nbs/fd_cache_test.go deleted file mode 100644 index 7b538b02e7..0000000000 --- a/go/store/nbs/fd_cache_test.go +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2019 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file incorporates work covered by the following copyright and -// permission notice: -// -// Copyright 2017 Attic Labs, Inc. All rights reserved. -// Licensed under the Apache License, version 2.0: -// http://www.apache.org/licenses/LICENSE-2.0 - -package nbs - -import ( - "fmt" - "os" - "path/filepath" - "sort" - "sync" - "testing" - - "github.com/dolthub/dolt/go/libraries/utils/file" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestFDCache(t *testing.T) { - dir := makeTempDir(t) - defer file.RemoveAll(dir) - - paths := [3]string{} - for i := range paths { - name := fmt.Sprintf("file%d", i) - paths[i] = filepath.Join(dir, name) - err := os.WriteFile(paths[i], []byte(name), 0644) - require.NoError(t, err) - } - - refNoError := func(fc *fdCache, p string, assert *assert.Assertions) *os.File { - f, err := fc.RefFile(p) - require.NoError(t, err) - assert.NotNil(f) - return f - } - - t.Run("ConcurrentOpen", func(t *testing.T) { - assert := assert.New(t) - concurrency := 3 - fc := newFDCache(3) - defer fc.Drop() - - trigger := make(chan struct{}) - wg := sync.WaitGroup{} - for i := 0; i < concurrency; i++ { - wg.Add(1) - go func() { - defer wg.Done() - <-trigger - fc.RefFile(paths[0]) - }() - } - close(trigger) - wg.Wait() - - present := fc.reportEntries() - if assert.Len(present, 1) { - ce := fc.cache[present[0]] - assert.EqualValues(concurrency, ce.refCount) - } - }) - - t.Run("NoEvictions", func(t *testing.T) { - assert := assert.New(t) - fc := newFDCache(2) - defer fc.Drop() - f := refNoError(fc, paths[0], assert) - - f2 := refNoError(fc, paths[1], assert) - assert.NotEqual(f, f2) - - dup := refNoError(fc, paths[0], assert) - assert.Equal(f, dup) - }) - - t.Run("Evictions", func(t *testing.T) { - assert := assert.New(t) - fc := newFDCache(1) - defer fc.Drop() - - f0 := refNoError(fc, paths[0], assert) - f1 := refNoError(fc, paths[1], assert) - assert.NotEqual(f0, f1) - - // f0 wasn't evicted, because that doesn't happen until UnrefFile() - dup := refNoError(fc, paths[0], assert) - assert.Equal(f0, dup) - - expected := sort.StringSlice(paths[:2]) - sort.Sort(expected) - assert.EqualValues(expected, fc.reportEntries()) - - // Unreffing f1 now should evict it - err := fc.UnrefFile(paths[1]) - require.NoError(t, err) - assert.EqualValues(paths[:1], fc.reportEntries()) - - // Bring f1 back so we can test multiple evictions in a row - f1 = refNoError(fc, paths[1], assert) - assert.NotEqual(f0, f1) - - // After adding f3, we should be able to evict both f0 and f1 - f2 := refNoError(fc, paths[2], assert) - assert.NotEqual(f0, f2) - assert.NotEqual(f1, f2) - - err = fc.UnrefFile(paths[0]) - require.NoError(t, err) - err = fc.UnrefFile(paths[0]) - require.NoError(t, err) - err = fc.UnrefFile(paths[1]) - require.NoError(t, err) - - assert.EqualValues(paths[2:], fc.reportEntries()) - }) -} diff --git a/go/store/nbs/file_table_persister.go b/go/store/nbs/file_table_persister.go index a22a59951f..b3a339591b 100644 --- a/go/store/nbs/file_table_persister.go +++ b/go/store/nbs/file_table_persister.go @@ -33,20 +33,17 @@ import ( "time" "github.com/dolthub/dolt/go/libraries/utils/file" - "github.com/dolthub/dolt/go/store/d" "github.com/dolthub/dolt/go/store/util/tempfiles" ) const tempTablePrefix = "nbs_table_" -func newFSTablePersister(dir string, fc *fdCache, q MemoryQuotaProvider) tablePersister { - d.PanicIfTrue(fc == nil) - return &fsTablePersister{dir, fc, q} +func newFSTablePersister(dir string, q MemoryQuotaProvider) tablePersister { + return &fsTablePersister{dir, q} } type fsTablePersister struct { dir string - fc *fdCache q MemoryQuotaProvider } @@ -54,7 +51,7 @@ var _ tablePersister = &fsTablePersister{} var _ tableFilePersister = &fsTablePersister{} func (ftp *fsTablePersister) Open(ctx context.Context, name addr, chunkCount uint32, stats *Stats) (chunkSource, error) { - return newFileTableReader(ctx, ftp.dir, name, chunkCount, ftp.q, ftp.fc) + return newFileTableReader(ctx, ftp.dir, name, chunkCount, ftp.q) } func (ftp *fsTablePersister) Exists(ctx context.Context, name addr, chunkCount uint32, stats *Stats) (bool, error) { @@ -154,11 +151,6 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [ } newName := filepath.Join(ftp.dir, name.String()) - err = ftp.fc.ShrinkCache() - - if err != nil { - return nil, err - } err = file.Rename(tempName, newName) @@ -169,15 +161,14 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [ return ftp.Open(ctx, name, chunkCount, stats) } -func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) { +func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) { plan, err := planRangeCopyConjoin(sources, stats) - if err != nil { - return emptyChunkSource{}, err + return emptyChunkSource{}, nil, err } if plan.chunkCount == 0 { - return emptyChunkSource{}, nil + return emptyChunkSource{}, nil, nil } name := nameFromSuffixes(plan.suffixes()) @@ -224,18 +215,24 @@ func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSource return temp.Name(), nil }() - if err != nil { - return nil, err + return nil, nil, err } err = file.Rename(tempName, filepath.Join(ftp.dir, name.String())) - if err != nil { - return nil, err + return nil, nil, err } - return ftp.Open(ctx, name, plan.chunkCount, stats) + cs, err := ftp.Open(ctx, name, plan.chunkCount, stats) + if err != nil { + return nil, nil, err + } + return cs, func() { + for _, s := range sources { + file.Remove(filepath.Join(ftp.dir, s.hash().String())) + } + }, nil } func (ftp *fsTablePersister) PruneTableFiles(ctx context.Context, contents manifestContents, mtime time.Time) error { @@ -247,12 +244,6 @@ func (ftp *fsTablePersister) PruneTableFiles(ctx context.Context, contents manif return err } - err = ftp.fc.ShrinkCache() - - if err != nil { - return err - } - ea := make(gcErrAccum) for _, info := range fileInfos { if info.IsDir() { diff --git a/go/store/nbs/file_table_persister_test.go b/go/store/nbs/file_table_persister_test.go index 151a632d01..c32ca49364 100644 --- a/go/store/nbs/file_table_persister_test.go +++ b/go/store/nbs/file_table_persister_test.go @@ -35,57 +35,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestFSTableCacheOnOpen(t *testing.T) { - assert := assert.New(t) - dir := makeTempDir(t) - defer file.RemoveAll(dir) - - names := []addr{} - cacheSize := 2 - fc := newFDCache(cacheSize) - defer fc.Drop() - fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{}) - - // Create some tables manually, load them into the cache - func() { - for i := 0; i < cacheSize; i++ { - name, err := writeTableData(dir, []byte{byte(i)}) - require.NoError(t, err) - names = append(names, name) - } - for _, name := range names { - tr, err := fts.Open(context.Background(), name, 1, nil) - require.NoError(t, err) - defer tr.close() - } - }() - - // Tables should still be cached and on disk - for i, name := range names { - src, err := fts.Open(context.Background(), name, 1, nil) - require.NoError(t, err) - defer src.close() - h := computeAddr([]byte{byte(i)}) - assert.True(src.has(h)) - } - - // Kick a table out of the cache - name, err := writeTableData(dir, []byte{0xff}) - require.NoError(t, err) - tr, err := fts.Open(context.Background(), name, 1, nil) - require.NoError(t, err) - defer tr.close() - - present := fc.reportEntries() - // Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size - assert.Len(present, cacheSize) - - err = fc.ShrinkCache() - require.NoError(t, err) - err = removeTables(dir, names...) - require.NoError(t, err) -} - func makeTempDir(t *testing.T) string { dir, err := os.MkdirTemp("", "") require.NoError(t, err) @@ -122,9 +71,7 @@ func TestFSTablePersisterPersist(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer file.RemoveAll(dir) - fc := newFDCache(defaultMaxTables) - defer fc.Drop() - fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{}) + fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{}) src, err := persistTableData(fts, testChunks...) require.NoError(t, err) @@ -163,9 +110,7 @@ func TestFSTablePersisterPersistNoData(t *testing.T) { dir := makeTempDir(t) defer file.RemoveAll(dir) - fc := newFDCache(defaultMaxTables) - defer fc.Drop() - fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{}) + fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{}) src, err := fts.Persist(context.Background(), mt, existingTable, &Stats{}) require.NoError(t, err) @@ -175,41 +120,6 @@ func TestFSTablePersisterPersistNoData(t *testing.T) { assert.True(os.IsNotExist(err), "%v", err) } -func TestFSTablePersisterCacheOnPersist(t *testing.T) { - assert := assert.New(t) - dir := makeTempDir(t) - fc := newFDCache(1) - defer fc.Drop() - fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{}) - defer file.RemoveAll(dir) - - var name addr - func() { - src, err := persistTableData(fts, testChunks...) - require.NoError(t, err) - defer src.close() - name = src.hash() - }() - - // Table should still be cached - src, err := fts.Open(context.Background(), name, uint32(len(testChunks)), nil) - require.NoError(t, err) - defer src.close() - assertChunksInReader(testChunks, src, assert) - - // Evict |name| from cache - tr, err := persistTableData(fts, []byte{0xff}) - require.NoError(t, err) - defer tr.close() - - present := fc.reportEntries() - // Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size - assert.Len(present, 1) - - err = removeTables(dir, name) - require.NoError(t, err) -} - func TestFSTablePersisterConjoinAll(t *testing.T) { ctx := context.Background() assert := assert.New(t) @@ -218,9 +128,7 @@ func TestFSTablePersisterConjoinAll(t *testing.T) { dir := makeTempDir(t) defer file.RemoveAll(dir) - fc := newFDCache(len(sources)) - defer fc.Drop() - fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{}) + fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{}) for i, c := range testChunks { randChunk := make([]byte, (i+1)*13) @@ -237,7 +145,7 @@ func TestFSTablePersisterConjoinAll(t *testing.T) { } }() - src, err := fts.ConjoinAll(ctx, sources, &Stats{}) + src, _, err := fts.ConjoinAll(ctx, sources, &Stats{}) require.NoError(t, err) defer src.close() @@ -251,10 +159,6 @@ func TestFSTablePersisterConjoinAll(t *testing.T) { defer tr.close() assertChunksInReader(testChunks, tr, assert) } - - present := fc.reportEntries() - // Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size - assert.Len(present, len(sources)) } func TestFSTablePersisterConjoinAllDups(t *testing.T) { @@ -262,29 +166,24 @@ func TestFSTablePersisterConjoinAllDups(t *testing.T) { assert := assert.New(t) dir := makeTempDir(t) defer file.RemoveAll(dir) - fc := newFDCache(defaultMaxTables) - defer fc.Drop() - fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{}) + fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{}) reps := 3 sources := make(chunkSources, reps) - for i := 0; i < reps; i++ { - mt := newMemTable(1 << 10) - for _, c := range testChunks { - mt.addChunk(computeAddr(c), c) - } - - var err error - sources[i], err = fts.Persist(ctx, mt, nil, &Stats{}) - require.NoError(t, err) + mt := newMemTable(1 << 10) + for _, c := range testChunks { + mt.addChunk(computeAddr(c), c) } - defer func() { - for _, s := range sources { - s.close() - } - }() - src, err := fts.ConjoinAll(ctx, sources, &Stats{}) + var err error + sources[0], err = fts.Persist(ctx, mt, nil, &Stats{}) + require.NoError(t, err) + sources[1], err = sources[0].clone() + require.NoError(t, err) + sources[2], err = sources[0].clone() + require.NoError(t, err) + + src, _, err := fts.ConjoinAll(ctx, sources, &Stats{}) require.NoError(t, err) defer src.close() diff --git a/go/store/nbs/file_table_reader.go b/go/store/nbs/file_table_reader.go index e6e14f0c4e..3dfdf367ef 100644 --- a/go/store/nbs/file_table_reader.go +++ b/go/store/nbs/file_table_reader.go @@ -33,8 +33,7 @@ import ( type fileTableReader struct { tableReader - fc *fdCache - h addr + h addr } const ( @@ -52,16 +51,15 @@ func tableFileExists(ctx context.Context, dir string, h addr) (bool, error) { return err == nil, err } -func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint32, q MemoryQuotaProvider, fc *fdCache) (cs chunkSource, err error) { +func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint32, q MemoryQuotaProvider) (cs chunkSource, err error) { path := filepath.Join(dir, h.String()) + var f *os.File index, sz, err := func() (ti onHeapTableIndex, sz int64, err error) { - // Be careful with how |f| is used below. |RefFile| returns a cached // os.File pointer so the code needs to use f in a concurrency-safe // manner. Moving the file offset is BAD. - var f *os.File - f, err = fc.RefFile(path) + f, err = os.Open(path) if err != nil { return } @@ -103,14 +101,6 @@ func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint return } - defer func() { - unrefErr := fc.UnrefFile(path) - if unrefErr != nil && err == nil { - q.ReleaseQuotaBytes(len(b)) - err = unrefErr - } - }() - ti, err = parseTableIndex(ctx, b, q) if err != nil { q.ReleaseQuotaBytes(len(b)) @@ -120,72 +110,77 @@ func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint return }() if err != nil { + if f != nil { + f.Close() + } return nil, err } if chunkCount != index.chunkCount() { index.Close() + f.Close() return nil, errors.New("unexpected chunk count") } - tr, err := newTableReader(index, &cacheReaderAt{path, fc, sz}, fileBlockSize) + tr, err := newTableReader(index, &fileReaderAt{f, path, sz}, fileBlockSize) if err != nil { index.Close() + f.Close() return nil, err } return &fileTableReader{ tr, - fc, h, }, nil } -func (mmtr *fileTableReader) hash() addr { - return mmtr.h +func (ftr *fileTableReader) hash() addr { + return ftr.h } -func (mmtr *fileTableReader) close() error { - return mmtr.tableReader.close() +func (ftr *fileTableReader) Close() error { + return ftr.tableReader.close() } -func (mmtr *fileTableReader) clone() (chunkSource, error) { - tr, err := mmtr.tableReader.clone() +func (ftr *fileTableReader) clone() (chunkSource, error) { + tr, err := ftr.tableReader.clone() if err != nil { return &fileTableReader{}, err } - return &fileTableReader{tr, mmtr.fc, mmtr.h}, nil + return &fileTableReader{tr, ftr.h}, nil } -type cacheReaderAt struct { +type fileReaderAt struct { + f *os.File path string - fc *fdCache sz int64 } -func (cra *cacheReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) { - return io.NopCloser(io.LimitReader(&readerAdapter{cra, 0, ctx}, cra.sz)), nil +func (fra *fileReaderAt) clone() (tableReaderAt, error) { + f, err := os.Open(fra.path) + if err != nil { + return nil, err + } + return &fileReaderAt{ + f, + fra.path, + fra.sz, + }, nil } -func (cra *cacheReaderAt) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) { - var r io.ReaderAt +func (fra *fileReaderAt) Close() error { + return fra.f.Close() +} + +func (fra *fileReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) { + return os.Open(fra.path) +} + +func (fra *fileReaderAt) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) { t1 := time.Now() - - if r, err = cra.fc.RefFile(cra.path); err != nil { - return - } - defer func() { stats.FileBytesPerRead.Sample(uint64(len(p))) stats.FileReadLatency.SampleTimeSince(t1) }() - - defer func() { - unrefErr := cra.fc.UnrefFile(cra.path) - - if err == nil { - err = unrefErr - } - }() - - return r.ReadAt(p, off) + return fra.f.ReadAt(p, off) } diff --git a/go/store/nbs/file_table_reader_test.go b/go/store/nbs/file_table_reader_test.go index 8df22ebbce..82400776cf 100644 --- a/go/store/nbs/file_table_reader_test.go +++ b/go/store/nbs/file_table_reader_test.go @@ -40,9 +40,6 @@ func TestMmapTableReader(t *testing.T) { require.NoError(t, err) defer file.RemoveAll(dir) - fc := newFDCache(1) - defer fc.Drop() - chunks := [][]byte{ []byte("hello2"), []byte("goodbye2"), @@ -54,7 +51,7 @@ func TestMmapTableReader(t *testing.T) { err = os.WriteFile(filepath.Join(dir, h.String()), tableData, 0666) require.NoError(t, err) - trc, err := newFileTableReader(ctx, dir, h, uint32(len(chunks)), &UnlimitedQuotaProvider{}, fc) + trc, err := newFileTableReader(ctx, dir, h, uint32(len(chunks)), &UnlimitedQuotaProvider{}) require.NoError(t, err) defer trc.close() assertChunksInReader(chunks, trc, assert) diff --git a/go/store/nbs/fs_table_cache.go b/go/store/nbs/fs_table_cache.go deleted file mode 100644 index 7b48f60a53..0000000000 --- a/go/store/nbs/fs_table_cache.go +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2019 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file incorporates work covered by the following copyright and -// permission notice: -// -// Copyright 2017 Attic Labs, Inc. All rights reserved. -// Licensed under the Apache License, version 2.0: -// http://www.apache.org/licenses/LICENSE-2.0 - -package nbs - -import ( - "errors" - "io" - "os" - "path/filepath" - "strings" - "sync" - - "github.com/dolthub/dolt/go/libraries/utils/file" - - "github.com/dolthub/dolt/go/store/atomicerr" - "github.com/dolthub/dolt/go/store/util/sizecache" - "github.com/dolthub/dolt/go/store/util/tempfiles" -) - -type tableCache interface { - checkout(h addr) (io.ReaderAt, error) - checkin(h addr) error - store(h addr, data io.Reader, size uint64) error -} - -type fsTableCache struct { - dir string - cache *sizecache.SizeCache - fd *fdCache -} - -func newFSTableCache(dir string, cacheSize uint64, maxOpenFds int) (*fsTableCache, error) { - ftc := &fsTableCache{dir: dir, fd: newFDCache(maxOpenFds)} - ftc.cache = sizecache.NewWithExpireCallback(cacheSize, func(elm interface{}) { - ftc.expire(elm.(addr)) - }) - - err := ftc.init(maxOpenFds) - - if err != nil { - return nil, err - } - - return ftc, nil -} - -func (ftc *fsTableCache) init(concurrency int) error { - type finfo struct { - path string - h addr - size uint64 - } - infos := make(chan finfo) - errc := make(chan error, 1) - go func() { - isTableFile := func(info os.FileInfo) bool { - return info.Mode().IsRegular() && ValidateAddr(info.Name()) - } - isTempTableFile := func(info os.FileInfo) bool { - return info.Mode().IsRegular() && strings.HasPrefix(info.Name(), tempTablePrefix) - } - defer close(errc) - defer close(infos) - // No select needed for this send, since errc is buffered. - errc <- filepath.Walk(ftc.dir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if path == ftc.dir { - return nil - } - if isTempTableFile(info) { - // ignore failure to remove temp file - _ = file.Remove(path) - return nil - } - if !isTableFile(info) { - return errors.New(path + " is not a table file; cache dir must contain only table files") - } - - ad, err := parseAddr(info.Name()) - - if err != nil { - return err - } - - infos <- finfo{path, ad, uint64(info.Size())} - return nil - }) - }() - - ae := atomicerr.New() - wg := sync.WaitGroup{} - wg.Add(concurrency) - for i := 0; i < concurrency; i++ { - go func() { - defer wg.Done() - for info := range infos { - if ae.IsSet() { - break - } - - ftc.cache.Add(info.h, info.size, true) - _, err := ftc.fd.RefFile(info.path) - - if err != nil { - ae.SetIfError(err) - break - } - - err = ftc.fd.UnrefFile(info.path) - - if err != nil { - ae.SetIfError(err) - break - } - } - }() - } - wg.Wait() - - err := <-errc - - if err != nil { - return err - } - - if err := ae.Get(); err != nil { - return err - } - - return nil -} - -func (ftc *fsTableCache) checkout(h addr) (io.ReaderAt, error) { - if _, ok := ftc.cache.Get(h); !ok { - return nil, nil - } - - fd, err := ftc.fd.RefFile(filepath.Join(ftc.dir, h.String())) - - if err != nil { - return nil, err - } - - return fd, nil -} - -func (ftc *fsTableCache) checkin(h addr) error { - return ftc.fd.UnrefFile(filepath.Join(ftc.dir, h.String())) -} - -func (ftc *fsTableCache) store(h addr, data io.Reader, size uint64) error { - path := filepath.Join(ftc.dir, h.String()) - tempName, err := func() (name string, ferr error) { - var temp *os.File - temp, ferr = tempfiles.MovableTempFileProvider.NewFile(ftc.dir, tempTablePrefix) - - if ferr != nil { - return "", ferr - } - - defer func() { - closeErr := temp.Close() - - if ferr == nil { - ferr = closeErr - } - }() - - _, ferr = io.Copy(temp, data) - - if ferr != nil { - return "", ferr - } - - return temp.Name(), nil - }() - - if err != nil { - return err - } - - err = ftc.fd.ShrinkCache() - - if err != nil { - return err - } - - err = file.Rename(tempName, path) - - if err != nil { - return err - } - - ftc.cache.Add(h, size, true) - - // Prime the file in the fd cache ignore err - if _, err = ftc.fd.RefFile(path); err == nil { - err := ftc.fd.UnrefFile(path) - - if err != nil { - return err - } - } - - return nil -} - -func (ftc *fsTableCache) expire(h addr) error { - return file.Remove(filepath.Join(ftc.dir, h.String())) -} diff --git a/go/store/nbs/fs_table_cache_test.go b/go/store/nbs/fs_table_cache_test.go deleted file mode 100644 index 65c0d9c4ad..0000000000 --- a/go/store/nbs/fs_table_cache_test.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2019 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file incorporates work covered by the following copyright and -// permission notice: -// -// Copyright 2017 Attic Labs, Inc. All rights reserved. -// Licensed under the Apache License, version 2.0: -// http://www.apache.org/licenses/LICENSE-2.0 - -package nbs - -import ( - "bytes" - "io" - "os" - "path/filepath" - "sort" - "testing" - - "github.com/dolthub/dolt/go/libraries/utils/file" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestFSTableCache(t *testing.T) { - datas := [][]byte{[]byte("hello"), []byte("world"), []byte("goodbye")} - sort.SliceStable(datas, func(i, j int) bool { return len(datas[i]) < len(datas[j]) }) - - t.Run("ExpireLRU", func(t *testing.T) { - t.Parallel() - dir := makeTempDir(t) - defer file.RemoveAll(dir) - - sum := 0 - for _, s := range datas[1:] { - sum += len(s) - } - - tc, err := newFSTableCache(dir, uint64(sum), len(datas)) - require.NoError(t, err) - for _, d := range datas { - err := tc.store(computeAddr(d), bytes.NewReader(d), uint64(len(d))) - require.NoError(t, err) - } - - expiredName := computeAddr(datas[0]) - r, err := tc.checkout(expiredName) - require.NoError(t, err) - assert.Nil(t, r) - _, fserr := os.Stat(filepath.Join(dir, expiredName.String())) - assert.True(t, os.IsNotExist(fserr)) - - for _, d := range datas[1:] { - name := computeAddr(d) - r, err := tc.checkout(name) - require.NoError(t, err) - assert.NotNil(t, r) - assertDataInReaderAt(t, d, r) - _, fserr := os.Stat(filepath.Join(dir, name.String())) - assert.False(t, os.IsNotExist(fserr)) - } - }) - - t.Run("Init", func(t *testing.T) { - t.Run("Success", func(t *testing.T) { - t.Parallel() - dir := makeTempDir(t) - defer file.RemoveAll(dir) - assert := assert.New(t) - - var names []addr - for i := byte(0); i < 4; i++ { - name := computeAddr([]byte{i}) - require.NoError(t, os.WriteFile(filepath.Join(dir, name.String()), nil, 0666)) - names = append(names, name) - } - - ftc, err := newFSTableCache(dir, 1024, 4) - require.NoError(t, err) - assert.NotNil(ftc) - - for _, name := range names { - assert.NotNil(ftc.checkout(name)) - } - }) - - t.Run("BadFile", func(t *testing.T) { - t.Parallel() - dir := makeTempDir(t) - defer file.RemoveAll(dir) - - require.NoError(t, os.WriteFile(filepath.Join(dir, "boo"), nil, 0666)) - _, err := newFSTableCache(dir, 1024, 4) - assert.Error(t, err) - }) - - t.Run("ClearTempFile", func(t *testing.T) { - t.Parallel() - dir := makeTempDir(t) - defer file.RemoveAll(dir) - - tempFile := filepath.Join(dir, tempTablePrefix+"boo") - require.NoError(t, os.WriteFile(tempFile, nil, 0666)) - _, err := newFSTableCache(dir, 1024, 4) - require.NoError(t, err) - _, fserr := os.Stat(tempFile) - assert.True(t, os.IsNotExist(fserr)) - }) - - t.Run("Dir", func(t *testing.T) { - t.Parallel() - dir := makeTempDir(t) - defer file.RemoveAll(dir) - require.NoError(t, os.Mkdir(filepath.Join(dir, "sub"), 0777)) - _, err := newFSTableCache(dir, 1024, 4) - assert.Error(t, err) - }) - }) -} - -func assertDataInReaderAt(t *testing.T, data []byte, r io.ReaderAt) { - p := make([]byte, len(data)) - n, err := r.ReadAt(p, 0) - require.NoError(t, err) - assert.Equal(t, len(data), n) - assert.Equal(t, data, p) -} diff --git a/go/store/nbs/journal.go b/go/store/nbs/journal.go index e4d7adea14..9b02cd226d 100644 --- a/go/store/nbs/journal.go +++ b/go/store/nbs/journal.go @@ -40,7 +40,7 @@ func UseJournalStore(path string) bool { if chunkJournalFeatureFlag { return true } - ok, err := journalFileExists(filepath.Join(path, chunkJournalAddr)) + ok, err := fileExists(filepath.Join(path, chunkJournalAddr)) if err != nil { panic(err) } @@ -78,22 +78,34 @@ func newChunkJournal(ctx context.Context, nbfVers, dir string, m manifest, p *fs j := &chunkJournal{path: path, backing: m, persister: p} j.contents.nbfVers = nbfVers - ok, err := journalFileExists(path) + ok, err := fileExists(path) if err != nil { return nil, err } else if ok { - // only open a journalWriter if the journal file exists, + // only bootstrap journalWriter if the journal file exists, // otherwise we wait to open in case we're cloning - if err = j.openJournal(ctx); err != nil { + if err = j.bootstrapJournalWriter(ctx); err != nil { return nil, err } } return j, nil } -func (j *chunkJournal) openJournal(ctx context.Context) (err error) { +// bootstrapJournalWriter initializes the journalWriter, which manages access to the +// journal file for this chunkJournal. The bootstrapping process differed depending +// on whether a journal file exists at startup time. +// +// If a journal file does not exist, we create one and commit a root hash record +// which we read from the manifest file. +// +// If a journal file does exist, we process its records to build up an index of its +// resident chunks. Processing journal records is potentially accelerated by an index +// file (see indexRec). The journal file is the source of truth for latest root hash. +// As we process journal records, we keep track of the latest root hash record we see +// and update the manifest file with the last root hash we saw. +func (j *chunkJournal) bootstrapJournalWriter(ctx context.Context) (err error) { var ok bool - ok, err = journalFileExists(j.path) + ok, err = fileExists(j.path) if err != nil { return err } @@ -116,7 +128,7 @@ func (j *chunkJournal) openJournal(ctx context.Context) (err error) { } if ok { // write the current root hash to the journal file - if err = j.wr.writeRootHash(contents.root); err != nil { + if err = j.wr.commitRootHash(contents.root); err != nil { return } j.contents = contents @@ -185,7 +197,7 @@ func (j *chunkJournal) Persist(ctx context.Context, mt *memTable, haver chunkRea } // ConjoinAll implements tablePersister. -func (j *chunkJournal) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) { +func (j *chunkJournal) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) { return j.persister.ConjoinAll(ctx, sources, stats) } @@ -258,7 +270,7 @@ func (j *chunkJournal) Update(ctx context.Context, lastLock addr, next manifestC } } - if err := j.wr.writeRootHash(next.root); err != nil { + if err := j.wr.commitRootHash(next.root); err != nil { return manifestContents{}, err } j.contents = next @@ -300,7 +312,7 @@ func (j *chunkJournal) ParseIfExists(ctx context.Context, stats *Stats, readHook func (j *chunkJournal) maybeInit(ctx context.Context) (err error) { if j.wr == nil { - err = j.openJournal(ctx) + err = j.bootstrapJournalWriter(ctx) } return } diff --git a/go/store/nbs/journal_chunk_source.go b/go/store/nbs/journal_chunk_source.go index 3bf7f1fc8c..5cf0ec54e3 100644 --- a/go/store/nbs/journal_chunk_source.go +++ b/go/store/nbs/journal_chunk_source.go @@ -25,29 +25,6 @@ import ( "github.com/dolthub/dolt/go/store/hash" ) -// recLookup contains journalRec lookup metadata. -type recLookup struct { - // journalOff is the file offset of the journalRec. - journalOff int64 - - // recordLen is the length of the journalRec. - recordLen uint32 - - // payloadOff is the offset of the payload within the - // journalRec, it's used for converting to a Range. - payloadOff uint32 -} - -// rangeFromLookup converts a recLookup to a Range, -// used when computing GetDownloadLocs. -func rangeFromLookup(l recLookup) Range { - return Range{ - // see journalRec for serialization format - Offset: uint64(l.journalOff) + uint64(l.payloadOff), - Length: l.recordLen - (l.payloadOff + journalRecChecksumSz), - } -} - // journalChunkSource is a chunkSource that reads chunks // from a chunkJournal. Unlike other NBS chunkSources, // it is not immutable and its set of chunks grows as diff --git a/go/store/nbs/journal_index_record.go b/go/store/nbs/journal_index_record.go new file mode 100644 index 0000000000..1dc1f66222 --- /dev/null +++ b/go/store/nbs/journal_index_record.go @@ -0,0 +1,268 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nbs + +import ( + "bufio" + "bytes" + "context" + "encoding/binary" + "fmt" + "io" + "sort" + + "github.com/dolthub/dolt/go/store/d" + "github.com/dolthub/dolt/go/store/hash" +) + +// indexRec is a record in a chunk journal index file. Index records +// serve as out-of-band chunk indexes into the chunk journal that allow +// bootstrapping the journal without reading each record in the journal. +// +// Like journalRec, its serialization format uses uint8 tag prefixes +// to identify fields and allow for format evolution. +type indexRec struct { + // index record length + length uint32 + + // root hash of commit when this index record was written + lastRoot hash.Hash + + // file offsets for the region of the journal file + // that |payload| indexes. end points to a root hash + // record in the journal containing |lastRoot|. + // we expect a sequence of index records to cover + // contiguous regions of the journal file. + start, end uint64 + + // index record kind + kind indexRecKind + + // encoded chunk index + payload []byte + + // index record crc32 checksum + checksum uint32 +} + +type indexRecKind uint8 + +const ( + unknownIndexRecKind indexRecKind = 0 + tableIndexRecKind indexRecKind = 1 +) + +type indexRecTag uint8 + +const ( + unknownIndexRecTag indexRecTag = 0 + lastRootIndexRecTag indexRecTag = 1 + startOffsetIndexRecTag indexRecTag = 2 + endOffsetIndexRecTag indexRecTag = 3 + kindIndexRecTag indexRecTag = 4 + payloadIndexRecTag indexRecTag = 5 +) + +const ( + indexRecTagSz = 1 + indexRecLenSz = 4 + indexRecKindSz = 1 + indexRecLastRootSz = 20 + indexRecOffsetSz = 8 + indexRecChecksumSz = 4 +) + +func journalIndexRecordSize(idx []byte) (recordSz uint32) { + recordSz += indexRecLenSz + recordSz += indexRecTagSz + indexRecLastRootSz + recordSz += indexRecTagSz + indexRecOffsetSz + recordSz += indexRecTagSz + indexRecOffsetSz + recordSz += indexRecTagSz + indexRecKindSz + recordSz += indexRecTagSz // payload tag + recordSz += uint32(len(idx)) + recordSz += indexRecChecksumSz + return +} + +func writeJournalIndexRecord(buf []byte, root hash.Hash, start, end uint64, idx []byte) (n uint32) { + // length + l := journalIndexRecordSize(idx) + writeUint32(buf[:indexRecLenSz], l) + n += indexRecLenSz + // last root + buf[n] = byte(lastRootIndexRecTag) + n += indexRecTagSz + copy(buf[n:], root[:]) + n += indexRecLastRootSz + // start offset + buf[n] = byte(startOffsetIndexRecTag) + n += indexRecTagSz + writeUint64(buf[n:], start) + n += indexRecOffsetSz + // end offset + buf[n] = byte(endOffsetIndexRecTag) + n += indexRecTagSz + writeUint64(buf[n:], end) + n += indexRecOffsetSz + // kind + buf[n] = byte(kindIndexRecTag) + n += indexRecTagSz + buf[n] = byte(tableIndexRecKind) + n += indexRecKindSz + // payload + buf[n] = byte(payloadIndexRecTag) + n += indexRecTagSz + copy(buf[n:], idx) + n += uint32(len(idx)) + // checksum + writeUint32(buf[n:], crc(buf[:n])) + n += indexRecChecksumSz + d.PanicIfFalse(l == n) + return +} + +func readJournalIndexRecord(buf []byte) (rec indexRec, err error) { + rec.length = readUint32(buf) + buf = buf[indexRecLenSz:] + for len(buf) > indexRecChecksumSz { + tag := indexRecTag(buf[0]) + buf = buf[indexRecTagSz:] + switch tag { + case lastRootIndexRecTag: + copy(rec.lastRoot[:], buf) + buf = buf[indexRecLastRootSz:] + case startOffsetIndexRecTag: + rec.start = readUint64(buf) + buf = buf[indexRecOffsetSz:] + case endOffsetIndexRecTag: + rec.end = readUint64(buf) + buf = buf[indexRecOffsetSz:] + case kindIndexRecTag: + rec.kind = indexRecKind(buf[0]) + buf = buf[indexRecKindSz:] + case payloadIndexRecTag: + sz := len(buf) - indexRecChecksumSz + rec.payload = buf[:sz] + buf = buf[sz:] + case unknownIndexRecTag: + fallthrough + default: + err = fmt.Errorf("unknown record field tag: %d", tag) + return + } + } + rec.checksum = readUint32(buf[:indexRecChecksumSz]) + return +} + +func validateIndexRecord(buf []byte) (ok bool) { + if len(buf) > (indexRecLenSz + indexRecChecksumSz) { + off := len(buf) - indexRecChecksumSz + ok = crc(buf[:off]) == readUint32(buf[off:]) + } + return +} + +// processIndexRecords reads a sequence of index records from |r| and passes them to the callback. While reading records +// it makes some basic assertions that the sequence is well-formed and indexes a contiguous region for the journal file. +func processIndexRecords(ctx context.Context, r io.ReadSeeker, sz int64, cb func(o int64, r indexRec) error) (err error) { + var ( + buf []byte + off int64 + prev uint64 + ) + + rdr := bufio.NewReader(r) + for off < sz { + // peek to read next record size + if buf, err = rdr.Peek(uint32Size); err != nil { + break + } + + l := readUint32(buf) + if int64(l) > sz { + return fmt.Errorf("invalid record size %d for index file of size %d", l, sz) + } + if len(buf) < int(l) { + buf = make([]byte, l) + } + if _, err = io.ReadFull(rdr, buf); err != nil { + break + } + + // we do not zero-fill the journal index and expect + // only complete records that will checksum + if !validateIndexRecord(buf) { + return fmt.Errorf("failed to checksum index record at %d", off) + } + + var rec indexRec + if rec, err = readJournalIndexRecord(buf); err != nil { + return err + } else if rec.start != prev { + return fmt.Errorf("index records do not cover contiguous region (%d != %d)", rec.end, prev) + } + + if err = cb(off, rec); err != nil { + return err + } + prev = rec.end + off += int64(len(buf)) + } + if err == nil && off != sz { + err = fmt.Errorf("failed to process entire journal index (%d < %d)", off, sz) + } else if err == io.EOF { + err = nil + } + return +} + +type lookup struct { + a addr + r Range +} + +const lookupSize = addrSize + offsetSize + lengthSize + +// serializeLookups serializes |lookups| using the table file chunk index format. +func serializeLookups(lookups []lookup) (index []byte) { + index = make([]byte, len(lookups)*lookupSize) + sort.Slice(lookups, func(i, j int) bool { // sort by addr + return bytes.Compare(lookups[i].a[:], lookups[j].a[:]) < 0 + }) + buf := index + for _, l := range lookups { + copy(buf, l.a[:]) + buf = buf[addrSize:] + binary.BigEndian.PutUint64(buf, l.r.Offset) + buf = buf[offsetSize:] + binary.BigEndian.PutUint32(buf, l.r.Length) + buf = buf[lengthSize:] + } + return +} + +func deserializeLookups(index []byte) (lookups []lookup) { + lookups = make([]lookup, len(index)/lookupSize) + for i := range lookups { + copy(lookups[i].a[:], index) + index = index[addrSize:] + lookups[i].r.Offset = binary.BigEndian.Uint64(index) + index = index[offsetSize:] + lookups[i].r.Length = binary.BigEndian.Uint32(index) + index = index[lengthSize:] + } + return +} diff --git a/go/store/nbs/journal_index_record_test.go b/go/store/nbs/journal_index_record_test.go new file mode 100644 index 0000000000..5d39568681 --- /dev/null +++ b/go/store/nbs/journal_index_record_test.go @@ -0,0 +1,205 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nbs + +import ( + "bytes" + "context" + "math/rand" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/d" + "github.com/dolthub/dolt/go/store/hash" +) + +func TestRoundTripIndexRecords(t *testing.T) { + t.Run("table index record", func(t *testing.T) { + start := uint64(0) + for i := 0; i < 64; i++ { + end := start + (rand.Uint64() % 1024) + rec, buf := makeTableIndexRecord(start, end) + start = end + assert.Equal(t, rec.length, uint32(len(buf))) + b := make([]byte, rec.length) + n := writeJournalIndexRecord(b, rec.lastRoot, rec.start, rec.end, mustPayload(rec)) + assert.Equal(t, n, rec.length) + assert.Equal(t, buf, b) + r, err := readJournalIndexRecord(buf) + assert.NoError(t, err) + assert.Equal(t, rec, r) + } + }) +} + +func TestUnknownIndexRecordTag(t *testing.T) { + // test behavior encountering unknown tag + buf := makeUnknownTagIndexRecord() + // checksum is ok + ok := validateIndexRecord(buf) + assert.True(t, ok) + // reading record fails + _, err := readJournalIndexRecord(buf) + assert.Error(t, err) +} + +func TestProcessIndexRecords(t *testing.T) { + const cnt = 1024 + ctx := context.Background() + records := make([]indexRec, cnt) + buffers := make([][]byte, cnt) + index := make([]byte, cnt*1024) + + var off uint32 + var start uint64 + for i := range records { + end := start + (rand.Uint64() % 1024) + r, b := makeTableIndexRecord(start, end) + start = end + off += writeJournalIndexRecord(index[off:], r.lastRoot, r.start, r.end, mustPayload(r)) + records[i], buffers[i] = r, b + } + index = index[:off] + + var i, sum int + check := func(o int64, r indexRec) (_ error) { + require.True(t, i < cnt) + assert.Equal(t, records[i], r) + assert.Equal(t, sum, int(o)) + sum += len(buffers[i]) + i++ + return + } + + err := processIndexRecords(ctx, bytes.NewReader(index), int64(len(index)), check) + assert.Equal(t, cnt, i) + require.NoError(t, err) + + i, sum = 0, 0 + // write a bogus record to the end and process again + index = appendCorruptIndexRecord(index) + err = processIndexRecords(ctx, bytes.NewReader(index), int64(len(index)), check) + assert.Equal(t, cnt, i) + assert.Error(t, err) // fails to checksum +} + +func TestRoundTripLookups(t *testing.T) { + exp := makeLookups(128) + buf := serializeLookups(exp) + act := deserializeLookups(buf) + assert.Equal(t, exp, act) + +} + +func makeTableIndexRecord(start, end uint64) (indexRec, []byte) { + payload := randBuf(100) + sz := journalIndexRecordSize(payload) + lastRoot := hash.Of([]byte("fake commit")) + + var n int + buf := make([]byte, sz) + + // length + writeUint32(buf[n:], uint32(len(buf))) + n += indexRecLenSz + + // last root + buf[n] = byte(lastRootIndexRecTag) + n += indexRecTagSz + copy(buf[n:], lastRoot[:]) + n += len(lastRoot[:]) + + // start offset + buf[n] = byte(startOffsetIndexRecTag) + n += indexRecTagSz + writeUint64(buf[n:], start) + n += indexRecOffsetSz + + // stop offset + buf[n] = byte(endOffsetIndexRecTag) + n += indexRecTagSz + writeUint64(buf[n:], end) + n += indexRecOffsetSz + + // kind + buf[n] = byte(kindIndexRecTag) + n += indexRecTagSz + buf[n] = byte(tableIndexRecKind) + n += indexRecKindSz + + // payload + buf[n] = byte(payloadIndexRecTag) + n += indexRecTagSz + copy(buf[n:], payload) + n += len(payload) + + // checksum + c := crc(buf[:len(buf)-indexRecChecksumSz]) + writeUint32(buf[len(buf)-indexRecChecksumSz:], c) + + r := indexRec{ + length: uint32(len(buf)), + lastRoot: lastRoot, + start: start, + end: end, + kind: tableIndexRecKind, + payload: payload, + checksum: c, + } + return r, buf +} + +func makeUnknownTagIndexRecord() (buf []byte) { + const fakeTag indexRecTag = 111 + _, buf = makeTableIndexRecord(0, 128) + // overwrite recKind + buf[indexRecLenSz] = byte(fakeTag) + // redo checksum + c := crc(buf[:len(buf)-indexRecChecksumSz]) + writeUint32(buf[len(buf)-indexRecChecksumSz:], c) + return +} + +func appendCorruptIndexRecord(buf []byte) []byte { + tail := make([]byte, journalIndexRecordSize(nil)) + rand.Read(tail) + // write a valid size, kind + writeUint32(tail, uint32(len(tail))) + tail[journalRecLenSz] = byte(tableIndexRecKind) + return append(buf, tail...) +} + +func mustPayload(rec indexRec) []byte { + d.PanicIfFalse(rec.kind == tableIndexRecKind) + return rec.payload +} + +func makeLookups(cnt int) (lookups []lookup) { + lookups = make([]lookup, cnt) + buf := make([]byte, cnt*addrSize) + rand.Read(buf) + var off uint64 + for i := range lookups { + copy(lookups[i].a[:], buf) + buf = buf[addrSize:] + lookups[i].r.Offset = off + l := rand.Uint32() % 1024 + lookups[i].r.Length = l + off += uint64(l) + } + return +} diff --git a/go/store/nbs/journal_record.go b/go/store/nbs/journal_record.go index 1c64ff5f20..b853a8284f 100644 --- a/go/store/nbs/journal_record.go +++ b/go/store/nbs/journal_record.go @@ -22,6 +22,7 @@ import ( "io" "github.com/dolthub/dolt/go/store/d" + "github.com/dolthub/dolt/go/store/hash" ) // journalRec is a record in a chunk journal. Its serialization format uses @@ -113,7 +114,7 @@ func rootHashRecordSize() (recordSz int) { func writeChunkRecord(buf []byte, c CompressedChunk) (n uint32) { // length l, _ := chunkRecordSize(c) - writeUint(buf[:journalRecLenSz], l) + writeUint32(buf[:journalRecLenSz], l) n += journalRecLenSz // kind buf[n] = byte(kindJournalRecTag) @@ -131,7 +132,7 @@ func writeChunkRecord(buf []byte, c CompressedChunk) (n uint32) { copy(buf[n:], c.FullCompressedChunk) n += uint32(len(c.FullCompressedChunk)) // checksum - writeUint(buf[n:], crc(buf[:n])) + writeUint32(buf[n:], crc(buf[:n])) n += journalRecChecksumSz d.PanicIfFalse(l == n) return @@ -140,7 +141,7 @@ func writeChunkRecord(buf []byte, c CompressedChunk) (n uint32) { func writeRootHashRecord(buf []byte, root addr) (n uint32) { // length l := rootHashRecordSize() - writeUint(buf[:journalRecLenSz], uint32(l)) + writeUint32(buf[:journalRecLenSz], uint32(l)) n += journalRecLenSz // kind buf[n] = byte(kindJournalRecTag) @@ -154,13 +155,13 @@ func writeRootHashRecord(buf []byte, root addr) (n uint32) { n += journalRecAddrSz // empty payload // checksum - writeUint(buf[n:], crc(buf[:n])) + writeUint32(buf[n:], crc(buf[:n])) n += journalRecChecksumSz return } func readJournalRecord(buf []byte) (rec journalRec, err error) { - rec.length = readUint(buf) + rec.length = readUint32(buf) buf = buf[journalRecLenSz:] for len(buf) > journalRecChecksumSz { tag := journalRecTag(buf[0]) @@ -183,25 +184,29 @@ func readJournalRecord(buf []byte) (rec journalRec, err error) { return } } - rec.checksum = readUint(buf[:journalRecChecksumSz]) + rec.checksum = readUint32(buf[:journalRecChecksumSz]) return } func validateJournalRecord(buf []byte) (ok bool) { if len(buf) > (journalRecLenSz + journalRecChecksumSz) { off := len(buf) - journalRecChecksumSz - ok = crc(buf[:off]) == readUint(buf[off:]) + ok = crc(buf[:off]) == readUint32(buf[off:]) } return } -func processJournalRecords(ctx context.Context, r io.ReadSeeker, cb func(o int64, r journalRec) error) (int64, error) { +func processJournalRecords(ctx context.Context, r io.ReadSeeker, off int64, cb func(o int64, r journalRec) error) (int64, error) { var ( buf []byte - off int64 err error ) + // start processing records from |off| + if _, err = r.Seek(off, io.SeekStart); err != nil { + return 0, err + } + rdr := bufio.NewReaderSize(r, journalWriterBuffSize) for { // peek to read next record size @@ -209,7 +214,7 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, cb func(o int64 break } - l := readUint(buf) + l := readUint32(buf) if l > journalRecMaxSz { break } else if buf, err = rdr.Peek(int(l)); err != nil { @@ -245,10 +250,43 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, cb func(o int64 return off, nil } -func readUint(buf []byte) uint32 { +func peekRootHashAt(journal io.ReaderAt, offset int64) (root hash.Hash, err error) { + buf := make([]byte, 1024) // assumes len(rec) < 1024 + if _, err = journal.ReadAt(buf, offset); err != nil { + return + } + sz := readUint32(buf) + if sz > journalRecMaxSz { + err = fmt.Errorf("invalid root hash record size at %d", offset) + return + } + buf = buf[:sz] + if !validateIndexRecord(buf) { + err = fmt.Errorf("failed to validate root hash record at %d", offset) + return + } + var rec journalRec + if rec, err = readJournalRecord(buf); err != nil { + return + } else if rec.kind != rootHashJournalRecKind { + err = fmt.Errorf("expected root hash record, got kind: %d", rec.kind) + return + } + return hash.Hash(rec.address), nil +} + +func readUint32(buf []byte) uint32 { return binary.BigEndian.Uint32(buf) } -func writeUint(buf []byte, u uint32) { +func writeUint32(buf []byte, u uint32) { binary.BigEndian.PutUint32(buf, u) } + +func readUint64(buf []byte) uint64 { + return binary.BigEndian.Uint64(buf) +} + +func writeUint64(buf []byte, u uint64) { + binary.BigEndian.PutUint64(buf, u) +} diff --git a/go/store/nbs/journal_record_test.go b/go/store/nbs/journal_record_test.go index a1bb86e3ef..f74dc57d42 100644 --- a/go/store/nbs/journal_record_test.go +++ b/go/store/nbs/journal_record_test.go @@ -28,7 +28,7 @@ import ( "github.com/dolthub/dolt/go/store/hash" ) -func TestRoundTripRecords(t *testing.T) { +func TestRoundTripJournalRecords(t *testing.T) { t.Run("chunk record", func(t *testing.T) { for i := 0; i < 64; i++ { rec, buf := makeChunkRecord() @@ -57,16 +57,18 @@ func TestRoundTripRecords(t *testing.T) { }) } -func TestUnknownTag(t *testing.T) { +func TestUnknownJournalRecordTag(t *testing.T) { // test behavior encountering unknown tag - buf := makeUnknownTagRecord() + buf := makeUnknownTagJournalRecord() + // checksum is ok ok := validateJournalRecord(buf) assert.True(t, ok) + // reading record fails _, err := readJournalRecord(buf) assert.Error(t, err) } -func TestProcessRecords(t *testing.T) { +func TestProcessJournalRecords(t *testing.T) { const cnt = 1024 ctx := context.Background() records := make([]journalRec, cnt) @@ -97,15 +99,15 @@ func TestProcessRecords(t *testing.T) { return } - n, err := processJournalRecords(ctx, bytes.NewReader(journal), check) + n, err := processJournalRecords(ctx, bytes.NewReader(journal), 0, check) assert.Equal(t, cnt, i) assert.Equal(t, int(off), int(n)) require.NoError(t, err) i, sum = 0, 0 // write a bogus record to the end and process again - writeCorruptRecord(journal[off:]) - n, err = processJournalRecords(ctx, bytes.NewReader(journal), check) + writeCorruptJournalRecord(journal[off:]) + n, err = processJournalRecords(ctx, bytes.NewReader(journal), 0, check) assert.Equal(t, cnt, i) assert.Equal(t, int(off), int(n)) require.NoError(t, err) @@ -133,7 +135,7 @@ func makeChunkRecord() (journalRec, []byte) { var n int buf := make([]byte, sz) // length - writeUint(buf[n:], uint32(len(buf))) + writeUint32(buf[n:], uint32(len(buf))) n += journalRecLenSz // kind buf[n] = byte(kindJournalRecTag) @@ -152,7 +154,7 @@ func makeChunkRecord() (journalRec, []byte) { n += len(payload) // checksum c := crc(buf[:len(buf)-journalRecChecksumSz]) - writeUint(buf[len(buf)-journalRecChecksumSz:], c) + writeUint32(buf[len(buf)-journalRecChecksumSz:], c) r := journalRec{ length: uint32(len(buf)), @@ -169,7 +171,7 @@ func makeRootHashRecord() (journalRec, []byte) { var n int buf := make([]byte, rootHashRecordSize()) // length - writeUint(buf[n:], uint32(len(buf))) + writeUint32(buf[n:], uint32(len(buf))) n += journalRecLenSz // kind buf[n] = byte(kindJournalRecTag) @@ -183,7 +185,7 @@ func makeRootHashRecord() (journalRec, []byte) { n += journalRecAddrSz // checksum c := crc(buf[:len(buf)-journalRecChecksumSz]) - writeUint(buf[len(buf)-journalRecChecksumSz:], c) + writeUint32(buf[len(buf)-journalRecChecksumSz:], c) r := journalRec{ length: uint32(len(buf)), kind: rootHashJournalRecKind, @@ -193,23 +195,23 @@ func makeRootHashRecord() (journalRec, []byte) { return r, buf } -func makeUnknownTagRecord() (buf []byte) { +func makeUnknownTagJournalRecord() (buf []byte) { const fakeTag journalRecTag = 111 _, buf = makeRootHashRecord() // overwrite recKind buf[journalRecLenSz] = byte(fakeTag) // redo checksum c := crc(buf[:len(buf)-journalRecChecksumSz]) - writeUint(buf[len(buf)-journalRecChecksumSz:], c) + writeUint32(buf[len(buf)-journalRecChecksumSz:], c) return } -func writeCorruptRecord(buf []byte) (n uint32) { +func writeCorruptJournalRecord(buf []byte) (n uint32) { n = uint32(rootHashRecordSize()) // fill with random data rand.Read(buf[:n]) // write a valid size, kind - writeUint(buf, n) + writeUint32(buf, n) buf[journalRecLenSz] = byte(rootHashJournalRecKind) return } diff --git a/go/store/nbs/journal_test.go b/go/store/nbs/journal_test.go index ae1a9a67d5..939062dd1e 100644 --- a/go/store/nbs/journal_test.go +++ b/go/store/nbs/journal_test.go @@ -36,7 +36,7 @@ func makeTestChunkJournal(t *testing.T) *chunkJournal { m, err := getFileManifest(ctx, dir, syncFlush) require.NoError(t, err) q := NewUnlimitedMemQuotaProvider() - p := newFSTablePersister(dir, globalFDCache, q) + p := newFSTablePersister(dir, q) nbf := types.Format_Default.VersionString() j, err := newChunkJournal(ctx, nbf, dir, m, p.(*fsTablePersister)) require.NoError(t, err) diff --git a/go/store/nbs/journal_writer.go b/go/store/nbs/journal_writer.go index 4841d88e91..4dc2395025 100644 --- a/go/store/nbs/journal_writer.go +++ b/go/store/nbs/journal_writer.go @@ -34,6 +34,9 @@ const ( journalWriterBuffSize = 1024 * 1024 chunkJournalAddr = "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" + + journalIndexFileName = "journal.idx" + journalIndexDefaultMaxNovel = 64 * 1024 ) var ( @@ -44,7 +47,7 @@ func isJournalAddr(a addr) bool { return a == journalAddr } -func journalFileExists(path string) (bool, error) { +func fileExists(path string) (bool, error) { var err error if path, err = filepath.Abs(path); err != nil { return false, err @@ -54,7 +57,7 @@ func journalFileExists(path string) (bool, error) { if errors.Is(err, os.ErrNotExist) { return false, nil } else if info.IsDir() { - return true, fmt.Errorf("expected file %s found directory", chunkJournalName) + return true, fmt.Errorf("expected file %s, found directory", path) } return true, nil } @@ -79,8 +82,7 @@ func openJournalWriter(ctx context.Context, path string) (wr *journalWriter, exi return &journalWriter{ buf: make([]byte, 0, journalWriterBuffSize), - lookups: make(map[addr]recLookup), - file: f, + journal: f, path: path, }, true, nil } @@ -119,20 +121,25 @@ func createJournalWriter(ctx context.Context, path string) (wr *journalWriter, e return &journalWriter{ buf: make([]byte, 0, journalWriterBuffSize), - lookups: make(map[addr]recLookup), - file: f, + journal: f, path: path, }, nil } type journalWriter struct { - buf []byte - lookups map[addr]recLookup - file *os.File + buf []byte + + journal *os.File off int64 - uncmpSz uint64 + indexed int64 path string - lock sync.RWMutex + uncmpSz uint64 + + ranges rangeIndex + index *os.File + maxNovel int + + lock sync.RWMutex } var _ io.Closer = &journalWriter{} @@ -142,14 +149,72 @@ var _ io.Closer = &journalWriter{} func (wr *journalWriter) bootstrapJournal(ctx context.Context) (last hash.Hash, err error) { wr.lock.Lock() defer wr.lock.Unlock() - wr.off, err = processJournalRecords(ctx, wr.file, func(o int64, r journalRec) error { + + if wr.maxNovel == 0 { + wr.maxNovel = journalIndexDefaultMaxNovel + } + wr.ranges = newRangeIndex() + + p := filepath.Join(filepath.Dir(wr.path), journalIndexFileName) + var ok bool + ok, err = fileExists(p) + if err != nil { + return + } else if ok { + wr.index, err = os.OpenFile(p, os.O_RDWR, 0666) + } else { + wr.index, err = os.OpenFile(p, os.O_RDWR|os.O_CREATE, 0666) + } + if err != nil { + return + } + + if ok { + var info os.FileInfo + if info, err = wr.index.Stat(); err != nil { + return hash.Hash{}, err + } + err = processIndexRecords(ctx, wr.index, info.Size(), func(o int64, r indexRec) (err error) { + switch r.kind { + case tableIndexRecKind: + // |r.end| is expected to point to a root hash record in |wr.journal| + // containing a hash equal to |r.lastRoot|, validate this here + var h hash.Hash + if h, err = peekRootHashAt(wr.journal, int64(r.end)); err != nil { + return err + } else if h != r.lastRoot { + return fmt.Errorf("invalid index record hash (%s != %s)", h.String(), r.lastRoot.String()) + } + // populate range hashmap + for _, l := range deserializeLookups(r.payload) { + wr.ranges.put(l.a, l.r) + } + // record a high-water-mark for the indexed portion of the journal + wr.indexed = int64(r.end) + // todo: uncompressed size + default: + return fmt.Errorf("unknown index record kind (%d)", r.kind) + } + return nil + }) + if err != nil { + // todo: issue warning on corrupt index recovery + if err = wr.corruptIndexRecovery(ctx); err != nil { + return + } + } + wr.ranges.flatten() + } + + // process the non-indexed portion of the journal starting at |wr.indexed|, + // at minimum the non-indexed portion will include a root hash record + wr.off, err = processJournalRecords(ctx, wr.journal, wr.indexed, func(o int64, r journalRec) error { switch r.kind { case chunkJournalRecKind: - wr.lookups[r.address] = recLookup{ - journalOff: o, - recordLen: r.length, - payloadOff: r.payloadOffset(), - } + wr.ranges.put(r.address, Range{ + Offset: uint64(o) + uint64(r.payloadOffset()), + Length: uint32(len(r.payload)), + }) wr.uncmpSz += r.uncompressedPayloadSize() case rootHashJournalRecKind: last = hash.Hash(r.address) @@ -164,11 +229,26 @@ func (wr *journalWriter) bootstrapJournal(ctx context.Context) (last hash.Hash, return } +// corruptIndexRecovery handles a corrupted or malformed journal index by truncating +// the index file and restarting the journal bootstrapping process without an index. +func (wr *journalWriter) corruptIndexRecovery(ctx context.Context) (err error) { + if _, err = wr.index.Seek(0, io.SeekStart); err != nil { + return + } + if err = wr.index.Truncate(0); err != nil { + return + } + // reset bootstrapping state + wr.off, wr.indexed, wr.uncmpSz = 0, 0, 0 + wr.ranges = newRangeIndex() + return +} + // hasAddr returns true if the journal contains a chunk with addr |h|. func (wr *journalWriter) hasAddr(h addr) (ok bool) { wr.lock.RLock() defer wr.lock.RUnlock() - _, ok = wr.lookups[h] + _, ok = wr.ranges.get(h) return } @@ -176,25 +256,15 @@ func (wr *journalWriter) hasAddr(h addr) (ok bool) { func (wr *journalWriter) getCompressedChunk(h addr) (CompressedChunk, error) { wr.lock.RLock() defer wr.lock.RUnlock() - l, ok := wr.lookups[h] + r, ok := wr.ranges.get(h) if !ok { return CompressedChunk{}, nil } - - buf := make([]byte, l.recordLen) - if _, err := wr.readAt(buf, l.journalOff); err != nil { + buf := make([]byte, r.Length) + if _, err := wr.readAt(buf, int64(r.Offset)); err != nil { return CompressedChunk{}, nil } - - rec, err := readJournalRecord(buf) - if err != nil { - return CompressedChunk{}, err - } else if h != rec.address { - err = fmt.Errorf("chunk record hash does not match (%s != %s)", - h.String(), rec.address.String()) - return CompressedChunk{}, err - } - return NewCompressedChunk(hash.Hash(h), rec.payload) + return NewCompressedChunk(hash.Hash(h), buf) } // getRange returns a Range for the chunk with addr |h|. @@ -206,11 +276,7 @@ func (wr *journalWriter) getRange(h addr) (rng Range, ok bool, err error) { } wr.lock.RLock() defer wr.lock.RUnlock() - var l recLookup - l, ok = wr.lookups[h] - if ok { - rng = rangeFromLookup(l) - } + rng, ok = wr.ranges.get(h) return } @@ -218,35 +284,53 @@ func (wr *journalWriter) getRange(h addr) (rng Range, ok bool, err error) { func (wr *journalWriter) writeCompressedChunk(cc CompressedChunk) error { wr.lock.Lock() defer wr.lock.Unlock() - l, o := chunkRecordSize(cc) - rec := recLookup{ - journalOff: wr.offset(), - recordLen: l, - payloadOff: o, + recordLen, payloadOff := chunkRecordSize(cc) + rng := Range{ + Offset: uint64(wr.offset()) + uint64(payloadOff), + Length: uint32(len(cc.FullCompressedChunk)), } - buf, err := wr.getBytes(int(rec.recordLen)) + buf, err := wr.getBytes(int(recordLen)) if err != nil { return err } _ = writeChunkRecord(buf, cc) - wr.lookups[addr(cc.H)] = rec + wr.ranges.put(addr(cc.H), rng) return nil } -// writeRootHash commits |root| to the journal and syncs the file to disk. -func (wr *journalWriter) writeRootHash(root hash.Hash) error { +// commitRootHash commits |root| to the journal and syncs the file to disk. +func (wr *journalWriter) commitRootHash(root hash.Hash) error { wr.lock.Lock() defer wr.lock.Unlock() buf, err := wr.getBytes(rootHashRecordSize()) if err != nil { return err } - _ = writeRootHashRecord(buf, addr(root)) - + n := writeRootHashRecord(buf, addr(root)) if err = wr.flush(); err != nil { return err } - return wr.file.Sync() + if err = wr.journal.Sync(); err != nil { + return err + } + if wr.ranges.novelCount() > wr.maxNovel { + o := wr.offset() - int64(n) // pre-commit journal offset + err = wr.flushIndexRecord(root, o) + } + return err +} + +func (wr *journalWriter) flushIndexRecord(root hash.Hash, end int64) (err error) { + payload := serializeLookups(wr.ranges.novelLookups()) + buf := make([]byte, journalIndexRecordSize(payload)) + writeJournalIndexRecord(buf, root, uint64(wr.indexed), uint64(end), payload) + if _, err = wr.index.Write(buf); err != nil { + return err + } + wr.ranges.flatten() + // set a new high-water-mark for the indexed portion of the journal + wr.indexed = end + return } // readAt reads len(p) bytes from the journal at offset |off|. @@ -262,7 +346,7 @@ func (wr *journalWriter) readAt(p []byte, off int64) (n int, err error) { bp = p[fread:] p = p[:fread] } - if n, err = wr.file.ReadAt(p, off); err != nil { + if n, err = wr.journal.ReadAt(p, off); err != nil { return 0, err } off = 0 @@ -294,7 +378,7 @@ func (wr *journalWriter) getBytes(n int) (buf []byte, err error) { // flush writes buffered data into the journal file. func (wr *journalWriter) flush() (err error) { - if _, err = wr.file.WriteAt(wr.buf, wr.off); err != nil { + if _, err = wr.journal.WriteAt(wr.buf, wr.off); err != nil { return err } wr.off += int64(len(wr.buf)) @@ -351,7 +435,7 @@ func (wr *journalWriter) uncompressedSize() uint64 { func (wr *journalWriter) recordCount() uint32 { wr.lock.RLock() defer wr.lock.RUnlock() - return uint32(len(wr.lookups)) + return wr.ranges.count() } func (wr *journalWriter) Close() (err error) { @@ -360,11 +444,71 @@ func (wr *journalWriter) Close() (err error) { if err = wr.flush(); err != nil { return err } - if cerr := wr.file.Sync(); cerr != nil { + if cerr := wr.journal.Sync(); cerr != nil { err = cerr } - if cerr := wr.file.Close(); cerr != nil { + if cerr := wr.journal.Close(); cerr != nil { err = cerr } return } + +type rangeIndex struct { + novel map[addr]Range + cached map[addr]Range +} + +func newRangeIndex() rangeIndex { + return rangeIndex{ + novel: make(map[addr]Range), + cached: make(map[addr]Range), + } +} + +func (idx rangeIndex) get(a addr) (rng Range, ok bool) { + rng, ok = idx.novel[a] + if !ok { + rng, ok = idx.cached[a] + } + return +} + +func (idx rangeIndex) put(a addr, rng Range) { + idx.novel[a] = rng +} + +func (idx rangeIndex) iter(cb func(addr, Range)) { + for a, r := range idx.novel { + cb(a, r) + } + for a, r := range idx.cached { + cb(a, r) + } +} + +func (idx rangeIndex) count() uint32 { + return uint32(len(idx.novel) + len(idx.cached)) +} + +func (idx rangeIndex) novelCount() int { + return len(idx.novel) +} + +func (idx rangeIndex) novelLookups() (lookups []lookup) { + lookups = make([]lookup, 0, len(idx.novel)) + for a, r := range idx.novel { + lookups = append(lookups, lookup{a: a, r: r}) + } + return +} + +func (idx rangeIndex) flatten() { + if len(idx.cached) == 0 { + idx.cached = idx.novel + } else { + for a, r := range idx.novel { + idx.cached[a] = r + } + } + idx.novel = make(map[addr]Range) +} diff --git a/go/store/nbs/journal_writer_test.go b/go/store/nbs/journal_writer_test.go index c8db1f8ae1..bb9a33d3f5 100644 --- a/go/store/nbs/journal_writer_test.go +++ b/go/store/nbs/journal_writer_test.go @@ -16,33 +16,33 @@ package nbs import ( "context" - "fmt" "math/rand" + "os" "path/filepath" "testing" - "github.com/dolthub/dolt/go/store/chunks" - "github.com/dolthub/dolt/go/store/hash" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/chunks" + "github.com/dolthub/dolt/go/store/hash" ) -type operation struct { - kind opKind - buf []byte - readAt int64 -} +func TestJournalWriterReadWrite(t *testing.T) { + type opKind byte -type opKind byte + type operation struct { + kind opKind + buf []byte + readAt int64 + } -const ( - readOp opKind = iota - writeOp - flushOp -) + const ( + readOp opKind = iota + writeOp + flushOp + ) -func TestJournalWriter(t *testing.T) { tests := []struct { name string size int @@ -145,14 +145,13 @@ func TestJournalWriter(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - ctx := context.Background() - j, err := createJournalWriter(ctx, newTestFilePath(t)) - require.NotNil(t, j) - require.NoError(t, err) + path := newTestFilePath(t) + j := newTestJournalWriter(t, path) // set specific buffer size j.buf = make([]byte, 0, test.size) var off int64 + var err error for i, op := range test.ops { switch op.kind { case readOp: @@ -176,53 +175,56 @@ func TestJournalWriter(t *testing.T) { } assert.Equal(t, off, j.offset()) } - assert.NoError(t, j.Close()) }) } } -func TestJournalWriterWriteCompressedChunk(t *testing.T) { +func newTestJournalWriter(t *testing.T, path string) *journalWriter { ctx := context.Background() - j, err := createJournalWriter(ctx, newTestFilePath(t)) - require.NotNil(t, j) + j, err := createJournalWriter(ctx, path) require.NoError(t, err) + require.NotNil(t, j) + _, err = j.bootstrapJournal(ctx) + require.NoError(t, err) + return j +} - data := randomCompressedChunks() - +func TestJournalWriterWriteCompressedChunk(t *testing.T) { + path := newTestFilePath(t) + j := newTestJournalWriter(t, path) + data := randomCompressedChunks(1024) for a, cc := range data { - err = j.writeCompressedChunk(cc) + err := j.writeCompressedChunk(cc) require.NoError(t, err) - l := j.lookups[a] - validateLookup(t, j, l, cc) + r, _ := j.ranges.get(a) + validateLookup(t, j, r, cc) } - for a, l := range j.lookups { - validateLookup(t, j, l, data[a]) - } - require.NoError(t, j.Close()) + j.ranges.iter(func(a addr, r Range) { + validateLookup(t, j, r, data[a]) + }) } func TestJournalWriterBootstrap(t *testing.T) { ctx := context.Background() path := newTestFilePath(t) - j, err := createJournalWriter(ctx, path) - require.NotNil(t, j) - require.NoError(t, err) - - data := randomCompressedChunks() + j := newTestJournalWriter(t, path) + data := randomCompressedChunks(1024) + var last hash.Hash for _, cc := range data { - err = j.writeCompressedChunk(cc) + err := j.writeCompressedChunk(cc) require.NoError(t, err) + last = cc.Hash() } - assert.NoError(t, j.Close()) + require.NoError(t, j.commitRootHash(last)) - j, _, err = openJournalWriter(ctx, path) + j, _, err := openJournalWriter(ctx, path) require.NoError(t, err) _, err = j.bootstrapJournal(ctx) require.NoError(t, err) - for a, l := range j.lookups { - validateLookup(t, j, l, data[a]) - } + j.ranges.iter(func(a addr, r Range) { + validateLookup(t, j, r, data[a]) + }) source := journalChunkSource{journal: j} for a, cc := range data { @@ -232,56 +234,171 @@ func TestJournalWriterBootstrap(t *testing.T) { require.NoError(t, err) assert.Equal(t, ch.Data(), buf) } - require.NoError(t, j.Close()) } -func validateLookup(t *testing.T, j *journalWriter, l recLookup, cc CompressedChunk) { - b := make([]byte, l.recordLen) - n, err := j.readAt(b, l.journalOff) +func validateLookup(t *testing.T, j *journalWriter, r Range, cc CompressedChunk) { + buf := make([]byte, r.Length) + _, err := j.readAt(buf, int64(r.Offset)) require.NoError(t, err) - assert.Equal(t, int(l.recordLen), n) - rec, err := readJournalRecord(b) - require.NoError(t, err) - assert.Equal(t, hash.Hash(rec.address), cc.Hash()) - assert.Equal(t, rec.payload, cc.FullCompressedChunk) + act, err := NewCompressedChunk(cc.H, buf) + assert.NoError(t, err) + assert.Equal(t, cc.FullCompressedChunk, act.FullCompressedChunk) } func TestJournalWriterSyncClose(t *testing.T) { - ctx := context.Background() - j, err := createJournalWriter(ctx, newTestFilePath(t)) - require.NotNil(t, j) - require.NoError(t, err) - _, err = j.bootstrapJournal(ctx) - require.NoError(t, err) - - // close triggers flush + path := newTestFilePath(t) + j := newTestJournalWriter(t, path) p := []byte("sit") buf, err := j.getBytes(len(p)) require.NoError(t, err) copy(buf, p) - err = j.Close() - require.NoError(t, err) + j.flush() assert.Equal(t, 0, len(j.buf)) assert.Equal(t, 3, int(j.off)) } func newTestFilePath(t *testing.T) string { - name := fmt.Sprintf("journal%d.log", rand.Intn(65536)) - return filepath.Join(t.TempDir(), name) + path, err := os.MkdirTemp("", "") + require.NoError(t, err) + return filepath.Join(path, "journal.log") } -func randomCompressedChunks() (compressed map[addr]CompressedChunk) { - buf := make([]byte, 1024*1024) - rand.Read(buf) +func TestJournalIndexBootstrap(t *testing.T) { + // potentially indexed region of a journal + type epoch struct { + records map[addr]CompressedChunk + last hash.Hash + } + makeEpoch := func() (e epoch) { + e.records = randomCompressedChunks(64) + for h := range e.records { + e.last = hash.Hash(h) + break + } + return + } + + tests := []struct { + name string + epochs []epoch + novel epoch + }{ + { + name: "smoke test", + epochs: []epoch{makeEpoch()}, + }, + { + name: "non-indexed journal", + epochs: nil, + novel: makeEpoch(), + }, + { + name: "partially indexed journal", + epochs: []epoch{makeEpoch()}, + novel: makeEpoch(), + }, + { + name: "multiple index records", + epochs: []epoch{ + makeEpoch(), + makeEpoch(), + makeEpoch(), + }, + novel: makeEpoch(), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.Background() + path := newTestFilePath(t) + j := newTestJournalWriter(t, path) + // setup + epochs := append(test.epochs, test.novel) + for i, e := range epochs { + for _, cc := range e.records { + assert.NoError(t, j.writeCompressedChunk(cc)) + if rand.Int()%10 == 0 { // periodic commits + assert.NoError(t, j.commitRootHash(cc.H)) + } + } + o := j.offset() // precommit offset + assert.NoError(t, j.commitRootHash(e.last)) // commit |e.last| + if i == len(epochs)-1 { + break // don't index |test.novel| + } + assert.NoError(t, j.flushIndexRecord(e.last, o)) // write index record + } + + validateJournal := func(p string, expected []epoch) { + journal, ok, err := openJournalWriter(ctx, p) + require.NoError(t, err) + require.True(t, ok) + // bootstrap journal and validate chunk records + last, err := journal.bootstrapJournal(ctx) + assert.NoError(t, err) + for _, e := range expected { + var act CompressedChunk + for a, exp := range e.records { + act, err = journal.getCompressedChunk(a) + assert.NoError(t, err) + assert.Equal(t, exp, act) + } + } + assert.Equal(t, expected[len(expected)-1].last, last) + } + idxPath := filepath.Join(filepath.Dir(path), journalIndexFileName) + + before, err := os.Stat(idxPath) + require.NoError(t, err) + if len(test.epochs) > 0 { // expect index + assert.True(t, before.Size() > 0) + } else { + assert.Equal(t, int64(0), before.Size()) + } + + // bootstrap journal using index + validateJournal(path, epochs) + // assert journal index unchanged + info, err := os.Stat(idxPath) + require.NoError(t, err) + assert.Equal(t, before.Size(), info.Size()) + + // bootstrap journal without index + corruptJournalIndex(t, idxPath) + validateJournal(path, epochs) + // assert corrupt index cleaned up + info, err = os.Stat(idxPath) + require.NoError(t, err) + assert.Equal(t, int64(0), info.Size()) + }) + } +} + +func randomCompressedChunks(cnt int) (compressed map[addr]CompressedChunk) { compressed = make(map[addr]CompressedChunk) - for { + var buf []byte + for i := 0; i < cnt; i++ { k := rand.Intn(51) + 50 if k >= len(buf) { - return + buf = make([]byte, 64*1024) + rand.Read(buf) } c := chunks.NewChunk(buf[:k]) buf = buf[k:] compressed[addr(c.Hash())] = ChunkToCompressedChunk(c) } + return +} + +func corruptJournalIndex(t *testing.T, path string) { + f, err := os.OpenFile(path, os.O_RDWR, 0666) + require.NoError(t, err) + info, err := f.Stat() + require.NoError(t, err) + buf := make([]byte, 64) + rand.Read(buf) + _, err = f.WriteAt(buf, info.Size()/2) + require.NoError(t, err) } diff --git a/go/store/nbs/mem_table_test.go b/go/store/nbs/mem_table_test.go index 77f4e0575a..c279ededa6 100644 --- a/go/store/nbs/mem_table_test.go +++ b/go/store/nbs/mem_table_test.go @@ -190,6 +190,14 @@ func tableReaderAtFromBytes(b []byte) tableReaderAt { return tableReaderAtAdapter{bytes.NewReader(b)} } +func (adapter tableReaderAtAdapter) Close() error { + return nil +} + +func (adapter tableReaderAtAdapter) clone() (tableReaderAt, error) { + return adapter, nil +} + func (adapter tableReaderAtAdapter) Reader(ctx context.Context) (io.ReadCloser, error) { r := *adapter.br return io.NopCloser(&r), nil diff --git a/go/store/nbs/root_tracker_test.go b/go/store/nbs/root_tracker_test.go index d62c3ae128..32a69b3360 100644 --- a/go/store/nbs/root_tracker_test.go +++ b/go/store/nbs/root_tracker_test.go @@ -528,12 +528,12 @@ func (ftp fakeTablePersister) Persist(ctx context.Context, mt *memTable, haver c return chunkSourceAdapter{cs, name}, nil } -func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) { +func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) { name, data, chunkCount, err := compactSourcesToBuffer(sources) if err != nil { - return nil, err + return nil, nil, err } else if chunkCount == 0 { - return emptyChunkSource{}, nil + return emptyChunkSource{}, func() {}, nil } ftp.mu.Lock() @@ -542,14 +542,14 @@ func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSourc ti, err := parseTableIndexByCopy(ctx, data, ftp.q) if err != nil { - return nil, err + return nil, nil, err } cs, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize) if err != nil { - return nil, err + return nil, nil, err } - return chunkSourceAdapter{cs, name}, nil + return chunkSourceAdapter{cs, name}, func() {}, nil } func compactSourcesToBuffer(sources chunkSources) (name addr, data []byte, chunkCount uint32, err error) { diff --git a/go/store/nbs/s3_table_reader.go b/go/store/nbs/s3_table_reader.go index 68079834c2..179d5b9a38 100644 --- a/go/store/nbs/s3_table_reader.go +++ b/go/store/nbs/s3_table_reader.go @@ -60,6 +60,14 @@ type s3svc interface { PutObjectWithContext(ctx aws.Context, input *s3.PutObjectInput, opts ...request.Option) (*s3.PutObjectOutput, error) } +func (s3tra *s3TableReaderAt) Close() error { + return nil +} + +func (s3tra *s3TableReaderAt) clone() (tableReaderAt, error) { + return s3tra, nil +} + func (s3tra *s3TableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) { return s3tra.s3.Reader(ctx, s3tra.h) } diff --git a/go/store/nbs/store.go b/go/store/nbs/store.go index ea514d9951..e64695bbeb 100644 --- a/go/store/nbs/store.go +++ b/go/store/nbs/store.go @@ -68,14 +68,11 @@ const ( var ( cacheOnce = sync.Once{} makeManifestManager func(manifest) manifestManager - globalFDCache *fdCache ) var tracer = otel.Tracer("github.com/dolthub/dolt/go/store/nbs") func makeGlobalCaches() { - globalFDCache = newFDCache(defaultMaxTables) - manifestCache := newManifestCache(defaultManifestCacheSize) manifestLocks := newManifestLocks() makeManifestManager = func(m manifest) manifestManager { return manifestManager{m, manifestCache, manifestLocks} } @@ -479,7 +476,7 @@ func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSi if err != nil { return nil, err } - p := newFSTablePersister(dir, globalFDCache, q) + p := newFSTablePersister(dir, q) c := conjoinStrategy(inlineConjoiner{maxTables}) return newNomsBlockStore(ctx, nbfVerStr, makeManifestManager(m), p, q, c, memTableSize) @@ -495,7 +492,7 @@ func NewLocalJournalingStore(ctx context.Context, nbfVers, dir string, q MemoryQ if err != nil { return nil, err } - p := newFSTablePersister(dir, globalFDCache, q) + p := newFSTablePersister(dir, q) journal, err := newChunkJournal(ctx, nbfVers, dir, m, p.(*fsTablePersister)) if err != nil { @@ -615,6 +612,9 @@ func (nbs *NomsBlockStore) putChunk(ctx context.Context, c chunks.Chunk, getAddr } func (nbs *NomsBlockStore) addChunk(ctx context.Context, ch chunks.Chunk, addrs hash.HashSet, checker refCheck) (bool, error) { + if err := ctx.Err(); err != nil { + return false, err + } nbs.mu.Lock() defer nbs.mu.Unlock() nbs.waitForGC() diff --git a/go/store/nbs/store_test.go b/go/store/nbs/store_test.go index 19aaa797b3..55020d1567 100644 --- a/go/store/nbs/store_test.go +++ b/go/store/nbs/store_test.go @@ -58,14 +58,14 @@ func makeTestLocalStore(t *testing.T, maxTableFiles int) (st *NomsBlockStore, no type fileToData map[string][]byte -func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fileToData { +func writeLocalTableFiles(t *testing.T, st *NomsBlockStore, numTableFiles, seed int) (map[string]int, fileToData) { ctx := context.Background() fileToData := make(fileToData, numTableFiles) - fileIDToNumChunks := make(map[string]int) + fileIDToNumChunks := make(map[string]int, numTableFiles) for i := 0; i < numTableFiles; i++ { var chunkData [][]byte for j := 0; j < i+1; j++ { - chunkData = append(chunkData, []byte(fmt.Sprintf("%d:%d", i, j))) + chunkData = append(chunkData, []byte(fmt.Sprintf("%d:%d:%d", i, j, seed))) } data, addr, err := buildTable(chunkData) require.NoError(t, err) @@ -77,9 +77,14 @@ func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fil }) require.NoError(t, err) } + return fileIDToNumChunks, fileToData +} + +func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fileToData { + ctx := context.Background() + fileIDToNumChunks, fileToData := writeLocalTableFiles(t, st, numTableFiles, 0) err := st.AddTableFilesToManifest(ctx, fileIDToNumChunks) require.NoError(t, err) - return fileToData } @@ -190,8 +195,10 @@ func TestNBSPruneTableFiles(t *testing.T) { numTableFiles := 64 maxTableFiles := 16 st, nomsDir, _ := makeTestLocalStore(t, maxTableFiles) - fileToData := populateLocalStore(t, st, numTableFiles) defer st.Close() + fileToData := populateLocalStore(t, st, numTableFiles) + + _, toDeleteToData := writeLocalTableFiles(t, st, numTableFiles, 32) // add a chunk and flush to trigger a conjoin c := chunks.NewChunk([]byte("it's a boy!")) @@ -212,6 +219,9 @@ func TestNBSPruneTableFiles(t *testing.T) { // assert some input table files were conjoined assert.NotEmpty(t, absent) + toDelete := tfSet.findAbsent(toDeleteToData) + assert.Len(t, toDelete, len(toDeleteToData)) + currTableFiles := func(dirName string) *set.StrSet { infos, err := os.ReadDir(dirName) require.NoError(t, err) @@ -228,7 +238,7 @@ func TestNBSPruneTableFiles(t *testing.T) { for _, tf := range sources { assert.True(t, preGC.Contains(tf.FileID())) } - for _, fileName := range absent { + for _, fileName := range toDelete { assert.True(t, preGC.Contains(fileName)) } @@ -237,11 +247,14 @@ func TestNBSPruneTableFiles(t *testing.T) { postGC := currTableFiles(nomsDir) for _, tf := range sources { - assert.True(t, preGC.Contains(tf.FileID())) + assert.True(t, postGC.Contains(tf.FileID())) } for _, fileName := range absent { assert.False(t, postGC.Contains(fileName)) } + for _, fileName := range toDelete { + assert.False(t, postGC.Contains(fileName)) + } infos, err := os.ReadDir(nomsDir) require.NoError(t, err) diff --git a/go/store/nbs/table_persister.go b/go/store/nbs/table_persister.go index 220906c962..e1b898a8a9 100644 --- a/go/store/nbs/table_persister.go +++ b/go/store/nbs/table_persister.go @@ -34,6 +34,8 @@ import ( var errCacheMiss = errors.New("index cache miss") +type cleanupFunc func() + // tablePersister allows interaction with persistent storage. It provides // primitives for pushing the contents of a memTable to persistent storage, // opening persistent tables for reading, and conjoining a number of existing @@ -45,8 +47,10 @@ type tablePersister interface { Persist(ctx context.Context, mt *memTable, haver chunkReader, stats *Stats) (chunkSource, error) // ConjoinAll conjoins all chunks in |sources| into a single, new - // chunkSource. - ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) + // chunkSource. It returns a |cleanupFunc| which can be called to + // potentially release resources associated with the |sources| once + // they are no longer needed. + ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) // Open a table named |name|, containing |chunkCount| chunks. Open(ctx context.Context, name addr, chunkCount uint32, stats *Stats) (chunkSource, error) diff --git a/go/store/nbs/table_reader.go b/go/store/nbs/table_reader.go index 3306103d10..8f7f5fb435 100644 --- a/go/store/nbs/table_reader.go +++ b/go/store/nbs/table_reader.go @@ -131,6 +131,8 @@ func (ir indexResult) Length() uint32 { type tableReaderAt interface { ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) Reader(ctx context.Context) (io.ReadCloser, error) + Close() error + clone() (tableReaderAt, error) } // tableReader implements get & has queries against a single nbs table. goroutine safe. @@ -663,7 +665,12 @@ func (tr tableReader) currentSize() uint64 { } func (tr tableReader) close() error { - return tr.idx.Close() + err := tr.idx.Close() + if err != nil { + tr.r.Close() + return err + } + return tr.r.Close() } func (tr tableReader) clone() (tableReader, error) { @@ -671,22 +678,15 @@ func (tr tableReader) clone() (tableReader, error) { if err != nil { return tableReader{}, err } + r, err := tr.r.clone() + if err != nil { + idx.Close() + return tableReader{}, err + } return tableReader{ prefixes: tr.prefixes, idx: idx, - r: tr.r, + r: r, blockSize: tr.blockSize, }, nil } - -type readerAdapter struct { - rat tableReaderAt - off int64 - ctx context.Context -} - -func (ra *readerAdapter) Read(p []byte) (n int, err error) { - n, err = ra.rat.ReadAtWithStats(ra.ctx, p, ra.off, &Stats{}) - ra.off += int64(n) - return -} diff --git a/go/store/nbs/table_set.go b/go/store/nbs/table_set.go index 4cfc0ae386..87459c5aae 100644 --- a/go/store/nbs/table_set.go +++ b/go/store/nbs/table_set.go @@ -114,6 +114,9 @@ func (ts tableSet) hasMany(addrs []hasRecord) (bool, error) { } func (ts tableSet) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) { + if err := ctx.Err(); err != nil { + return nil, err + } f := func(css chunkSourceSet) ([]byte, error) { for _, haver := range css { data, err := haver.get(ctx, h, stats) diff --git a/go/store/prolly/tuple_map.go b/go/store/prolly/tuple_map.go index 171fb65196..c779a3d8d0 100644 --- a/go/store/prolly/tuple_map.go +++ b/go/store/prolly/tuple_map.go @@ -35,12 +35,6 @@ type Map struct { valDesc val.TupleDesc } -type DiffSummary struct { - Adds, Removes uint64 - Changes, CellChanges uint64 - NewSize, OldSize uint64 -} - // NewMap creates an empty prolly Tree Map func NewMap(node tree.Node, ns tree.NodeStore, keyDesc, valDesc val.TupleDesc) Map { tuples := tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{ diff --git a/go/utils/copyrightshdrs/main.go b/go/utils/copyrightshdrs/main.go index cb3361c3fd..e0e59629bc 100644 --- a/go/utils/copyrightshdrs/main.go +++ b/go/utils/copyrightshdrs/main.go @@ -137,7 +137,6 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{ {Path: "store/diff/patch.go", NomsPath: "go/diff/patch.go", HadCopyrightNotice: true}, {Path: "store/diff/patch_test.go", NomsPath: "go/diff/patch_test.go", HadCopyrightNotice: true}, {Path: "store/diff/print_diff.go", NomsPath: "go/diff/print_diff.go", HadCopyrightNotice: true}, - {Path: "store/diff/summary.go", NomsPath: "go/diff/summary.go", HadCopyrightNotice: true}, {Path: "store/hash/base32.go", NomsPath: "go/hash/base32.go", HadCopyrightNotice: true}, {Path: "store/hash/base32_test.go", NomsPath: "go/hash/base32_test.go", HadCopyrightNotice: true}, {Path: "store/hash/hash.go", NomsPath: "go/hash/hash.go", HadCopyrightNotice: true}, @@ -183,15 +182,11 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{ {Path: "store/nbs/dynamo_manifest_test.go", NomsPath: "go/nbs/dynamo_manifest_test.go", HadCopyrightNotice: true}, {Path: "store/nbs/dynamo_table_reader.go", NomsPath: "go/nbs/dynamo_table_reader.go", HadCopyrightNotice: true}, {Path: "store/nbs/dynamo_table_reader_test.go", NomsPath: "go/nbs/dynamo_table_reader_test.go", HadCopyrightNotice: true}, - {Path: "store/nbs/fd_cache.go", NomsPath: "go/nbs/fd_cache.go", HadCopyrightNotice: true}, - {Path: "store/nbs/fd_cache_test.go", NomsPath: "go/nbs/fd_cache_test.go", HadCopyrightNotice: true}, {Path: "store/nbs/file_manifest.go", NomsPath: "go/nbs/file_manifest.go", HadCopyrightNotice: true}, {Path: "store/nbs/file_manifest_test.go", NomsPath: "go/nbs/file_manifest_test.go", HadCopyrightNotice: true}, {Path: "store/nbs/file_table_persister.go", NomsPath: "go/nbs/file_table_persister.go", HadCopyrightNotice: true}, {Path: "store/nbs/file_table_persister_test.go", NomsPath: "go/nbs/file_table_persister_test.go", HadCopyrightNotice: true}, {Path: "store/nbs/frag/main.go", NomsPath: "go/nbs/frag/main.go", HadCopyrightNotice: true}, - {Path: "store/nbs/fs_table_cache.go", NomsPath: "go/nbs/fs_table_cache.go", HadCopyrightNotice: true}, - {Path: "store/nbs/fs_table_cache_test.go", NomsPath: "go/nbs/fs_table_cache_test.go", HadCopyrightNotice: true}, {Path: "store/nbs/manifest.go", NomsPath: "go/nbs/manifest.go", HadCopyrightNotice: true}, {Path: "store/nbs/manifest_cache.go", NomsPath: "go/nbs/manifest_cache.go", HadCopyrightNotice: true}, {Path: "store/nbs/manifest_cache_test.go", NomsPath: "go/nbs/manifest_cache_test.go", HadCopyrightNotice: true}, diff --git a/docs/Dolt-Logo@3x.svg b/images/Dolt-Logo@3x.svg similarity index 100% rename from docs/Dolt-Logo@3x.svg rename to images/Dolt-Logo@3x.svg diff --git a/docs/getting-started-new-updates.png b/images/getting-started-new-updates.png similarity index 100% rename from docs/getting-started-new-updates.png rename to images/getting-started-new-updates.png diff --git a/docs/getting-started-tp-connect.png b/images/getting-started-tp-connect.png similarity index 100% rename from docs/getting-started-tp-connect.png rename to images/getting-started-tp-connect.png diff --git a/docs/getting-started-tp.png b/images/getting-started-tp.png similarity index 100% rename from docs/getting-started-tp.png rename to images/getting-started-tp.png diff --git a/integration-tests/bats/diff-stat.bats b/integration-tests/bats/diff-stat.bats new file mode 100644 index 0000000000..0551a45758 --- /dev/null +++ b/integration-tests/bats/diff-stat.bats @@ -0,0 +1,341 @@ +#!/usr/bin/env bats +load $BATS_TEST_DIRNAME/helper/common.bash + +setup() { + setup_common + + dolt sql < employees.csv +"id","first name","last name","title","start date","end date" +0,tim,sehn,ceo,"","" +1,aaron,son,founder,"","" +2,brian,hendricks,founder,"","" +DELIM + dolt table import -c -pk=id employees employees.csv + dolt add employees + dolt commit -m "Added employees table with data" + + dolt sql -q "alter table employees add city longtext" + dolt sql -q "insert into employees values (3, 'taylor', 'bantle', 'software engineer', '', '', 'Santa Monica')" + + run dolt diff --stat + [ "$status" -eq 0 ] + [[ "$output" =~ "3 Rows Unmodified (100.00%)" ]] || false + [[ "$output" =~ "1 Row Added (33.33%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false + [[ "$output" =~ "10 Cells Added (55.56%)" ]] || false + [[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false + [[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false + [[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false + + run dolt diff --summary + [ "$status" -eq 0 ] + [[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false + [[ "$output" =~ "| employees | modified | true | true |" ]] || false + + dolt sql -q "replace into employees values (0, 'tim', 'sehn', 'ceo', '2 years ago', '', 'Santa Monica')" + + dolt diff --stat + run dolt diff --stat + [ "$status" -eq 0 ] + [[ "$output" =~ "2 Rows Unmodified (66.67%)" ]] || false + [[ "$output" =~ "1 Row Added (33.33%)" ]] || false + [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false + [[ "$output" =~ "1 Row Modified (33.33%)" ]] || false + [[ "$output" =~ "10 Cells Added (55.56%)" ]] || false + [[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false + [[ "$output" =~ "2 Cells Modified (11.11%)" ]] || false + [[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false + + run dolt diff --summary + [ "$status" -eq 0 ] + [[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false + [[ "$output" =~ "| employees | modified | true | true |" ]] || false +} + +@test "diff-stat: stat/summary gets summaries for all tables with changes" { + dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)" + dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)" + dolt sql < employees.csv -"id","first name","last name","title","start date","end date" -0,tim,sehn,ceo,"","" -1,aaron,son,founder,"","" -2,brian,hendricks,founder,"","" -DELIM - dolt table import -c -pk=id employees employees.csv - dolt add employees - dolt commit -m "Added employees table with data" - - dolt sql -q "alter table employees add city longtext" - dolt sql -q "insert into employees values (3, 'taylor', 'bantle', 'software engineer', '', '', 'Santa Monica')" - - dolt diff --summary - run dolt diff --summary - [ "$status" -eq 0 ] - [[ "$output" =~ "3 Rows Unmodified (100.00%)" ]] || false - [[ "$output" =~ "1 Row Added (33.33%)" ]] || false - [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false - [[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false - [[ "$output" =~ "10 Cells Added (55.56%)" ]] || false - [[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false - [[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false - [[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false - - dolt sql -q "replace into employees values (0, 'tim', 'sehn', 'ceo', '2 years ago', '', 'Santa Monica')" - - dolt diff --summary - run dolt diff --summary - [ "$status" -eq 0 ] - [[ "$output" =~ "2 Rows Unmodified (66.67%)" ]] || false - [[ "$output" =~ "1 Row Added (33.33%)" ]] || false - [[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false - [[ "$output" =~ "1 Row Modified (33.33%)" ]] || false - [[ "$output" =~ "10 Cells Added (55.56%)" ]] || false - [[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false - [[ "$output" =~ "2 Cells Modified (11.11%)" ]] || false - [[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false -} - -@test "diff: summary gets summaries for all tables with changes" { - dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)" - dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)" - dolt sql <> .dolt/noms/b0f6n6b1ej7a9ovalt0rr80bsentq807 + done + BEFORE=$(du -c .dolt/noms/ | grep total | sed 's/[^0-9]*//g') run dolt gc --shallow [ "$status" -eq 0 ] diff --git a/integration-tests/bats/import-replace-tables.bats b/integration-tests/bats/import-replace-tables.bats index b0547f4d64..f8b19d583f 100644 --- a/integration-tests/bats/import-replace-tables.bats +++ b/integration-tests/bats/import-replace-tables.bats @@ -324,7 +324,7 @@ SQL dolt add . dolt commit --allow-empty -m "update table from parquet file" - run dolt diff --summary main new_branch + run dolt diff --stat main new_branch [ "$status" -eq 0 ] [[ "$output" = "" ]] || false } diff --git a/integration-tests/bats/import-update-tables.bats b/integration-tests/bats/import-update-tables.bats index 27976902d8..57f669c5c4 100644 --- a/integration-tests/bats/import-update-tables.bats +++ b/integration-tests/bats/import-update-tables.bats @@ -440,7 +440,7 @@ DELIM dolt add . dolt commit --allow-empty -m "update table from parquet file" - run dolt diff --summary main new_branch + run dolt diff --stat main new_branch [ "$status" -eq 0 ] [[ "$output" = "" ]] || false } diff --git a/integration-tests/bats/keyless.bats b/integration-tests/bats/keyless.bats index 46f1727f24..bd89c1d002 100644 --- a/integration-tests/bats/keyless.bats +++ b/integration-tests/bats/keyless.bats @@ -204,14 +204,14 @@ SQL [[ "${#lines[@]}" = "13" ]] || false } -@test "keyless: diff --summary" { +@test "keyless: diff --stat" { dolt sql <