Merge branch 'main' into zachmu/multi-db

This commit is contained in:
Zach Musgrave
2023-02-28 09:30:06 -08:00
101 changed files with 4098 additions and 2819 deletions
+61
View File
@@ -0,0 +1,61 @@
import os
import shutil
import sys
import random
if len(sys.argv) != 5:
print("usage: python3 data.py <output-dir> <table-num> <row-num> <add-num>")
sys.exit(1)
table_dir = sys.argv[1]
tables = int(sys.argv[2])
rows = int(sys.argv[3])
adds = int(sys.argv[4])
if __name__=="__main__":
if not os.path.exists(table_dir):
shutil.rmtree(table_dir, ignore_errors=True)
os.makedirs(table_dir)
ys = [i for i in range(rows+adds)]
random.shuffle(ys)
with open(f"{table_dir}/create.sql", "+w") as f:
for i in range(tables):
if i == 0:
f.write(f"create table table{i} (x int primary key, y int, z int, key y_idx(y));\n")
else:
f.write(f"create table table{i} (x int primary key, y int, z int, key y_idx(y), foreign key (y) references table{i-1}(y));\n")
for j in range(tables):
with open(f"{table_dir}/table{j}.csv", "+w") as f:
f.write("x,y,z\n")
for i in range(rows):
f.write(f"{i},{ys[i]},{i}\n")
with open(f"{table_dir}/branch.sql", "+w") as f:
for i in range(tables):
f.write(f"set foreign_key_checks = 0;\n")
f.write(f"set unique_checks = 0;\n")
f.write(f"insert into table{i} values\n")
for j,k in enumerate(ys[rows:rows+adds]):
if j == 0:
f.write(f" ")
else:
f.write(f", ")
f.write(f"({rows+j},{k},{rows+j})")
f.write(f";\n")
with open(f"{table_dir}/diverge_main.sql", "+w") as f:
for i in range(tables):
f.write(f"set foreign_key_checks = 0;\n")
f.write(f"set unique_checks = 0;\n")
f.write(f"insert into table{i} values\n")
for j,k in enumerate(ys[rows:rows+adds]):
if j == 0:
f.write(f" ")
else:
f.write(f", ")
f.write(f"({rows+j},{k+1},{rows+j})")
f.write(f";\n")
+38
View File
@@ -0,0 +1,38 @@
#!/bin/bash
if [ "$#" -ne 2 ]; then
echo "usage: setup.sh <dolt-dir> <data-dir>"
exit 1
fi
DIR=$1
DATA=$2
rm -rf $DIR
mkdir $DIR
cd $DIR
dolt init
dolt sql < $DATA/create.sql
i=0
for t in $(ls $DATA/ | grep "table"); do
echo $t
dolt table import --disable-fk-checks -u "table${i}" "$DATA/$t"
((i++))
done
dolt commit -Am "add tables"
dolt sql < $DATA/diverge_main.sql
dolt commit -Am "add rows to conflict"
dolt checkout -b feature
dolt reset --hard head~1
dolt sql < $DATA/branch.sql
dolt commit -Am "new branch"
+1 -1
View File
@@ -53,7 +53,7 @@ jobs:
fi
- name: Configure AWS Credentials
if: ${{ env.use_credentials == 'true' }}
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+2 -2
View File
@@ -85,7 +85,7 @@ jobs:
fi
- name: Configure AWS Credentials
if: ${{ env.use_credentials == 'true' }}
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -159,7 +159,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -19,7 +19,7 @@ jobs:
uses: ./.github/actions/orm-tests
- name: Configure AWS Credentials
if: ${{ failure() }}
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -11,7 +11,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -134,7 +134,7 @@ jobs:
- name: Configure AWS Credentials
if: ${{ github.event.client_payload.email_recipient }} != ""
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -24,7 +24,7 @@ jobs:
sudo cp ./aws-iam-authenticator /usr/local/bin/aws-iam-authenticator
aws-iam-authenticator version
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -18,7 +18,7 @@ jobs:
with:
version: 'v1.23.6'
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -17,7 +17,7 @@ jobs:
with:
version: 'v1.23.6'
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+70
View File
@@ -0,0 +1,70 @@
name: Run Merge Benchmark on Pull Requests
on:
pull_request:
types: [ opened ]
issue_comment:
types: [ created ]
jobs:
validate-commentor:
runs-on: ubuntu-22.04
outputs:
valid: ${{ steps.set_valid.outputs.valid }}
steps:
- uses: actions/checkout@v3
- name: Validate Commentor
id: set_valid
run: ./.github/scripts/performance-benchmarking/validate-commentor.sh "$ACTOR"
env:
ACTOR: ${{ github.actor }}
check-comments:
runs-on: ubuntu-22.04
needs: validate-commentor
if: ${{ needs.validate-commentor.outputs.valid == 'true' }}
outputs:
benchmark: ${{ steps.set_benchmark.outputs.benchmark }}
comment-body: ${{ steps.set_body.outputs.body }}
steps:
- name: Check for Deploy Trigger
uses: dolthub/pull-request-comment-trigger@master
id: check
with:
trigger: '#merge-benchmark'
reaction: rocket
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Set Benchmark
if: ${{ steps.check.outputs.triggered == 'true' }}
id: set_benchmark
run: |
echo "benchmark=true" >> $GITHUB_OUTPUT
performance:
runs-on: ubuntu-22.04
needs: [validate-commentor, check-comments]
if: ${{ needs.check-comments.outputs.benchmark == 'true' }}
name: Trigger Benchmark Merge Workflow
steps:
- uses: dolthub/pull-request-comment-branch@v3
id: comment-branch
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
- name: Get pull number
uses: actions/github-script@v6
id: get_pull_number
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: core.setOutput("pull_number", JSON.stringify(context.issue.number));
- uses: peter-evans/repository-dispatch@v2.0.0
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
event-type: benchmark-merge
client-payload: |
{
"version": "${{ steps.comment-branch.outputs.head_sha }}",
"commit_to_branch": "${{ steps.comment-branch.outputs.head_sha }}",
"actor": "${{ github.actor }}",
"issue_id": "${{ steps.get_pull_number.outputs.pull_number }}"
}
+183
View File
@@ -0,0 +1,183 @@
name: Merge Benchmarks
on:
repository_dispatch:
types: [ benchmark-merge ]
env:
SCRIPT_DIR: '.github/scripts/merge-perf'
RESULT_TABLE_NAME: 'merge_perf_results'
DOLTHUB_DB: 'import-perf/merge-perf'
jobs:
bench:
name: Benchmark
defaults:
run:
shell: bash
strategy:
fail-fast: true
runs-on: ubuntu-latest
steps:
- name: Set up Go 1.x
id: go
uses: actions/setup-go@v3
with:
go-version: ^1.19
- name: Setup Python 3.x
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Dolt version
id: version
run: |
version=${{ github.event.client_payload.version }}
- uses: actions/checkout@v3
with:
ref: ${{ github.event.client_payload.version }}
- name: Install dolt
working-directory: ./go
run: go install ./cmd/dolt
- name: Run bench
id: bench
run: |
dolt config --global --add user.email "merge-perf@dolthub.com"
dolt config --global --add user.name "merge-perf"
gw=$GITHUB_WORKSPACE
DATADIR=$gw/data
TABLE_NUM=2
ROW_NUM=1000000
ADD_NUM=60000
python ${{ env.SCRIPT_DIR }}/data.py $DATADIR $TABLE_NUM $ROW_NUM $ADD_NUM
TMPDIR=$gw/tmp
./${{ env.SCRIPT_DIR}}/setup.sh $TMPDIR $DATADIR
TIMES=$gw/time.log
cd $TMPDIR
latency=$(python3 -c "import time, subprocess; start = time.time(); res=subprocess.run(['dolt', 'merge', 'main'], capture_output=True); output = res.stdout + res.stderr if res.returncode != 0 else time.time() -start; print(output); exit(res.returncode)")
RESULTS=$gw/results.sql
echo "CREATE TABLE ${{env.RESULT_TABLE_NAME }} (name varchar(50) primary key, table_cnt int, run_cnt int, add_cnt int, conflict_cnt int, fks bool, latency float);" >> $RESULTS
echo "INSERT INTO ${{ env.RESULT_TABLE_NAME }} values ('1m rows, 100k conflicts', 2, $ROW_NUM, $ADD_NUM, $ADD_NUM, true, $latency);" >> $RESULTS
echo "::set-output name=result_path::$RESULTS"
- name: Report
id: report
run: |
gw=$GITHUB_WORKSPACE
in="${{ steps.bench.outputs.result_path }}"
query="select name, round(latency, 2) as latency from ${{ env.RESULT_TABLE_NAME }}"
summaryq="select round(avg(latency), 2) as avg from ${{ env.RESULT_TABLE_NAME }}"
out="$gw/results.csv"
dolt_dir="$gw/merge-perf"
dolt config --global --add user.email "merge-perf@dolthub.com"
dolt config --global --add user.name "merge-perf"
echo '${{ secrets.DOLTHUB_IMPORT_PERF_CREDS_VALUE }}' | dolt creds import
dolt clone ${{ env.DOLTHUB_DB }} "$dolt_dir"
cd "$dolt_dir"
branch="${{ github.event.client_payload.commit_to_branch }}"
# checkout branch
if [ -z $(dolt sql -q "select 1 from dolt_branches where name = '$branch';") ]; then
dolt checkout -b $branch
else
dolt checkout $branch
fi
dolt sql -q "drop table if exists ${{ env.RESULT_TABLE_NAME }}"
# load results
dolt sql < "$in"
# push results to dolthub
dolt add ${{ env.RESULT_TABLE_NAME }}
dolt commit -m "CI commit"
dolt push -f origin $branch
# generate report
dolt sql -r csv -q "$query" > "$out"
cat "$out"
echo "::set-output name=report_path::$out"
avg=$(dolt sql -r csv -q "$summaryq" | tail -1)
echo "::set-output name=avg::$avg"
- name: Format Results
id: html
if: ${{ github.event.client_payload.email_recipient }} != ""
run: |
gw="$GITHUB_WORKSPACE"
in="${{ steps.report.outputs.report_path }}"
out="$gw/results.html"
echo "<table>" > "$out"
print_header=true
while read line; do
if "$print_header"; then
echo " <tr><th>${line//,/</th><th>}</th></tr>" >> "$out"
print_header=false
continue
fi
echo " <tr><td>${line//,/</td><td>}</td></tr>" >> "$out"
done < "$in"
echo "</table>" >> "$out"
avg="${{ steps.report.outputs.avg }}"
echo "<table><tr><th>Average</th></tr><tr><td>$avg</tr></td></table>" >> "$out"
cat "$out"
echo "::set-output name=html::$(echo $out)"
- name: Configure AWS Credentials
if: ${{ github.event.client_payload.email_recipient }} != ""
uses: aws-actions/configure-aws-credentials@v1-node16
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Send Email
uses: ./.github/actions/ses-email-action
if: ${{ github.event.client_payload.email_recipient }} != ""
with:
region: us-west-2
toAddresses: '["${{ github.event.client_payload.email_recipient }}"]'
subject: 'System Table Performance Benchmarks: ${{ github.event.client_payload.version }}'
bodyPath: ${{ steps.html.outputs.html }}
template: 'SysbenchTemplate'
- name: Read CSV
if: ${{ github.event.client_payload.issue_id }} != ""
id: csv
uses: juliangruber/read-file-action@v1
with:
path: "${{ steps.report.outputs.report_path }}"
- name: Create MD
if: ${{ github.event.client_payload.issue_id }} != ""
uses: petems/csv-to-md-table-action@master
id: md
with:
csvinput: ${{ steps.csv.outputs.content }}
- uses: mshick/add-pr-comment@v2
if: ${{ github.event.client_payload.issue_id }} != ""
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
issue: ${{ github.event.client_payload.issue_id }}
message-failure: merge benchmark failed
message-cancelled: merge benchmark cancelled
allow-repeats: true
message: |
@${{ github.event.client_payload.actor }} __DOLT__
${{ steps.md.outputs.markdown-table }}
@@ -52,3 +52,14 @@ jobs:
"commit_to_branch": "nightly",
"actor": "${{ github.actor }}"
}
- uses: peter-evans/repository-dispatch@v2.0.0
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
event-type: benchmark-merge
client-payload: |
{
"email_recipient": "${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}",
"version": "${{ github.sha }}",
"commit_to_branch": "nightly",
"actor": "${{ github.actor }}"
}
+1 -1
View File
@@ -13,7 +13,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v3
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+1 -1
View File
@@ -135,7 +135,7 @@ jobs:
- name: Configure AWS Credentials
if: ${{ github.event.client_payload.email_recipient }} != ""
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@567d4149d67f15f52b09796bea6573fc32952783
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+4 -4
View File
@@ -1,4 +1,4 @@
<img height="100" src="./docs/Dolt-Logo@3x.svg"/>
<img height="100" src="./images/Dolt-Logo@3x.svg"/>
# Dolt is Git for Data!
@@ -526,11 +526,11 @@ Hate the command line? Let's use [Tableplus](https://tableplus.com/) to make som
Now, to connect you must select MySQL as the connection type. Then enter a name for your connection, `getting_started` as your database, and `root` as your user.
![Tableplus Connection](./docs/getting-started-tp-connect.png)
![Tableplus Connection](./images/getting-started-tp-connect.png)
Click connect and you'll be presented with a familiar database workbench GUI.
![Tableplus](./docs/getting-started-tp.png)
![Tableplus](./images/getting-started-tp.png)
## Make changes on a branch
@@ -549,7 +549,7 @@ call dolt_commit('-am', 'Modifications on a branch');
Here's the result in Tableplus.
![New Updates](./docs/getting-started-new-updates.png)
![New Updates](./images/getting-started-new-updates.png)
Back in my terminal, I cannot see the table modifications made in Tableplus because they happened on a different branch than the one I have checked out in my session.
-132
View File
@@ -1,132 +0,0 @@
#!/bin/bash
# This script installs starts a dolt server on your Unix compatible computer.
if test -z "$BASH_VERSION"; then
echo "Please run this script using bash, not sh or any other shell. It should be run as root." >&2
exit 1
fi
_() {
install_dolt() {
# Install Dolt if it already doesn't exist
echo "Installing Dolt..."
if ! command -v dolt &> /dev/null
then
sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | bash'
fi
}
setup_configs() {
# Set up the dolt user along with core dolt configurations
echo "Setting up Configurations..."
# Check if the user "dolt" already exists. If it exists double check that it is okay to continue
if id -u "dolt" &> /dev/null; then
echo "The user dolt already exists"
read -r -p "Do you want to continue adding privileges to the existing user dolt? " response
response=${response,,} # tolower
if ! ([[ $response =~ ^(yes|y| ) ]] || [[ -z $response ]]); then
exit 1
fi
else
# add the user if `dolt` doesn't exist
useradd -r -m -d /var/lib/doltdb dolt
fi
cd /var/lib/doltdb
read -e -p "Enter an email associated with your user: " -i "dolt-user@dolt.com" email
read -e -p "Enter a username associated with your user: " -i "Dolt Server Account" username
sudo -u dolt dolt config --global --add user.email $email
sudo -u dolt dolt config --global --add user.name $username
}
# Database creation
database_configuration() {
echo "Setting up the dolt database..."
read -e -p "Input the name of your database: " -i "mydb" db_name
local db_dir="databases/$db_name"
cd /var/lib/doltdb
sudo -u dolt mkdir -p $db_dir
cd $db_dir
sudo -u dolt dolt init
}
# Setup and Start daemon
start_server() {
echo "Starting the server"
cd ~
cat > dolt_config.yaml<<EOF
log_level: info
behavior:
read_only: false
autocommit: true
user:
name: root
password: ""
listener:
host: localhost
port: 3306
max_connections: 100
read_timeout_millis: 28800000
write_timeout_millis: 28800000
tls_key: null
tls_cert: null
require_secure_transport: null
databases: []
performance:
query_parallelism: null
EOF
cat > doltdb.service<<EOF
[Unit]
Description=dolt SQL server
After=network.target
[Install]
WantedBy=multi-user.target
[Service]
User=dolt
Group=dolt
ExecStart=/usr/local/bin/dolt sql-server --config=dolt_config.yaml
WorkingDirectory=/var/lib/doltdb/databases/$db_name
KillSignal=SIGTERM
SendSIGKILL=no
EOF
sudo chown root:root doltdb.service
sudo chmod 644 doltdb.service
sudo mv doltdb.service /etc/systemd/system
sudo cp dolt_config.yaml /var/lib/doltdb/databases/$db_name
sudo systemctl daemon-reload
sudo systemctl enable doltdb.service
sudo systemctl start doltdb
}
validate_status() {
if systemctl --state=active | grep "doltdb.service"; then
echo "Sever successfully started..."
else
echo "ERROR: Server did not start properly..."
fi
}
install_dolt
setup_configs
database_configuration
start_server
validate_status
}
_ "$0" "$@"
-1
View File
File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 12 KiB

-10
View File
@@ -1,10 +0,0 @@
<svg width="163" height="56" viewBox="0 0 163 56" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M28.87 7.0459V45.8632C28.8654 46.7997 28.498 47.6965 27.8476 48.3591C27.1971 49.0217 26.316 49.3964 25.3957 49.402H10.4953C9.5713 49.402 8.68489 49.0298 8.0299 48.3666C7.3749 47.7035 7.00462 46.8034 7 45.8632V24.7722C7.00462 23.832 7.3749 22.9319 8.0299 22.2688C8.68489 21.6056 9.5713 21.2334 10.4953 21.2334H22.2115" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M156.3 49.4019H145.283" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M156.026 21.5259H134.174" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M145.336 7.0498V49.4024" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M72.2752 7.68311H59.049C56.6669 7.68311 54.7358 9.64808 54.7358 12.072V44.8074C54.7358 47.2313 56.6669 49.1963 59.049 49.1963H72.2752C74.6573 49.1963 76.5884 47.2313 76.5884 44.8074V12.072C76.5884 9.64808 74.6573 7.68311 72.2752 7.68311Z" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M119.586 49.4019H99.418" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M110.344 7.0498V49.4024" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M109.884 7H98.7939" stroke="#29E3C1" stroke-width="12.6599" stroke-linecap="round" stroke-linejoin="round"/>
</svg>

Before

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

-85
View File
@@ -1,85 +0,0 @@
# Dolt FAQ
## Why is it called Dolt? Are you calling me dumb?
It's named `dolt` to pay homage to [how Linus Torvalds named
git](https://en.wikipedia.org/wiki/Git#Naming):
> Torvalds sarcastically quipped about the name git (which means
> "unpleasant person" in British English slang): "I'm an egotistical
> bastard, and I name all my projects after myself. First 'Linux',
> now 'git'."
We wanted a word meaning "idiot", starting with D for Data,
short enough to type on the command line, and
not taken in the standard command line lexicon. So,
`dolt`.
## The MySQL shell gives me an error: `Can't connect to local MySQL server through socket '/tmp/mysql.sock'`
The MySQL shell will try to connect through a socket file on many OSes.
To force it to use TCP instead, give it the loopback address like this:
```bash
% mysql --host 127.0.0.1 ...
```
## What does `@@autocommit` do?
This is a SQL variable that you can turn on for your SQL session like so:
`SET @@autocommit = 1`
It's on by default in the MySQL shell, as well as in most clients. But
some clients (notably the Python MySQL connector) turn it off by
default.
You must commit your changes for them to persist after your session
ends, either by setting `@@autocommit` to on, or by issuing `COMMIT`
statements manually.
## What's the difference between `COMMIT` and `DOLT_COMMIT()`?
`COMMIT` is a standard SQL statement that commits a transaction. In
dolt, it just flushes any pending changes in the current SQL session
to disk, updating the working set. HEAD stays the same, but your
working set changes. This means your edits will persist after this
session ends.
`DOLT_COMMIT()` commits the current SQL transaction, then creates a
new dolt commit on the current branch. It's the same as if you run
`dolt commit` from the command line.
## I want each of my connected SQL users to get their own branch to make changes on, then merge them back into `main` when they're done making edits. How do I do that?
We are glad you asked! This is a common use case, and giving each user
their own branch is something we've spent a lot of time getting
right. For more details on how to use this pattern effectively, see
[using branches](https://docs.dolthub.com/reference/sql/branches).
## Does Dolt support transactions?
Yes, it should exactly work the same as MySQL, but with fewer locks
for competing writes.
It's also possible for different sessions to connect to different
branches on the same server. See [using
branches](https://docs.dolthub.com/reference/sql/branches) for details.
## What SQL features / syntax are supported?
Most of them! Check out [the docs for the full list of supported
features](https://docs.dolthub.com/reference/sql/support).
You can check out what we're working on next on our
[roadmap](roadmap.md). Paying customers get their feature requests
bumped to the front of the line.
## Does Dolt support my favorite SQL workbench / tool?
Probably! Have you tried it? If you try it and it doesn't work, [let
us know with an issue](https://github.com/dolthub/dolt/issues) or in
[our Discord](https://discord.com/invite/RFwfYpu) and we'll see what
we can do. A lot of times we can fix small compatibility issues really
quick, like the same week. And even if we can't, we want to know about
it! Our goal is to be a 100% drop-in replacement for MySQL.
-182
View File
@@ -1,182 +0,0 @@
# Dolt quickstart guide
This is a one-page guide to getting you started with Dolt as quickly
as possible. If you're trying to participate in a
[data bounty](https://www.dolthub.com/bounties), this will get you
up and running. We think bounties are the most engaging way to get
started using Dolt and DoltHub and understand how it all works.
This guide is intended for new data bounty participants, and is geared
to that use case. You can find more complete documentation on how to
use Dolt in the [README](../README.md) and in the [DoltHub
documentation](https://docs.dolthub.com/introduction/installation).
## Install Dolt
```sh
% sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | bash'
```
For windows installation, see [here](windows.md).
## Configure dolt
```sh
% dolt config --global --add user.email YOU@DOMAIN.COM
% dolt config --global --add user.name "YOUR NAME"
```
## Fork the data bounty
Forking a database makes a private copy for you to edit. Find the
database you want to edit, then click the "Fork" button on the top
left.
![Forking a repository](dolthub-fork.png)
## Clone your fork
Cloning your fork of the database downloads it to your local computer
so you can make changes to it. Click
"Clone" to find the command to copy and paste into your terminal. This
clone command will be different for every fork, so you can't just copy
and paste the command in the text below.
![Cloning a repository](dolthub-clone.png)
Run the command, then cd into the database directory.
```sh
% dolt clone dolthub/hospital-price-transparency
% cd hospital-price-transparency
```
## Inspect the data
Get familiar with the tables and their columns. The easiest way to do
this is by using SQL commands. `show tables` and `describe <tablename>` are good commands to use when exploring a new database.
```sql
% dolt sql
# Welcome to the DoltSQL shell.
# Statements must be terminated with ';'.
# "exit" or "quit" (or Ctrl-D) to exit.
hospital_price_transparency> show tables;
+-----------+
| Table |
+-----------+
| cpt_hcpcs |
| hospitals |
| prices |
+-----------+
hospital_price_transparency> describe hospitals;
+----------------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+----------------+--------------+------+-----+---------+-------+
| npi_number | char(16) | NO | PRI | | |
| name | varchar(256) | YES | | | |
| url | varchar(512) | YES | | | |
| street_address | varchar(512) | YES | | | |
| city | varchar(64) | YES | | | |
| state | varchar(32) | YES | | | |
| zip_code | varchar(16) | YES | | | |
| publish_date | date | YES | | | |
+----------------+--------------+------+-----+---------+-------+
hospital_price_transparency> select npi_number, name, street_address from hospitals limit 3;
+------------+------------------------------------+---------------------+
| npi_number | name | street_address |
+------------+------------------------------------+---------------------+
| 1003873225 | The Specialty Hospital Of Meridian | 1314 19th Ave |
| 1023061405 | Grandview Medical Center | 3690 Grandview Pkwy |
| 1023180502 | Medical City Dallas | 7777 Forest Ln |
+------------+------------------------------------+---------------------+
hospital_price_transparency> exit
Bye
```
## Add some data
There are two main ways to add data into your copy of the
database. You can either import from files, or you can add data by
writing scripts and inserting rows with SQL statements.
### Importing files
Use the `dolt table import` command to import CSV or JSON files. Use
the `-u` option to update the table (instead of replacing the
contents).
```sh
% dolt table import -u prices hospital_prices.csv
```
### Starting a SQL server
If you want to write a script to insert data with python or another
programming language, start a SQL server on the command line:
```sh
% dolt sql-server
Starting server with Config HP="localhost:3306"|T="28800000"|R="false"|L="info"
```
Then connect to the database with any standard MySQL connector and
make your edits.
## See your changes
After you've inserted some data, you can inspect the changes you made
using `dolt diff`. If you added a lot of rows, use the `--summary` flag
to get a summary instead.
```sh
% dolt diff
% dolt diff --summary
```
## Commit your changes
These commands work like `git`, if you know `git`. If you don't know
`git`, don't worry! Most people who know `git` don't actually know
`git` either!
```sh
% dolt add .
% dolt commit -m "This message describes my changes"
```
You can repeat these steps as many times as you have more changes to add:
1. Add data
2. Commit your changes
Every time you commit it creates a checkpoint you can roll back to if
you mess up later.
## Push your changes back to DoltHub and create a PR
When you're done adding data, push the database back to DoltHub and
submit a pull request (PR) to merge them back into the original fork.
```sh
% dolt push origin master
```
![Create new PR](dolthub-pr-1.png)
![Create new PR](dolthub-pr-2.png)
## Respond to PR review feedback
Your PR will be reviewed by the people running the bounty, and they
may ask you to make changes. If they do, then go ahead and make your
changes on your machine, then `dolt push` those new commits back to
DoltHub and your existing PR will automatically be updated with them.
## Questions? Still need help?
Come hang out with us on [our
Discord](https://discord.com/invite/RFwfYpu), where the team that
builds Dolt and lots of other customers are available to chat and ask
questions. If this guide is missing something obvious, come tell us
there!
-58
View File
@@ -1,58 +0,0 @@
# Dolt Feature Roadmap
Full details on [supported SQL
features](https://docs.dolthub.com/reference/sql/support) are
available on the docs site.
This is a selection of unimplemented features we're working on. Don't
see what you need on here? [Let us
know!](https://github.com/dolthub/dolt/issues) Paying customers get
their feature requests implemented first.
Roadmap last updated Apr 2022, next update Jun 2022.
## Upcoming features
| Feature | Estimate |
| ------- | --- |
| 99.9% SQL correctness | Q2 2022 |
| Hosted Dolt v1 | Q2 2022 |
| Hash join strategy | Q2 2022 |
| Storage performance | Q2 2022 |
| Lock / unlock tables | Q2 2022 |
| SQL GUI support tests | Q2 2022 |
| `JSON_TABLE()` | Q2 2022 |
| Table / index statistics | Q2 2022 |
| Universal SQL path for CLI | Q2 2022 |
| Pipeline query processing | Q3 2022 |
| Row-level locking (`SELECT FOR UPDATE`) | Q3 2022 |
| All transaction isolation levels | Q3 2022 |
| Postgres Support | 2023 |
| Automatic garbage collection | Unscheduled |
| Collation and charset support | Unscheduled |
| Virtual columns and json indexing | Unscheduled |
| Full text indexes | Unscheduled |
| Spatial indexes | Unscheduled |
| Multiple DBs in one repo | Unscheduled |
| Embedded dolt | Unscheduled |
| Signed commits | Unscheduled |
| Cross-database joins with indexes | Unscheduled |
| More function coverage | Ongoing |
## Recently launched features
| Feature | Launch Date |
| ------- | --- |
| Join for update | Oct 2021 |
| Backup and replication | Nov 2021 |
| Commit graph performance | Nov 2021 |
| Persistent SQL configuration | Dec 2021 |
| CREATE / DROP DATABASE | Dec 2021 |
| Hosted Dolt Alpha | Jan 2022 |
| `ROWS` window definitions | Jan 2022 |
| `RANGE` window definitions | Jan 2022 |
| DoltLab (on-prem DoltHub) | Jan 2022 |
| Users / grants | Feb 2022 |
| Geometry types and functions | Feb 2022 |
| Better `dolt_diff` table experience | Mar 2022 |
-24
View File
@@ -1,24 +0,0 @@
# Windows support
Dolt is tested and supported on windows! If you find any problems
specific to Windows, please file an
[issue](https://github.com/dolthub/dolt/issues/) and let us know.
## Installation
Download the latest Microsoft Installer (`.msi` file) in
[releases](https://github.com/dolthub/dolt/releases) and run it.
Package manager releases coming soon!
## Environment
Dolt runs best under the Windows Subsystem for Linux, or WSL. But it
should also work fine with `cmd.exe` or `powershell`. If you find this
isn't true, please file an
[issue](https://github.com/dolthub/dolt/issues/) and let us know.
WSL 2 currently has [known
bugs](https://github.com/dolthub/dolt/issues/992), so we recommend
using WSL 1 for now. Or if you do use WSL 2, we recommend using the
Linux `dolt` binary, rather than the Windows `dolt.exe` binary.
+57 -7
View File
@@ -23,6 +23,7 @@ import (
"strings"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/cmd/dolt/commands/engine"
@@ -35,7 +36,9 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlfmt"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil"
"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/tabular"
"github.com/dolthub/dolt/go/libraries/utils/argparser"
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
"github.com/dolthub/dolt/go/libraries/utils/set"
)
@@ -46,7 +49,8 @@ type diffMode int
const (
SchemaOnlyDiff diffPart = 1 // 0b0001
DataOnlyDiff diffPart = 2 // 0b0010
Summary diffPart = 4 // 0b0100
Stat diffPart = 4 // 0b0100
Summary diffPart = 8 // 0b1000
SchemaAndDataDiff = SchemaOnlyDiff | DataOnlyDiff
@@ -56,6 +60,7 @@ const (
DataFlag = "data"
SchemaFlag = "schema"
StatFlag = "stat"
SummaryFlag = "summary"
whereParam = "where"
limitParam = "limit"
@@ -138,7 +143,8 @@ func (cmd DiffCmd) ArgParser() *argparser.ArgParser {
ap := argparser.NewArgParser()
ap.SupportsFlag(DataFlag, "d", "Show only the data changes, do not show the schema changes (Both shown by default).")
ap.SupportsFlag(SchemaFlag, "s", "Show only the schema changes, do not show the data changes (Both shown by default).")
ap.SupportsFlag(SummaryFlag, "", "Show summary of data changes")
ap.SupportsFlag(StatFlag, "", "Show stats of data changes")
ap.SupportsFlag(SummaryFlag, "", "Show summary of data and schema changes")
ap.SupportsString(FormatFlag, "r", "result output format", "How to format diff output. Valid values are tabular, sql, json. Defaults to tabular.")
ap.SupportsString(whereParam, "", "column", "filters columns based on values in the diff. See {{.EmphasisLeft}}dolt diff --help{{.EmphasisRight}} for details.")
ap.SupportsInt(limitParam, "", "record_count", "limits to the first N diffs.")
@@ -173,9 +179,9 @@ func (cmd DiffCmd) Exec(ctx context.Context, commandStr string, args []string, d
}
func (cmd DiffCmd) validateArgs(apr *argparser.ArgParseResults) errhand.VerboseError {
if apr.Contains(SummaryFlag) {
if apr.Contains(StatFlag) || apr.Contains(SummaryFlag) {
if apr.Contains(SchemaFlag) || apr.Contains(DataFlag) {
return errhand.BuildDError("invalid Arguments: --summary cannot be combined with --schema or --data").Build()
return errhand.BuildDError("invalid Arguments: --stat and --summary cannot be combined with --schema or --data").Build()
}
}
@@ -197,6 +203,8 @@ func parseDiffArgs(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgPar
dArgs.diffParts = DataOnlyDiff
} else if apr.Contains(SchemaFlag) && !apr.Contains(DataFlag) {
dArgs.diffParts = SchemaOnlyDiff
} else if apr.Contains(StatFlag) {
dArgs.diffParts = Stat
} else if apr.Contains(SummaryFlag) {
dArgs.diffParts = Summary
}
@@ -248,6 +256,10 @@ func parseDiffArgs(ctx context.Context, dEnv *env.DoltEnv, apr *argparser.ArgPar
if err != nil {
return nil, err
}
if ok {
dArgs.tableSet.Add(tableName)
continue
}
if !ok {
return nil, fmt.Errorf("table %s does not exist in either revision", tableName)
}
@@ -467,6 +479,41 @@ func maybeResolve(ctx context.Context, dEnv *env.DoltEnv, spec string) (*doltdb.
return root, true
}
var diffSummarySchema = sql.Schema{
&sql.Column{Name: "Table name", Type: types.Text, Nullable: false},
&sql.Column{Name: "Diff type", Type: types.Text, Nullable: false},
&sql.Column{Name: "Data change", Type: types.Boolean, Nullable: false},
&sql.Column{Name: "Schema change", Type: types.Boolean, Nullable: false},
}
func printDiffSummary(ctx context.Context, tds []diff.TableDelta, dArgs *diffArgs) errhand.VerboseError {
cliWR := iohelp.NopWrCloser(cli.OutStream)
wr := tabular.NewFixedWidthTableWriter(diffSummarySchema, cliWR, 100)
defer wr.Close(ctx)
for _, td := range tds {
if !dArgs.tableSet.Contains(td.FromName) && !dArgs.tableSet.Contains(td.ToName) {
continue
}
if td.FromTable == nil && td.ToTable == nil {
return errhand.BuildDError("error: both tables in tableDelta are nil").Build()
}
summ, err := td.GetSummary(ctx)
if err != nil {
return errhand.BuildDError("could not get table delta summary").AddCause(err).Build()
}
err = wr.WriteSqlRow(ctx, sql.Row{td.CurName(), summ.DiffType, summ.DataChange, summ.SchemaChange})
if err != nil {
return errhand.BuildDError("could not write table delta summary").AddCause(err).Build()
}
}
return nil
}
func diffUserTables(ctx context.Context, dEnv *env.DoltEnv, dArgs *diffArgs) errhand.VerboseError {
var err error
@@ -490,6 +537,10 @@ func diffUserTables(ctx context.Context, dEnv *env.DoltEnv, dArgs *diffArgs) err
return strings.Compare(tableDeltas[i].ToName, tableDeltas[j].ToName) < 0
})
if dArgs.diffParts&Summary != 0 {
return printDiffSummary(ctx, tableDeltas, dArgs)
}
dw, err := newDiffWriter(dArgs.diffOutput)
if err != nil {
return errhand.VerboseErrorFromError(err)
@@ -538,8 +589,8 @@ func diffUserTable(
return errhand.BuildDError("cannot retrieve schema for table %s", td.ToName).AddCause(err).Build()
}
if dArgs.diffParts&Summary != 0 {
return printDiffSummary(ctx, td, fromSch.GetAllCols().Size(), toSch.GetAllCols().Size())
if dArgs.diffParts&Stat != 0 {
return printDiffStat(ctx, td, fromSch.GetAllCols().Size(), toSch.GetAllCols().Size())
}
if dArgs.diffParts&SchemaOnlyDiff != 0 {
@@ -687,7 +738,6 @@ func diffRows(
}
fromSch = pkSch.Schema
}
if td.ToSch != nil {
pkSch, err := sqlutil.FromDoltSchema(td.ToName, td.ToSch)
if err != nil {
+8 -8
View File
@@ -65,18 +65,18 @@ func newDiffWriter(diffOutput diffOutput) (diffWriter, error) {
}
}
func printDiffSummary(ctx context.Context, td diff.TableDelta, oldColLen, newColLen int) errhand.VerboseError {
func printDiffStat(ctx context.Context, td diff.TableDelta, oldColLen, newColLen int) errhand.VerboseError {
// todo: use errgroup.Group
ae := atomicerr.New()
ch := make(chan diff.DiffSummaryProgress)
ch := make(chan diff.DiffStatProgress)
go func() {
defer close(ch)
err := diff.SummaryForTableDelta(ctx, ch, td)
err := diff.StatForTableDelta(ctx, ch, td)
ae.SetIfError(err)
}()
acc := diff.DiffSummaryProgress{}
acc := diff.DiffStatProgress{}
var count int64
var pos int
eP := cli.NewEphemeralPrinter()
@@ -119,15 +119,15 @@ func printDiffSummary(ctx context.Context, td diff.TableDelta, oldColLen, newCol
}
if keyless {
printKeylessSummary(acc)
printKeylessStat(acc)
} else {
printSummary(acc, oldColLen, newColLen)
printStat(acc, oldColLen, newColLen)
}
return nil
}
func printSummary(acc diff.DiffSummaryProgress, oldColLen, newColLen int) {
func printStat(acc diff.DiffStatProgress, oldColLen, newColLen int) {
numCellInserts, numCellDeletes := sqle.GetCellsAddedAndDeleted(acc, newColLen)
rowsUnmodified := uint64(acc.OldRowSize - acc.Changes - acc.Removes)
unmodified := pluralize("Row Unmodified", "Rows Unmodified", rowsUnmodified)
@@ -161,7 +161,7 @@ func printSummary(acc diff.DiffSummaryProgress, oldColLen, newColLen int) {
cli.Printf("(%s vs %s)\n\n", oldValues, newValues)
}
func printKeylessSummary(acc diff.DiffSummaryProgress) {
func printKeylessStat(acc diff.DiffStatProgress) {
insertions := pluralize("Row Added", "Rows Added", acc.Adds)
deletions := pluralize("Row Deleted", "Rows Deleted", acc.Removes)
+1 -1
View File
@@ -56,7 +56,7 @@ import (
)
const (
Version = "0.53.1"
Version = "0.53.2"
)
var dumpDocsCommand = &commands.DumpDocsCmd{}
+3 -4
View File
@@ -3,10 +3,9 @@ module github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi
go 1.13
require (
github.com/golang/protobuf v1.4.2
golang.org/x/net v0.0.0-20200602114024-627f9648deb9 // indirect
golang.org/x/sys v0.0.0-20200620081246-981b61492c35 // indirect
golang.org/x/text v0.3.3 // indirect
github.com/golang/protobuf v1.4.2 // indirect
golang.org/x/text v0.3.8 // indirect
google.golang.org/genproto v0.0.0-20200622133129-d0ee0c36e670 // indirect
google.golang.org/grpc v1.29.1
google.golang.org/protobuf v1.24.0
)
+23 -13
View File
@@ -10,7 +10,6 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
@@ -24,43 +23,56 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200620081246-981b61492c35 h1:wb/9mP8eUAmHfkM8RmpeLq6nUA7c2i5+bQOtcDftjaE=
golang.org/x/sys v0.0.0-20200620081246-981b61492c35/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8 h1:Nw54tB0rB7hY/N0NQvRW8DG4Yk3Q6T9cu9RcFQDu1tc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
@@ -68,8 +80,6 @@ google.golang.org/genproto v0.0.0-20200622133129-d0ee0c36e670 h1:v/N9fZIfu6jopNI
google.golang.org/genproto v0.0.0-20200622133129-d0ee0c36e670/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.24.0 h1:vb/1TCsVn3DcJlQ0Gs1yB1pKI6Do2/QNwxdKqmc/b0s=
google.golang.org/grpc v1.24.0/go.mod h1:XDChyiUovWa60DnaeDeZmSW86xtLtjtZbwvSiRnRtcA=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.29.1 h1:EC2SB8S04d2r73uptxphDSUG+kTKVgjRPF+N3xpxRB4=
+2 -2
View File
@@ -15,7 +15,7 @@ require (
github.com/dolthub/fslock v0.0.3
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81
github.com/dolthub/vitess v0.0.0-20230216234925-189ffe819e56
github.com/dolthub/vitess v0.0.0-20230223032306-95d4b04eabad
github.com/dustin/go-humanize v1.0.0
github.com/fatih/color v1.13.0
github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568
@@ -58,7 +58,7 @@ require (
github.com/cenkalti/backoff/v4 v4.1.3
github.com/cespare/xxhash v1.1.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/go-mysql-server v0.14.1-0.20230218000648-8448267c2200
github.com/dolthub/go-mysql-server v0.14.1-0.20230227175231-786abd289f41
github.com/google/flatbuffers v2.0.6+incompatible
github.com/jmoiron/sqlx v1.3.4
github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6
+12 -6
View File
@@ -166,16 +166,16 @@ github.com/dolthub/flatbuffers v1.13.0-dh.1 h1:OWJdaPep22N52O/0xsUevxJ6Qfw1M2txC
github.com/dolthub/flatbuffers v1.13.0-dh.1/go.mod h1:CorYGaDmXjHz1Z7i50PYXG1Ricn31GcA2wNOTFIQAKE=
github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-mysql-server v0.14.1-0.20230218000648-8448267c2200 h1:j8tumbRo3G2YgE5qcKlAznGfbgHqZa/e3Li7wsuizj8=
github.com/dolthub/go-mysql-server v0.14.1-0.20230218000648-8448267c2200/go.mod h1:BRFyf6PUuoR+iSLZ+JdpjtqgHzo5cT+tF7oHIpVdytY=
github.com/dolthub/go-mysql-server v0.14.1-0.20230227175231-786abd289f41 h1:8vc9pwtRgqb1RIJyWHsTetx+VZnd7pZlzCewTQIXk7Y=
github.com/dolthub/go-mysql-server v0.14.1-0.20230227175231-786abd289f41/go.mod h1:I2Mu8LSpwUII53EyBXqJMEKTQH5DUetV4ulP88JVsKA=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto=
github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474 h1:xTrR+l5l+1Lfq0NvhiEsctylXinUMFhhsqaEcl414p8=
github.com/dolthub/jsonpath v0.0.0-20210609232853-d49537a30474/go.mod h1:kMz7uXOXq4qRriCEyZ/LUeTqraLJCjf0WVZcUi6TxUY=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
github.com/dolthub/vitess v0.0.0-20230216234925-189ffe819e56 h1:dHuKfUwaDUe847BVN3Wo+4GUGUNdlhuUif4RWkvG3Go=
github.com/dolthub/vitess v0.0.0-20230216234925-189ffe819e56/go.mod h1:oVFIBdqMFEkt4Xz2fzFJBNtzKhDEjwdCF0dzde39iKs=
github.com/dolthub/vitess v0.0.0-20230223032306-95d4b04eabad h1:9FPQtKoqyREEsHfGKNU2DImktOusXTXklLtvTxtIuZ0=
github.com/dolthub/vitess v0.0.0-20230223032306-95d4b04eabad/go.mod h1:oVFIBdqMFEkt4Xz2fzFJBNtzKhDEjwdCF0dzde39iKs=
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
@@ -638,6 +638,7 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
@@ -744,6 +745,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.7.0 h1:LapD9S96VoQRhi/GrNTqeBJFrUjs5UHCAtTlgwA5oZA=
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -778,7 +780,6 @@ golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
@@ -789,6 +790,7 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd
golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -810,6 +812,7 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -853,7 +856,6 @@ golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200620081246-981b61492c35/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -876,6 +878,8 @@ golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -892,6 +896,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -954,6 +959,7 @@ golang.org/x/tools v0.0.0-20200915173823-2db8f0ff891c/go.mod h1:z6u4i615ZeAfBE4X
golang.org/x/tools v0.0.0-20200918232735-d647fc253266/go.mod h1:z6u4i615ZeAfBE4XtMziQW1fSVJXACjjbWkB/mvPzlU=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.3.0 h1:SrNbZl6ECOS1qFzgTdQfWXZM9XBkiA6tkFrH9YSTPHM=
golang.org/x/tools v0.3.0/go.mod h1:/rWhSS2+zyEVwoJf8YAX6L2f0ntZ7Kn/mGgAWcipA5k=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -34,16 +34,16 @@ import (
var ErrPrimaryKeySetChanged = errors.New("primary key set changed")
type DiffSummaryProgress struct {
type DiffStatProgress struct {
Adds, Removes, Changes, CellChanges, NewRowSize, OldRowSize, NewCellSize, OldCellSize uint64
}
type prollyReporter func(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffSummaryProgress) error
type nomsReporter func(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffSummaryProgress) error
type prollyReporter func(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffStatProgress) error
type nomsReporter func(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffStatProgress) error
// Summary reports a summary of diff changes between two values
// Stat reports a stat of diff changes between two values
// todo: make package private once dolthub is migrated
func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to durable.Index, fromSch, toSch schema.Schema) (err error) {
func Stat(ctx context.Context, ch chan DiffStatProgress, from, to durable.Index, fromSch, toSch schema.Schema) (err error) {
fc, err := from.Count()
if err != nil {
return err
@@ -52,7 +52,7 @@ func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to durable.
if err != nil {
return err
}
ch <- DiffSummaryProgress{OldRowSize: fc, NewRowSize: tc}
ch <- DiffStatProgress{OldRowSize: fc, NewRowSize: tc}
fk, tk := schema.IsKeyless(fromSch), schema.IsKeyless(toSch)
var keyless bool
@@ -69,15 +69,15 @@ func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to durable.
return diffNomsMaps(ctx, ch, keyless, from, to, fromSch, toSch)
}
// SummaryForTableDelta pushes diff summary progress messages for the table delta given to the channel given
func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td TableDelta) error {
// StatForTableDelta pushes diff stat progress messages for the table delta given to the channel given
func StatForTableDelta(ctx context.Context, ch chan DiffStatProgress, td TableDelta) error {
fromSch, toSch, err := td.GetSchemas(ctx)
if err != nil {
return errhand.BuildDError("cannot retrieve schema for table %s", td.ToName).AddCause(err).Build()
}
if !schema.ArePrimaryKeySetsDiffable(td.Format(), fromSch, toSch) {
return fmt.Errorf("failed to compute diff summary for table %s: %w", td.CurName(), ErrPrimaryKeySetChanged)
return fmt.Errorf("failed to compute diff stat for table %s: %w", td.CurName(), ErrPrimaryKeySetChanged)
}
keyless, err := td.IsKeyless(ctx)
@@ -97,7 +97,7 @@ func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td T
}
}
func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error {
func diffProllyTrees(ctx context.Context, ch chan DiffStatProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error {
_, vMapping, err := schema.MapSchemaBasedOnTagAndName(fromSch, toSch)
if err != nil {
return err
@@ -123,7 +123,7 @@ func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless b
}
ctc := uint64(len(toSch.GetAllCols().GetColumns())) * tc
rpr = reportPkChanges
ch <- DiffSummaryProgress{
ch <- DiffStatProgress{
OldRowSize: fc,
NewRowSize: tc,
OldCellSize: cfc,
@@ -140,7 +140,7 @@ func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless b
return nil
}
func diffNomsMaps(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, fromRows durable.Index, toRows durable.Index, fromSch, toSch schema.Schema) error {
func diffNomsMaps(ctx context.Context, ch chan DiffStatProgress, keyless bool, fromRows durable.Index, toRows durable.Index, fromSch, toSch schema.Schema) error {
var rpr nomsReporter
if keyless {
rpr = reportNomsKeylessChanges
@@ -156,7 +156,7 @@ func diffNomsMaps(ctx context.Context, ch chan DiffSummaryProgress, keyless bool
}
ctc := uint64(len(toSch.GetAllCols().GetColumns())) * tc
rpr = reportNomsPkChanges
ch <- DiffSummaryProgress{
ch <- DiffStatProgress{
OldRowSize: fc,
NewRowSize: tc,
OldCellSize: cfc,
@@ -164,10 +164,10 @@ func diffNomsMaps(ctx context.Context, ch chan DiffSummaryProgress, keyless bool
}
}
return summaryWithReporter(ctx, ch, durable.NomsMapFromIndex(fromRows), durable.NomsMapFromIndex(toRows), rpr, fromSch, toSch)
return statWithReporter(ctx, ch, durable.NomsMapFromIndex(fromRows), durable.NomsMapFromIndex(toRows), rpr, fromSch, toSch)
}
func summaryWithReporter(ctx context.Context, ch chan DiffSummaryProgress, from, to types.Map, rpr nomsReporter, fromSch, toSch schema.Schema) (err error) {
func statWithReporter(ctx context.Context, ch chan DiffStatProgress, from, to types.Map, rpr nomsReporter, fromSch, toSch schema.Schema) (err error) {
ad := NewAsyncDiffer(1024)
ad.Start(ctx, from, to)
defer func() {
@@ -199,50 +199,50 @@ func summaryWithReporter(ctx context.Context, ch chan DiffSummaryProgress, from,
return nil
}
func reportPkChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffSummaryProgress) error {
var sum DiffSummaryProgress
func reportPkChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffStatProgress) error {
var stat DiffStatProgress
switch change.Type {
case tree.AddedDiff:
sum.Adds++
stat.Adds++
case tree.RemovedDiff:
sum.Removes++
stat.Removes++
case tree.ModifiedDiff:
sum.CellChanges = prollyCountCellDiff(vMapping, fromD, toD, val.Tuple(change.From), val.Tuple(change.To))
sum.Changes++
stat.CellChanges = prollyCountCellDiff(vMapping, fromD, toD, val.Tuple(change.From), val.Tuple(change.To))
stat.Changes++
default:
return errors.New("unknown change type")
}
select {
case ch <- sum:
case ch <- stat:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func reportKeylessChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffSummaryProgress) error {
var sum DiffSummaryProgress
func reportKeylessChanges(ctx context.Context, vMapping val.OrdinalMapping, fromD, toD val.TupleDesc, change tree.Diff, ch chan<- DiffStatProgress) error {
var stat DiffStatProgress
var n, n2 uint64
switch change.Type {
case tree.AddedDiff:
n, _ = toD.GetUint64(0, val.Tuple(change.To))
sum.Adds += n
stat.Adds += n
case tree.RemovedDiff:
n, _ = fromD.GetUint64(0, val.Tuple(change.From))
sum.Removes += n
stat.Removes += n
case tree.ModifiedDiff:
n, _ = fromD.GetUint64(0, val.Tuple(change.From))
n2, _ = toD.GetUint64(0, val.Tuple(change.To))
if n < n2 {
sum.Adds += n2 - n
stat.Adds += n2 - n
} else {
sum.Removes += n - n2
stat.Removes += n - n2
}
default:
return errors.New("unknown change type")
}
select {
case ch <- sum:
case ch <- stat:
return nil
case <-ctx.Done():
return ctx.Err()
@@ -280,13 +280,13 @@ func prollyCountCellDiff(mapping val.OrdinalMapping, fromD, toD val.TupleDesc, f
return changed
}
func reportNomsPkChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffSummaryProgress) error {
var summary DiffSummaryProgress
func reportNomsPkChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffStatProgress) error {
var stat DiffStatProgress
switch change.ChangeType {
case types.DiffChangeAdded:
summary = DiffSummaryProgress{Adds: 1}
stat = DiffStatProgress{Adds: 1}
case types.DiffChangeRemoved:
summary = DiffSummaryProgress{Removes: 1}
stat = DiffStatProgress{Removes: 1}
case types.DiffChangeModified:
oldTuple := change.OldValue.(types.Tuple)
newTuple := change.NewValue.(types.Tuple)
@@ -294,19 +294,19 @@ func reportNomsPkChanges(ctx context.Context, change *diff.Difference, fromSch,
if err != nil {
return err
}
summary = DiffSummaryProgress{Changes: 1, CellChanges: cellChanges}
stat = DiffStatProgress{Changes: 1, CellChanges: cellChanges}
default:
return errors.New("unknown change type")
}
select {
case ch <- summary:
case ch <- stat:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffSummaryProgress) error {
func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, fromSch, toSch schema.Schema, ch chan<- DiffStatProgress) error {
var oldCard uint64
if change.OldValue != nil {
v, err := change.OldValue.(types.Tuple).Get(row.KeylessCardinalityValIdx)
@@ -325,18 +325,18 @@ func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, from
newCard = uint64(v.(types.Uint))
}
var summary DiffSummaryProgress
var stat DiffStatProgress
delta := int64(newCard) - int64(oldCard)
if delta > 0 {
summary = DiffSummaryProgress{Adds: uint64(delta)}
stat = DiffStatProgress{Adds: uint64(delta)}
} else if delta < 0 {
summary = DiffSummaryProgress{Removes: uint64(-delta)}
stat = DiffStatProgress{Removes: uint64(-delta)}
} else {
return fmt.Errorf("diff with delta = 0 for key: %s", change.KeyValue.HumanReadableString())
}
select {
case ch <- summary:
case ch <- stat:
return nil
case <-ctx.Done():
return ctx.Err()
@@ -57,6 +57,13 @@ type TableDelta struct {
FromFksParentSch map[string]schema.Schema
}
type TableDeltaSummary struct {
DiffType string
DataChange bool
SchemaChange bool
TableName string
}
// GetStagedUnstagedTableDeltas represents staged and unstaged changes as TableDelta slices.
func GetStagedUnstagedTableDeltas(ctx context.Context, roots doltdb.Roots) (staged, unstaged []TableDelta, err error) {
staged, err = GetTableDeltas(ctx, roots.Head, roots.Staged)
@@ -387,6 +394,83 @@ func (td TableDelta) IsKeyless(ctx context.Context) (bool, error) {
}
}
// isTableDataEmpty return true if the table does not contain any data
func isTableDataEmpty(ctx context.Context, table *doltdb.Table) (bool, error) {
rowData, err := table.GetRowData(ctx)
if err != nil {
return false, err
}
return rowData.Empty()
}
// GetSummary returns a summary of the table delta.
func (td TableDelta) GetSummary(ctx context.Context) (*TableDeltaSummary, error) {
// Dropping a table is always a schema change, and also a data change if the table contained data
if td.IsDrop() {
isEmpty, err := isTableDataEmpty(ctx, td.FromTable)
if err != nil {
return nil, err
}
return &TableDeltaSummary{
TableName: td.FromName,
DataChange: !isEmpty,
SchemaChange: true,
DiffType: "dropped",
}, nil
}
// Renaming a table is always a schema change, and also a data change if the table data differs
if td.IsRename() {
dataChanged, err := td.HasHashChanged()
if err != nil {
return nil, err
}
return &TableDeltaSummary{
TableName: td.ToName,
DataChange: dataChanged,
SchemaChange: true,
DiffType: "renamed",
}, nil
}
// Creating a table is always a schema change, and also a data change if data was inserted
if td.IsAdd() {
isEmpty, err := isTableDataEmpty(ctx, td.ToTable)
if err != nil {
return nil, err
}
return &TableDeltaSummary{
TableName: td.ToName,
DataChange: !isEmpty,
SchemaChange: true,
DiffType: "added",
}, nil
}
// TODO: Renamed columns without a data change are not accounted for here,
// `dataChanged` is true when it should be false
dataChanged, err := td.HasHashChanged()
if err != nil {
return nil, err
}
schemaChanged, err := td.HasSchemaChanged(ctx)
if err != nil {
return nil, err
}
return &TableDeltaSummary{
TableName: td.ToName,
DataChange: dataChanged,
SchemaChange: schemaChanged,
DiffType: "modified",
}, nil
}
// GetRowData returns the table's row data at the fromRoot and toRoot, or an empty map if the table did not exist.
func (td TableDelta) GetRowData(ctx context.Context) (from, to durable.Index, err error) {
if td.FromTable == nil && td.ToTable == nil {
@@ -60,6 +60,7 @@ func mergeNomsTableData(
changeChan, mergeChangeChan := make(chan types.ValueChanged, 32), make(chan types.ValueChanged, 32)
originalCtx := ctx
eg, ctx := errgroup.WithContext(ctx)
eg.Go(func() error {
@@ -197,7 +198,7 @@ func mergeNomsTableData(
return nil, types.EmptyMap, nil, err
}
mergedTable, err := tblEdit.Table(ctx)
mergedTable, err := tblEdit.Table(originalCtx)
if err != nil {
return nil, types.EmptyMap, nil, err
}
+2 -2
View File
@@ -486,10 +486,10 @@ func calcTableMergeStats(ctx context.Context, tbl *doltdb.Table, mergeTbl *doltd
}
ae := atomicerr.New()
ch := make(chan diff.DiffSummaryProgress)
ch := make(chan diff.DiffStatProgress)
go func() {
defer close(ch)
err := diff.Summary(ctx, ch, rows, mergeRows, sch, mergeSch)
err := diff.Stat(ctx, ch, rows, mergeRows, sch, mergeSch)
ae.SetIfError(err)
}()
@@ -77,6 +77,10 @@ func TestMigration(t *testing.T) {
query: "SELECT count(*) FROM dolt_log",
expected: []sql.Row{{int64(2)}},
},
{
query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping",
expected: []sql.Row{{int64(2)}},
},
},
},
{
@@ -109,6 +113,36 @@ func TestMigration(t *testing.T) {
},
},
},
{
name: "create more commits",
setup: []string{
"CREATE TABLE test (pk int primary key)",
"INSERT INTO test VALUES (1),(2),(3)",
"CALL dolt_commit('-Am', 'new table')",
"INSERT INTO test VALUES (4)",
"CALL dolt_commit('-am', 'added row 4')",
"INSERT INTO test VALUES (5)",
"CALL dolt_commit('-am', 'added row 5')",
},
asserts: []assertion{
{
query: "SELECT count(*) FROM dolt_log",
expected: []sql.Row{{int64(4)}},
},
{
query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping",
expected: []sql.Row{{int64(4)}},
},
{
query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping WHERE new_commit_hash IN (SELECT commit_hash FROM dolt_log)",
expected: []sql.Row{{int64(4)}},
},
{
query: "SELECT count(*) FROM `dolt/dolt_migrated_commits`.dolt_commit_mapping WHERE new_commit_hash NOT IN (SELECT commit_hash FROM dolt_log)",
expected: []sql.Row{{int64(0)}},
},
},
},
}
for _, test := range tests {
+193 -61
View File
@@ -17,8 +17,14 @@ package migrate
import (
"context"
"fmt"
"io"
"time"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
"github.com/dolthub/dolt/go/libraries/doltcore/ref"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/store/datas"
"github.com/dolthub/dolt/go/cmd/dolt/cli"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/store/chunks"
@@ -31,37 +37,34 @@ import (
"github.com/dolthub/dolt/go/store/val"
)
type ChunkMapping interface {
Has(ctx context.Context, addr hash.Hash) (bool, error)
Get(ctx context.Context, addr hash.Hash) (hash.Hash, error)
Put(ctx context.Context, old, new hash.Hash) error
Close(ctx context.Context) error
}
const (
MigratedCommitsBranch = "dolt_migrated_commits"
MigratedCommitsTable = "dolt_commit_mapping"
)
type CommitStack interface {
Push(ctx context.Context, cm *doltdb.Commit) error
Pop(ctx context.Context) (*doltdb.Commit, error)
}
var (
mappingSchema, _ = schema.SchemaFromCols(schema.NewColCollection(
schema.NewColumn("old_commit_hash", 0, types.StringKind, true),
schema.NewColumn("new_commit_hash", 1, types.StringKind, false),
))
desc = val.NewTupleDescriptor(val.Type{Enc: val.StringEnc, Nullable: false})
)
type Progress interface {
ChunkMapping
CommitStack
// progress maintains the state of migration.
type progress struct {
stack []*doltdb.Commit
Log(ctx context.Context, format string, args ...any)
Close(ctx context.Context) error
}
// A memory stack with a persisted commit mapping.
type memoryStackProgress struct {
stack []*doltdb.Commit
// mapping tracks migrated commits
// it maps old commit hash to new hash
mapping *prolly.MutableMap
kb, vb *val.TupleBuilder
buffPool pool.BuffPool
vs *types.ValueStore
cs chunks.ChunkStore
vs *types.ValueStore
cs chunks.ChunkStore
}
func newProgress(ctx context.Context, cs chunks.ChunkStore) (Progress, error) {
func newProgress(ctx context.Context, cs chunks.ChunkStore) (*progress, error) {
kd := val.NewTupleDescriptor(val.Type{
Enc: val.ByteStringEnc,
Nullable: false,
@@ -83,7 +86,7 @@ func newProgress(ctx context.Context, cs chunks.ChunkStore) (Progress, error) {
kb := val.NewTupleBuilder(kd)
vb := val.NewTupleBuilder(vd)
return &memoryStackProgress{
return &progress{
stack: make([]*doltdb.Commit, 0, 128),
mapping: mut,
kb: kb,
@@ -94,18 +97,18 @@ func newProgress(ctx context.Context, cs chunks.ChunkStore) (Progress, error) {
}, nil
}
func (mem *memoryStackProgress) Has(ctx context.Context, addr hash.Hash) (ok bool, err error) {
mem.kb.PutByteString(0, addr[:])
k := mem.kb.Build(mem.buffPool)
return mem.mapping.Has(ctx, k)
func (p *progress) Has(ctx context.Context, addr hash.Hash) (ok bool, err error) {
p.kb.PutByteString(0, addr[:])
k := p.kb.Build(p.buffPool)
return p.mapping.Has(ctx, k)
}
func (mem *memoryStackProgress) Get(ctx context.Context, old hash.Hash) (new hash.Hash, err error) {
mem.kb.PutByteString(0, old[:])
k := mem.kb.Build(mem.buffPool)
err = mem.mapping.Get(ctx, k, func(_, v val.Tuple) error {
func (p *progress) Get(ctx context.Context, old hash.Hash) (new hash.Hash, err error) {
p.kb.PutByteString(0, old[:])
k := p.kb.Build(p.buffPool)
err = p.mapping.Get(ctx, k, func(_, v val.Tuple) error {
if len(v) > 0 {
n, ok := mem.vb.Desc.GetBytes(0, v)
n, ok := p.vb.Desc.GetBytes(0, v)
if !ok {
return fmt.Errorf("failed to get string address from commit mapping value")
}
@@ -116,56 +119,185 @@ func (mem *memoryStackProgress) Get(ctx context.Context, old hash.Hash) (new has
return
}
func (mem *memoryStackProgress) Put(ctx context.Context, old, new hash.Hash) (err error) {
mem.kb.PutByteString(0, old[:])
k := mem.kb.Build(mem.buffPool)
mem.vb.PutByteString(0, new[:])
v := mem.vb.Build(mem.buffPool)
err = mem.mapping.Put(ctx, k, v)
func (p *progress) Put(ctx context.Context, old, new hash.Hash) (err error) {
p.kb.PutByteString(0, old[:])
k := p.kb.Build(p.buffPool)
p.vb.PutByteString(0, new[:])
v := p.vb.Build(p.buffPool)
err = p.mapping.Put(ctx, k, v)
return
}
func (mem *memoryStackProgress) Push(ctx context.Context, cm *doltdb.Commit) (err error) {
mem.stack = append(mem.stack, cm)
func (p *progress) Push(ctx context.Context, cm *doltdb.Commit) (err error) {
p.stack = append(p.stack, cm)
return
}
func (mem *memoryStackProgress) Pop(ctx context.Context) (cm *doltdb.Commit, err error) {
if len(mem.stack) == 0 {
func (p *progress) Pop(ctx context.Context) (cm *doltdb.Commit, err error) {
if len(p.stack) == 0 {
return nil, nil
}
top := len(mem.stack) - 1
cm = mem.stack[top]
mem.stack = mem.stack[:top]
top := len(p.stack) - 1
cm = p.stack[top]
p.stack = p.stack[:top]
return
}
func (mem *memoryStackProgress) Log(ctx context.Context, format string, args ...any) {
func (p *progress) Log(ctx context.Context, format string, args ...any) {
cli.Println(time.Now().UTC().String() + " " + fmt.Sprintf(format, args...))
}
func (mem *memoryStackProgress) Close(ctx context.Context) error {
m, err := mem.mapping.Map(ctx)
func (p *progress) Finalize(ctx context.Context) (prolly.Map, error) {
m, err := p.mapping.Map(ctx)
if err != nil {
return err
return prolly.Map{}, err
}
v := shim.ValueFromMap(m)
ref, err := mem.vs.WriteValue(ctx, v)
ref, err := p.vs.WriteValue(ctx, v)
if err != nil {
return err
return prolly.Map{}, err
}
last, err := mem.vs.Root(ctx)
last, err := p.vs.Root(ctx)
if err != nil {
return err
return prolly.Map{}, err
}
ok, err := mem.vs.Commit(ctx, last, last)
ok, err := p.vs.Commit(ctx, last, last)
if err != nil {
return err
}
if !ok {
return fmt.Errorf("failed to commit, manifest swapped out beneath us")
return prolly.Map{}, err
} else if !ok {
return prolly.Map{}, fmt.Errorf("failed to commit, manifest swapped out beneath us")
}
mem.Log(ctx, "Wrote commit mapping!! [commit_mapping_ref: %s]", ref.TargetHash().String())
return nil
p.Log(ctx, "Wrote commit mapping!! [commit_mapping_ref: %s]", ref.TargetHash().String())
p.Log(ctx, "Commit mapping allow mapping pre-migration commit hashes to post-migration commit hashes, "+
"it is available on branch '%s' in table '%s'", MigratedCommitsBranch, MigratedCommitsTable)
return m, nil
}
func persistMigratedCommitMapping(ctx context.Context, ddb *doltdb.DoltDB, mapping prolly.Map) error {
// create a new branch to persist the migrated commit mapping
init, err := ddb.ResolveCommitRef(ctx, ref.NewInternalRef(doltdb.CreationBranch))
if err != nil {
return err
}
br := ref.NewBranchRef(MigratedCommitsBranch)
err = ddb.NewBranchAtCommit(ctx, br, init)
if err != nil {
return err
}
ns, vrw := ddb.NodeStore(), ddb.ValueReadWriter()
m, err := prolly.NewMapFromTuples(ctx, ns, desc, desc)
if err != nil {
return err
}
rows := m.Mutate()
bld := val.NewTupleBuilder(desc)
// convert |mapping| values from hash.Hash to string
iter, err := mapping.IterAll(ctx)
if err != nil {
return err
}
var k, v val.Tuple
kd, vd := mapping.Descriptors()
for {
k, v, err = iter.Next(ctx)
if err == io.EOF {
break
} else if err != nil {
return err
}
o, _ := kd.GetBytes(0, k)
bld.PutString(0, hash.New(o).String())
key := bld.Build(ddb.NodeStore().Pool())
n, _ := vd.GetBytes(0, v)
bld.PutString(0, hash.New(n).String())
value := bld.Build(ddb.NodeStore().Pool())
if err = rows.Put(ctx, key, value); err != nil {
return err
}
}
m, err = rows.Map(ctx)
if err != nil {
return err
}
idx := durable.IndexFromProllyMap(m)
tbl, err := doltdb.NewTable(ctx, vrw, ns, mappingSchema, idx, nil, nil)
if err != nil {
return err
}
root, err := init.GetRootValue(ctx)
if err != nil {
return err
}
root, err = root.PutTable(ctx, MigratedCommitsTable, tbl)
if err != nil {
return err
}
return commitRoot(ctx, ddb, br, root, init)
}
func commitRoot(
ctx context.Context,
ddb *doltdb.DoltDB,
br ref.BranchRef,
root *doltdb.RootValue,
parent *doltdb.Commit,
) error {
roots := doltdb.Roots{
Head: root,
Working: root,
Staged: root,
}
parents := []*doltdb.Commit{parent}
meta, err := parent.GetCommitMeta(ctx)
if err != nil {
return err
}
meta, err = datas.NewCommitMeta(meta.Name, meta.Email, meta.Description)
if err != nil {
return err
}
pcm, err := ddb.NewPendingCommit(ctx, roots, parents, meta)
if err != nil {
return err
}
wsr, err := ref.WorkingSetRefForHead(br)
if err != nil {
return err
}
ws, err := ddb.ResolveWorkingSet(ctx, wsr)
if err != nil {
return err
}
prev, err := ws.HashOf()
if err != nil {
return err
}
ws = ws.WithWorkingRoot(root).WithStagedRoot(root)
_, err = ddb.CommitWithWorkingSet(ctx, br, wsr, pcm, ws, prev, &datas.WorkingSetMeta{
Name: meta.Name,
Email: meta.Email,
Timestamp: uint64(time.Now().Unix()),
})
return err
}
+6 -5
View File
@@ -96,7 +96,7 @@ func migrateWorkingSet(ctx context.Context, menv Environment, brRef ref.BranchRe
return new.UpdateWorkingSet(ctx, wsRef, newWs, hash.Hash{}, oldWs.Meta())
}
func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, new *doltdb.DoltDB, prog Progress) error {
func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error {
oldHash, err := oldCm.HashOf()
if err != nil {
return err
@@ -204,7 +204,7 @@ func migrateCommit(ctx context.Context, menv Environment, oldCm *doltdb.Commit,
return nil
}
func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltDB, prog Progress) error {
func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error {
oldHash, err := cm.HashOf()
if err != nil {
return err
@@ -244,7 +244,7 @@ func migrateInitCommit(ctx context.Context, cm *doltdb.Commit, new *doltdb.DoltD
return prog.Put(ctx, oldHash, newHash)
}
func migrateCommitOptions(ctx context.Context, oldCm *doltdb.Commit, prog Progress) (datas.CommitOptions, error) {
func migrateCommitOptions(ctx context.Context, oldCm *doltdb.Commit, prog *progress) (datas.CommitOptions, error) {
parents, err := oldCm.ParentHashes(ctx)
if err != nil {
return datas.CommitOptions{}, err
@@ -414,6 +414,7 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb
var newRows durable.Index
var newSet durable.IndexSet
originalCtx := ctx
eg, ctx := errgroup.WithContext(ctx)
eg.Go(func() error {
@@ -433,13 +434,13 @@ func migrateTable(ctx context.Context, newSch schema.Schema, oldParentTbl, oldTb
return nil, err
}
ai, err := oldTbl.GetAutoIncrementValue(ctx)
ai, err := oldTbl.GetAutoIncrementValue(originalCtx)
if err != nil {
return nil, err
}
autoInc := types.Uint(ai)
return doltdb.NewTable(ctx, vrw, ns, newSch, newRows, newSet, autoInc)
return doltdb.NewTable(originalCtx, vrw, ns, newSch, newRows, newSet, autoInc)
}
func migrateSchema(ctx context.Context, tableName string, existing schema.Schema) (schema.Schema, error) {
+15 -12
View File
@@ -28,7 +28,7 @@ import (
// TraverseDAG traverses |old|, migrating values to |new|.
func TraverseDAG(ctx context.Context, menv Environment, old, new *doltdb.DoltDB) (err error) {
var heads []ref.DoltRef
var prog Progress
var prog *progress
heads, err = old.GetHeadRefs(ctx)
if err != nil {
@@ -42,12 +42,6 @@ func TraverseDAG(ctx context.Context, menv Environment, old, new *doltdb.DoltDB)
if err != nil {
return err
}
defer func() {
cerr := prog.Close(ctx)
if err == nil {
err = cerr
}
}()
for i := range heads {
if err = traverseRefHistory(ctx, menv, heads[i], old, new, prog); err != nil {
@@ -58,10 +52,19 @@ func TraverseDAG(ctx context.Context, menv Environment, old, new *doltdb.DoltDB)
if err = validateBranchMapping(ctx, old, new); err != nil {
return err
}
// write the migrated commit mapping to a special branch
m, err := prog.Finalize(ctx)
if err != nil {
return err
}
if err = persistMigratedCommitMapping(ctx, new, m); err != nil {
return err
}
return nil
}
func traverseRefHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog Progress) error {
func traverseRefHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog *progress) error {
switch r.GetType() {
case ref.BranchRefType:
if err := traverseBranchHistory(ctx, menv, r, old, new, prog); err != nil {
@@ -87,7 +90,7 @@ func traverseRefHistory(ctx context.Context, menv Environment, r ref.DoltRef, ol
}
}
func traverseBranchHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog Progress) error {
func traverseBranchHistory(ctx context.Context, menv Environment, r ref.DoltRef, old, new *doltdb.DoltDB, prog *progress) error {
cm, err := old.ResolveCommitRef(ctx, r)
if err != nil {
return err
@@ -108,7 +111,7 @@ func traverseBranchHistory(ctx context.Context, menv Environment, r ref.DoltRef,
return new.SetHead(ctx, r, newHash)
}
func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old, new *doltdb.DoltDB, prog Progress) error {
func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old, new *doltdb.DoltDB, prog *progress) error {
t, err := old.ResolveTag(ctx, r)
if err != nil {
return err
@@ -133,7 +136,7 @@ func traverseTagHistory(ctx context.Context, menv Environment, r ref.TagRef, old
return new.NewTagAtCommit(ctx, r, cm, t.Meta)
}
func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Commit, new *doltdb.DoltDB, prog Progress) error {
func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Commit, new *doltdb.DoltDB, prog *progress) error {
ch, err := cm.HashOf()
if err != nil {
return err
@@ -180,7 +183,7 @@ func traverseCommitHistory(ctx context.Context, menv Environment, cm *doltdb.Com
}
}
func firstAbsent(ctx context.Context, p Progress, addrs []hash.Hash) (int, error) {
func firstAbsent(ctx context.Context, p *progress, addrs []hash.Hash) (int, error) {
for i := range addrs {
ok, err := p.Has(ctx, addrs[i])
if err != nil {
@@ -228,6 +228,20 @@ func (a *binlogReplicaApplier) startReplicationEventStream(ctx *sql.Context, con
a.currentPosition = position
// Clear out the format description in case we're reconnecting, so that we don't use the old format description
// to interpret any event messages before we receive the new format description from the new stream.
a.format = mysql.BinlogFormat{}
// If the source server has binlog checksums enabled (@@global.binlog_checksum), then the replica MUST
// set @master_binlog_checksum to handshake with the server to acknowledge that it knows that checksums
// are in use. Without this step, the server will just send back error messages saying that the replica
// does not support the binlog checksum algorithm in use on the primary.
// For more details, see: https://dev.mysql.com/worklog/task/?id=2540
_, err = conn.ExecuteFetch("set @master_binlog_checksum=@@global.binlog_checksum;", 0, false)
if err != nil {
return err
}
return conn.SendBinlogDumpCommand(serverId, *position)
}
@@ -271,10 +285,6 @@ func (a *binlogReplicaApplier) replicaBinlogEventHandler(ctx *sql.Context) error
return err
}
continue
} else if strings.Contains(sqlError.Message, "can not handle replication events with the checksum") {
// Ignore any errors about checksums
ctx.GetLogger().Debug("ignoring binlog checksum error message")
continue
}
}
@@ -285,6 +295,19 @@ func (a *binlogReplicaApplier) replicaBinlogEventHandler(ctx *sql.Context) error
continue
}
// We don't support checksum validation, so we must strip off any checksum data if present, otherwise
// it could get interpreted as part of the data fields and corrupt the fields we pull out. There is not
// a future-proof guarantee on the checksum size, so we can't strip a checksum until we've seen the
// Format binlog event that definitively tells us if checksums are enabled and what algorithm they use.
if a.format.IsZero() == false {
event, _, err = event.StripChecksum(a.format)
if err != nil {
msg := fmt.Sprintf("unable to strip checksum from binlog event: '%v'", err.Error())
ctx.GetLogger().Error(msg)
DoltBinlogReplicaController.setSqlError(mysql.ERUnknownError, msg)
}
}
err = a.processBinlogEvent(ctx, engine, event)
if err != nil {
ctx.GetLogger().Errorf("unexpected error of type %T: '%v'", err, err.Error())
@@ -328,6 +351,8 @@ func (a *binlogReplicaApplier) processBinlogEvent(ctx *sql.Context, engine *gms.
"database": query.Database,
"charset": query.Charset,
"query": query.SQL,
"options": fmt.Sprintf("0x%x", query.Options),
"sql_mode": fmt.Sprintf("0x%x", query.SqlMode),
}).Debug("Received binlog event: Query")
// When executing SQL statements sent from the primary, we can't be sure what database was modified unless we
@@ -337,6 +362,39 @@ func (a *binlogReplicaApplier) processBinlogEvent(ctx *sql.Context, engine *gms.
// avoid issues with correctness, at the cost of being slightly less efficient
commitToAllDatabases = true
if query.Options&mysql.QFlagOptionAutoIsNull > 0 {
ctx.GetLogger().Tracef("Setting sql_auto_is_null ON")
ctx.SetSessionVariable(ctx, "sql_auto_is_null", 1)
} else {
ctx.GetLogger().Tracef("Setting sql_auto_is_null OFF")
ctx.SetSessionVariable(ctx, "sql_auto_is_null", 0)
}
if query.Options&mysql.QFlagOptionNotAutocommit > 0 {
ctx.GetLogger().Tracef("Setting autocommit=0")
ctx.SetSessionVariable(ctx, "autocommit", 0)
} else {
ctx.GetLogger().Tracef("Setting autocommit=1")
ctx.SetSessionVariable(ctx, "autocommit", 1)
}
if query.Options&mysql.QFlagOptionNoForeignKeyChecks > 0 {
ctx.GetLogger().Tracef("Setting foreign_key_checks=0")
ctx.SetSessionVariable(ctx, "foreign_key_checks", 0)
} else {
ctx.GetLogger().Tracef("Setting foreign_key_checks=1")
ctx.SetSessionVariable(ctx, "foreign_key_checks", 1)
}
// NOTE: unique_checks is not currently honored by Dolt
if query.Options&mysql.QFlagOptionRelaxedUniqueChecks > 0 {
ctx.GetLogger().Tracef("Setting unique_checks=0")
ctx.SetSessionVariable(ctx, "unique_checks", 0)
} else {
ctx.GetLogger().Tracef("Setting unique_checks=1")
ctx.SetSessionVariable(ctx, "unique_checks", 1)
}
executeQueryWithEngine(ctx, engine, query.SQL)
createCommit = strings.ToLower(query.SQL) != "begin"
@@ -493,16 +551,18 @@ func (a *binlogReplicaApplier) processBinlogEvent(ctx *sql.Context, engine *gms.
// processRowEvent processes a WriteRows, DeleteRows, or UpdateRows binlog event and returns an error if any problems
// were encountered.
func (a *binlogReplicaApplier) processRowEvent(ctx *sql.Context, event mysql.BinlogEvent, engine *gms.Engine) error {
var eventType string
switch {
case event.IsDeleteRows():
ctx.GetLogger().Debug("Received binlog event: DeleteRows")
eventType = "DeleteRows"
case event.IsWriteRows():
ctx.GetLogger().Debug("Received binlog event: WriteRows")
eventType = "WriteRows"
case event.IsUpdateRows():
ctx.GetLogger().Debug("Received binlog event: UpdateRows")
eventType = "UpdateRows"
default:
return fmt.Errorf("unsupported event type: %v", event)
}
ctx.GetLogger().Debugf("Received binlog event: %s", eventType)
tableId := event.TableID(a.format)
tableMap, ok := a.tableMapsById[tableId]
@@ -519,16 +579,22 @@ func (a *binlogReplicaApplier) processRowEvent(ctx *sql.Context, event mysql.Bin
return err
}
ctx.GetLogger().WithFields(logrus.Fields{
"flags": fmt.Sprintf("%x", rows.Flags),
}).Debugf("Processing rows from %s event", eventType)
flags := rows.Flags
if flags&rowFlag_endOfStatement == rowFlag_endOfStatement {
foreignKeyChecksDisabled := false
if flags&rowFlag_endOfStatement > 0 {
// nothing to be done for end of statement; just clear the flag and move on
flags = flags &^ rowFlag_endOfStatement
}
if flags&rowFlag_noForeignKeyChecks == rowFlag_noForeignKeyChecks {
if flags&rowFlag_noForeignKeyChecks > 0 {
foreignKeyChecksDisabled = true
flags = flags &^ rowFlag_noForeignKeyChecks
}
if flags != 0 {
msg := fmt.Sprintf("unsupported binlog protocol message: DeleteRows event with unsupported flags '%x'", flags)
msg := fmt.Sprintf("unsupported binlog protocol message: row event with unsupported flags '%x'", flags)
ctx.GetLogger().Errorf(msg)
DoltBinlogReplicaController.setSqlError(mysql.ERUnknownError, msg)
}
@@ -543,10 +609,9 @@ func (a *binlogReplicaApplier) processRowEvent(ctx *sql.Context, event mysql.Bin
case event.IsUpdateRows():
ctx.GetLogger().Debugf(" - Updated Rows (table: %s)", tableMap.Name)
case event.IsWriteRows():
ctx.GetLogger().Debugf(" - New Rows (table: %s)", tableMap.Name)
ctx.GetLogger().Debugf(" - Inserted Rows (table: %s)", tableMap.Name)
}
foreignKeyChecksDisabled := tableMap.Flags&rowFlag_noForeignKeyChecks > 0
writeSession, tableWriter, err := getTableWriter(ctx, engine, tableMap.Name, tableMap.Database, foreignKeyChecksDisabled)
if err != nil {
return err
@@ -263,11 +263,15 @@ func TestForeignKeyChecks(t *testing.T) {
startSqlServers(t)
startReplication(t, mySqlPort)
// Insert a record with a foreign key check
primaryDatabase.MustExec("CREATE TABLE colors (name varchar(100) primary key);")
// Test that we can execute statement-based replication that requires foreign_key_checks
// being turned off (referenced table doesn't exist yet).
primaryDatabase.MustExec("SET foreign_key_checks = 0;")
primaryDatabase.MustExec("CREATE TABLE t1 (pk int primary key, color varchar(100), FOREIGN KEY (color) REFERENCES colors(name));")
primaryDatabase.MustExec("START TRANSACTION;")
primaryDatabase.MustExec("CREATE TABLE colors (name varchar(100) primary key);")
primaryDatabase.MustExec("SET foreign_key_checks = 1;")
// Insert a record with foreign key checks enabled
primaryDatabase.MustExec("START TRANSACTION;")
primaryDatabase.MustExec("INSERT INTO colors VALUES ('green'), ('red'), ('blue');")
primaryDatabase.MustExec("INSERT INTO t1 VALUES (1, 'red'), (2, 'green');")
primaryDatabase.MustExec("COMMIT;")
@@ -362,7 +366,7 @@ func TestCharsetsAndCollations(t *testing.T) {
// waitForReplicaToCatchUp waits (up to 20s) for the replica to catch up with the primary database. The
// lag is measured by checking that gtid_executed is the same on the primary and replica.
func waitForReplicaToCatchUp(t *testing.T) {
timeLimit := 20 * time.Second
timeLimit := 60 * time.Second
endTime := time.Now().Add(timeLimit)
for time.Now().Before(endTime) {
replicaGtid := queryGtid(t, replicaDatabase)
@@ -486,8 +490,8 @@ func stopDoltSqlServer(t *testing.T) {
func startReplication(_ *testing.T, port int) {
replicaDatabase.MustExec("SET @@GLOBAL.server_id=123;")
replicaDatabase.MustExec(
fmt.Sprintf("change replication source to SOURCE_HOST='localhost', SOURCE_USER='root', "+
"SOURCE_PASSWORD='', SOURCE_PORT=%v;", port))
fmt.Sprintf("change replication source to SOURCE_HOST='localhost', SOURCE_USER='replicator', "+
"SOURCE_PASSWORD='Zqr8_blrGm1!', SOURCE_PORT=%v;", port))
replicaDatabase.MustExec("start replica;")
}
@@ -588,7 +592,6 @@ func startMySqlServer(dir string) (int, *os.Process, error) {
fmt.Sprintf("--port=%v", mySqlPort),
"--server-id=11223344",
fmt.Sprintf("--socket=mysql-%v.sock", mySqlPort),
"--binlog-checksum=NONE",
"--general_log_file="+dir+"general_log",
"--log-bin="+dir+"log_bin",
"--slow_query_log_file="+dir+"slow_query_log",
@@ -623,9 +626,11 @@ func startMySqlServer(dir string) (int, *os.Process, error) {
primaryDatabase = sqlx.MustOpen("mysql", dsn)
os.Chdir(originalCwd)
fmt.Printf("MySQL server started on port %v \n", mySqlPort)
primaryDatabase.MustExec("CREATE USER 'replicator'@'%' IDENTIFIED BY 'Zqr8_blrGm1!';")
primaryDatabase.MustExec("GRANT REPLICATION SLAVE ON *.* TO 'replicator'@'%';")
return mySqlPort, cmd.Process, nil
}
@@ -937,12 +937,14 @@ func (p DoltDatabaseProvider) ExternalStoredProcedures(_ *sql.Context, name stri
// TableFunction implements the sql.TableFunctionProvider interface
func (p DoltDatabaseProvider) TableFunction(_ *sql.Context, name string) (sql.TableFunction, error) {
// currently, only one table function is supported, if we extend this, we should clean this up
// and store table functions in a map, similar to regular functions.
// TODO: Clean this up and store table functions in a map, similar to regular functions.
switch strings.ToLower(name) {
case "dolt_diff":
dtf := &DiffTableFunction{}
return dtf, nil
case "dolt_diff_stat":
dtf := &DiffStatTableFunction{}
return dtf, nil
case "dolt_diff_summary":
dtf := &DiffSummaryTableFunction{}
return dtf, nil
@@ -1,4 +1,4 @@
// Copyright 2020 Dolthub, Inc.
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/store/hash"
)
const HashOfFuncName = "hashof"
@@ -80,12 +81,21 @@ func (t *HashOf) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
} else {
ref, err := ddb.GetRefByNameInsensitive(ctx, name)
if err != nil {
return nil, err
}
cm, err = ddb.ResolveCommitRef(ctx, ref)
if err != nil {
return nil, err
hsh, parsed := hash.MaybeParse(name)
if parsed {
orgErr := err
cm, err = ddb.ReadCommit(ctx, hsh)
if err != nil {
return nil, orgErr
}
} else {
return nil, err
}
} else {
cm, err = ddb.ResolveCommitRef(ctx, ref)
if err != nil {
return nil, err
}
}
}
@@ -0,0 +1,571 @@
// Copyright 2022 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sqle
import (
"errors"
"fmt"
"io"
"math"
"strings"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/libraries/doltcore/diff"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtables"
)
var _ sql.TableFunction = (*DiffStatTableFunction)(nil)
type DiffStatTableFunction struct {
ctx *sql.Context
fromCommitExpr sql.Expression
toCommitExpr sql.Expression
dotCommitExpr sql.Expression
tableNameExpr sql.Expression
database sql.Database
}
var diffStatTableSchema = sql.Schema{
&sql.Column{Name: "table_name", Type: types.LongText, Nullable: false},
&sql.Column{Name: "rows_unmodified", Type: types.Int64, Nullable: true},
&sql.Column{Name: "rows_added", Type: types.Int64, Nullable: true},
&sql.Column{Name: "rows_deleted", Type: types.Int64, Nullable: true},
&sql.Column{Name: "rows_modified", Type: types.Int64, Nullable: true},
&sql.Column{Name: "cells_added", Type: types.Int64, Nullable: true},
&sql.Column{Name: "cells_deleted", Type: types.Int64, Nullable: true},
&sql.Column{Name: "cells_modified", Type: types.Int64, Nullable: true},
&sql.Column{Name: "old_row_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "new_row_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "old_cell_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "new_cell_count", Type: types.Int64, Nullable: true},
}
// NewInstance creates a new instance of TableFunction interface
func (ds *DiffStatTableFunction) NewInstance(ctx *sql.Context, db sql.Database, expressions []sql.Expression) (sql.Node, error) {
newInstance := &DiffStatTableFunction{
ctx: ctx,
database: db,
}
node, err := newInstance.WithExpressions(expressions...)
if err != nil {
return nil, err
}
return node, nil
}
// Database implements the sql.Databaser interface
func (ds *DiffStatTableFunction) Database() sql.Database {
return ds.database
}
// WithDatabase implements the sql.Databaser interface
func (ds *DiffStatTableFunction) WithDatabase(database sql.Database) (sql.Node, error) {
ds.database = database
return ds, nil
}
// Name implements the sql.TableFunction interface
func (ds *DiffStatTableFunction) Name() string {
return "dolt_diff_stat"
}
func (ds *DiffStatTableFunction) commitsResolved() bool {
if ds.dotCommitExpr != nil {
return ds.dotCommitExpr.Resolved()
}
return ds.fromCommitExpr.Resolved() && ds.toCommitExpr.Resolved()
}
// Resolved implements the sql.Resolvable interface
func (ds *DiffStatTableFunction) Resolved() bool {
if ds.tableNameExpr != nil {
return ds.commitsResolved() && ds.tableNameExpr.Resolved()
}
return ds.commitsResolved()
}
// String implements the Stringer interface
func (ds *DiffStatTableFunction) String() string {
if ds.dotCommitExpr != nil {
if ds.tableNameExpr != nil {
return fmt.Sprintf("DOLT_DIFF_STAT(%s, %s)", ds.dotCommitExpr.String(), ds.tableNameExpr.String())
}
return fmt.Sprintf("DOLT_DIFF_STAT(%s)", ds.dotCommitExpr.String())
}
if ds.tableNameExpr != nil {
return fmt.Sprintf("DOLT_DIFF_STAT(%s, %s, %s)", ds.fromCommitExpr.String(), ds.toCommitExpr.String(), ds.tableNameExpr.String())
}
return fmt.Sprintf("DOLT_DIFF_STAT(%s, %s)", ds.fromCommitExpr.String(), ds.toCommitExpr.String())
}
// Schema implements the sql.Node interface.
func (ds *DiffStatTableFunction) Schema() sql.Schema {
return diffStatTableSchema
}
// Children implements the sql.Node interface.
func (ds *DiffStatTableFunction) Children() []sql.Node {
return nil
}
// WithChildren implements the sql.Node interface.
func (ds *DiffStatTableFunction) WithChildren(children ...sql.Node) (sql.Node, error) {
if len(children) != 0 {
return nil, fmt.Errorf("unexpected children")
}
return ds, nil
}
// CheckPrivileges implements the interface sql.Node.
func (ds *DiffStatTableFunction) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool {
if ds.tableNameExpr != nil {
if !types.IsText(ds.tableNameExpr.Type()) {
return false
}
tableNameVal, err := ds.tableNameExpr.Eval(ds.ctx, nil)
if err != nil {
return false
}
tableName, ok := tableNameVal.(string)
if !ok {
return false
}
// TODO: Add tests for privilege checking
return opChecker.UserHasPrivileges(ctx,
sql.NewPrivilegedOperation(ds.database.Name(), tableName, "", sql.PrivilegeType_Select))
}
tblNames, err := ds.database.GetTableNames(ctx)
if err != nil {
return false
}
var operations []sql.PrivilegedOperation
for _, tblName := range tblNames {
operations = append(operations, sql.NewPrivilegedOperation(ds.database.Name(), tblName, "", sql.PrivilegeType_Select))
}
return opChecker.UserHasPrivileges(ctx, operations...)
}
// Expressions implements the sql.Expressioner interface.
func (ds *DiffStatTableFunction) Expressions() []sql.Expression {
exprs := []sql.Expression{}
if ds.dotCommitExpr != nil {
exprs = append(exprs, ds.dotCommitExpr)
} else {
exprs = append(exprs, ds.fromCommitExpr, ds.toCommitExpr)
}
if ds.tableNameExpr != nil {
exprs = append(exprs, ds.tableNameExpr)
}
return exprs
}
// WithExpressions implements the sql.Expressioner interface.
func (ds *DiffStatTableFunction) WithExpressions(expression ...sql.Expression) (sql.Node, error) {
if len(expression) < 1 {
return nil, sql.ErrInvalidArgumentNumber.New(ds.Name(), "1 to 3", len(expression))
}
for _, expr := range expression {
if !expr.Resolved() {
return nil, ErrInvalidNonLiteralArgument.New(ds.Name(), expr.String())
}
// prepared statements resolve functions beforehand, so above check fails
if _, ok := expr.(sql.FunctionExpression); ok {
return nil, ErrInvalidNonLiteralArgument.New(ds.Name(), expr.String())
}
}
if strings.Contains(expression[0].String(), "..") {
if len(expression) < 1 || len(expression) > 2 {
return nil, sql.ErrInvalidArgumentNumber.New(ds.Name(), "1 or 2", len(expression))
}
ds.dotCommitExpr = expression[0]
if len(expression) == 2 {
ds.tableNameExpr = expression[1]
}
} else {
if len(expression) < 2 || len(expression) > 3 {
return nil, sql.ErrInvalidArgumentNumber.New(ds.Name(), "2 or 3", len(expression))
}
ds.fromCommitExpr = expression[0]
ds.toCommitExpr = expression[1]
if len(expression) == 3 {
ds.tableNameExpr = expression[2]
}
}
// validate the expressions
if ds.dotCommitExpr != nil {
if !types.IsText(ds.dotCommitExpr.Type()) {
return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.dotCommitExpr.String())
}
} else {
if !types.IsText(ds.fromCommitExpr.Type()) {
return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.fromCommitExpr.String())
}
if !types.IsText(ds.toCommitExpr.Type()) {
return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.toCommitExpr.String())
}
}
if ds.tableNameExpr != nil {
if !types.IsText(ds.tableNameExpr.Type()) {
return nil, sql.ErrInvalidArgumentDetails.New(ds.Name(), ds.tableNameExpr.String())
}
}
return ds, nil
}
// RowIter implements the sql.Node interface
func (ds *DiffStatTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.RowIter, error) {
fromCommitVal, toCommitVal, dotCommitVal, tableName, err := ds.evaluateArguments()
if err != nil {
return nil, err
}
sqledb, ok := ds.database.(SqlDatabase)
if !ok {
return nil, fmt.Errorf("unexpected database type: %T", ds.database)
}
fromCommitStr, toCommitStr, err := loadCommitStrings(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb)
if err != nil {
return nil, err
}
sess := dsess.DSessFromSess(ctx.Session)
fromRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), fromCommitStr)
if err != nil {
return nil, err
}
toRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), toCommitStr)
if err != nil {
return nil, err
}
deltas, err := diff.GetTableDeltas(ctx, fromRoot, toRoot)
if err != nil {
return nil, err
}
// If tableNameExpr defined, return a single table diff stat result
if ds.tableNameExpr != nil {
delta := findMatchingDelta(deltas, tableName)
diffStat, hasDiff, err := getDiffStatNodeFromDelta(ctx, delta, fromRoot, toRoot, tableName)
if err != nil {
return nil, err
}
if !hasDiff {
return NewDiffStatTableFunctionRowIter([]diffStatNode{}), nil
}
return NewDiffStatTableFunctionRowIter([]diffStatNode{diffStat}), nil
}
var diffStats []diffStatNode
for _, delta := range deltas {
tblName := delta.ToName
if tblName == "" {
tblName = delta.FromName
}
diffStat, hasDiff, err := getDiffStatNodeFromDelta(ctx, delta, fromRoot, toRoot, tblName)
if err != nil {
if errors.Is(err, diff.ErrPrimaryKeySetChanged) {
ctx.Warn(dtables.PrimaryKeyChangeWarningCode, fmt.Sprintf("stat for table %s cannot be determined. Primary key set changed.", tblName))
// Report an empty diff for tables that have primary key set changes
diffStats = append(diffStats, diffStatNode{tblName: tblName})
continue
}
return nil, err
}
if hasDiff {
diffStats = append(diffStats, diffStat)
}
}
return NewDiffStatTableFunctionRowIter(diffStats), nil
}
// evaluateArguments returns fromCommitVal, toCommitVal, dotCommitVal, and tableName.
// It evaluates the argument expressions to turn them into values this DiffStatTableFunction
// can use. Note that this method only evals the expressions, and doesn't validate the values.
func (ds *DiffStatTableFunction) evaluateArguments() (interface{}, interface{}, interface{}, string, error) {
var tableName string
if ds.tableNameExpr != nil {
tableNameVal, err := ds.tableNameExpr.Eval(ds.ctx, nil)
if err != nil {
return nil, nil, nil, "", err
}
tn, ok := tableNameVal.(string)
if !ok {
return nil, nil, nil, "", ErrInvalidTableName.New(ds.tableNameExpr.String())
}
tableName = tn
}
if ds.dotCommitExpr != nil {
dotCommitVal, err := ds.dotCommitExpr.Eval(ds.ctx, nil)
if err != nil {
return nil, nil, nil, "", err
}
return nil, nil, dotCommitVal, tableName, nil
}
fromCommitVal, err := ds.fromCommitExpr.Eval(ds.ctx, nil)
if err != nil {
return nil, nil, nil, "", err
}
toCommitVal, err := ds.toCommitExpr.Eval(ds.ctx, nil)
if err != nil {
return nil, nil, nil, "", err
}
return fromCommitVal, toCommitVal, nil, tableName, nil
}
// getDiffStatNodeFromDelta returns diffStatNode object and whether there is data diff or not. It gets tables
// from roots and diff stat if there is a valid table exists in both fromRoot and toRoot.
func getDiffStatNodeFromDelta(ctx *sql.Context, delta diff.TableDelta, fromRoot, toRoot *doltdb.RootValue, tableName string) (diffStatNode, bool, error) {
var oldColLen int
var newColLen int
fromTable, _, fromTableExists, err := fromRoot.GetTableInsensitive(ctx, tableName)
if err != nil {
return diffStatNode{}, false, err
}
if fromTableExists {
fromSch, err := fromTable.GetSchema(ctx)
if err != nil {
return diffStatNode{}, false, err
}
oldColLen = len(fromSch.GetAllCols().GetColumns())
}
toTable, _, toTableExists, err := toRoot.GetTableInsensitive(ctx, tableName)
if err != nil {
return diffStatNode{}, false, err
}
if toTableExists {
toSch, err := toTable.GetSchema(ctx)
if err != nil {
return diffStatNode{}, false, err
}
newColLen = len(toSch.GetAllCols().GetColumns())
}
if !fromTableExists && !toTableExists {
return diffStatNode{}, false, sql.ErrTableNotFound.New(tableName)
}
// no diff from tableDelta
if delta.FromTable == nil && delta.ToTable == nil {
return diffStatNode{}, false, nil
}
diffStat, hasDiff, keyless, err := getDiffStat(ctx, delta)
if err != nil {
return diffStatNode{}, false, err
}
return diffStatNode{tableName, diffStat, oldColLen, newColLen, keyless}, hasDiff, nil
}
// getDiffStat returns diff.DiffStatProgress object and whether there is a data diff or not.
func getDiffStat(ctx *sql.Context, td diff.TableDelta) (diff.DiffStatProgress, bool, bool, error) {
// got this method from diff_output.go
ch := make(chan diff.DiffStatProgress)
grp, ctx2 := errgroup.WithContext(ctx)
grp.Go(func() error {
defer close(ch)
err := diff.StatForTableDelta(ctx2, ch, td)
return err
})
acc := diff.DiffStatProgress{}
var count int64
grp.Go(func() error {
for {
select {
case p, ok := <-ch:
if !ok {
return nil
}
acc.Adds += p.Adds
acc.Removes += p.Removes
acc.Changes += p.Changes
acc.CellChanges += p.CellChanges
acc.NewRowSize += p.NewRowSize
acc.OldRowSize += p.OldRowSize
acc.NewCellSize += p.NewCellSize
acc.OldCellSize += p.OldCellSize
count++
case <-ctx2.Done():
return ctx2.Err()
}
}
})
if err := grp.Wait(); err != nil {
return diff.DiffStatProgress{}, false, false, err
}
keyless, err := td.IsKeyless(ctx)
if err != nil {
return diff.DiffStatProgress{}, false, keyless, err
}
if (acc.Adds+acc.Removes+acc.Changes) == 0 && (acc.OldCellSize-acc.NewCellSize) == 0 {
return diff.DiffStatProgress{}, false, keyless, nil
}
return acc, true, keyless, nil
}
//------------------------------------
// diffStatTableFunctionRowIter
//------------------------------------
var _ sql.RowIter = &diffStatTableFunctionRowIter{}
type diffStatTableFunctionRowIter struct {
diffStats []diffStatNode
diffIdx int
}
func (d *diffStatTableFunctionRowIter) incrementIndexes() {
d.diffIdx++
if d.diffIdx >= len(d.diffStats) {
d.diffIdx = 0
d.diffStats = nil
}
}
type diffStatNode struct {
tblName string
diffStat diff.DiffStatProgress
oldColLen int
newColLen int
keyless bool
}
func NewDiffStatTableFunctionRowIter(ds []diffStatNode) sql.RowIter {
return &diffStatTableFunctionRowIter{
diffStats: ds,
}
}
func (d *diffStatTableFunctionRowIter) Next(ctx *sql.Context) (sql.Row, error) {
defer d.incrementIndexes()
if d.diffIdx >= len(d.diffStats) {
return nil, io.EOF
}
if d.diffStats == nil {
return nil, io.EOF
}
ds := d.diffStats[d.diffIdx]
return getRowFromDiffStat(ds.tblName, ds.diffStat, ds.newColLen, ds.oldColLen, ds.keyless), nil
}
func (d *diffStatTableFunctionRowIter) Close(context *sql.Context) error {
return nil
}
// getRowFromDiffStat takes diff.DiffStatProgress and calculates the row_modified, cell_added, cell_deleted.
// If the number of cell change from old to new cell count does not equal to cell_added and/or cell_deleted, there
// must be schema changes that affects cell_added and cell_deleted value addition to the row count * col length number.
func getRowFromDiffStat(tblName string, dsp diff.DiffStatProgress, newColLen, oldColLen int, keyless bool) sql.Row {
// if table is keyless table, match current CLI command result
if keyless {
return sql.Row{
tblName, // table_name
nil, // rows_unmodified
int64(dsp.Adds), // rows_added
int64(dsp.Removes), // rows_deleted
nil, // rows_modified
nil, // cells_added
nil, // cells_deleted
nil, // cells_modified
nil, // old_row_count
nil, // new_row_count
nil, // old_cell_count
nil, // new_cell_count
}
}
numCellInserts, numCellDeletes := GetCellsAddedAndDeleted(dsp, newColLen)
rowsUnmodified := dsp.OldRowSize - dsp.Changes - dsp.Removes
return sql.Row{
tblName, // table_name
int64(rowsUnmodified), // rows_unmodified
int64(dsp.Adds), // rows_added
int64(dsp.Removes), // rows_deleted
int64(dsp.Changes), // rows_modified
int64(numCellInserts), // cells_added
int64(numCellDeletes), // cells_deleted
int64(dsp.CellChanges), // cells_modified
int64(dsp.OldRowSize), // old_row_count
int64(dsp.NewRowSize), // new_row_count
int64(dsp.OldCellSize), // old_cell_count
int64(dsp.NewCellSize), // new_cell_count
}
}
// GetCellsAddedAndDeleted calculates cells added and deleted given diff.DiffStatProgress and toCommit table
// column length. We use rows added and deleted to calculate cells added and deleted, but it does not include
// cells added and deleted from schema changes. Here we fill those in using total number of cells in each commit table.
func GetCellsAddedAndDeleted(acc diff.DiffStatProgress, newColLen int) (uint64, uint64) {
var numCellInserts, numCellDeletes float64
rowToCellInserts := float64(acc.Adds) * float64(newColLen)
rowToCellDeletes := float64(acc.Removes) * float64(newColLen)
cellDiff := float64(acc.NewCellSize) - float64(acc.OldCellSize)
if cellDiff > 0 {
numCellInserts = cellDiff + rowToCellDeletes
numCellDeletes = rowToCellDeletes
} else if cellDiff < 0 {
numCellInserts = rowToCellInserts
numCellDeletes = math.Abs(cellDiff) + rowToCellInserts
} else {
if rowToCellInserts != rowToCellDeletes {
numCellDeletes = math.Max(rowToCellDeletes, rowToCellInserts)
numCellInserts = math.Max(rowToCellDeletes, rowToCellInserts)
} else {
numCellDeletes = rowToCellDeletes
numCellInserts = rowToCellInserts
}
}
return uint64(numCellInserts), uint64(numCellDeletes)
}
@@ -15,19 +15,16 @@
package sqle
import (
"errors"
"fmt"
"io"
"math"
"sort"
"strings"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/libraries/doltcore/diff"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtables"
)
@@ -45,17 +42,9 @@ type DiffSummaryTableFunction struct {
var diffSummaryTableSchema = sql.Schema{
&sql.Column{Name: "table_name", Type: types.LongText, Nullable: false},
&sql.Column{Name: "rows_unmodified", Type: types.Int64, Nullable: true},
&sql.Column{Name: "rows_added", Type: types.Int64, Nullable: true},
&sql.Column{Name: "rows_deleted", Type: types.Int64, Nullable: true},
&sql.Column{Name: "rows_modified", Type: types.Int64, Nullable: true},
&sql.Column{Name: "cells_added", Type: types.Int64, Nullable: true},
&sql.Column{Name: "cells_deleted", Type: types.Int64, Nullable: true},
&sql.Column{Name: "cells_modified", Type: types.Int64, Nullable: true},
&sql.Column{Name: "old_row_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "new_row_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "old_cell_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "new_cell_count", Type: types.Int64, Nullable: true},
&sql.Column{Name: "diff_type", Type: types.Text, Nullable: false},
&sql.Column{Name: "data_change", Type: types.Boolean, Nullable: false},
&sql.Column{Name: "schema_change", Type: types.Boolean, Nullable: false},
}
// NewInstance creates a new instance of TableFunction interface
@@ -254,64 +243,75 @@ func (ds *DiffSummaryTableFunction) RowIter(ctx *sql.Context, row sql.Row) (sql.
return nil, fmt.Errorf("unexpected database type: %T", ds.database)
}
fromCommitStr, toCommitStr, err := loadCommitStrings(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb)
fromDetails, toDetails, err := loadDetailsForRefs(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb)
if err != nil {
return nil, err
}
sess := dsess.DSessFromSess(ctx.Session)
fromRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), fromCommitStr)
deltas, err := diff.GetTableDeltas(ctx, fromDetails.root, toDetails.root)
if err != nil {
return nil, err
}
toRoot, _, err := sess.ResolveRootForRef(ctx, sqledb.Name(), toCommitStr)
if err != nil {
return nil, err
}
deltas, err := diff.GetTableDeltas(ctx, fromRoot, toRoot)
if err != nil {
return nil, err
}
sort.Slice(deltas, func(i, j int) bool {
return strings.Compare(deltas[i].ToName, deltas[j].ToName) < 0
})
// If tableNameExpr defined, return a single table diff summary result
if ds.tableNameExpr != nil {
delta := findMatchingDelta(deltas, tableName)
diffSum, hasDiff, err := getDiffSummaryNodeFromDelta(ctx, delta, fromRoot, toRoot, tableName)
summ, err := getSummaryForDelta(ctx, delta, sqledb, fromDetails, toDetails, true)
if err != nil {
return nil, err
}
if !hasDiff {
return NewDiffSummaryTableFunctionRowIter([]diffSummaryNode{}), nil
summs := []*diff.TableDeltaSummary{}
if summ != nil {
// Old name of renamed table can be matched, use provided name in result
summ.TableName = tableName
summs = []*diff.TableDeltaSummary{summ}
}
return NewDiffSummaryTableFunctionRowIter([]diffSummaryNode{diffSum}), nil
return NewDiffSummaryTableFunctionRowIter(summs), nil
}
var diffSummaries []diffSummaryNode
var diffSummaries []*diff.TableDeltaSummary
for _, delta := range deltas {
tblName := delta.ToName
if tblName == "" {
tblName = delta.FromName
}
diffSum, hasDiff, err := getDiffSummaryNodeFromDelta(ctx, delta, fromRoot, toRoot, tblName)
summ, err := getSummaryForDelta(ctx, delta, sqledb, fromDetails, toDetails, false)
if err != nil {
if errors.Is(err, diff.ErrPrimaryKeySetChanged) {
ctx.Warn(dtables.PrimaryKeyChangeWarningCode, fmt.Sprintf("summary for table %s cannot be determined. Primary key set changed.", tblName))
// Report an empty diff for tables that have primary key set changes
diffSummaries = append(diffSummaries, diffSummaryNode{tblName: tblName})
continue
}
return nil, err
}
if hasDiff {
diffSummaries = append(diffSummaries, diffSum)
if summ != nil {
diffSummaries = append(diffSummaries, summ)
}
}
return NewDiffSummaryTableFunctionRowIter(diffSummaries), nil
}
func getSummaryForDelta(ctx *sql.Context, delta diff.TableDelta, sqledb SqlDatabase, fromDetails, toDetails *refDetails, shouldErrorOnPKChange bool) (*diff.TableDeltaSummary, error) {
if delta.FromTable == nil && delta.ToTable == nil {
return nil, nil
}
if !schema.ArePrimaryKeySetsDiffable(delta.Format(), delta.FromSch, delta.ToSch) {
if shouldErrorOnPKChange {
return nil, fmt.Errorf("failed to compute diff summary for table %s: %w", delta.CurName(), diff.ErrPrimaryKeySetChanged)
}
ctx.Warn(dtables.PrimaryKeyChangeWarningCode, fmt.Sprintf(dtables.PrimaryKeyChangeWarning, fromDetails.hashStr, toDetails.hashStr))
return nil, nil
}
summ, err := delta.GetSummary(ctx)
if err != nil {
return nil, err
}
return summ, nil
}
// evaluateArguments returns fromCommitVal, toCommitVal, dotCommitVal, and tableName.
// It evaluates the argument expressions to turn them into values this DiffSummaryTableFunction
// can use. Note that this method only evals the expressions, and doesn't validate the values.
@@ -351,107 +351,6 @@ func (ds *DiffSummaryTableFunction) evaluateArguments() (interface{}, interface{
return fromCommitVal, toCommitVal, nil, tableName, nil
}
// getDiffSummaryNodeFromDelta returns diffSummaryNode object and whether there is data diff or not. It gets tables
// from roots and diff summary if there is a valid table exists in both fromRoot and toRoot.
func getDiffSummaryNodeFromDelta(ctx *sql.Context, delta diff.TableDelta, fromRoot, toRoot *doltdb.RootValue, tableName string) (diffSummaryNode, bool, error) {
var oldColLen int
var newColLen int
fromTable, _, fromTableExists, err := fromRoot.GetTableInsensitive(ctx, tableName)
if err != nil {
return diffSummaryNode{}, false, err
}
if fromTableExists {
fromSch, err := fromTable.GetSchema(ctx)
if err != nil {
return diffSummaryNode{}, false, err
}
oldColLen = len(fromSch.GetAllCols().GetColumns())
}
toTable, _, toTableExists, err := toRoot.GetTableInsensitive(ctx, tableName)
if err != nil {
return diffSummaryNode{}, false, err
}
if toTableExists {
toSch, err := toTable.GetSchema(ctx)
if err != nil {
return diffSummaryNode{}, false, err
}
newColLen = len(toSch.GetAllCols().GetColumns())
}
if !fromTableExists && !toTableExists {
return diffSummaryNode{}, false, sql.ErrTableNotFound.New(tableName)
}
// no diff from tableDelta
if delta.FromTable == nil && delta.ToTable == nil {
return diffSummaryNode{}, false, nil
}
diffSum, hasDiff, keyless, err := getDiffSummary(ctx, delta)
if err != nil {
return diffSummaryNode{}, false, err
}
return diffSummaryNode{tableName, diffSum, oldColLen, newColLen, keyless}, hasDiff, nil
}
// getDiffSummary returns diff.DiffSummaryProgress object and whether there is a data diff or not.
func getDiffSummary(ctx *sql.Context, td diff.TableDelta) (diff.DiffSummaryProgress, bool, bool, error) {
// got this method from diff_output.go
ch := make(chan diff.DiffSummaryProgress)
grp, ctx2 := errgroup.WithContext(ctx)
grp.Go(func() error {
defer close(ch)
err := diff.SummaryForTableDelta(ctx2, ch, td)
return err
})
acc := diff.DiffSummaryProgress{}
var count int64
grp.Go(func() error {
for {
select {
case p, ok := <-ch:
if !ok {
return nil
}
acc.Adds += p.Adds
acc.Removes += p.Removes
acc.Changes += p.Changes
acc.CellChanges += p.CellChanges
acc.NewRowSize += p.NewRowSize
acc.OldRowSize += p.OldRowSize
acc.NewCellSize += p.NewCellSize
acc.OldCellSize += p.OldCellSize
count++
case <-ctx2.Done():
return ctx2.Err()
}
}
})
if err := grp.Wait(); err != nil {
return diff.DiffSummaryProgress{}, false, false, err
}
keyless, err := td.IsKeyless(ctx)
if err != nil {
return diff.DiffSummaryProgress{}, false, keyless, err
}
if (acc.Adds+acc.Removes+acc.Changes) == 0 && (acc.OldCellSize-acc.NewCellSize) == 0 {
return diff.DiffSummaryProgress{}, false, keyless, nil
}
return acc, true, keyless, nil
}
//------------------------------------
// diffSummaryTableFunctionRowIter
//------------------------------------
@@ -459,113 +358,47 @@ func getDiffSummary(ctx *sql.Context, td diff.TableDelta) (diff.DiffSummaryProgr
var _ sql.RowIter = &diffSummaryTableFunctionRowIter{}
type diffSummaryTableFunctionRowIter struct {
diffSums []diffSummaryNode
diffIdx int
summaries []*diff.TableDeltaSummary
diffIdx int
}
func (d *diffSummaryTableFunctionRowIter) incrementIndexes() {
d.diffIdx++
if d.diffIdx >= len(d.diffSums) {
if d.diffIdx >= len(d.summaries) {
d.diffIdx = 0
d.diffSums = nil
d.summaries = nil
}
}
type diffSummaryNode struct {
tblName string
diffSummary diff.DiffSummaryProgress
oldColLen int
newColLen int
keyless bool
}
func NewDiffSummaryTableFunctionRowIter(ds []diffSummaryNode) sql.RowIter {
func NewDiffSummaryTableFunctionRowIter(ds []*diff.TableDeltaSummary) sql.RowIter {
return &diffSummaryTableFunctionRowIter{
diffSums: ds,
summaries: ds,
}
}
func (d *diffSummaryTableFunctionRowIter) Next(ctx *sql.Context) (sql.Row, error) {
defer d.incrementIndexes()
if d.diffIdx >= len(d.diffSums) {
if d.diffIdx >= len(d.summaries) {
return nil, io.EOF
}
if d.diffSums == nil {
if d.summaries == nil {
return nil, io.EOF
}
ds := d.diffSums[d.diffIdx]
return getRowFromDiffSummary(ds.tblName, ds.diffSummary, ds.newColLen, ds.oldColLen, ds.keyless), nil
ds := d.summaries[d.diffIdx]
return getRowFromSummary(ds), nil
}
func (d *diffSummaryTableFunctionRowIter) Close(context *sql.Context) error {
return nil
}
// getRowFromDiffSummary takes diff.DiffSummaryProgress and calculates the row_modified, cell_added, cell_deleted.
// If the number of cell change from old to new cell count does not equal to cell_added and/or cell_deleted, there
// must be schema changes that affects cell_added and cell_deleted value addition to the row count * col length number.
func getRowFromDiffSummary(tblName string, dsp diff.DiffSummaryProgress, newColLen, oldColLen int, keyless bool) sql.Row {
// if table is keyless table, match current CLI command result
if keyless {
return sql.Row{
tblName, // table_name
nil, // rows_unmodified
int64(dsp.Adds), // rows_added
int64(dsp.Removes), // rows_deleted
nil, // rows_modified
nil, // cells_added
nil, // cells_deleted
nil, // cells_modified
nil, // old_row_count
nil, // new_row_count
nil, // old_cell_count
nil, // new_cell_count
}
}
numCellInserts, numCellDeletes := GetCellsAddedAndDeleted(dsp, newColLen)
rowsUnmodified := dsp.OldRowSize - dsp.Changes - dsp.Removes
func getRowFromSummary(ds *diff.TableDeltaSummary) sql.Row {
return sql.Row{
tblName, // table_name
int64(rowsUnmodified), // rows_unmodified
int64(dsp.Adds), // rows_added
int64(dsp.Removes), // rows_deleted
int64(dsp.Changes), // rows_modified
int64(numCellInserts), // cells_added
int64(numCellDeletes), // cells_deleted
int64(dsp.CellChanges), // cells_modified
int64(dsp.OldRowSize), // old_row_count
int64(dsp.NewRowSize), // new_row_count
int64(dsp.OldCellSize), // old_cell_count
int64(dsp.NewCellSize), // new_cell_count
ds.TableName, // table_name
ds.DiffType, // diff_type
ds.DataChange, // data_change
ds.SchemaChange, // schema_change
}
}
// GetCellsAddedAndDeleted calculates cells added and deleted given diff.DiffSummaryProgress and toCommit table
// column length. We use rows added and deleted to calculate cells added and deleted, but it does not include
// cells added and deleted from schema changes. Here we fill those in using total number of cells in each commit table.
func GetCellsAddedAndDeleted(acc diff.DiffSummaryProgress, newColLen int) (uint64, uint64) {
var numCellInserts, numCellDeletes float64
rowToCellInserts := float64(acc.Adds) * float64(newColLen)
rowToCellDeletes := float64(acc.Removes) * float64(newColLen)
cellDiff := float64(acc.NewCellSize) - float64(acc.OldCellSize)
if cellDiff > 0 {
numCellInserts = cellDiff + rowToCellDeletes
numCellDeletes = rowToCellDeletes
} else if cellDiff < 0 {
numCellInserts = rowToCellInserts
numCellDeletes = math.Abs(cellDiff) + rowToCellInserts
} else {
if rowToCellInserts != rowToCellDeletes {
numCellDeletes = math.Max(rowToCellDeletes, rowToCellInserts)
numCellInserts = math.Max(rowToCellDeletes, rowToCellInserts)
} else {
numCellDeletes = rowToCellDeletes
numCellInserts = rowToCellInserts
}
}
return uint64(numCellInserts), uint64(numCellDeletes)
}
@@ -52,14 +52,6 @@ type UnscopedDiffTable struct {
commitCheck doltdb.CommitFilter
}
// tableChange is an internal data structure used to hold the results of processing
// a diff.TableDelta structure into the output data for this system table.
type tableChange struct {
tableName string
dataChange bool
schemaChange bool
}
// NewUnscopedDiffTable creates an UnscopedDiffTable
func NewUnscopedDiffTable(_ *sql.Context, dbName string, ddb *doltdb.DoltDB, head *doltdb.Commit) sql.Table {
return &UnscopedDiffTable{dbName: dbName, ddb: ddb, head: head}
@@ -241,20 +233,20 @@ func (d *doltDiffWorkingSetRowItr) Next(ctx *sql.Context) (sql.Row, error) {
return nil, io.EOF
}
change, err := processTableDelta(ctx, tableDelta)
change, err := tableDelta.GetSummary(ctx)
if err != nil {
return nil, err
}
sqlRow := sql.NewRow(
changeSet,
change.tableName,
change.TableName,
nil, // committer
nil, // email
nil, // date
nil, // message
change.dataChange,
change.schemaChange,
change.DataChange,
change.SchemaChange,
)
return sqlRow, nil
@@ -288,7 +280,7 @@ type doltDiffCommitHistoryRowItr struct {
commits []*doltdb.Commit
meta *datas.CommitMeta
hash hash.Hash
tableChanges []tableChange
tableChanges []diff.TableDeltaSummary
tableChangesIdx int
}
@@ -358,13 +350,13 @@ func (itr *doltDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, error)
return sql.NewRow(
h.String(),
tableChange.tableName,
tableChange.TableName,
meta.Name,
meta.Email,
meta.Time(),
meta.Description,
tableChange.dataChange,
tableChange.schemaChange,
tableChange.DataChange,
tableChange.SchemaChange,
), nil
}
@@ -399,7 +391,7 @@ func (itr *doltDiffCommitHistoryRowItr) loadTableChanges(ctx context.Context, co
// calculateTableChanges calculates the tables that changed in the specified commit, by comparing that
// commit with its immediate ancestor commit.
func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]tableChange, error) {
func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]diff.TableDeltaSummary, error) {
if len(commit.DatasParents()) == 0 {
return nil, nil
}
@@ -424,9 +416,9 @@ func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Contex
return nil, err
}
tableChanges := make([]tableChange, len(deltas))
tableChanges := make([]diff.TableDeltaSummary, len(deltas))
for i := 0; i < len(deltas); i++ {
change, err := processTableDelta(itr.ctx, deltas[i])
change, err := deltas[i].GetSummary(itr.ctx)
if err != nil {
return nil, err
}
@@ -442,68 +434,6 @@ func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Contex
return tableChanges, nil
}
// processTableDelta processes the specified TableDelta to determine what kind of change it was (i.e. table drop,
// table rename, table create, or data update) and returns a tableChange struct representing the change.
func processTableDelta(ctx *sql.Context, delta diff.TableDelta) (*tableChange, error) {
// Dropping a table is always a schema change, and also a data change if the table contained data
if delta.IsDrop() {
isEmpty, err := isTableDataEmpty(ctx, delta.FromTable)
if err != nil {
return nil, err
}
return &tableChange{
tableName: delta.FromName,
dataChange: !isEmpty,
schemaChange: true,
}, nil
}
// Renaming a table is always a schema change, and also a data change if the table data differs
if delta.IsRename() {
dataChanged, err := delta.HasHashChanged()
if err != nil {
return nil, err
}
return &tableChange{
tableName: delta.ToName,
dataChange: dataChanged,
schemaChange: true,
}, nil
}
// Creating a table is always a schema change, and also a data change if data was inserted
if delta.IsAdd() {
isEmpty, err := isTableDataEmpty(ctx, delta.ToTable)
if err != nil {
return nil, err
}
return &tableChange{
tableName: delta.ToName,
dataChange: !isEmpty,
schemaChange: true,
}, nil
}
dataChanged, err := delta.HasHashChanged()
if err != nil {
return nil, err
}
schemaChanged, err := delta.HasSchemaChanged(ctx)
if err != nil {
return nil, err
}
return &tableChange{
tableName: delta.ToName,
dataChange: dataChanged,
schemaChange: schemaChanged,
}, nil
}
// Close closes the iterator.
func (itr *doltDiffCommitHistoryRowItr) Close(*sql.Context) error {
return nil
@@ -26,6 +26,7 @@ import (
"github.com/dolthub/go-mysql-server/enginetest/scriptgen/setup"
"github.com/dolthub/go-mysql-server/server"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/analyzer"
"github.com/dolthub/go-mysql-server/sql/mysql_db"
"github.com/dolthub/go-mysql-server/sql/plan"
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
@@ -1235,6 +1236,28 @@ func TestDiffTableFunctionPrepared(t *testing.T) {
}
}
func TestDiffStatTableFunction(t *testing.T) {
harness := newDoltHarness(t)
harness.Setup(setup.MydbData)
for _, test := range DiffStatTableFunctionScriptTests {
harness.engine = nil
t.Run(test.Name, func(t *testing.T) {
enginetest.TestScript(t, harness, test)
})
}
}
func TestDiffStatTableFunctionPrepared(t *testing.T) {
harness := newDoltHarness(t)
harness.Setup(setup.MydbData)
for _, test := range DiffStatTableFunctionScriptTests {
harness.engine = nil
t.Run(test.Name, func(t *testing.T) {
enginetest.TestScriptPrepared(t, harness, test)
})
}
}
func TestDiffSummaryTableFunction(t *testing.T) {
harness := newDoltHarness(t)
harness.Setup(setup.MydbData)
@@ -1351,6 +1374,13 @@ func mustNewEngine(t *testing.T, h enginetest.Harness) *gms.Engine {
return e
}
var biasedCosters = []analyzer.Coster{
analyzer.NewInnerBiasedCoster(),
analyzer.NewLookupBiasedCoster(),
analyzer.NewHashBiasedCoster(),
analyzer.NewMergeBiasedCoster(),
}
func TestSystemTableIndexes(t *testing.T) {
if !types.IsFormat_DOLT(types.Format_Default) {
t.Skip("only new format support system table indexing")
@@ -1361,23 +1391,27 @@ func TestSystemTableIndexes(t *testing.T) {
harness.SkipSetupCommit()
e := mustNewEngine(t, harness)
defer e.Close()
e.Analyzer.Coster = analyzer.NewMergeBiasedCoster()
ctx := enginetest.NewContext(harness)
for _, q := range stt.setup {
enginetest.RunQuery(t, e, harness, q)
}
for _, tt := range stt.queries {
t.Run(fmt.Sprintf("%s: %s", stt.name, tt.query), func(t *testing.T) {
if tt.skip {
t.Skip()
}
for i, c := range []string{"inner", "lookup", "hash", "merge"} {
e.Analyzer.Coster = biasedCosters[i]
for _, tt := range stt.queries {
t.Run(fmt.Sprintf("%s(%s): %s", stt.name, c, tt.query), func(t *testing.T) {
if tt.skip {
t.Skip()
}
ctx = ctx.WithQuery(tt.query)
if tt.exp != nil {
enginetest.TestQueryWithContext(t, ctx, e, harness, tt.query, tt.exp, nil, nil)
}
})
ctx = ctx.WithQuery(tt.query)
if tt.exp != nil {
enginetest.TestQueryWithContext(t, ctx, e, harness, tt.query, tt.exp, nil, nil)
}
})
}
}
}
}
@@ -104,6 +104,7 @@ func (d *DoltHarness) resetScripts() []setup.SetupScript {
}
var resetCmds []setup.SetupScript
resetCmds = append(resetCmds, setup.SetupScript{"SET foreign_key_checks=0;"})
for i := range dbs {
db := dbs[i]
resetCmds = append(resetCmds, setup.SetupScript{fmt.Sprintf("use %s", db)})
@@ -138,6 +139,7 @@ func (d *DoltHarness) resetScripts() []setup.SetupScript {
resetCmds = append(resetCmds, setup.SetupScript{"call dreset('--hard', 'head')"})
}
resetCmds = append(resetCmds, setup.SetupScript{"SET foreign_key_checks=1;"})
resetCmds = append(resetCmds, setup.SetupScript{"use mydb"})
return resetCmds
}
@@ -777,6 +777,52 @@ var DoltScripts = []queries.ScriptTest{
},
},
},
{
Name: "test hashof",
SetUpScript: []string{
"CREATE TABLE hashof_test (pk int primary key, c1 int)",
"INSERT INTO hashof_test values (1,1), (2,2), (3,3)",
"CALL DOLT_ADD('hashof_test')",
"CALL DOLT_COMMIT('-a', '-m', 'first commit')",
"SET @Commit1 = (SELECT commit_hash FROM DOLT_LOG() LIMIT 1)",
"INSERT INTO hashof_test values (4,4), (5,5), (6,6)",
"CALL DOLT_COMMIT('-a', '-m', 'second commit')",
"SET @Commit2 = (SELECT commit_hash from DOLT_LOG() LIMIT 1)",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT (hashof(@Commit1) = hashof(@Commit2))",
Expected: []sql.Row{{false}},
},
{
Query: "SELECT (hashof(@Commit1) = hashof('HEAD~1'))",
Expected: []sql.Row{
{true},
},
},
{
Query: "SELECT (hashof(@Commit2) = hashof('HEAD'))",
Expected: []sql.Row{
{true},
},
},
{
Query: "SELECT (hashof(@Commit2) = hashof('main'))",
Expected: []sql.Row{
{true},
},
},
{
Query: "SELECT hashof('non_branch')",
ExpectedErrStr: "invalid ref spec",
},
{
// Test that a short commit is invalid. This may change in the future.
Query: "SELECT hashof(left(@Commit2,30))",
ExpectedErrStr: "invalid ref spec",
},
},
},
}
func makeLargeInsert(sz int) string {
@@ -817,6 +863,20 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{
Query: "SELECT * FROM dolt_diff('main~..main', 'test');",
ExpectedErr: sql.ErrDatabaseAccessDeniedForUser,
},
{
// Without access to the database, dolt_diff_stat should fail with a database access error
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~', 'main', 'test');",
ExpectedErr: sql.ErrDatabaseAccessDeniedForUser,
},
{
// Without access to the database, dolt_diff_stat with dots should fail with a database access error
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~..main', 'test');",
ExpectedErr: sql.ErrDatabaseAccessDeniedForUser,
},
{
// Without access to the database, dolt_diff_summary should fail with a database access error
User: "tester",
@@ -873,6 +933,34 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{
Query: "SELECT * FROM dolt_diff('main~..main', 'test2');",
ExpectedErr: sql.ErrPrivilegeCheckFailed,
},
{
// With access to the db, but not the table, dolt_diff_stat should fail
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~', 'main', 'test2');",
ExpectedErr: sql.ErrPrivilegeCheckFailed,
},
{
// With access to the db, but not the table, dolt_diff_stat with dots should fail
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~...main', 'test2');",
ExpectedErr: sql.ErrPrivilegeCheckFailed,
},
{
// With access to the db, dolt_diff_stat should fail for all tables if no access any of tables
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~', 'main');",
ExpectedErr: sql.ErrPrivilegeCheckFailed,
},
{
// With access to the db, dolt_diff_stat with dots should fail for all tables if no access any of tables
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~...main');",
ExpectedErr: sql.ErrPrivilegeCheckFailed,
},
{
// With access to the db, but not the table, dolt_diff_summary should fail
User: "tester",
@@ -943,6 +1031,20 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{
Query: "SELECT COUNT(*) FROM dolt_diff('main~..main', 'test');",
Expected: []sql.Row{{1}},
},
{
// After granting access to the entire db, dolt_diff_stat should work
User: "tester",
Host: "localhost",
Query: "SELECT COUNT(*) FROM dolt_diff_stat('main~', 'main');",
Expected: []sql.Row{{1}},
},
{
// After granting access to the entire db, dolt_diff_stat with dots should work
User: "tester",
Host: "localhost",
Query: "SELECT COUNT(*) FROM dolt_diff_stat('main~...main');",
Expected: []sql.Row{{1}},
},
{
// After granting access to the entire db, dolt_diff_summary should work
User: "tester",
@@ -985,6 +1087,13 @@ var DoltUserPrivTests = []queries.UserPrivilegeTest{
Query: "SELECT * FROM dolt_diff('main~...main', 'test');",
ExpectedErr: sql.ErrDatabaseAccessDeniedForUser,
},
{
// After revoking access, dolt_diff_stat should fail
User: "tester",
Host: "localhost",
Query: "SELECT * FROM dolt_diff_stat('main~', 'main', 'test');",
ExpectedErr: sql.ErrDatabaseAccessDeniedForUser,
},
{
// After revoking access, dolt_diff_summary should fail
User: "tester",
File diff suppressed because it is too large Load Diff
+6
View File
@@ -68,6 +68,9 @@ func (ms *MemoryStorage) NewViewWithDefaultFormat() ChunkStore {
// Get retrieves the Chunk with the Hash h, returning EmptyChunk if it's not
// present.
func (ms *MemoryStorage) Get(ctx context.Context, h hash.Hash) (Chunk, error) {
if err := ctx.Err(); err != nil {
return Chunk{}, err
}
ms.mu.RLock()
defer ms.mu.RUnlock()
if c, ok := ms.data[h]; ok {
@@ -207,6 +210,9 @@ func (ms *MemoryStoreView) errorIfDangling(ctx context.Context, addrs hash.HashS
}
func (ms *MemoryStoreView) Put(ctx context.Context, c Chunk, getAddrs GetAddrsCb) error {
if err := ctx.Err(); err != nil {
return err
}
addrs, err := getAddrs(ctx, c)
if err != nil {
return err
-331
View File
@@ -1,331 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2016 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package diff
import (
"context"
"fmt"
"sync/atomic"
humanize "github.com/dustin/go-humanize"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/datas"
"github.com/dolthub/dolt/go/store/types"
"github.com/dolthub/dolt/go/store/util/status"
)
// Summary prints a summary of the diff between two values to stdout.
func Summary(ctx context.Context, vr1 types.ValueReader, vr2 types.ValueReader, value1, value2 types.Value) {
if is1, err := datas.IsCommit(value1); err != nil {
panic(err)
} else if is1 {
if is2, err := datas.IsCommit(value2); err != nil {
panic(err)
} else if is2 {
fmt.Println("Comparing commit values")
var err error
value1, err = datas.GetCommittedValue(ctx, vr1, value1)
d.PanicIfError(err)
value2, err = datas.GetCommittedValue(ctx, vr2, value2)
d.PanicIfError(err)
}
}
var singular, plural string
if value1.Kind() == value2.Kind() {
switch value1.Kind() {
case types.StructKind:
singular = "field"
plural = "fields"
case types.MapKind:
singular = "entry"
plural = "entries"
default:
singular = "value"
plural = "values"
}
}
eg, ctx := errgroup.WithContext(ctx)
var rp atomic.Value
ch := make(chan diffSummaryProgress)
eg.Go(func() (err error) {
defer close(ch)
defer func() {
if r := recover(); r != nil {
rp.Store(r)
err = fmt.Errorf("panic")
}
}()
err = diffSummary(ctx, ch, value1, value2)
return
})
eg.Go(func() error {
acc := diffSummaryProgress{}
LOOP:
for {
select {
case p, ok := <-ch:
if !ok {
break LOOP
}
acc.Adds += p.Adds
acc.Removes += p.Removes
acc.Changes += p.Changes
acc.NewSize += p.NewSize
acc.OldSize += p.OldSize
if status.WillPrint() {
formatStatus(acc, singular, plural)
}
case <-ctx.Done():
return ctx.Err()
}
}
formatStatus(acc, singular, plural)
status.Done()
return nil
})
if err := eg.Wait(); err != nil {
if r := rp.Load(); r != nil {
panic(r)
}
panic(err)
}
}
type diffSummaryProgress struct {
Adds, Removes, Changes, NewSize, OldSize uint64
}
func diffSummary(ctx context.Context, ch chan diffSummaryProgress, v1, v2 types.Value) error {
if !v1.Equals(v2) {
if ShouldDescend(v1, v2) {
var err error
switch v1.Kind() {
case types.ListKind:
err = diffSummaryList(ctx, ch, v1.(types.List), v2.(types.List))
case types.MapKind:
err = diffSummaryMap(ctx, ch, v1.(types.Map), v2.(types.Map))
case types.SetKind:
err = diffSummarySet(ctx, ch, v1.(types.Set), v2.(types.Set))
case types.StructKind:
err = diffSummaryStructs(ctx, ch, v1.(types.Struct), v2.(types.Struct))
default:
panic("Unrecognized type in diff function")
}
if err != nil {
return err
}
} else {
ch <- diffSummaryProgress{Adds: 1, Removes: 1, NewSize: 1, OldSize: 1}
}
}
return nil
}
func diffSummaryList(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.List) error {
select {
case ch <- diffSummaryProgress{OldSize: v1.Len(), NewSize: v2.Len()}:
case <-ctx.Done():
return ctx.Err()
}
spliceChan := make(chan types.Splice)
eg, ctx := errgroup.WithContext(ctx)
var rp atomic.Value
eg.Go(func() (err error) {
defer close(spliceChan)
defer func() {
if r := recover(); r != nil {
rp.Store(r)
err = fmt.Errorf("panic")
}
}()
return v2.Diff(ctx, v1, spliceChan)
})
eg.Go(func() (err error) {
defer func() {
if r := recover(); r != nil {
rp.Store(r)
err = fmt.Errorf("panic")
}
}()
LOOP:
for {
select {
case splice, ok := <-spliceChan:
if !ok {
break LOOP
}
var summary diffSummaryProgress
if splice.SpRemoved == splice.SpAdded {
summary = diffSummaryProgress{Changes: splice.SpRemoved}
} else {
summary = diffSummaryProgress{Adds: splice.SpAdded, Removes: splice.SpRemoved}
}
select {
case ch <- summary:
case <-ctx.Done():
return ctx.Err()
}
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
if err := eg.Wait(); err != nil {
if r := rp.Load(); r != nil {
panic(r)
}
return err
}
return nil
}
func diffSummaryMap(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.Map) error {
return diffSummaryValueChanged(ctx, ch, v1.Len(), v2.Len(), func(ctx context.Context, changeChan chan<- types.ValueChanged) error {
return v2.Diff(ctx, v1, changeChan)
})
}
func diffSummarySet(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.Set) error {
return diffSummaryValueChanged(ctx, ch, v1.Len(), v2.Len(), func(ctx context.Context, changeChan chan<- types.ValueChanged) error {
return v2.Diff(ctx, v1, changeChan)
})
}
func diffSummaryStructs(ctx context.Context, ch chan<- diffSummaryProgress, v1, v2 types.Struct) error {
// TODO: Operate on values directly
t1, err := types.TypeOf(v1)
if err != nil {
return err
}
t2, err := types.TypeOf(v2)
if err != nil {
return err
}
size1 := uint64(t1.Desc.(types.StructDesc).Len())
size2 := uint64(t2.Desc.(types.StructDesc).Len())
return diffSummaryValueChanged(ctx, ch, size1, size2, func(ctx context.Context, changeChan chan<- types.ValueChanged) error {
return v2.Diff(ctx, v1, changeChan)
})
}
func diffSummaryValueChanged(ctx context.Context, ch chan<- diffSummaryProgress, oldSize, newSize uint64, f diffFunc) error {
select {
case ch <- diffSummaryProgress{OldSize: oldSize, NewSize: newSize}:
case <-ctx.Done():
return ctx.Err()
}
changeChan := make(chan types.ValueChanged)
eg, ctx := errgroup.WithContext(ctx)
var rp atomic.Value
eg.Go(func() (err error) {
defer close(changeChan)
defer func() {
if r := recover(); r != nil {
rp.Store(r)
err = fmt.Errorf("panic")
}
}()
return f(ctx, changeChan)
})
eg.Go(func() error {
return reportChanges(ctx, ch, changeChan)
})
if err := eg.Wait(); err != nil {
if r := rp.Load(); r != nil {
panic(r)
}
return err
}
return nil
}
func reportChanges(ctx context.Context, ch chan<- diffSummaryProgress, changeChan chan types.ValueChanged) error {
LOOP:
for {
select {
case change, ok := <-changeChan:
if !ok {
break LOOP
}
var summary diffSummaryProgress
switch change.ChangeType {
case types.DiffChangeAdded:
summary = diffSummaryProgress{Adds: 1}
case types.DiffChangeRemoved:
summary = diffSummaryProgress{Removes: 1}
case types.DiffChangeModified:
summary = diffSummaryProgress{Changes: 1}
default:
panic("unknown change type")
}
select {
case ch <- summary:
return nil
case <-ctx.Done():
return ctx.Err()
}
case <-ctx.Done():
return ctx.Err()
}
}
return nil
}
func formatStatus(acc diffSummaryProgress, singular, plural string) {
pluralize := func(singular, plural string, n uint64) string {
var noun string
if n != 1 {
noun = plural
} else {
noun = singular
}
return fmt.Sprintf("%s %s", humanize.Comma(int64(n)), noun)
}
insertions := pluralize("insertion", "insertions", acc.Adds)
deletions := pluralize("deletion", "deletions", acc.Removes)
changes := pluralize("change", "changes", acc.Changes)
oldValues := pluralize(singular, plural, acc.OldSize)
newValues := pluralize(singular, plural, acc.NewSize)
status.Printf("%s (%.2f%%), %s (%.2f%%), %s (%.2f%%), (%s vs %s)", insertions, (float64(100*acc.Adds) / float64(acc.OldSize)), deletions, (float64(100*acc.Removes) / float64(acc.OldSize)), changes, (float64(100*acc.Changes) / float64(acc.OldSize)), oldValues, newValues)
}
+6 -5
View File
@@ -335,27 +335,28 @@ func (s partsByPartNum) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s3p awsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) {
func (s3p awsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) {
plan, err := planRangeCopyConjoin(sources, stats)
if err != nil {
return nil, err
return nil, nil, err
}
if plan.chunkCount == 0 {
return emptyChunkSource{}, nil
return emptyChunkSource{}, nil, nil
}
t1 := time.Now()
name := nameFromSuffixes(plan.suffixes())
err = s3p.executeCompactionPlan(ctx, plan, name.String())
if err != nil {
return nil, err
return nil, nil, err
}
verbose.Logger(ctx).Sugar().Debugf("Compacted table of %d Kb in %s", plan.totalCompressedData/1024, time.Since(t1))
tra := &s3TableReaderAt{&s3ObjectReader{s3: s3p.s3, bucket: s3p.bucket, readRl: s3p.rl, ns: s3p.ns}, name}
return newReaderFromIndexData(ctx, s3p.q, plan.mergedIndex, name, tra, s3BlockSize)
cs, err := newReaderFromIndexData(ctx, s3p.q, plan.mergedIndex, name, tra, s3BlockSize)
return cs, func() {}, err
}
func (s3p awsTablePersister) executeCompactionPlan(ctx context.Context, plan compactionPlan, key string) error {
+5 -5
View File
@@ -381,7 +381,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) {
chunks := smallChunks[:len(smallChunks)-1]
sources := makeSources(s3p, chunks)
src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
require.NoError(t, err)
defer src.close()
for _, s := range sources {
@@ -402,7 +402,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) {
s3p := newPersister(s3svc, ddb)
sources := makeSources(s3p, smallChunks)
src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
require.NoError(t, err)
defer src.close()
for _, s := range sources {
@@ -443,7 +443,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) {
sources[i], err = s3p.Persist(context.Background(), mt, nil, &Stats{})
require.NoError(t, err)
}
src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
require.NoError(t, err)
defer src.close()
for _, s := range sources {
@@ -484,7 +484,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) {
require.NoError(t, err)
sources := chunkSources{cs1, cs2}
src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
require.NoError(t, err)
defer src.close()
for _, s := range sources {
@@ -539,7 +539,7 @@ func TestAWSTablePersisterConjoinAll(t *testing.T) {
require.NoError(t, err)
sources = append(sources, cs)
src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
src, _, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
require.NoError(t, err)
defer src.close()
for _, s := range sources {
+16 -7
View File
@@ -78,7 +78,7 @@ func (bsp *blobstorePersister) Persist(ctx context.Context, mt *memTable, haver
}
// ConjoinAll implements tablePersister.
func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) {
func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) {
var sized []sourceWithSize
for _, src := range sources {
sized = append(sized, sourceWithSize{src, src.currentSize()})
@@ -86,7 +86,7 @@ func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSour
plan, err := planConjoin(sized, stats)
if err != nil {
return nil, err
return nil, nil, err
}
address := nameFromSuffixes(plan.suffixes())
name := address.String()
@@ -101,24 +101,25 @@ func (bsp *blobstorePersister) ConjoinAll(ctx context.Context, sources chunkSour
for _, src := range plan.sources.sws {
sub, err := bsp.getRecordsSubObject(ctx, src.source)
if err != nil {
return nil, err
return nil, nil, err
}
conjoinees = append(conjoinees, sub)
}
// first concatenate all the sub-objects to create a composite sub-object
if _, err = bsp.bs.Concatenate(ctx, name+tableRecordsExt, conjoinees); err != nil {
return nil, err
return nil, nil, err
}
if _, err = blobstore.PutBytes(ctx, bsp.bs, name+tableTailExt, plan.mergedIndex); err != nil {
return nil, err
return nil, nil, err
}
// then concatenate into a final blob
if _, err = bsp.bs.Concatenate(ctx, name, []string{name + tableRecordsExt, name + tableTailExt}); err != nil {
return emptyChunkSource{}, err
return emptyChunkSource{}, nil, err
}
return newBSChunkSource(ctx, bsp.bs, address, plan.chunkCount, bsp.q, stats)
cs, err := newBSChunkSource(ctx, bsp.bs, address, plan.chunkCount, bsp.q, stats)
return cs, func() {}, err
}
func (bsp *blobstorePersister) getRecordsSubObject(ctx context.Context, cs chunkSource) (name string, err error) {
@@ -233,6 +234,14 @@ type bsTableReaderAt struct {
bs blobstore.Blobstore
}
func (bsTRA *bsTableReaderAt) Close() error {
return nil
}
func (bsTRA *bsTableReaderAt) clone() (tableReaderAt, error) {
return bsTRA, nil
}
func (bsTRA *bsTableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) {
rc, _, err := bsTRA.bs.Get(ctx, bsTRA.key, blobstore.AllRange)
return rc, err
+18 -10
View File
@@ -94,6 +94,7 @@ func (c noopConjoiner) chooseConjoinees(sources []tableSpec) (conjoinees, keeper
func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) (manifestContents, error) {
var conjoined tableSpec
var conjoinees, keepers, appendixSpecs []tableSpec
var cleanup cleanupFunc
for {
if conjoinees == nil {
@@ -110,7 +111,7 @@ func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents,
return manifestContents{}, err
}
conjoined, err = conjoinTables(ctx, conjoinees, p, stats)
conjoined, cleanup, err = conjoinTables(ctx, conjoinees, p, stats)
if err != nil {
return manifestContents{}, err
}
@@ -140,11 +141,18 @@ func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents,
}
if newContents.lock == upstream.lock {
cleanup()
return upstream, nil
}
// Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us.
// If we can re-use the conjoin we already performed, we want to try again. Currently, we will only do so if ALL conjoinees are still present upstream. If we can't re-use...then someone else almost certainly landed a conjoin upstream. In this case, bail and let clients ask again if they think they still can't proceed.
// Optimistic lock failure. Someone else moved to the root, the
// set of tables, or both out from under us. If we can re-use
// the conjoin we already performed, we want to try again.
// Currently, we will only do so if ALL conjoinees are still
// present upstream. If we can't re-use...then someone else
// almost certainly landed a conjoin upstream. In this case,
// bail and let clients ask again if they think they still
// can't proceed.
// If the appendix has changed we simply bail
// and let the client retry
@@ -186,7 +194,7 @@ func conjoin(ctx context.Context, s conjoinStrategy, upstream manifestContents,
}
}
func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister, stats *Stats) (conjoined tableSpec, err error) {
func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister, stats *Stats) (conjoined tableSpec, cleanup cleanupFunc, err error) {
eg, ectx := errgroup.WithContext(ctx)
toConjoin := make(chunkSources, len(conjoinees))
@@ -205,14 +213,14 @@ func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister
}
}()
if err = eg.Wait(); err != nil {
return tableSpec{}, err
return tableSpec{}, nil, err
}
t1 := time.Now()
conjoinedSrc, err := p.ConjoinAll(ctx, toConjoin, stats)
conjoinedSrc, cleanup, err := p.ConjoinAll(ctx, toConjoin, stats)
if err != nil {
return tableSpec{}, err
return tableSpec{}, nil, err
}
defer conjoinedSrc.close()
@@ -221,7 +229,7 @@ func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister
cnt, err := conjoinedSrc.count()
if err != nil {
return tableSpec{}, err
return tableSpec{}, nil, err
}
stats.ChunksPerConjoin.Sample(uint64(cnt))
@@ -229,9 +237,9 @@ func conjoinTables(ctx context.Context, conjoinees []tableSpec, p tablePersister
h := conjoinedSrc.hash()
cnt, err = conjoinedSrc.count()
if err != nil {
return tableSpec{}, err
return tableSpec{}, nil, err
}
return tableSpec{h, cnt}, nil
return tableSpec{h, cnt}, cleanup, nil
}
func toSpecs(srcs chunkSources) ([]tableSpec, error) {
+8
View File
@@ -54,6 +54,14 @@ func (t tableNotInDynamoErr) Error() string {
return fmt.Sprintf("NBS table %s not present in DynamoDB table %s", t.nbs, t.dynamo)
}
func (dtra *dynamoTableReaderAt) Close() error {
return nil
}
func (dtra *dynamoTableReaderAt) clone() (tableReaderAt, error) {
return dtra, nil
}
func (dtra *dynamoTableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) {
data, err := dtra.ddb.ReadTable(ctx, dtra.h, &Stats{})
if err != nil {
-178
View File
@@ -1,178 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nbs
import (
"os"
"sort"
"sync"
)
func newFDCache(targetSize int) *fdCache {
return &fdCache{targetSize: targetSize, cache: map[string]fdCacheEntry{}}
}
// fdCache ref-counts open file descriptors, but doesn't keep a hard cap on
// the number of open files. Once the cache's target size is exceeded, opening
// a new file causes the cache to try to get the cache back to the target size
// by closing fds with zero refs. If there aren't enough such fds, fdCache
// gives up and tries again next time a caller refs a file.
type fdCache struct {
targetSize int
mu sync.Mutex
cache map[string]fdCacheEntry
}
type fdCacheEntry struct {
refCount uint32
f *os.File
}
// RefFile returns an opened *os.File for the file at |path|, or an error
// indicating why the file could not be opened. If the cache already had an
// entry for |path|, RefFile increments its refcount and returns the cached
// pointer. If not, it opens the file and caches the pointer for others to
// use. If RefFile returns an error, it's guaranteed that no refCounts were
// changed, so it's an error to make a subsequent call to UnrefFile().
// This is intended for clients that hold fds for extremely short periods.
func (fc *fdCache) RefFile(path string) (f *os.File, err error) {
refFile := func() *os.File {
if ce, present := fc.cache[path]; present {
ce.refCount++
fc.cache[path] = ce
return ce.f
}
return nil
}
f = func() *os.File {
fc.mu.Lock()
defer fc.mu.Unlock()
return refFile()
}()
if f != nil {
return f, nil
}
// Very much want this to be outside the lock, but the downside is that multiple callers may get here concurrently. That means we need to deal with the raciness below.
f, err = os.Open(path)
if err != nil {
return nil, err
}
fc.mu.Lock()
defer fc.mu.Unlock()
if cached := refFile(); cached != nil {
// Someone beat us to it, so close f and return cached fd
f.Close()
return cached, nil
}
// I won the race!
fc.cache[path] = fdCacheEntry{f: f, refCount: 1}
return f, nil
}
// UnrefFile reduces the refcount of the entry at |path|. If the cache is over
// |fc.targetSize|, UnrefFile makes a best effort to shrink the cache by dumping
// entries with a zero refcount. If there aren't enough zero refcount entries
// to drop to get the cache back to |fc.targetSize|, the cache will remain
// over |fc.targetSize| until the next call to UnrefFile().
func (fc *fdCache) UnrefFile(path string) error {
fc.mu.Lock()
defer fc.mu.Unlock()
if ce, present := fc.cache[path]; present {
ce.refCount--
fc.cache[path] = ce
}
if len(fc.cache) > fc.targetSize {
// Sadly, we can't remove items from a map while iterating, so we'll record the stuff we want to drop and then do it after
needed := len(fc.cache) - fc.targetSize
toDrop := make([]string, 0, needed)
for p, ce := range fc.cache {
if ce.refCount != 0 {
continue
}
toDrop = append(toDrop, p)
err := ce.f.Close()
if err != nil {
return err
}
needed--
if needed == 0 {
break
}
}
for _, p := range toDrop {
delete(fc.cache, p)
}
}
return nil
}
// ShrinkCache forcefully removes all file handles with a refcount of zero.
func (fc *fdCache) ShrinkCache() error {
fc.mu.Lock()
defer fc.mu.Unlock()
toDrop := make([]string, 0, len(fc.cache))
for p, ce := range fc.cache {
if ce.refCount != 0 {
continue
}
toDrop = append(toDrop, p)
err := ce.f.Close()
if err != nil {
return err
}
}
for _, p := range toDrop {
delete(fc.cache, p)
}
return nil
}
// Drop dumps the entire cache and closes all currently open files.
func (fc *fdCache) Drop() {
fc.mu.Lock()
defer fc.mu.Unlock()
for _, ce := range fc.cache {
ce.f.Close()
}
fc.cache = map[string]fdCacheEntry{}
}
// reportEntries is meant for testing.
func (fc *fdCache) reportEntries() sort.StringSlice {
fc.mu.Lock()
defer fc.mu.Unlock()
ret := make(sort.StringSlice, 0, len(fc.cache))
for p := range fc.cache {
ret = append(ret, p)
}
sort.Sort(ret)
return ret
}
-136
View File
@@ -1,136 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nbs
import (
"fmt"
"os"
"path/filepath"
"sort"
"sync"
"testing"
"github.com/dolthub/dolt/go/libraries/utils/file"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFDCache(t *testing.T) {
dir := makeTempDir(t)
defer file.RemoveAll(dir)
paths := [3]string{}
for i := range paths {
name := fmt.Sprintf("file%d", i)
paths[i] = filepath.Join(dir, name)
err := os.WriteFile(paths[i], []byte(name), 0644)
require.NoError(t, err)
}
refNoError := func(fc *fdCache, p string, assert *assert.Assertions) *os.File {
f, err := fc.RefFile(p)
require.NoError(t, err)
assert.NotNil(f)
return f
}
t.Run("ConcurrentOpen", func(t *testing.T) {
assert := assert.New(t)
concurrency := 3
fc := newFDCache(3)
defer fc.Drop()
trigger := make(chan struct{})
wg := sync.WaitGroup{}
for i := 0; i < concurrency; i++ {
wg.Add(1)
go func() {
defer wg.Done()
<-trigger
fc.RefFile(paths[0])
}()
}
close(trigger)
wg.Wait()
present := fc.reportEntries()
if assert.Len(present, 1) {
ce := fc.cache[present[0]]
assert.EqualValues(concurrency, ce.refCount)
}
})
t.Run("NoEvictions", func(t *testing.T) {
assert := assert.New(t)
fc := newFDCache(2)
defer fc.Drop()
f := refNoError(fc, paths[0], assert)
f2 := refNoError(fc, paths[1], assert)
assert.NotEqual(f, f2)
dup := refNoError(fc, paths[0], assert)
assert.Equal(f, dup)
})
t.Run("Evictions", func(t *testing.T) {
assert := assert.New(t)
fc := newFDCache(1)
defer fc.Drop()
f0 := refNoError(fc, paths[0], assert)
f1 := refNoError(fc, paths[1], assert)
assert.NotEqual(f0, f1)
// f0 wasn't evicted, because that doesn't happen until UnrefFile()
dup := refNoError(fc, paths[0], assert)
assert.Equal(f0, dup)
expected := sort.StringSlice(paths[:2])
sort.Sort(expected)
assert.EqualValues(expected, fc.reportEntries())
// Unreffing f1 now should evict it
err := fc.UnrefFile(paths[1])
require.NoError(t, err)
assert.EqualValues(paths[:1], fc.reportEntries())
// Bring f1 back so we can test multiple evictions in a row
f1 = refNoError(fc, paths[1], assert)
assert.NotEqual(f0, f1)
// After adding f3, we should be able to evict both f0 and f1
f2 := refNoError(fc, paths[2], assert)
assert.NotEqual(f0, f2)
assert.NotEqual(f1, f2)
err = fc.UnrefFile(paths[0])
require.NoError(t, err)
err = fc.UnrefFile(paths[0])
require.NoError(t, err)
err = fc.UnrefFile(paths[1])
require.NoError(t, err)
assert.EqualValues(paths[2:], fc.reportEntries())
})
}
+17 -26
View File
@@ -33,20 +33,17 @@ import (
"time"
"github.com/dolthub/dolt/go/libraries/utils/file"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/util/tempfiles"
)
const tempTablePrefix = "nbs_table_"
func newFSTablePersister(dir string, fc *fdCache, q MemoryQuotaProvider) tablePersister {
d.PanicIfTrue(fc == nil)
return &fsTablePersister{dir, fc, q}
func newFSTablePersister(dir string, q MemoryQuotaProvider) tablePersister {
return &fsTablePersister{dir, q}
}
type fsTablePersister struct {
dir string
fc *fdCache
q MemoryQuotaProvider
}
@@ -54,7 +51,7 @@ var _ tablePersister = &fsTablePersister{}
var _ tableFilePersister = &fsTablePersister{}
func (ftp *fsTablePersister) Open(ctx context.Context, name addr, chunkCount uint32, stats *Stats) (chunkSource, error) {
return newFileTableReader(ctx, ftp.dir, name, chunkCount, ftp.q, ftp.fc)
return newFileTableReader(ctx, ftp.dir, name, chunkCount, ftp.q)
}
func (ftp *fsTablePersister) Exists(ctx context.Context, name addr, chunkCount uint32, stats *Stats) (bool, error) {
@@ -154,11 +151,6 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [
}
newName := filepath.Join(ftp.dir, name.String())
err = ftp.fc.ShrinkCache()
if err != nil {
return nil, err
}
err = file.Rename(tempName, newName)
@@ -169,15 +161,14 @@ func (ftp *fsTablePersister) persistTable(ctx context.Context, name addr, data [
return ftp.Open(ctx, name, chunkCount, stats)
}
func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) {
func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) {
plan, err := planRangeCopyConjoin(sources, stats)
if err != nil {
return emptyChunkSource{}, err
return emptyChunkSource{}, nil, err
}
if plan.chunkCount == 0 {
return emptyChunkSource{}, nil
return emptyChunkSource{}, nil, nil
}
name := nameFromSuffixes(plan.suffixes())
@@ -224,18 +215,24 @@ func (ftp *fsTablePersister) ConjoinAll(ctx context.Context, sources chunkSource
return temp.Name(), nil
}()
if err != nil {
return nil, err
return nil, nil, err
}
err = file.Rename(tempName, filepath.Join(ftp.dir, name.String()))
if err != nil {
return nil, err
return nil, nil, err
}
return ftp.Open(ctx, name, plan.chunkCount, stats)
cs, err := ftp.Open(ctx, name, plan.chunkCount, stats)
if err != nil {
return nil, nil, err
}
return cs, func() {
for _, s := range sources {
file.Remove(filepath.Join(ftp.dir, s.hash().String()))
}
}, nil
}
func (ftp *fsTablePersister) PruneTableFiles(ctx context.Context, contents manifestContents, mtime time.Time) error {
@@ -247,12 +244,6 @@ func (ftp *fsTablePersister) PruneTableFiles(ctx context.Context, contents manif
return err
}
err = ftp.fc.ShrinkCache()
if err != nil {
return err
}
ea := make(gcErrAccum)
for _, info := range fileInfos {
if info.IsDir() {
+17 -118
View File
@@ -35,57 +35,6 @@ import (
"github.com/stretchr/testify/require"
)
func TestFSTableCacheOnOpen(t *testing.T) {
assert := assert.New(t)
dir := makeTempDir(t)
defer file.RemoveAll(dir)
names := []addr{}
cacheSize := 2
fc := newFDCache(cacheSize)
defer fc.Drop()
fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{})
// Create some tables manually, load them into the cache
func() {
for i := 0; i < cacheSize; i++ {
name, err := writeTableData(dir, []byte{byte(i)})
require.NoError(t, err)
names = append(names, name)
}
for _, name := range names {
tr, err := fts.Open(context.Background(), name, 1, nil)
require.NoError(t, err)
defer tr.close()
}
}()
// Tables should still be cached and on disk
for i, name := range names {
src, err := fts.Open(context.Background(), name, 1, nil)
require.NoError(t, err)
defer src.close()
h := computeAddr([]byte{byte(i)})
assert.True(src.has(h))
}
// Kick a table out of the cache
name, err := writeTableData(dir, []byte{0xff})
require.NoError(t, err)
tr, err := fts.Open(context.Background(), name, 1, nil)
require.NoError(t, err)
defer tr.close()
present := fc.reportEntries()
// Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size
assert.Len(present, cacheSize)
err = fc.ShrinkCache()
require.NoError(t, err)
err = removeTables(dir, names...)
require.NoError(t, err)
}
func makeTempDir(t *testing.T) string {
dir, err := os.MkdirTemp("", "")
require.NoError(t, err)
@@ -122,9 +71,7 @@ func TestFSTablePersisterPersist(t *testing.T) {
assert := assert.New(t)
dir := makeTempDir(t)
defer file.RemoveAll(dir)
fc := newFDCache(defaultMaxTables)
defer fc.Drop()
fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{})
fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{})
src, err := persistTableData(fts, testChunks...)
require.NoError(t, err)
@@ -163,9 +110,7 @@ func TestFSTablePersisterPersistNoData(t *testing.T) {
dir := makeTempDir(t)
defer file.RemoveAll(dir)
fc := newFDCache(defaultMaxTables)
defer fc.Drop()
fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{})
fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{})
src, err := fts.Persist(context.Background(), mt, existingTable, &Stats{})
require.NoError(t, err)
@@ -175,41 +120,6 @@ func TestFSTablePersisterPersistNoData(t *testing.T) {
assert.True(os.IsNotExist(err), "%v", err)
}
func TestFSTablePersisterCacheOnPersist(t *testing.T) {
assert := assert.New(t)
dir := makeTempDir(t)
fc := newFDCache(1)
defer fc.Drop()
fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{})
defer file.RemoveAll(dir)
var name addr
func() {
src, err := persistTableData(fts, testChunks...)
require.NoError(t, err)
defer src.close()
name = src.hash()
}()
// Table should still be cached
src, err := fts.Open(context.Background(), name, uint32(len(testChunks)), nil)
require.NoError(t, err)
defer src.close()
assertChunksInReader(testChunks, src, assert)
// Evict |name| from cache
tr, err := persistTableData(fts, []byte{0xff})
require.NoError(t, err)
defer tr.close()
present := fc.reportEntries()
// Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size
assert.Len(present, 1)
err = removeTables(dir, name)
require.NoError(t, err)
}
func TestFSTablePersisterConjoinAll(t *testing.T) {
ctx := context.Background()
assert := assert.New(t)
@@ -218,9 +128,7 @@ func TestFSTablePersisterConjoinAll(t *testing.T) {
dir := makeTempDir(t)
defer file.RemoveAll(dir)
fc := newFDCache(len(sources))
defer fc.Drop()
fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{})
fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{})
for i, c := range testChunks {
randChunk := make([]byte, (i+1)*13)
@@ -237,7 +145,7 @@ func TestFSTablePersisterConjoinAll(t *testing.T) {
}
}()
src, err := fts.ConjoinAll(ctx, sources, &Stats{})
src, _, err := fts.ConjoinAll(ctx, sources, &Stats{})
require.NoError(t, err)
defer src.close()
@@ -251,10 +159,6 @@ func TestFSTablePersisterConjoinAll(t *testing.T) {
defer tr.close()
assertChunksInReader(testChunks, tr, assert)
}
present := fc.reportEntries()
// Since 0 refcount entries are evicted randomly, the only thing we can validate is that fc remains at its target size
assert.Len(present, len(sources))
}
func TestFSTablePersisterConjoinAllDups(t *testing.T) {
@@ -262,29 +166,24 @@ func TestFSTablePersisterConjoinAllDups(t *testing.T) {
assert := assert.New(t)
dir := makeTempDir(t)
defer file.RemoveAll(dir)
fc := newFDCache(defaultMaxTables)
defer fc.Drop()
fts := newFSTablePersister(dir, fc, &UnlimitedQuotaProvider{})
fts := newFSTablePersister(dir, &UnlimitedQuotaProvider{})
reps := 3
sources := make(chunkSources, reps)
for i := 0; i < reps; i++ {
mt := newMemTable(1 << 10)
for _, c := range testChunks {
mt.addChunk(computeAddr(c), c)
}
var err error
sources[i], err = fts.Persist(ctx, mt, nil, &Stats{})
require.NoError(t, err)
mt := newMemTable(1 << 10)
for _, c := range testChunks {
mt.addChunk(computeAddr(c), c)
}
defer func() {
for _, s := range sources {
s.close()
}
}()
src, err := fts.ConjoinAll(ctx, sources, &Stats{})
var err error
sources[0], err = fts.Persist(ctx, mt, nil, &Stats{})
require.NoError(t, err)
sources[1], err = sources[0].clone()
require.NoError(t, err)
sources[2], err = sources[0].clone()
require.NoError(t, err)
src, _, err := fts.ConjoinAll(ctx, sources, &Stats{})
require.NoError(t, err)
defer src.close()
+39 -44
View File
@@ -33,8 +33,7 @@ import (
type fileTableReader struct {
tableReader
fc *fdCache
h addr
h addr
}
const (
@@ -52,16 +51,15 @@ func tableFileExists(ctx context.Context, dir string, h addr) (bool, error) {
return err == nil, err
}
func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint32, q MemoryQuotaProvider, fc *fdCache) (cs chunkSource, err error) {
func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint32, q MemoryQuotaProvider) (cs chunkSource, err error) {
path := filepath.Join(dir, h.String())
var f *os.File
index, sz, err := func() (ti onHeapTableIndex, sz int64, err error) {
// Be careful with how |f| is used below. |RefFile| returns a cached
// os.File pointer so the code needs to use f in a concurrency-safe
// manner. Moving the file offset is BAD.
var f *os.File
f, err = fc.RefFile(path)
f, err = os.Open(path)
if err != nil {
return
}
@@ -103,14 +101,6 @@ func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint
return
}
defer func() {
unrefErr := fc.UnrefFile(path)
if unrefErr != nil && err == nil {
q.ReleaseQuotaBytes(len(b))
err = unrefErr
}
}()
ti, err = parseTableIndex(ctx, b, q)
if err != nil {
q.ReleaseQuotaBytes(len(b))
@@ -120,72 +110,77 @@ func newFileTableReader(ctx context.Context, dir string, h addr, chunkCount uint
return
}()
if err != nil {
if f != nil {
f.Close()
}
return nil, err
}
if chunkCount != index.chunkCount() {
index.Close()
f.Close()
return nil, errors.New("unexpected chunk count")
}
tr, err := newTableReader(index, &cacheReaderAt{path, fc, sz}, fileBlockSize)
tr, err := newTableReader(index, &fileReaderAt{f, path, sz}, fileBlockSize)
if err != nil {
index.Close()
f.Close()
return nil, err
}
return &fileTableReader{
tr,
fc,
h,
}, nil
}
func (mmtr *fileTableReader) hash() addr {
return mmtr.h
func (ftr *fileTableReader) hash() addr {
return ftr.h
}
func (mmtr *fileTableReader) close() error {
return mmtr.tableReader.close()
func (ftr *fileTableReader) Close() error {
return ftr.tableReader.close()
}
func (mmtr *fileTableReader) clone() (chunkSource, error) {
tr, err := mmtr.tableReader.clone()
func (ftr *fileTableReader) clone() (chunkSource, error) {
tr, err := ftr.tableReader.clone()
if err != nil {
return &fileTableReader{}, err
}
return &fileTableReader{tr, mmtr.fc, mmtr.h}, nil
return &fileTableReader{tr, ftr.h}, nil
}
type cacheReaderAt struct {
type fileReaderAt struct {
f *os.File
path string
fc *fdCache
sz int64
}
func (cra *cacheReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) {
return io.NopCloser(io.LimitReader(&readerAdapter{cra, 0, ctx}, cra.sz)), nil
func (fra *fileReaderAt) clone() (tableReaderAt, error) {
f, err := os.Open(fra.path)
if err != nil {
return nil, err
}
return &fileReaderAt{
f,
fra.path,
fra.sz,
}, nil
}
func (cra *cacheReaderAt) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) {
var r io.ReaderAt
func (fra *fileReaderAt) Close() error {
return fra.f.Close()
}
func (fra *fileReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) {
return os.Open(fra.path)
}
func (fra *fileReaderAt) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) {
t1 := time.Now()
if r, err = cra.fc.RefFile(cra.path); err != nil {
return
}
defer func() {
stats.FileBytesPerRead.Sample(uint64(len(p)))
stats.FileReadLatency.SampleTimeSince(t1)
}()
defer func() {
unrefErr := cra.fc.UnrefFile(cra.path)
if err == nil {
err = unrefErr
}
}()
return r.ReadAt(p, off)
return fra.f.ReadAt(p, off)
}
+1 -4
View File
@@ -40,9 +40,6 @@ func TestMmapTableReader(t *testing.T) {
require.NoError(t, err)
defer file.RemoveAll(dir)
fc := newFDCache(1)
defer fc.Drop()
chunks := [][]byte{
[]byte("hello2"),
[]byte("goodbye2"),
@@ -54,7 +51,7 @@ func TestMmapTableReader(t *testing.T) {
err = os.WriteFile(filepath.Join(dir, h.String()), tableData, 0666)
require.NoError(t, err)
trc, err := newFileTableReader(ctx, dir, h, uint32(len(chunks)), &UnlimitedQuotaProvider{}, fc)
trc, err := newFileTableReader(ctx, dir, h, uint32(len(chunks)), &UnlimitedQuotaProvider{})
require.NoError(t, err)
defer trc.close()
assertChunksInReader(chunks, trc, assert)
-231
View File
@@ -1,231 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nbs
import (
"errors"
"io"
"os"
"path/filepath"
"strings"
"sync"
"github.com/dolthub/dolt/go/libraries/utils/file"
"github.com/dolthub/dolt/go/store/atomicerr"
"github.com/dolthub/dolt/go/store/util/sizecache"
"github.com/dolthub/dolt/go/store/util/tempfiles"
)
type tableCache interface {
checkout(h addr) (io.ReaderAt, error)
checkin(h addr) error
store(h addr, data io.Reader, size uint64) error
}
type fsTableCache struct {
dir string
cache *sizecache.SizeCache
fd *fdCache
}
func newFSTableCache(dir string, cacheSize uint64, maxOpenFds int) (*fsTableCache, error) {
ftc := &fsTableCache{dir: dir, fd: newFDCache(maxOpenFds)}
ftc.cache = sizecache.NewWithExpireCallback(cacheSize, func(elm interface{}) {
ftc.expire(elm.(addr))
})
err := ftc.init(maxOpenFds)
if err != nil {
return nil, err
}
return ftc, nil
}
func (ftc *fsTableCache) init(concurrency int) error {
type finfo struct {
path string
h addr
size uint64
}
infos := make(chan finfo)
errc := make(chan error, 1)
go func() {
isTableFile := func(info os.FileInfo) bool {
return info.Mode().IsRegular() && ValidateAddr(info.Name())
}
isTempTableFile := func(info os.FileInfo) bool {
return info.Mode().IsRegular() && strings.HasPrefix(info.Name(), tempTablePrefix)
}
defer close(errc)
defer close(infos)
// No select needed for this send, since errc is buffered.
errc <- filepath.Walk(ftc.dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if path == ftc.dir {
return nil
}
if isTempTableFile(info) {
// ignore failure to remove temp file
_ = file.Remove(path)
return nil
}
if !isTableFile(info) {
return errors.New(path + " is not a table file; cache dir must contain only table files")
}
ad, err := parseAddr(info.Name())
if err != nil {
return err
}
infos <- finfo{path, ad, uint64(info.Size())}
return nil
})
}()
ae := atomicerr.New()
wg := sync.WaitGroup{}
wg.Add(concurrency)
for i := 0; i < concurrency; i++ {
go func() {
defer wg.Done()
for info := range infos {
if ae.IsSet() {
break
}
ftc.cache.Add(info.h, info.size, true)
_, err := ftc.fd.RefFile(info.path)
if err != nil {
ae.SetIfError(err)
break
}
err = ftc.fd.UnrefFile(info.path)
if err != nil {
ae.SetIfError(err)
break
}
}
}()
}
wg.Wait()
err := <-errc
if err != nil {
return err
}
if err := ae.Get(); err != nil {
return err
}
return nil
}
func (ftc *fsTableCache) checkout(h addr) (io.ReaderAt, error) {
if _, ok := ftc.cache.Get(h); !ok {
return nil, nil
}
fd, err := ftc.fd.RefFile(filepath.Join(ftc.dir, h.String()))
if err != nil {
return nil, err
}
return fd, nil
}
func (ftc *fsTableCache) checkin(h addr) error {
return ftc.fd.UnrefFile(filepath.Join(ftc.dir, h.String()))
}
func (ftc *fsTableCache) store(h addr, data io.Reader, size uint64) error {
path := filepath.Join(ftc.dir, h.String())
tempName, err := func() (name string, ferr error) {
var temp *os.File
temp, ferr = tempfiles.MovableTempFileProvider.NewFile(ftc.dir, tempTablePrefix)
if ferr != nil {
return "", ferr
}
defer func() {
closeErr := temp.Close()
if ferr == nil {
ferr = closeErr
}
}()
_, ferr = io.Copy(temp, data)
if ferr != nil {
return "", ferr
}
return temp.Name(), nil
}()
if err != nil {
return err
}
err = ftc.fd.ShrinkCache()
if err != nil {
return err
}
err = file.Rename(tempName, path)
if err != nil {
return err
}
ftc.cache.Add(h, size, true)
// Prime the file in the fd cache ignore err
if _, err = ftc.fd.RefFile(path); err == nil {
err := ftc.fd.UnrefFile(path)
if err != nil {
return err
}
}
return nil
}
func (ftc *fsTableCache) expire(h addr) error {
return file.Remove(filepath.Join(ftc.dir, h.String()))
}
-140
View File
@@ -1,140 +0,0 @@
// Copyright 2019 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file incorporates work covered by the following copyright and
// permission notice:
//
// Copyright 2017 Attic Labs, Inc. All rights reserved.
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
package nbs
import (
"bytes"
"io"
"os"
"path/filepath"
"sort"
"testing"
"github.com/dolthub/dolt/go/libraries/utils/file"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFSTableCache(t *testing.T) {
datas := [][]byte{[]byte("hello"), []byte("world"), []byte("goodbye")}
sort.SliceStable(datas, func(i, j int) bool { return len(datas[i]) < len(datas[j]) })
t.Run("ExpireLRU", func(t *testing.T) {
t.Parallel()
dir := makeTempDir(t)
defer file.RemoveAll(dir)
sum := 0
for _, s := range datas[1:] {
sum += len(s)
}
tc, err := newFSTableCache(dir, uint64(sum), len(datas))
require.NoError(t, err)
for _, d := range datas {
err := tc.store(computeAddr(d), bytes.NewReader(d), uint64(len(d)))
require.NoError(t, err)
}
expiredName := computeAddr(datas[0])
r, err := tc.checkout(expiredName)
require.NoError(t, err)
assert.Nil(t, r)
_, fserr := os.Stat(filepath.Join(dir, expiredName.String()))
assert.True(t, os.IsNotExist(fserr))
for _, d := range datas[1:] {
name := computeAddr(d)
r, err := tc.checkout(name)
require.NoError(t, err)
assert.NotNil(t, r)
assertDataInReaderAt(t, d, r)
_, fserr := os.Stat(filepath.Join(dir, name.String()))
assert.False(t, os.IsNotExist(fserr))
}
})
t.Run("Init", func(t *testing.T) {
t.Run("Success", func(t *testing.T) {
t.Parallel()
dir := makeTempDir(t)
defer file.RemoveAll(dir)
assert := assert.New(t)
var names []addr
for i := byte(0); i < 4; i++ {
name := computeAddr([]byte{i})
require.NoError(t, os.WriteFile(filepath.Join(dir, name.String()), nil, 0666))
names = append(names, name)
}
ftc, err := newFSTableCache(dir, 1024, 4)
require.NoError(t, err)
assert.NotNil(ftc)
for _, name := range names {
assert.NotNil(ftc.checkout(name))
}
})
t.Run("BadFile", func(t *testing.T) {
t.Parallel()
dir := makeTempDir(t)
defer file.RemoveAll(dir)
require.NoError(t, os.WriteFile(filepath.Join(dir, "boo"), nil, 0666))
_, err := newFSTableCache(dir, 1024, 4)
assert.Error(t, err)
})
t.Run("ClearTempFile", func(t *testing.T) {
t.Parallel()
dir := makeTempDir(t)
defer file.RemoveAll(dir)
tempFile := filepath.Join(dir, tempTablePrefix+"boo")
require.NoError(t, os.WriteFile(tempFile, nil, 0666))
_, err := newFSTableCache(dir, 1024, 4)
require.NoError(t, err)
_, fserr := os.Stat(tempFile)
assert.True(t, os.IsNotExist(fserr))
})
t.Run("Dir", func(t *testing.T) {
t.Parallel()
dir := makeTempDir(t)
defer file.RemoveAll(dir)
require.NoError(t, os.Mkdir(filepath.Join(dir, "sub"), 0777))
_, err := newFSTableCache(dir, 1024, 4)
assert.Error(t, err)
})
})
}
func assertDataInReaderAt(t *testing.T, data []byte, r io.ReaderAt) {
p := make([]byte, len(data))
n, err := r.ReadAt(p, 0)
require.NoError(t, err)
assert.Equal(t, len(data), n)
assert.Equal(t, data, p)
}
+22 -10
View File
@@ -40,7 +40,7 @@ func UseJournalStore(path string) bool {
if chunkJournalFeatureFlag {
return true
}
ok, err := journalFileExists(filepath.Join(path, chunkJournalAddr))
ok, err := fileExists(filepath.Join(path, chunkJournalAddr))
if err != nil {
panic(err)
}
@@ -78,22 +78,34 @@ func newChunkJournal(ctx context.Context, nbfVers, dir string, m manifest, p *fs
j := &chunkJournal{path: path, backing: m, persister: p}
j.contents.nbfVers = nbfVers
ok, err := journalFileExists(path)
ok, err := fileExists(path)
if err != nil {
return nil, err
} else if ok {
// only open a journalWriter if the journal file exists,
// only bootstrap journalWriter if the journal file exists,
// otherwise we wait to open in case we're cloning
if err = j.openJournal(ctx); err != nil {
if err = j.bootstrapJournalWriter(ctx); err != nil {
return nil, err
}
}
return j, nil
}
func (j *chunkJournal) openJournal(ctx context.Context) (err error) {
// bootstrapJournalWriter initializes the journalWriter, which manages access to the
// journal file for this chunkJournal. The bootstrapping process differed depending
// on whether a journal file exists at startup time.
//
// If a journal file does not exist, we create one and commit a root hash record
// which we read from the manifest file.
//
// If a journal file does exist, we process its records to build up an index of its
// resident chunks. Processing journal records is potentially accelerated by an index
// file (see indexRec). The journal file is the source of truth for latest root hash.
// As we process journal records, we keep track of the latest root hash record we see
// and update the manifest file with the last root hash we saw.
func (j *chunkJournal) bootstrapJournalWriter(ctx context.Context) (err error) {
var ok bool
ok, err = journalFileExists(j.path)
ok, err = fileExists(j.path)
if err != nil {
return err
}
@@ -116,7 +128,7 @@ func (j *chunkJournal) openJournal(ctx context.Context) (err error) {
}
if ok {
// write the current root hash to the journal file
if err = j.wr.writeRootHash(contents.root); err != nil {
if err = j.wr.commitRootHash(contents.root); err != nil {
return
}
j.contents = contents
@@ -185,7 +197,7 @@ func (j *chunkJournal) Persist(ctx context.Context, mt *memTable, haver chunkRea
}
// ConjoinAll implements tablePersister.
func (j *chunkJournal) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) {
func (j *chunkJournal) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) {
return j.persister.ConjoinAll(ctx, sources, stats)
}
@@ -258,7 +270,7 @@ func (j *chunkJournal) Update(ctx context.Context, lastLock addr, next manifestC
}
}
if err := j.wr.writeRootHash(next.root); err != nil {
if err := j.wr.commitRootHash(next.root); err != nil {
return manifestContents{}, err
}
j.contents = next
@@ -300,7 +312,7 @@ func (j *chunkJournal) ParseIfExists(ctx context.Context, stats *Stats, readHook
func (j *chunkJournal) maybeInit(ctx context.Context) (err error) {
if j.wr == nil {
err = j.openJournal(ctx)
err = j.bootstrapJournalWriter(ctx)
}
return
}
-23
View File
@@ -25,29 +25,6 @@ import (
"github.com/dolthub/dolt/go/store/hash"
)
// recLookup contains journalRec lookup metadata.
type recLookup struct {
// journalOff is the file offset of the journalRec.
journalOff int64
// recordLen is the length of the journalRec.
recordLen uint32
// payloadOff is the offset of the payload within the
// journalRec, it's used for converting to a Range.
payloadOff uint32
}
// rangeFromLookup converts a recLookup to a Range,
// used when computing GetDownloadLocs.
func rangeFromLookup(l recLookup) Range {
return Range{
// see journalRec for serialization format
Offset: uint64(l.journalOff) + uint64(l.payloadOff),
Length: l.recordLen - (l.payloadOff + journalRecChecksumSz),
}
}
// journalChunkSource is a chunkSource that reads chunks
// from a chunkJournal. Unlike other NBS chunkSources,
// it is not immutable and its set of chunks grows as
+268
View File
@@ -0,0 +1,268 @@
// Copyright 2023 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nbs
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"fmt"
"io"
"sort"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/hash"
)
// indexRec is a record in a chunk journal index file. Index records
// serve as out-of-band chunk indexes into the chunk journal that allow
// bootstrapping the journal without reading each record in the journal.
//
// Like journalRec, its serialization format uses uint8 tag prefixes
// to identify fields and allow for format evolution.
type indexRec struct {
// index record length
length uint32
// root hash of commit when this index record was written
lastRoot hash.Hash
// file offsets for the region of the journal file
// that |payload| indexes. end points to a root hash
// record in the journal containing |lastRoot|.
// we expect a sequence of index records to cover
// contiguous regions of the journal file.
start, end uint64
// index record kind
kind indexRecKind
// encoded chunk index
payload []byte
// index record crc32 checksum
checksum uint32
}
type indexRecKind uint8
const (
unknownIndexRecKind indexRecKind = 0
tableIndexRecKind indexRecKind = 1
)
type indexRecTag uint8
const (
unknownIndexRecTag indexRecTag = 0
lastRootIndexRecTag indexRecTag = 1
startOffsetIndexRecTag indexRecTag = 2
endOffsetIndexRecTag indexRecTag = 3
kindIndexRecTag indexRecTag = 4
payloadIndexRecTag indexRecTag = 5
)
const (
indexRecTagSz = 1
indexRecLenSz = 4
indexRecKindSz = 1
indexRecLastRootSz = 20
indexRecOffsetSz = 8
indexRecChecksumSz = 4
)
func journalIndexRecordSize(idx []byte) (recordSz uint32) {
recordSz += indexRecLenSz
recordSz += indexRecTagSz + indexRecLastRootSz
recordSz += indexRecTagSz + indexRecOffsetSz
recordSz += indexRecTagSz + indexRecOffsetSz
recordSz += indexRecTagSz + indexRecKindSz
recordSz += indexRecTagSz // payload tag
recordSz += uint32(len(idx))
recordSz += indexRecChecksumSz
return
}
func writeJournalIndexRecord(buf []byte, root hash.Hash, start, end uint64, idx []byte) (n uint32) {
// length
l := journalIndexRecordSize(idx)
writeUint32(buf[:indexRecLenSz], l)
n += indexRecLenSz
// last root
buf[n] = byte(lastRootIndexRecTag)
n += indexRecTagSz
copy(buf[n:], root[:])
n += indexRecLastRootSz
// start offset
buf[n] = byte(startOffsetIndexRecTag)
n += indexRecTagSz
writeUint64(buf[n:], start)
n += indexRecOffsetSz
// end offset
buf[n] = byte(endOffsetIndexRecTag)
n += indexRecTagSz
writeUint64(buf[n:], end)
n += indexRecOffsetSz
// kind
buf[n] = byte(kindIndexRecTag)
n += indexRecTagSz
buf[n] = byte(tableIndexRecKind)
n += indexRecKindSz
// payload
buf[n] = byte(payloadIndexRecTag)
n += indexRecTagSz
copy(buf[n:], idx)
n += uint32(len(idx))
// checksum
writeUint32(buf[n:], crc(buf[:n]))
n += indexRecChecksumSz
d.PanicIfFalse(l == n)
return
}
func readJournalIndexRecord(buf []byte) (rec indexRec, err error) {
rec.length = readUint32(buf)
buf = buf[indexRecLenSz:]
for len(buf) > indexRecChecksumSz {
tag := indexRecTag(buf[0])
buf = buf[indexRecTagSz:]
switch tag {
case lastRootIndexRecTag:
copy(rec.lastRoot[:], buf)
buf = buf[indexRecLastRootSz:]
case startOffsetIndexRecTag:
rec.start = readUint64(buf)
buf = buf[indexRecOffsetSz:]
case endOffsetIndexRecTag:
rec.end = readUint64(buf)
buf = buf[indexRecOffsetSz:]
case kindIndexRecTag:
rec.kind = indexRecKind(buf[0])
buf = buf[indexRecKindSz:]
case payloadIndexRecTag:
sz := len(buf) - indexRecChecksumSz
rec.payload = buf[:sz]
buf = buf[sz:]
case unknownIndexRecTag:
fallthrough
default:
err = fmt.Errorf("unknown record field tag: %d", tag)
return
}
}
rec.checksum = readUint32(buf[:indexRecChecksumSz])
return
}
func validateIndexRecord(buf []byte) (ok bool) {
if len(buf) > (indexRecLenSz + indexRecChecksumSz) {
off := len(buf) - indexRecChecksumSz
ok = crc(buf[:off]) == readUint32(buf[off:])
}
return
}
// processIndexRecords reads a sequence of index records from |r| and passes them to the callback. While reading records
// it makes some basic assertions that the sequence is well-formed and indexes a contiguous region for the journal file.
func processIndexRecords(ctx context.Context, r io.ReadSeeker, sz int64, cb func(o int64, r indexRec) error) (err error) {
var (
buf []byte
off int64
prev uint64
)
rdr := bufio.NewReader(r)
for off < sz {
// peek to read next record size
if buf, err = rdr.Peek(uint32Size); err != nil {
break
}
l := readUint32(buf)
if int64(l) > sz {
return fmt.Errorf("invalid record size %d for index file of size %d", l, sz)
}
if len(buf) < int(l) {
buf = make([]byte, l)
}
if _, err = io.ReadFull(rdr, buf); err != nil {
break
}
// we do not zero-fill the journal index and expect
// only complete records that will checksum
if !validateIndexRecord(buf) {
return fmt.Errorf("failed to checksum index record at %d", off)
}
var rec indexRec
if rec, err = readJournalIndexRecord(buf); err != nil {
return err
} else if rec.start != prev {
return fmt.Errorf("index records do not cover contiguous region (%d != %d)", rec.end, prev)
}
if err = cb(off, rec); err != nil {
return err
}
prev = rec.end
off += int64(len(buf))
}
if err == nil && off != sz {
err = fmt.Errorf("failed to process entire journal index (%d < %d)", off, sz)
} else if err == io.EOF {
err = nil
}
return
}
type lookup struct {
a addr
r Range
}
const lookupSize = addrSize + offsetSize + lengthSize
// serializeLookups serializes |lookups| using the table file chunk index format.
func serializeLookups(lookups []lookup) (index []byte) {
index = make([]byte, len(lookups)*lookupSize)
sort.Slice(lookups, func(i, j int) bool { // sort by addr
return bytes.Compare(lookups[i].a[:], lookups[j].a[:]) < 0
})
buf := index
for _, l := range lookups {
copy(buf, l.a[:])
buf = buf[addrSize:]
binary.BigEndian.PutUint64(buf, l.r.Offset)
buf = buf[offsetSize:]
binary.BigEndian.PutUint32(buf, l.r.Length)
buf = buf[lengthSize:]
}
return
}
func deserializeLookups(index []byte) (lookups []lookup) {
lookups = make([]lookup, len(index)/lookupSize)
for i := range lookups {
copy(lookups[i].a[:], index)
index = index[addrSize:]
lookups[i].r.Offset = binary.BigEndian.Uint64(index)
index = index[offsetSize:]
lookups[i].r.Length = binary.BigEndian.Uint32(index)
index = index[lengthSize:]
}
return
}
+205
View File
@@ -0,0 +1,205 @@
// Copyright 2023 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nbs
import (
"bytes"
"context"
"math/rand"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/hash"
)
func TestRoundTripIndexRecords(t *testing.T) {
t.Run("table index record", func(t *testing.T) {
start := uint64(0)
for i := 0; i < 64; i++ {
end := start + (rand.Uint64() % 1024)
rec, buf := makeTableIndexRecord(start, end)
start = end
assert.Equal(t, rec.length, uint32(len(buf)))
b := make([]byte, rec.length)
n := writeJournalIndexRecord(b, rec.lastRoot, rec.start, rec.end, mustPayload(rec))
assert.Equal(t, n, rec.length)
assert.Equal(t, buf, b)
r, err := readJournalIndexRecord(buf)
assert.NoError(t, err)
assert.Equal(t, rec, r)
}
})
}
func TestUnknownIndexRecordTag(t *testing.T) {
// test behavior encountering unknown tag
buf := makeUnknownTagIndexRecord()
// checksum is ok
ok := validateIndexRecord(buf)
assert.True(t, ok)
// reading record fails
_, err := readJournalIndexRecord(buf)
assert.Error(t, err)
}
func TestProcessIndexRecords(t *testing.T) {
const cnt = 1024
ctx := context.Background()
records := make([]indexRec, cnt)
buffers := make([][]byte, cnt)
index := make([]byte, cnt*1024)
var off uint32
var start uint64
for i := range records {
end := start + (rand.Uint64() % 1024)
r, b := makeTableIndexRecord(start, end)
start = end
off += writeJournalIndexRecord(index[off:], r.lastRoot, r.start, r.end, mustPayload(r))
records[i], buffers[i] = r, b
}
index = index[:off]
var i, sum int
check := func(o int64, r indexRec) (_ error) {
require.True(t, i < cnt)
assert.Equal(t, records[i], r)
assert.Equal(t, sum, int(o))
sum += len(buffers[i])
i++
return
}
err := processIndexRecords(ctx, bytes.NewReader(index), int64(len(index)), check)
assert.Equal(t, cnt, i)
require.NoError(t, err)
i, sum = 0, 0
// write a bogus record to the end and process again
index = appendCorruptIndexRecord(index)
err = processIndexRecords(ctx, bytes.NewReader(index), int64(len(index)), check)
assert.Equal(t, cnt, i)
assert.Error(t, err) // fails to checksum
}
func TestRoundTripLookups(t *testing.T) {
exp := makeLookups(128)
buf := serializeLookups(exp)
act := deserializeLookups(buf)
assert.Equal(t, exp, act)
}
func makeTableIndexRecord(start, end uint64) (indexRec, []byte) {
payload := randBuf(100)
sz := journalIndexRecordSize(payload)
lastRoot := hash.Of([]byte("fake commit"))
var n int
buf := make([]byte, sz)
// length
writeUint32(buf[n:], uint32(len(buf)))
n += indexRecLenSz
// last root
buf[n] = byte(lastRootIndexRecTag)
n += indexRecTagSz
copy(buf[n:], lastRoot[:])
n += len(lastRoot[:])
// start offset
buf[n] = byte(startOffsetIndexRecTag)
n += indexRecTagSz
writeUint64(buf[n:], start)
n += indexRecOffsetSz
// stop offset
buf[n] = byte(endOffsetIndexRecTag)
n += indexRecTagSz
writeUint64(buf[n:], end)
n += indexRecOffsetSz
// kind
buf[n] = byte(kindIndexRecTag)
n += indexRecTagSz
buf[n] = byte(tableIndexRecKind)
n += indexRecKindSz
// payload
buf[n] = byte(payloadIndexRecTag)
n += indexRecTagSz
copy(buf[n:], payload)
n += len(payload)
// checksum
c := crc(buf[:len(buf)-indexRecChecksumSz])
writeUint32(buf[len(buf)-indexRecChecksumSz:], c)
r := indexRec{
length: uint32(len(buf)),
lastRoot: lastRoot,
start: start,
end: end,
kind: tableIndexRecKind,
payload: payload,
checksum: c,
}
return r, buf
}
func makeUnknownTagIndexRecord() (buf []byte) {
const fakeTag indexRecTag = 111
_, buf = makeTableIndexRecord(0, 128)
// overwrite recKind
buf[indexRecLenSz] = byte(fakeTag)
// redo checksum
c := crc(buf[:len(buf)-indexRecChecksumSz])
writeUint32(buf[len(buf)-indexRecChecksumSz:], c)
return
}
func appendCorruptIndexRecord(buf []byte) []byte {
tail := make([]byte, journalIndexRecordSize(nil))
rand.Read(tail)
// write a valid size, kind
writeUint32(tail, uint32(len(tail)))
tail[journalRecLenSz] = byte(tableIndexRecKind)
return append(buf, tail...)
}
func mustPayload(rec indexRec) []byte {
d.PanicIfFalse(rec.kind == tableIndexRecKind)
return rec.payload
}
func makeLookups(cnt int) (lookups []lookup) {
lookups = make([]lookup, cnt)
buf := make([]byte, cnt*addrSize)
rand.Read(buf)
var off uint64
for i := range lookups {
copy(lookups[i].a[:], buf)
buf = buf[addrSize:]
lookups[i].r.Offset = off
l := rand.Uint32() % 1024
lookups[i].r.Length = l
off += uint64(l)
}
return
}
+50 -12
View File
@@ -22,6 +22,7 @@ import (
"io"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/hash"
)
// journalRec is a record in a chunk journal. Its serialization format uses
@@ -113,7 +114,7 @@ func rootHashRecordSize() (recordSz int) {
func writeChunkRecord(buf []byte, c CompressedChunk) (n uint32) {
// length
l, _ := chunkRecordSize(c)
writeUint(buf[:journalRecLenSz], l)
writeUint32(buf[:journalRecLenSz], l)
n += journalRecLenSz
// kind
buf[n] = byte(kindJournalRecTag)
@@ -131,7 +132,7 @@ func writeChunkRecord(buf []byte, c CompressedChunk) (n uint32) {
copy(buf[n:], c.FullCompressedChunk)
n += uint32(len(c.FullCompressedChunk))
// checksum
writeUint(buf[n:], crc(buf[:n]))
writeUint32(buf[n:], crc(buf[:n]))
n += journalRecChecksumSz
d.PanicIfFalse(l == n)
return
@@ -140,7 +141,7 @@ func writeChunkRecord(buf []byte, c CompressedChunk) (n uint32) {
func writeRootHashRecord(buf []byte, root addr) (n uint32) {
// length
l := rootHashRecordSize()
writeUint(buf[:journalRecLenSz], uint32(l))
writeUint32(buf[:journalRecLenSz], uint32(l))
n += journalRecLenSz
// kind
buf[n] = byte(kindJournalRecTag)
@@ -154,13 +155,13 @@ func writeRootHashRecord(buf []byte, root addr) (n uint32) {
n += journalRecAddrSz
// empty payload
// checksum
writeUint(buf[n:], crc(buf[:n]))
writeUint32(buf[n:], crc(buf[:n]))
n += journalRecChecksumSz
return
}
func readJournalRecord(buf []byte) (rec journalRec, err error) {
rec.length = readUint(buf)
rec.length = readUint32(buf)
buf = buf[journalRecLenSz:]
for len(buf) > journalRecChecksumSz {
tag := journalRecTag(buf[0])
@@ -183,25 +184,29 @@ func readJournalRecord(buf []byte) (rec journalRec, err error) {
return
}
}
rec.checksum = readUint(buf[:journalRecChecksumSz])
rec.checksum = readUint32(buf[:journalRecChecksumSz])
return
}
func validateJournalRecord(buf []byte) (ok bool) {
if len(buf) > (journalRecLenSz + journalRecChecksumSz) {
off := len(buf) - journalRecChecksumSz
ok = crc(buf[:off]) == readUint(buf[off:])
ok = crc(buf[:off]) == readUint32(buf[off:])
}
return
}
func processJournalRecords(ctx context.Context, r io.ReadSeeker, cb func(o int64, r journalRec) error) (int64, error) {
func processJournalRecords(ctx context.Context, r io.ReadSeeker, off int64, cb func(o int64, r journalRec) error) (int64, error) {
var (
buf []byte
off int64
err error
)
// start processing records from |off|
if _, err = r.Seek(off, io.SeekStart); err != nil {
return 0, err
}
rdr := bufio.NewReaderSize(r, journalWriterBuffSize)
for {
// peek to read next record size
@@ -209,7 +214,7 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, cb func(o int64
break
}
l := readUint(buf)
l := readUint32(buf)
if l > journalRecMaxSz {
break
} else if buf, err = rdr.Peek(int(l)); err != nil {
@@ -245,10 +250,43 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, cb func(o int64
return off, nil
}
func readUint(buf []byte) uint32 {
func peekRootHashAt(journal io.ReaderAt, offset int64) (root hash.Hash, err error) {
buf := make([]byte, 1024) // assumes len(rec) < 1024
if _, err = journal.ReadAt(buf, offset); err != nil {
return
}
sz := readUint32(buf)
if sz > journalRecMaxSz {
err = fmt.Errorf("invalid root hash record size at %d", offset)
return
}
buf = buf[:sz]
if !validateIndexRecord(buf) {
err = fmt.Errorf("failed to validate root hash record at %d", offset)
return
}
var rec journalRec
if rec, err = readJournalRecord(buf); err != nil {
return
} else if rec.kind != rootHashJournalRecKind {
err = fmt.Errorf("expected root hash record, got kind: %d", rec.kind)
return
}
return hash.Hash(rec.address), nil
}
func readUint32(buf []byte) uint32 {
return binary.BigEndian.Uint32(buf)
}
func writeUint(buf []byte, u uint32) {
func writeUint32(buf []byte, u uint32) {
binary.BigEndian.PutUint32(buf, u)
}
func readUint64(buf []byte) uint64 {
return binary.BigEndian.Uint64(buf)
}
func writeUint64(buf []byte, u uint64) {
binary.BigEndian.PutUint64(buf, u)
}
+17 -15
View File
@@ -28,7 +28,7 @@ import (
"github.com/dolthub/dolt/go/store/hash"
)
func TestRoundTripRecords(t *testing.T) {
func TestRoundTripJournalRecords(t *testing.T) {
t.Run("chunk record", func(t *testing.T) {
for i := 0; i < 64; i++ {
rec, buf := makeChunkRecord()
@@ -57,16 +57,18 @@ func TestRoundTripRecords(t *testing.T) {
})
}
func TestUnknownTag(t *testing.T) {
func TestUnknownJournalRecordTag(t *testing.T) {
// test behavior encountering unknown tag
buf := makeUnknownTagRecord()
buf := makeUnknownTagJournalRecord()
// checksum is ok
ok := validateJournalRecord(buf)
assert.True(t, ok)
// reading record fails
_, err := readJournalRecord(buf)
assert.Error(t, err)
}
func TestProcessRecords(t *testing.T) {
func TestProcessJournalRecords(t *testing.T) {
const cnt = 1024
ctx := context.Background()
records := make([]journalRec, cnt)
@@ -97,15 +99,15 @@ func TestProcessRecords(t *testing.T) {
return
}
n, err := processJournalRecords(ctx, bytes.NewReader(journal), check)
n, err := processJournalRecords(ctx, bytes.NewReader(journal), 0, check)
assert.Equal(t, cnt, i)
assert.Equal(t, int(off), int(n))
require.NoError(t, err)
i, sum = 0, 0
// write a bogus record to the end and process again
writeCorruptRecord(journal[off:])
n, err = processJournalRecords(ctx, bytes.NewReader(journal), check)
writeCorruptJournalRecord(journal[off:])
n, err = processJournalRecords(ctx, bytes.NewReader(journal), 0, check)
assert.Equal(t, cnt, i)
assert.Equal(t, int(off), int(n))
require.NoError(t, err)
@@ -133,7 +135,7 @@ func makeChunkRecord() (journalRec, []byte) {
var n int
buf := make([]byte, sz)
// length
writeUint(buf[n:], uint32(len(buf)))
writeUint32(buf[n:], uint32(len(buf)))
n += journalRecLenSz
// kind
buf[n] = byte(kindJournalRecTag)
@@ -152,7 +154,7 @@ func makeChunkRecord() (journalRec, []byte) {
n += len(payload)
// checksum
c := crc(buf[:len(buf)-journalRecChecksumSz])
writeUint(buf[len(buf)-journalRecChecksumSz:], c)
writeUint32(buf[len(buf)-journalRecChecksumSz:], c)
r := journalRec{
length: uint32(len(buf)),
@@ -169,7 +171,7 @@ func makeRootHashRecord() (journalRec, []byte) {
var n int
buf := make([]byte, rootHashRecordSize())
// length
writeUint(buf[n:], uint32(len(buf)))
writeUint32(buf[n:], uint32(len(buf)))
n += journalRecLenSz
// kind
buf[n] = byte(kindJournalRecTag)
@@ -183,7 +185,7 @@ func makeRootHashRecord() (journalRec, []byte) {
n += journalRecAddrSz
// checksum
c := crc(buf[:len(buf)-journalRecChecksumSz])
writeUint(buf[len(buf)-journalRecChecksumSz:], c)
writeUint32(buf[len(buf)-journalRecChecksumSz:], c)
r := journalRec{
length: uint32(len(buf)),
kind: rootHashJournalRecKind,
@@ -193,23 +195,23 @@ func makeRootHashRecord() (journalRec, []byte) {
return r, buf
}
func makeUnknownTagRecord() (buf []byte) {
func makeUnknownTagJournalRecord() (buf []byte) {
const fakeTag journalRecTag = 111
_, buf = makeRootHashRecord()
// overwrite recKind
buf[journalRecLenSz] = byte(fakeTag)
// redo checksum
c := crc(buf[:len(buf)-journalRecChecksumSz])
writeUint(buf[len(buf)-journalRecChecksumSz:], c)
writeUint32(buf[len(buf)-journalRecChecksumSz:], c)
return
}
func writeCorruptRecord(buf []byte) (n uint32) {
func writeCorruptJournalRecord(buf []byte) (n uint32) {
n = uint32(rootHashRecordSize())
// fill with random data
rand.Read(buf[:n])
// write a valid size, kind
writeUint(buf, n)
writeUint32(buf, n)
buf[journalRecLenSz] = byte(rootHashJournalRecKind)
return
}
+1 -1
View File
@@ -36,7 +36,7 @@ func makeTestChunkJournal(t *testing.T) *chunkJournal {
m, err := getFileManifest(ctx, dir, syncFlush)
require.NoError(t, err)
q := NewUnlimitedMemQuotaProvider()
p := newFSTablePersister(dir, globalFDCache, q)
p := newFSTablePersister(dir, q)
nbf := types.Format_Default.VersionString()
j, err := newChunkJournal(ctx, nbf, dir, m, p.(*fsTablePersister))
require.NoError(t, err)
+198 -54
View File
@@ -34,6 +34,9 @@ const (
journalWriterBuffSize = 1024 * 1024
chunkJournalAddr = "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv"
journalIndexFileName = "journal.idx"
journalIndexDefaultMaxNovel = 64 * 1024
)
var (
@@ -44,7 +47,7 @@ func isJournalAddr(a addr) bool {
return a == journalAddr
}
func journalFileExists(path string) (bool, error) {
func fileExists(path string) (bool, error) {
var err error
if path, err = filepath.Abs(path); err != nil {
return false, err
@@ -54,7 +57,7 @@ func journalFileExists(path string) (bool, error) {
if errors.Is(err, os.ErrNotExist) {
return false, nil
} else if info.IsDir() {
return true, fmt.Errorf("expected file %s found directory", chunkJournalName)
return true, fmt.Errorf("expected file %s, found directory", path)
}
return true, nil
}
@@ -79,8 +82,7 @@ func openJournalWriter(ctx context.Context, path string) (wr *journalWriter, exi
return &journalWriter{
buf: make([]byte, 0, journalWriterBuffSize),
lookups: make(map[addr]recLookup),
file: f,
journal: f,
path: path,
}, true, nil
}
@@ -119,20 +121,25 @@ func createJournalWriter(ctx context.Context, path string) (wr *journalWriter, e
return &journalWriter{
buf: make([]byte, 0, journalWriterBuffSize),
lookups: make(map[addr]recLookup),
file: f,
journal: f,
path: path,
}, nil
}
type journalWriter struct {
buf []byte
lookups map[addr]recLookup
file *os.File
buf []byte
journal *os.File
off int64
uncmpSz uint64
indexed int64
path string
lock sync.RWMutex
uncmpSz uint64
ranges rangeIndex
index *os.File
maxNovel int
lock sync.RWMutex
}
var _ io.Closer = &journalWriter{}
@@ -142,14 +149,72 @@ var _ io.Closer = &journalWriter{}
func (wr *journalWriter) bootstrapJournal(ctx context.Context) (last hash.Hash, err error) {
wr.lock.Lock()
defer wr.lock.Unlock()
wr.off, err = processJournalRecords(ctx, wr.file, func(o int64, r journalRec) error {
if wr.maxNovel == 0 {
wr.maxNovel = journalIndexDefaultMaxNovel
}
wr.ranges = newRangeIndex()
p := filepath.Join(filepath.Dir(wr.path), journalIndexFileName)
var ok bool
ok, err = fileExists(p)
if err != nil {
return
} else if ok {
wr.index, err = os.OpenFile(p, os.O_RDWR, 0666)
} else {
wr.index, err = os.OpenFile(p, os.O_RDWR|os.O_CREATE, 0666)
}
if err != nil {
return
}
if ok {
var info os.FileInfo
if info, err = wr.index.Stat(); err != nil {
return hash.Hash{}, err
}
err = processIndexRecords(ctx, wr.index, info.Size(), func(o int64, r indexRec) (err error) {
switch r.kind {
case tableIndexRecKind:
// |r.end| is expected to point to a root hash record in |wr.journal|
// containing a hash equal to |r.lastRoot|, validate this here
var h hash.Hash
if h, err = peekRootHashAt(wr.journal, int64(r.end)); err != nil {
return err
} else if h != r.lastRoot {
return fmt.Errorf("invalid index record hash (%s != %s)", h.String(), r.lastRoot.String())
}
// populate range hashmap
for _, l := range deserializeLookups(r.payload) {
wr.ranges.put(l.a, l.r)
}
// record a high-water-mark for the indexed portion of the journal
wr.indexed = int64(r.end)
// todo: uncompressed size
default:
return fmt.Errorf("unknown index record kind (%d)", r.kind)
}
return nil
})
if err != nil {
// todo: issue warning on corrupt index recovery
if err = wr.corruptIndexRecovery(ctx); err != nil {
return
}
}
wr.ranges.flatten()
}
// process the non-indexed portion of the journal starting at |wr.indexed|,
// at minimum the non-indexed portion will include a root hash record
wr.off, err = processJournalRecords(ctx, wr.journal, wr.indexed, func(o int64, r journalRec) error {
switch r.kind {
case chunkJournalRecKind:
wr.lookups[r.address] = recLookup{
journalOff: o,
recordLen: r.length,
payloadOff: r.payloadOffset(),
}
wr.ranges.put(r.address, Range{
Offset: uint64(o) + uint64(r.payloadOffset()),
Length: uint32(len(r.payload)),
})
wr.uncmpSz += r.uncompressedPayloadSize()
case rootHashJournalRecKind:
last = hash.Hash(r.address)
@@ -164,11 +229,26 @@ func (wr *journalWriter) bootstrapJournal(ctx context.Context) (last hash.Hash,
return
}
// corruptIndexRecovery handles a corrupted or malformed journal index by truncating
// the index file and restarting the journal bootstrapping process without an index.
func (wr *journalWriter) corruptIndexRecovery(ctx context.Context) (err error) {
if _, err = wr.index.Seek(0, io.SeekStart); err != nil {
return
}
if err = wr.index.Truncate(0); err != nil {
return
}
// reset bootstrapping state
wr.off, wr.indexed, wr.uncmpSz = 0, 0, 0
wr.ranges = newRangeIndex()
return
}
// hasAddr returns true if the journal contains a chunk with addr |h|.
func (wr *journalWriter) hasAddr(h addr) (ok bool) {
wr.lock.RLock()
defer wr.lock.RUnlock()
_, ok = wr.lookups[h]
_, ok = wr.ranges.get(h)
return
}
@@ -176,25 +256,15 @@ func (wr *journalWriter) hasAddr(h addr) (ok bool) {
func (wr *journalWriter) getCompressedChunk(h addr) (CompressedChunk, error) {
wr.lock.RLock()
defer wr.lock.RUnlock()
l, ok := wr.lookups[h]
r, ok := wr.ranges.get(h)
if !ok {
return CompressedChunk{}, nil
}
buf := make([]byte, l.recordLen)
if _, err := wr.readAt(buf, l.journalOff); err != nil {
buf := make([]byte, r.Length)
if _, err := wr.readAt(buf, int64(r.Offset)); err != nil {
return CompressedChunk{}, nil
}
rec, err := readJournalRecord(buf)
if err != nil {
return CompressedChunk{}, err
} else if h != rec.address {
err = fmt.Errorf("chunk record hash does not match (%s != %s)",
h.String(), rec.address.String())
return CompressedChunk{}, err
}
return NewCompressedChunk(hash.Hash(h), rec.payload)
return NewCompressedChunk(hash.Hash(h), buf)
}
// getRange returns a Range for the chunk with addr |h|.
@@ -206,11 +276,7 @@ func (wr *journalWriter) getRange(h addr) (rng Range, ok bool, err error) {
}
wr.lock.RLock()
defer wr.lock.RUnlock()
var l recLookup
l, ok = wr.lookups[h]
if ok {
rng = rangeFromLookup(l)
}
rng, ok = wr.ranges.get(h)
return
}
@@ -218,35 +284,53 @@ func (wr *journalWriter) getRange(h addr) (rng Range, ok bool, err error) {
func (wr *journalWriter) writeCompressedChunk(cc CompressedChunk) error {
wr.lock.Lock()
defer wr.lock.Unlock()
l, o := chunkRecordSize(cc)
rec := recLookup{
journalOff: wr.offset(),
recordLen: l,
payloadOff: o,
recordLen, payloadOff := chunkRecordSize(cc)
rng := Range{
Offset: uint64(wr.offset()) + uint64(payloadOff),
Length: uint32(len(cc.FullCompressedChunk)),
}
buf, err := wr.getBytes(int(rec.recordLen))
buf, err := wr.getBytes(int(recordLen))
if err != nil {
return err
}
_ = writeChunkRecord(buf, cc)
wr.lookups[addr(cc.H)] = rec
wr.ranges.put(addr(cc.H), rng)
return nil
}
// writeRootHash commits |root| to the journal and syncs the file to disk.
func (wr *journalWriter) writeRootHash(root hash.Hash) error {
// commitRootHash commits |root| to the journal and syncs the file to disk.
func (wr *journalWriter) commitRootHash(root hash.Hash) error {
wr.lock.Lock()
defer wr.lock.Unlock()
buf, err := wr.getBytes(rootHashRecordSize())
if err != nil {
return err
}
_ = writeRootHashRecord(buf, addr(root))
n := writeRootHashRecord(buf, addr(root))
if err = wr.flush(); err != nil {
return err
}
return wr.file.Sync()
if err = wr.journal.Sync(); err != nil {
return err
}
if wr.ranges.novelCount() > wr.maxNovel {
o := wr.offset() - int64(n) // pre-commit journal offset
err = wr.flushIndexRecord(root, o)
}
return err
}
func (wr *journalWriter) flushIndexRecord(root hash.Hash, end int64) (err error) {
payload := serializeLookups(wr.ranges.novelLookups())
buf := make([]byte, journalIndexRecordSize(payload))
writeJournalIndexRecord(buf, root, uint64(wr.indexed), uint64(end), payload)
if _, err = wr.index.Write(buf); err != nil {
return err
}
wr.ranges.flatten()
// set a new high-water-mark for the indexed portion of the journal
wr.indexed = end
return
}
// readAt reads len(p) bytes from the journal at offset |off|.
@@ -262,7 +346,7 @@ func (wr *journalWriter) readAt(p []byte, off int64) (n int, err error) {
bp = p[fread:]
p = p[:fread]
}
if n, err = wr.file.ReadAt(p, off); err != nil {
if n, err = wr.journal.ReadAt(p, off); err != nil {
return 0, err
}
off = 0
@@ -294,7 +378,7 @@ func (wr *journalWriter) getBytes(n int) (buf []byte, err error) {
// flush writes buffered data into the journal file.
func (wr *journalWriter) flush() (err error) {
if _, err = wr.file.WriteAt(wr.buf, wr.off); err != nil {
if _, err = wr.journal.WriteAt(wr.buf, wr.off); err != nil {
return err
}
wr.off += int64(len(wr.buf))
@@ -351,7 +435,7 @@ func (wr *journalWriter) uncompressedSize() uint64 {
func (wr *journalWriter) recordCount() uint32 {
wr.lock.RLock()
defer wr.lock.RUnlock()
return uint32(len(wr.lookups))
return wr.ranges.count()
}
func (wr *journalWriter) Close() (err error) {
@@ -360,11 +444,71 @@ func (wr *journalWriter) Close() (err error) {
if err = wr.flush(); err != nil {
return err
}
if cerr := wr.file.Sync(); cerr != nil {
if cerr := wr.journal.Sync(); cerr != nil {
err = cerr
}
if cerr := wr.file.Close(); cerr != nil {
if cerr := wr.journal.Close(); cerr != nil {
err = cerr
}
return
}
type rangeIndex struct {
novel map[addr]Range
cached map[addr]Range
}
func newRangeIndex() rangeIndex {
return rangeIndex{
novel: make(map[addr]Range),
cached: make(map[addr]Range),
}
}
func (idx rangeIndex) get(a addr) (rng Range, ok bool) {
rng, ok = idx.novel[a]
if !ok {
rng, ok = idx.cached[a]
}
return
}
func (idx rangeIndex) put(a addr, rng Range) {
idx.novel[a] = rng
}
func (idx rangeIndex) iter(cb func(addr, Range)) {
for a, r := range idx.novel {
cb(a, r)
}
for a, r := range idx.cached {
cb(a, r)
}
}
func (idx rangeIndex) count() uint32 {
return uint32(len(idx.novel) + len(idx.cached))
}
func (idx rangeIndex) novelCount() int {
return len(idx.novel)
}
func (idx rangeIndex) novelLookups() (lookups []lookup) {
lookups = make([]lookup, 0, len(idx.novel))
for a, r := range idx.novel {
lookups = append(lookups, lookup{a: a, r: r})
}
return
}
func (idx rangeIndex) flatten() {
if len(idx.cached) == 0 {
idx.cached = idx.novel
} else {
for a, r := range idx.novel {
idx.cached[a] = r
}
}
idx.novel = make(map[addr]Range)
}
+187 -70
View File
@@ -16,33 +16,33 @@ package nbs
import (
"context"
"fmt"
"math/rand"
"os"
"path/filepath"
"testing"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/hash"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/dolthub/dolt/go/store/chunks"
"github.com/dolthub/dolt/go/store/hash"
)
type operation struct {
kind opKind
buf []byte
readAt int64
}
func TestJournalWriterReadWrite(t *testing.T) {
type opKind byte
type opKind byte
type operation struct {
kind opKind
buf []byte
readAt int64
}
const (
readOp opKind = iota
writeOp
flushOp
)
const (
readOp opKind = iota
writeOp
flushOp
)
func TestJournalWriter(t *testing.T) {
tests := []struct {
name string
size int
@@ -145,14 +145,13 @@ func TestJournalWriter(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ctx := context.Background()
j, err := createJournalWriter(ctx, newTestFilePath(t))
require.NotNil(t, j)
require.NoError(t, err)
path := newTestFilePath(t)
j := newTestJournalWriter(t, path)
// set specific buffer size
j.buf = make([]byte, 0, test.size)
var off int64
var err error
for i, op := range test.ops {
switch op.kind {
case readOp:
@@ -176,53 +175,56 @@ func TestJournalWriter(t *testing.T) {
}
assert.Equal(t, off, j.offset())
}
assert.NoError(t, j.Close())
})
}
}
func TestJournalWriterWriteCompressedChunk(t *testing.T) {
func newTestJournalWriter(t *testing.T, path string) *journalWriter {
ctx := context.Background()
j, err := createJournalWriter(ctx, newTestFilePath(t))
require.NotNil(t, j)
j, err := createJournalWriter(ctx, path)
require.NoError(t, err)
require.NotNil(t, j)
_, err = j.bootstrapJournal(ctx)
require.NoError(t, err)
return j
}
data := randomCompressedChunks()
func TestJournalWriterWriteCompressedChunk(t *testing.T) {
path := newTestFilePath(t)
j := newTestJournalWriter(t, path)
data := randomCompressedChunks(1024)
for a, cc := range data {
err = j.writeCompressedChunk(cc)
err := j.writeCompressedChunk(cc)
require.NoError(t, err)
l := j.lookups[a]
validateLookup(t, j, l, cc)
r, _ := j.ranges.get(a)
validateLookup(t, j, r, cc)
}
for a, l := range j.lookups {
validateLookup(t, j, l, data[a])
}
require.NoError(t, j.Close())
j.ranges.iter(func(a addr, r Range) {
validateLookup(t, j, r, data[a])
})
}
func TestJournalWriterBootstrap(t *testing.T) {
ctx := context.Background()
path := newTestFilePath(t)
j, err := createJournalWriter(ctx, path)
require.NotNil(t, j)
require.NoError(t, err)
data := randomCompressedChunks()
j := newTestJournalWriter(t, path)
data := randomCompressedChunks(1024)
var last hash.Hash
for _, cc := range data {
err = j.writeCompressedChunk(cc)
err := j.writeCompressedChunk(cc)
require.NoError(t, err)
last = cc.Hash()
}
assert.NoError(t, j.Close())
require.NoError(t, j.commitRootHash(last))
j, _, err = openJournalWriter(ctx, path)
j, _, err := openJournalWriter(ctx, path)
require.NoError(t, err)
_, err = j.bootstrapJournal(ctx)
require.NoError(t, err)
for a, l := range j.lookups {
validateLookup(t, j, l, data[a])
}
j.ranges.iter(func(a addr, r Range) {
validateLookup(t, j, r, data[a])
})
source := journalChunkSource{journal: j}
for a, cc := range data {
@@ -232,56 +234,171 @@ func TestJournalWriterBootstrap(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, ch.Data(), buf)
}
require.NoError(t, j.Close())
}
func validateLookup(t *testing.T, j *journalWriter, l recLookup, cc CompressedChunk) {
b := make([]byte, l.recordLen)
n, err := j.readAt(b, l.journalOff)
func validateLookup(t *testing.T, j *journalWriter, r Range, cc CompressedChunk) {
buf := make([]byte, r.Length)
_, err := j.readAt(buf, int64(r.Offset))
require.NoError(t, err)
assert.Equal(t, int(l.recordLen), n)
rec, err := readJournalRecord(b)
require.NoError(t, err)
assert.Equal(t, hash.Hash(rec.address), cc.Hash())
assert.Equal(t, rec.payload, cc.FullCompressedChunk)
act, err := NewCompressedChunk(cc.H, buf)
assert.NoError(t, err)
assert.Equal(t, cc.FullCompressedChunk, act.FullCompressedChunk)
}
func TestJournalWriterSyncClose(t *testing.T) {
ctx := context.Background()
j, err := createJournalWriter(ctx, newTestFilePath(t))
require.NotNil(t, j)
require.NoError(t, err)
_, err = j.bootstrapJournal(ctx)
require.NoError(t, err)
// close triggers flush
path := newTestFilePath(t)
j := newTestJournalWriter(t, path)
p := []byte("sit")
buf, err := j.getBytes(len(p))
require.NoError(t, err)
copy(buf, p)
err = j.Close()
require.NoError(t, err)
j.flush()
assert.Equal(t, 0, len(j.buf))
assert.Equal(t, 3, int(j.off))
}
func newTestFilePath(t *testing.T) string {
name := fmt.Sprintf("journal%d.log", rand.Intn(65536))
return filepath.Join(t.TempDir(), name)
path, err := os.MkdirTemp("", "")
require.NoError(t, err)
return filepath.Join(path, "journal.log")
}
func randomCompressedChunks() (compressed map[addr]CompressedChunk) {
buf := make([]byte, 1024*1024)
rand.Read(buf)
func TestJournalIndexBootstrap(t *testing.T) {
// potentially indexed region of a journal
type epoch struct {
records map[addr]CompressedChunk
last hash.Hash
}
makeEpoch := func() (e epoch) {
e.records = randomCompressedChunks(64)
for h := range e.records {
e.last = hash.Hash(h)
break
}
return
}
tests := []struct {
name string
epochs []epoch
novel epoch
}{
{
name: "smoke test",
epochs: []epoch{makeEpoch()},
},
{
name: "non-indexed journal",
epochs: nil,
novel: makeEpoch(),
},
{
name: "partially indexed journal",
epochs: []epoch{makeEpoch()},
novel: makeEpoch(),
},
{
name: "multiple index records",
epochs: []epoch{
makeEpoch(),
makeEpoch(),
makeEpoch(),
},
novel: makeEpoch(),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ctx := context.Background()
path := newTestFilePath(t)
j := newTestJournalWriter(t, path)
// setup
epochs := append(test.epochs, test.novel)
for i, e := range epochs {
for _, cc := range e.records {
assert.NoError(t, j.writeCompressedChunk(cc))
if rand.Int()%10 == 0 { // periodic commits
assert.NoError(t, j.commitRootHash(cc.H))
}
}
o := j.offset() // precommit offset
assert.NoError(t, j.commitRootHash(e.last)) // commit |e.last|
if i == len(epochs)-1 {
break // don't index |test.novel|
}
assert.NoError(t, j.flushIndexRecord(e.last, o)) // write index record
}
validateJournal := func(p string, expected []epoch) {
journal, ok, err := openJournalWriter(ctx, p)
require.NoError(t, err)
require.True(t, ok)
// bootstrap journal and validate chunk records
last, err := journal.bootstrapJournal(ctx)
assert.NoError(t, err)
for _, e := range expected {
var act CompressedChunk
for a, exp := range e.records {
act, err = journal.getCompressedChunk(a)
assert.NoError(t, err)
assert.Equal(t, exp, act)
}
}
assert.Equal(t, expected[len(expected)-1].last, last)
}
idxPath := filepath.Join(filepath.Dir(path), journalIndexFileName)
before, err := os.Stat(idxPath)
require.NoError(t, err)
if len(test.epochs) > 0 { // expect index
assert.True(t, before.Size() > 0)
} else {
assert.Equal(t, int64(0), before.Size())
}
// bootstrap journal using index
validateJournal(path, epochs)
// assert journal index unchanged
info, err := os.Stat(idxPath)
require.NoError(t, err)
assert.Equal(t, before.Size(), info.Size())
// bootstrap journal without index
corruptJournalIndex(t, idxPath)
validateJournal(path, epochs)
// assert corrupt index cleaned up
info, err = os.Stat(idxPath)
require.NoError(t, err)
assert.Equal(t, int64(0), info.Size())
})
}
}
func randomCompressedChunks(cnt int) (compressed map[addr]CompressedChunk) {
compressed = make(map[addr]CompressedChunk)
for {
var buf []byte
for i := 0; i < cnt; i++ {
k := rand.Intn(51) + 50
if k >= len(buf) {
return
buf = make([]byte, 64*1024)
rand.Read(buf)
}
c := chunks.NewChunk(buf[:k])
buf = buf[k:]
compressed[addr(c.Hash())] = ChunkToCompressedChunk(c)
}
return
}
func corruptJournalIndex(t *testing.T, path string) {
f, err := os.OpenFile(path, os.O_RDWR, 0666)
require.NoError(t, err)
info, err := f.Stat()
require.NoError(t, err)
buf := make([]byte, 64)
rand.Read(buf)
_, err = f.WriteAt(buf, info.Size()/2)
require.NoError(t, err)
}
+8
View File
@@ -190,6 +190,14 @@ func tableReaderAtFromBytes(b []byte) tableReaderAt {
return tableReaderAtAdapter{bytes.NewReader(b)}
}
func (adapter tableReaderAtAdapter) Close() error {
return nil
}
func (adapter tableReaderAtAdapter) clone() (tableReaderAt, error) {
return adapter, nil
}
func (adapter tableReaderAtAdapter) Reader(ctx context.Context) (io.ReadCloser, error) {
r := *adapter.br
return io.NopCloser(&r), nil
+6 -6
View File
@@ -528,12 +528,12 @@ func (ftp fakeTablePersister) Persist(ctx context.Context, mt *memTable, haver c
return chunkSourceAdapter{cs, name}, nil
}
func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error) {
func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error) {
name, data, chunkCount, err := compactSourcesToBuffer(sources)
if err != nil {
return nil, err
return nil, nil, err
} else if chunkCount == 0 {
return emptyChunkSource{}, nil
return emptyChunkSource{}, func() {}, nil
}
ftp.mu.Lock()
@@ -542,14 +542,14 @@ func (ftp fakeTablePersister) ConjoinAll(ctx context.Context, sources chunkSourc
ti, err := parseTableIndexByCopy(ctx, data, ftp.q)
if err != nil {
return nil, err
return nil, nil, err
}
cs, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
if err != nil {
return nil, err
return nil, nil, err
}
return chunkSourceAdapter{cs, name}, nil
return chunkSourceAdapter{cs, name}, func() {}, nil
}
func compactSourcesToBuffer(sources chunkSources) (name addr, data []byte, chunkCount uint32, err error) {
+8
View File
@@ -60,6 +60,14 @@ type s3svc interface {
PutObjectWithContext(ctx aws.Context, input *s3.PutObjectInput, opts ...request.Option) (*s3.PutObjectOutput, error)
}
func (s3tra *s3TableReaderAt) Close() error {
return nil
}
func (s3tra *s3TableReaderAt) clone() (tableReaderAt, error) {
return s3tra, nil
}
func (s3tra *s3TableReaderAt) Reader(ctx context.Context) (io.ReadCloser, error) {
return s3tra.s3.Reader(ctx, s3tra.h)
}
+5 -5
View File
@@ -68,14 +68,11 @@ const (
var (
cacheOnce = sync.Once{}
makeManifestManager func(manifest) manifestManager
globalFDCache *fdCache
)
var tracer = otel.Tracer("github.com/dolthub/dolt/go/store/nbs")
func makeGlobalCaches() {
globalFDCache = newFDCache(defaultMaxTables)
manifestCache := newManifestCache(defaultManifestCacheSize)
manifestLocks := newManifestLocks()
makeManifestManager = func(m manifest) manifestManager { return manifestManager{m, manifestCache, manifestLocks} }
@@ -479,7 +476,7 @@ func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSi
if err != nil {
return nil, err
}
p := newFSTablePersister(dir, globalFDCache, q)
p := newFSTablePersister(dir, q)
c := conjoinStrategy(inlineConjoiner{maxTables})
return newNomsBlockStore(ctx, nbfVerStr, makeManifestManager(m), p, q, c, memTableSize)
@@ -495,7 +492,7 @@ func NewLocalJournalingStore(ctx context.Context, nbfVers, dir string, q MemoryQ
if err != nil {
return nil, err
}
p := newFSTablePersister(dir, globalFDCache, q)
p := newFSTablePersister(dir, q)
journal, err := newChunkJournal(ctx, nbfVers, dir, m, p.(*fsTablePersister))
if err != nil {
@@ -615,6 +612,9 @@ func (nbs *NomsBlockStore) putChunk(ctx context.Context, c chunks.Chunk, getAddr
}
func (nbs *NomsBlockStore) addChunk(ctx context.Context, ch chunks.Chunk, addrs hash.HashSet, checker refCheck) (bool, error) {
if err := ctx.Err(); err != nil {
return false, err
}
nbs.mu.Lock()
defer nbs.mu.Unlock()
nbs.waitForGC()
+20 -7
View File
@@ -58,14 +58,14 @@ func makeTestLocalStore(t *testing.T, maxTableFiles int) (st *NomsBlockStore, no
type fileToData map[string][]byte
func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fileToData {
func writeLocalTableFiles(t *testing.T, st *NomsBlockStore, numTableFiles, seed int) (map[string]int, fileToData) {
ctx := context.Background()
fileToData := make(fileToData, numTableFiles)
fileIDToNumChunks := make(map[string]int)
fileIDToNumChunks := make(map[string]int, numTableFiles)
for i := 0; i < numTableFiles; i++ {
var chunkData [][]byte
for j := 0; j < i+1; j++ {
chunkData = append(chunkData, []byte(fmt.Sprintf("%d:%d", i, j)))
chunkData = append(chunkData, []byte(fmt.Sprintf("%d:%d:%d", i, j, seed)))
}
data, addr, err := buildTable(chunkData)
require.NoError(t, err)
@@ -77,9 +77,14 @@ func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fil
})
require.NoError(t, err)
}
return fileIDToNumChunks, fileToData
}
func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fileToData {
ctx := context.Background()
fileIDToNumChunks, fileToData := writeLocalTableFiles(t, st, numTableFiles, 0)
err := st.AddTableFilesToManifest(ctx, fileIDToNumChunks)
require.NoError(t, err)
return fileToData
}
@@ -190,8 +195,10 @@ func TestNBSPruneTableFiles(t *testing.T) {
numTableFiles := 64
maxTableFiles := 16
st, nomsDir, _ := makeTestLocalStore(t, maxTableFiles)
fileToData := populateLocalStore(t, st, numTableFiles)
defer st.Close()
fileToData := populateLocalStore(t, st, numTableFiles)
_, toDeleteToData := writeLocalTableFiles(t, st, numTableFiles, 32)
// add a chunk and flush to trigger a conjoin
c := chunks.NewChunk([]byte("it's a boy!"))
@@ -212,6 +219,9 @@ func TestNBSPruneTableFiles(t *testing.T) {
// assert some input table files were conjoined
assert.NotEmpty(t, absent)
toDelete := tfSet.findAbsent(toDeleteToData)
assert.Len(t, toDelete, len(toDeleteToData))
currTableFiles := func(dirName string) *set.StrSet {
infos, err := os.ReadDir(dirName)
require.NoError(t, err)
@@ -228,7 +238,7 @@ func TestNBSPruneTableFiles(t *testing.T) {
for _, tf := range sources {
assert.True(t, preGC.Contains(tf.FileID()))
}
for _, fileName := range absent {
for _, fileName := range toDelete {
assert.True(t, preGC.Contains(fileName))
}
@@ -237,11 +247,14 @@ func TestNBSPruneTableFiles(t *testing.T) {
postGC := currTableFiles(nomsDir)
for _, tf := range sources {
assert.True(t, preGC.Contains(tf.FileID()))
assert.True(t, postGC.Contains(tf.FileID()))
}
for _, fileName := range absent {
assert.False(t, postGC.Contains(fileName))
}
for _, fileName := range toDelete {
assert.False(t, postGC.Contains(fileName))
}
infos, err := os.ReadDir(nomsDir)
require.NoError(t, err)
+6 -2
View File
@@ -34,6 +34,8 @@ import (
var errCacheMiss = errors.New("index cache miss")
type cleanupFunc func()
// tablePersister allows interaction with persistent storage. It provides
// primitives for pushing the contents of a memTable to persistent storage,
// opening persistent tables for reading, and conjoining a number of existing
@@ -45,8 +47,10 @@ type tablePersister interface {
Persist(ctx context.Context, mt *memTable, haver chunkReader, stats *Stats) (chunkSource, error)
// ConjoinAll conjoins all chunks in |sources| into a single, new
// chunkSource.
ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, error)
// chunkSource. It returns a |cleanupFunc| which can be called to
// potentially release resources associated with the |sources| once
// they are no longer needed.
ConjoinAll(ctx context.Context, sources chunkSources, stats *Stats) (chunkSource, cleanupFunc, error)
// Open a table named |name|, containing |chunkCount| chunks.
Open(ctx context.Context, name addr, chunkCount uint32, stats *Stats) (chunkSource, error)
+14 -14
View File
@@ -131,6 +131,8 @@ func (ir indexResult) Length() uint32 {
type tableReaderAt interface {
ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error)
Reader(ctx context.Context) (io.ReadCloser, error)
Close() error
clone() (tableReaderAt, error)
}
// tableReader implements get & has queries against a single nbs table. goroutine safe.
@@ -663,7 +665,12 @@ func (tr tableReader) currentSize() uint64 {
}
func (tr tableReader) close() error {
return tr.idx.Close()
err := tr.idx.Close()
if err != nil {
tr.r.Close()
return err
}
return tr.r.Close()
}
func (tr tableReader) clone() (tableReader, error) {
@@ -671,22 +678,15 @@ func (tr tableReader) clone() (tableReader, error) {
if err != nil {
return tableReader{}, err
}
r, err := tr.r.clone()
if err != nil {
idx.Close()
return tableReader{}, err
}
return tableReader{
prefixes: tr.prefixes,
idx: idx,
r: tr.r,
r: r,
blockSize: tr.blockSize,
}, nil
}
type readerAdapter struct {
rat tableReaderAt
off int64
ctx context.Context
}
func (ra *readerAdapter) Read(p []byte) (n int, err error) {
n, err = ra.rat.ReadAtWithStats(ra.ctx, p, ra.off, &Stats{})
ra.off += int64(n)
return
}
+3
View File
@@ -114,6 +114,9 @@ func (ts tableSet) hasMany(addrs []hasRecord) (bool, error) {
}
func (ts tableSet) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
f := func(css chunkSourceSet) ([]byte, error) {
for _, haver := range css {
data, err := haver.get(ctx, h, stats)
-6
View File
@@ -35,12 +35,6 @@ type Map struct {
valDesc val.TupleDesc
}
type DiffSummary struct {
Adds, Removes uint64
Changes, CellChanges uint64
NewSize, OldSize uint64
}
// NewMap creates an empty prolly Tree Map
func NewMap(node tree.Node, ns tree.NodeStore, keyDesc, valDesc val.TupleDesc) Map {
tuples := tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{
-5
View File
@@ -137,7 +137,6 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{
{Path: "store/diff/patch.go", NomsPath: "go/diff/patch.go", HadCopyrightNotice: true},
{Path: "store/diff/patch_test.go", NomsPath: "go/diff/patch_test.go", HadCopyrightNotice: true},
{Path: "store/diff/print_diff.go", NomsPath: "go/diff/print_diff.go", HadCopyrightNotice: true},
{Path: "store/diff/summary.go", NomsPath: "go/diff/summary.go", HadCopyrightNotice: true},
{Path: "store/hash/base32.go", NomsPath: "go/hash/base32.go", HadCopyrightNotice: true},
{Path: "store/hash/base32_test.go", NomsPath: "go/hash/base32_test.go", HadCopyrightNotice: true},
{Path: "store/hash/hash.go", NomsPath: "go/hash/hash.go", HadCopyrightNotice: true},
@@ -183,15 +182,11 @@ var CopiedNomsFiles []CopiedNomsFile = []CopiedNomsFile{
{Path: "store/nbs/dynamo_manifest_test.go", NomsPath: "go/nbs/dynamo_manifest_test.go", HadCopyrightNotice: true},
{Path: "store/nbs/dynamo_table_reader.go", NomsPath: "go/nbs/dynamo_table_reader.go", HadCopyrightNotice: true},
{Path: "store/nbs/dynamo_table_reader_test.go", NomsPath: "go/nbs/dynamo_table_reader_test.go", HadCopyrightNotice: true},
{Path: "store/nbs/fd_cache.go", NomsPath: "go/nbs/fd_cache.go", HadCopyrightNotice: true},
{Path: "store/nbs/fd_cache_test.go", NomsPath: "go/nbs/fd_cache_test.go", HadCopyrightNotice: true},
{Path: "store/nbs/file_manifest.go", NomsPath: "go/nbs/file_manifest.go", HadCopyrightNotice: true},
{Path: "store/nbs/file_manifest_test.go", NomsPath: "go/nbs/file_manifest_test.go", HadCopyrightNotice: true},
{Path: "store/nbs/file_table_persister.go", NomsPath: "go/nbs/file_table_persister.go", HadCopyrightNotice: true},
{Path: "store/nbs/file_table_persister_test.go", NomsPath: "go/nbs/file_table_persister_test.go", HadCopyrightNotice: true},
{Path: "store/nbs/frag/main.go", NomsPath: "go/nbs/frag/main.go", HadCopyrightNotice: true},
{Path: "store/nbs/fs_table_cache.go", NomsPath: "go/nbs/fs_table_cache.go", HadCopyrightNotice: true},
{Path: "store/nbs/fs_table_cache_test.go", NomsPath: "go/nbs/fs_table_cache_test.go", HadCopyrightNotice: true},
{Path: "store/nbs/manifest.go", NomsPath: "go/nbs/manifest.go", HadCopyrightNotice: true},
{Path: "store/nbs/manifest_cache.go", NomsPath: "go/nbs/manifest_cache.go", HadCopyrightNotice: true},
{Path: "store/nbs/manifest_cache_test.go", NomsPath: "go/nbs/manifest_cache_test.go", HadCopyrightNotice: true},

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Before

Width:  |  Height:  |  Size: 397 KiB

After

Width:  |  Height:  |  Size: 397 KiB

Before

Width:  |  Height:  |  Size: 143 KiB

After

Width:  |  Height:  |  Size: 143 KiB

Before

Width:  |  Height:  |  Size: 320 KiB

After

Width:  |  Height:  |  Size: 320 KiB

+341
View File
@@ -0,0 +1,341 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
setup_common
dolt sql <<SQL
CREATE TABLE test (
pk BIGINT NOT NULL COMMENT 'tag:0',
c1 BIGINT COMMENT 'tag:1',
c2 BIGINT COMMENT 'tag:2',
c3 BIGINT COMMENT 'tag:3',
c4 BIGINT COMMENT 'tag:4',
c5 BIGINT COMMENT 'tag:5',
PRIMARY KEY (pk)
);
SQL
}
teardown() {
assert_feature_version
teardown_common
}
@test "diff-stat: stat/summary comparing working table to last commit" {
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
dolt add test
dolt commit -m "table created"
dolt sql -q "insert into test values (2, 11, 0, 0, 0, 0)"
dolt sql -q "insert into test values (3, 11, 0, 0, 0, 0)"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "2 Rows Unmodified (100.00%)" ]] || false
[[ "$output" =~ "2 Rows Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "12 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(2 Row Entries vs 4 Row Entries)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
dolt add test
dolt commit -m "added two rows"
dolt sql -q "replace into test values (0, 11, 0, 0, 0, 6)"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (25.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "2 Cells Modified (8.33%)" ]] || false
[[ "$output" =~ "(4 Row Entries vs 4 Row Entries)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
echo "$output"
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
dolt add test
dolt commit -m "modified first row"
dolt sql -q "delete from test where pk = 0"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "1 Row Deleted (25.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Deleted (25.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(4 Row Entries vs 3 Row Entries)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
echo "$output"
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
}
@test "diff-stat: stat/summary comparing row with a deleted cell and an added cell" {
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | added | false | true |" ]] || false
dolt add test
dolt commit -m "create table"
dolt sql -q "insert into test values (0, 1, 2, 3, 4, 5)"
dolt add test
dolt commit -m "put row"
dolt sql -q "replace into test (pk, c1, c3, c4, c5) values (0, 1, 3, 4, 5)"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "0 Rows Unmodified (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Cell Modified (16.67%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 1 Row Entry)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
dolt add test
dolt commit -m "row modified"
dolt sql -q "replace into test values (0, 1, 2, 3, 4, 5)"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "0 Rows Unmodified (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Cell Modified (16.67%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 1 Row Entry)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
}
@test "diff-stat: stat/summary comparing two branches" {
dolt checkout -b firstbranch
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt add test
dolt commit -m "Added one row"
dolt checkout -b newbranch
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
dolt add test
dolt commit -m "Added another row"
run dolt diff --stat firstbranch newbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 2 Row Entries)" ]] || false
run dolt diff --summary firstbranch newbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
run dolt diff --stat firstbranch..newbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 2 Row Entries)" ]] || false
run dolt diff --summary firstbranch..newbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
}
@test "diff-stat: stat/summary shows correct changes after schema change" {
cat <<DELIM > employees.csv
"id","first name","last name","title","start date","end date"
0,tim,sehn,ceo,"",""
1,aaron,son,founder,"",""
2,brian,hendricks,founder,"",""
DELIM
dolt table import -c -pk=id employees employees.csv
dolt add employees
dolt commit -m "Added employees table with data"
dolt sql -q "alter table employees add city longtext"
dolt sql -q "insert into employees values (3, 'taylor', 'bantle', 'software engineer', '', '', 'Santa Monica')"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (33.33%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "10 Cells Added (55.56%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| employees | modified | true | true |" ]] || false
dolt sql -q "replace into employees values (0, 'tim', 'sehn', 'ceo', '2 years ago', '', 'Santa Monica')"
dolt diff --stat
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "2 Rows Unmodified (66.67%)" ]] || false
[[ "$output" =~ "1 Row Added (33.33%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (33.33%)" ]] || false
[[ "$output" =~ "10 Cells Added (55.56%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "2 Cells Modified (11.11%)" ]] || false
[[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| employees | modified | true | true |" ]] || false
}
@test "diff-stat: stat/summary gets summaries for all tables with changes" {
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
dolt sql <<SQL
CREATE TABLE employees (
\`id\` varchar(20) NOT NULL,
\`first name\` LONGTEXT,
\`last name\` LONGTEXT,
\`title\` LONGTEXT,
\`start date\` LONGTEXT,
\`end date\` LONGTEXT,
PRIMARY KEY (id)
);
SQL
dolt sql -q "insert into employees values (0, 'tim', 'sehn', 'ceo', '', '')"
dolt add test employees
dolt commit -m "test tables created"
dolt sql -q "insert into test values (2, 11, 0, 0, 0, 0)"
dolt sql -q "insert into employees values (1, 'brian', 'hendriks', 'founder', '', '')"
run dolt diff --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "diff --dolt a/test b/test" ]] || false
[[ "$output" =~ "--- a/test @" ]] || false
[[ "$output" =~ "+++ b/test @" ]] || false
[[ "$output" =~ "diff --dolt a/employees b/employees" ]] || false
[[ "$output" =~ "--- a/employees @" ]] || false
[[ "$output" =~ "+++ b/employees @" ]] || false
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
[[ "$output" =~ "| employees | modified | true | false |" ]] || false
run dolt diff --stat employees
[ "$status" -eq 0 ]
[[ "$output" =~ "diff --dolt a/employees b/employees" ]] || false
[[ "$output" =~ "--- a/employees @" ]] || false
[[ "$output" =~ "+++ b/employees @" ]] || false
run dolt diff --summary employees
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| employees | modified | true | false |" ]] || false
}
@test "diff-stat: two and three dot diff stat/summary" {
dolt checkout main
dolt sql -q 'insert into test values (0,0,0,0,0,0)'
dolt add .
dolt commit -m table
dolt checkout -b branch1
dolt sql -q 'insert into test values (1,1,1,1,1,1)'
dolt add .
dolt commit -m row
dolt checkout main
dolt sql -q 'insert into test values (2,2,2,2,2,2)'
dolt add .
dolt commit -m newrow
run dolt diff main..branch1 --stat
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (50.00%)" ]] || false
[[ "$output" =~ "1 Row Added (50.00%)" ]] || false
[[ "$output" =~ "1 Row Deleted (50.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (50.00%)" ]] || false
[[ "$output" =~ "6 Cells Deleted (50.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(2 Row Entries vs 2 Row Entries)" ]] || false
run dolt diff main..branch1 --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
run dolt diff main...branch1 --stat
echo $output
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 2 Row Entries)" ]] || false
run dolt diff main...branch1 --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "| Table name | Diff type | Data change | Schema change |" ]] || false
[[ "$output" =~ "| test | modified | true | false |" ]] || false
}
@test "diff-stat: diff stat incorrect primary key set change regression test" {
dolt sql -q "create table testdrop (col1 varchar(20), id int primary key, col2 varchar(20))"
dolt add .
dolt sql -q "insert into testdrop values ('test1', 1, 'test2')"
dolt commit -am "Add testdrop table"
dolt sql -q "alter table testdrop drop column col1"
run dolt diff --stat
[ $status -eq 0 ]
[[ $output =~ "1 Row Modified (100.00%)" ]]
}
+1 -220
View File
@@ -361,30 +361,6 @@ SQL
[ "$status" -eq 0 ]
[[ "$output" =~ "+ | 1" ]] || false
[[ ! "$output" =~ "- | 2" ]] || false
# Dots work with --summary
run dolt diff main..branch1 --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (50.00%)" ]] || false
[[ "$output" =~ "1 Row Added (50.00%)" ]] || false
[[ "$output" =~ "1 Row Deleted (50.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (50.00%)" ]] || false
[[ "$output" =~ "6 Cells Deleted (50.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(2 Row Entries vs 2 Row Entries)" ]] || false
run dolt diff main...branch1 --summary
echo $output
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 2 Row Entries)" ]] || false
}
@test "diff: data and schema changes" {
@@ -774,189 +750,6 @@ SQL
[[ "$output" =~ '+ KEY `c2` (`c2`)' ]] || false
}
@test "diff: summary comparing working table to last commit" {
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
dolt add test
dolt commit -m "table created"
dolt sql -q "insert into test values (2, 11, 0, 0, 0, 0)"
dolt sql -q "insert into test values (3, 11, 0, 0, 0, 0)"
dolt diff --summary
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "2 Rows Unmodified (100.00%)" ]] || false
[[ "$output" =~ "2 Rows Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "12 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(2 Row Entries vs 4 Row Entries)" ]] || false
dolt add test
dolt commit -m "added two rows"
dolt sql -q "replace into test values (0, 11, 0, 0, 0, 6)"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (25.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "2 Cells Modified (8.33%)" ]] || false
[[ "$output" =~ "(4 Row Entries vs 4 Row Entries)" ]] || false
dolt add test
dolt commit -m "modified first row"
dolt sql -q "delete from test where pk = 0"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (75.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "1 Row Deleted (25.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Deleted (25.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(4 Row Entries vs 3 Row Entries)" ]] || false
}
@test "diff: summary comparing row with a deleted cell and an added cell" {
dolt add test
dolt commit -m "create table"
dolt sql -q "insert into test values (0, 1, 2, 3, 4, 5)"
dolt add test
dolt commit -m "put row"
dolt sql -q "replace into test (pk, c1, c3, c4, c5) values (0, 1, 3, 4, 5)"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "0 Rows Unmodified (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Cell Modified (16.67%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 1 Row Entry)" ]] || false
dolt add test
dolt commit -m "row modified"
dolt sql -q "replace into test values (0, 1, 2, 3, 4, 5)"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "0 Rows Unmodified (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Added (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Added (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Cell Modified (16.67%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 1 Row Entry)" ]] || false
}
@test "diff: summary comparing two branches" {
dolt checkout -b firstbranch
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt add test
dolt commit -m "Added one row"
dolt checkout -b newbranch
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
dolt add test
dolt commit -m "Added another row"
run dolt diff --summary firstbranch newbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 2 Row Entries)" ]] || false
run dolt diff --summary firstbranch..newbranch
[ "$status" -eq 0 ]
[[ "$output" =~ "1 Row Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (100.00%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "6 Cells Added (100.00%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(1 Row Entry vs 2 Row Entries)" ]] || false
}
@test "diff: summary shows correct changes after schema change" {
cat <<DELIM > employees.csv
"id","first name","last name","title","start date","end date"
0,tim,sehn,ceo,"",""
1,aaron,son,founder,"",""
2,brian,hendricks,founder,"",""
DELIM
dolt table import -c -pk=id employees employees.csv
dolt add employees
dolt commit -m "Added employees table with data"
dolt sql -q "alter table employees add city longtext"
dolt sql -q "insert into employees values (3, 'taylor', 'bantle', 'software engineer', '', '', 'Santa Monica')"
dolt diff --summary
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "3 Rows Unmodified (100.00%)" ]] || false
[[ "$output" =~ "1 Row Added (33.33%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Rows Modified (0.00%)" ]] || false
[[ "$output" =~ "10 Cells Added (55.56%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "0 Cells Modified (0.00%)" ]] || false
[[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false
dolt sql -q "replace into employees values (0, 'tim', 'sehn', 'ceo', '2 years ago', '', 'Santa Monica')"
dolt diff --summary
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "2 Rows Unmodified (66.67%)" ]] || false
[[ "$output" =~ "1 Row Added (33.33%)" ]] || false
[[ "$output" =~ "0 Rows Deleted (0.00%)" ]] || false
[[ "$output" =~ "1 Row Modified (33.33%)" ]] || false
[[ "$output" =~ "10 Cells Added (55.56%)" ]] || false
[[ "$output" =~ "0 Cells Deleted (0.00%)" ]] || false
[[ "$output" =~ "2 Cells Modified (11.11%)" ]] || false
[[ "$output" =~ "(3 Row Entries vs 4 Row Entries)" ]] || false
}
@test "diff: summary gets summaries for all tables with changes" {
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
dolt sql <<SQL
CREATE TABLE employees (
\`id\` varchar(20) NOT NULL,
\`first name\` LONGTEXT,
\`last name\` LONGTEXT,
\`title\` LONGTEXT,
\`start date\` LONGTEXT,
\`end date\` LONGTEXT,
PRIMARY KEY (id)
);
SQL
dolt sql -q "insert into employees values (0, 'tim', 'sehn', 'ceo', '', '')"
dolt add test employees
dolt commit -m "test tables created"
dolt sql -q "insert into test values (2, 11, 0, 0, 0, 0)"
dolt sql -q "insert into employees values (1, 'brian', 'hendriks', 'founder', '', '')"
run dolt diff --summary
[ "$status" -eq 0 ]
[[ "$output" =~ "diff --dolt a/test b/test" ]] || false
[[ "$output" =~ "--- a/test @" ]] || false
[[ "$output" =~ "+++ b/test @" ]] || false
[[ "$output" =~ "diff --dolt a/employees b/employees" ]] || false
[[ "$output" =~ "--- a/employees @" ]] || false
[[ "$output" =~ "+++ b/employees @" ]] || false
}
@test "diff: with where clause" {
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
@@ -1047,18 +840,6 @@ SQL
[[ "$output" =~ "where pk=4" ]] || false
}
@test "diff: diff summary incorrect primary key set change regression test" {
dolt sql -q "create table testdrop (col1 varchar(20), id int primary key, col2 varchar(20))"
dolt add .
dolt sql -q "insert into testdrop values ('test1', 1, 'test2')"
dolt commit -am "Add testdrop table"
dolt sql -q "alter table testdrop drop column col1"
run dolt diff --summary
[ $status -eq 0 ]
[[ $output =~ "1 Row Modified (100.00%)" ]]
}
@test "diff: with where clause errors" {
dolt sql -q "insert into test values (0, 0, 0, 0, 0, 0)"
dolt sql -q "insert into test values (1, 1, 1, 1, 1, 1)"
@@ -1704,7 +1485,7 @@ EOF
[ $status -eq 0 ]
[[ ! "$output" =~ "| 1" ]] || false
run dolt diff --summary
run dolt diff --stat
[ $status -eq 0 ]
[[ ! "$output" =~ "1 Row Modified" ]] || false
}
+7 -7
View File
@@ -127,7 +127,7 @@ teardown() {
dolt add .
dolt commit --allow-empty -m "create tables from doltdump"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -158,7 +158,7 @@ teardown() {
dolt add .
dolt commit --allow-empty -m "create tables from doltdump"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -500,7 +500,7 @@ SQL
dolt add .
dolt commit --allow-empty -m "create tables from doltdump"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -526,7 +526,7 @@ SQL
dolt add .
dolt commit --allow-empty -m "create tables from doltdump"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -640,7 +640,7 @@ SQL
dolt add .
dolt commit --allow-empty -m "create tables from doltdump"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -665,7 +665,7 @@ SQL
dolt add .
dolt commit --allow-empty -m "create tables from doltdump"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -768,7 +768,7 @@ SQL
dolt add .
dolt commit --allow-empty -m "create tables from dump files"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -304,6 +304,11 @@ skip_if_chunk_journal() {
# leave data in the working set
dolt sql -q "INSERT INTO test VALUES ($(($NUM_COMMITS+1))),($(($NUM_COMMITS+2))),($(($NUM_COMMITS+3)));"
# write a garbage file which looks like an old table file
for i in `seq 0 100`; do
dolt --help >> .dolt/noms/b0f6n6b1ej7a9ovalt0rr80bsentq807
done
BEFORE=$(du -c .dolt/noms/ | grep total | sed 's/[^0-9]*//g')
run dolt gc --shallow
[ "$status" -eq 0 ]
@@ -324,7 +324,7 @@ SQL
dolt add .
dolt commit --allow-empty -m "update table from parquet file"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
@@ -440,7 +440,7 @@ DELIM
dolt add .
dolt commit --allow-empty -m "update table from parquet file"
run dolt diff --summary main new_branch
run dolt diff --stat main new_branch
[ "$status" -eq 0 ]
[[ "$output" = "" ]] || false
}
+2 -2
View File
@@ -204,14 +204,14 @@ SQL
[[ "${#lines[@]}" = "13" ]] || false
}
@test "keyless: diff --summary" {
@test "keyless: diff --stat" {
dolt sql <<SQL
DELETE FROM keyless WHERE c0 = 0;
INSERT INTO keyless VALUES (8,8);
UPDATE keyless SET c1 = 9 WHERE c0 = 1;
SQL
run dolt diff --summary
run dolt diff --stat
[ $status -eq 0 ]
[[ "$output" =~ "3 Rows Added" ]] || false
[[ "$output" =~ "3 Rows Deleted" ]] || false
+8 -1
View File
@@ -41,6 +41,11 @@ SQL
run dolt sql -q "SELECT count(*) FROM dolt_commits" -r csv
[ $status -eq 0 ]
[[ "$output" =~ "3" ]] || false
dolt checkout dolt_migrated_commits
run dolt sql -q "SELECT count(*) FROM dolt_commit_mapping" -r csv
[ $status -eq 0 ]
[[ "$output" =~ "2" ]] || false
}
@@ -62,12 +67,14 @@ SQL
pushd db_one
dolt migrate
[[ $(cat ./.dolt/noms/manifest | cut -f 2 -d :) = "$TARGET_NBF" ]] || false
dolt branch -D dolt_migrated_commits
ONE=$(dolt branch -av)
popd
pushd db_two
dolt migrate
[[ $(cat ./.dolt/noms/manifest | cut -f 2 -d :) = "$TARGET_NBF" ]] || false
dolt branch -D dolt_migrated_commits
TWO=$(dolt branch -av)
popd
@@ -123,7 +130,7 @@ SQL
run dolt sql -q "SELECT count(*) FROM dolt_commits" -r csv
[ $status -eq 0 ]
[[ "$output" =~ "5" ]] || false
[[ "$output" =~ "6" ]] || false
}
@test "migrate: tag and working set" {
@@ -334,15 +334,15 @@ teardown() {
[[ "$output" =~ '+ `val` int NOT NULL,' ]] || false
[[ "$output" =~ '+ PRIMARY KEY (`pk`,`val`)' ]] || false
# Make sure there is not a data diff or summary diff
# Make sure there is not a data diff or stat diff
dolt diff --data
run dolt diff --data
[ "$status" -eq 0 ]
[[ "$output" =~ "Primary key sets differ between revisions for table t, skipping data diff" ]] || false
run dolt diff --summary
run dolt diff --stat
[ "$status" -eq 1 ]
[[ "$output" =~ "failed to compute diff summary for table t: primary key set changed" ]] || false
[[ "$output" =~ "failed to compute diff stat for table t: primary key set changed" ]] || false
dolt add .

Some files were not shown because too many files have changed in this diff Show More