Files
dolt/integration-tests/bats/archive.bats
2025-10-15 11:54:12 -07:00

552 lines
17 KiB
Bash
Executable File

#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
load $BATS_TEST_DIRNAME/helper/data-generation.bash
setup() {
setup_common
create_test_table
dolt sql -q "$(insert_statement)"
}
teardown() {
if [ -n "$remotesrv_pid" ]; then
kill "$remotesrv_pid"
wait "$remotesrv_pid" || :
remotesrv_pid=""
fi
assert_feature_version
teardown_common
}
# For reasons unknown, lambda fails on this test about 10% of the time. It seems to be something having to do with
# the IO subsystem of lambda. The output of the `dolt archive` command is truncated occasionally. Doesn't ever happen
# on regular hosts.
# bats test_tags=no_lambda
@test "archive: too few chunks" {
dolt sql -q "$(update_statement)"
dolt gc --archive-level 0
run dolt archive
[ "$status" -eq 0 ]
lines="$(echo "$output" | grep -ci 'Not enough chunks to build archive.*skipping')"
[ "$lines" -eq "2" ]
}
@test "archive: single archive oldgen" {
dolt sql -q "$(mutations_and_gc_statement 1)"
find . -type f
files=$(find .dolt/noms/oldgen -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "1" ]
# Ensure updates continue to work.
dolt sql -q "$(update_statement)"
}
@test "archive: single archive cloned in newgen" {
dolt sql -q "$(mutations_and_gc_statement 0)"
mkdir remote
dolt remote add origin file://remote
dolt push origin main
dolt clone file://remote cloned
cd cloned
files=$(find . -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "1" ]
# Ensure updates continue to work.
dolt sql -q "$(update_statement)"
}
@test "archive: multi archive newgen then revert" {
# Getting multiple table files in `newgen` is a little gross.
dolt sql -q "$(mutations_and_gc_statement 0)"
mkdir remote
dolt remote add origin file://remote
dolt push origin main
dolt clone file://remote cloned
cd cloned
dolt archive --purge
files=$(find . -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "1" ]
cd ..
dolt sql -q "$(mutations_and_gc_statement 0)"
dolt push origin main
cd cloned
dolt fetch
dolt archive --purge
files=$(find . -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "2" ]
dolt archive --revert
files=$(find . -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "0" ]
dolt fsck
}
@test "archive: multiple archives" {
dolt sql -q "$(mutations_and_gc_statement 1)"
dolt sql -q "$(mutations_and_gc_statement 1)"
dolt sql -q "$(mutations_and_gc_statement 1)"
files=$(find .dolt/noms/oldgen -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "3" ]
# dolt log --stat will load every single chunk.
commits=$(dolt log --stat --oneline | wc -l | sed 's/[ \t]//g')
[ "$commits" -eq "186" ]
}
@test "archive: archive multiple times" {
dolt sql -q "$(mutations_and_gc_statement 0)"
dolt archive
dolt sql -q "$(mutations_and_gc_statement 0)"
dolt archive
files=$(find . -name "*darc" | wc -l | sed 's/[ \t]//g')
[ "$files" -eq "2" ]
}
@test "archive: archive --revert (fast)" {
dolt sql -q "$(mutations_and_gc_statement 0)"
dolt archive
dolt archive --revert
# dolt log --stat will load every single chunk. 66 manually verified.
commits=$(dolt log --stat --oneline | wc -l | sed 's/[ \t]//g')
[ "$commits" -eq "66" ]
}
@test "archive: archive --revert (rebuild)" {
dolt sql -q "$(mutations_and_gc_statement 0)"
dolt archive
dolt archive --revert
# dolt log --stat will load every single chunk. 66 manually verified.
commits=$(dolt log --stat --oneline | wc -l | sed 's/[ \t]//g')
[ "$commits" -eq "66" ]
}
@test "archive: archive --purge" {
dolt sql -q "$(mutations_and_gc_statement 0)"
# find impl differences by platform makes this a pain.
tablefile=$(find .dolt/noms/oldgen -type f -print | awk -F/ 'length($NF) == 32 && $NF ~ /^[a-v0-9]{32}$/')
[ -e "$tablefile" ] # extreme paranoia. make sure it exists before.
dolt archive --purge
# Ensure the table file is gone.
[ ! -e "$tablefile" ]
}
@test "archive: can clone archived repository" {
mkdir -p remote/.dolt
mkdir cloned
# Copy the archive test repo to remote directory
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../cloned
dolt clone http://localhost:$port/test-org/test-repo repo1
cd repo1
# Verify we can read data
run dolt sql -q 'select sum(i) from tbl;'
[[ "$status" -eq 0 ]] || false
[[ "$output" =~ "138075" ]] || false # i = 1 - 525, sum is 138075
kill $remotesrv_pid
wait $remotesrv_pid || :
remotesrv_pid=""
## The above test is the setup for the next test - so we'll stick both in here.
## This tests cloning from a clone. Archive files are generally in oldgen, but not the case with a fresh clone.
cd ../../
mkdir clone2
cd cloned/repo1 # start the server using the clone from above.
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../../clone2
run dolt clone http://localhost:$port/test-org/test-repo repo2
[ "$status" -eq 0 ]
cd repo2
run dolt sql -q 'select sum(i) from tbl;'
[[ "$status" -eq 0 ]] || false
[[ "$output" =~ "138075" ]] || false # i = 1 - 525, sum is 138075
}
@test "archive: can clone repository with mixed types" {
mkdir -p remote/.dolt
mkdir cloned
# Copy the archive test repo to remote directory
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
# Insert data (commits automatically), but don't gc/archive yet. Want to make sure we can still clone it.
dolt sql -q "$(insert_statement)"
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../cloned
run dolt clone http://localhost:$port/test-org/test-repo repo1
[ "$status" -eq 0 ]
cd repo1
# verify new data is there.
run dolt sql -q 'select sum(i) from tbl;'
[[ "$status" -eq 0 ]] || false
[[ "$output" =~ "151525" ]] || false # i = 1 - 550, sum is 151525
}
@test "archive: can fetch chunks from an archived repo" {
mkdir -p remote/.dolt
mkdir cloned
# Copy the archive test repo to remote directory
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../cloned
dolt clone http://localhost:$port/test-org/test-repo repo1
# Fetch when there are no changes.
cd repo1
dolt fetch
## update the remote repo directly. Need to run the archive command when the server is stopped.
## This will result in archived files on the remote, which we will need to read chunks from when we fetch.
cd ../../remote
kill $remotesrv_pid
wait $remotesrv_pid || :
remotesrv_pid=""
dolt sql -q "$(mutations_and_gc_statement)"
dolt archive
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../cloned/repo1
dolt fetch
run dolt status
[ "$status" -eq 0 ]
[[ "$output" =~ "Your branch is behind 'origin/main' by 20 commits, and can be fast-forwarded" ]] || false
# Verify the repo has integrity.
dolt fsck
}
@test "archive: backup and restore" {
# cp the repository from the test dir.
mkdir -p original/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* original/.dolt
cd original
dolt backup add bac1 file://../bac1
dolt backup sync bac1
cd ..
dolt backup restore file://./bac1 restored
cd restored
# Verify we can read data
run dolt sql -q 'select sum(i) from tbl;'
[[ "$status" -eq 0 ]] || false
[[ "$output" =~ "138075" ]] || false # i = 1 - 525, sum is 138075
}
@test "archive: mixed compression types" {
port=$( definePORT )
# run a bare server.
mkdir remotesrv
cd remotesrv
remotesrv --http-port $port --grpc-port $port &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
# Copy the archive test repo to remote directory
mkdir -p repo/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* repo/.dolt
cd repo
# Make some new commits.
update_statement
# Get everything into a non-journal form. Repository has mixed storage types now.
dolt gc
# Push, and enable the archive streamer. In the future this will be the default.
dolt remote add origin http://localhost:$port/test-org/test-repo
dolt push origin main
cd ..
dolt clone http://localhost:$port/test-org/test-repo repo2
cd repo2
dolt fsck
}
@test "archive: large push remote without archive default produces no new archives" {
export DOLT_ARCHIVE_PULL_STREAMER=0
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/large_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as snappy chunks
## in a classic table file. multiline regex - no quotes - to match this text:
# Table File Metadata:
# Snappy Chunk Count: 1609
[[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]1609[[:space:]] ]] || false
}
@test "archive: small push remote without archive default produces no new archives" {
export DOLT_ARCHIVE_PULL_STREAMER=0
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/small_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as snappy chunks
## in a classic table file. multiline regex - no quotes - to match this text:
# Table File Metadata:
# Snappy Chunk Count: 9
[[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]9[[:space:]] ]] || false
}
@test "archive: large push remote with archive default produces new archive with converted snappy chunks" {
unset DOLT_ARCHIVE_PULL_STREAMER
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/large_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as zStd chunks
## in an archive file. multiline regex - no quotes - to match this text:
# Archive Metadata:
# Format Version: 3
# Snappy Chunk Count: 0 (bytes: 0)
# ZStd Chunk Count: 1609
[[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]3[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]0.*ZStd[[:space:]]Chunk[[:space:]]Count:[[:space:]]1609 ]] || false
}
@test "archive: small push remote with archive default produces archive with snappy chunks" {
unset DOLT_ARCHIVE_PULL_STREAMER
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/small_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as snappy chunks
## in an archive file. multiline regex - no quotes - to match this text:
# Archive Metadata:
# Format Version: 3
# Snappy Chunk Count: 9
[[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]3[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]9[[:space:]] ]] || false
}
@test "archive: fetch into empty database with archive default" {
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt fetch r1
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content was fetch from the remote into an archive file. Note that since
## the remote is all archive, the chunks end up as zStd as well.
## multiline regex - no quotes - to match this text:
# Archive Metadata:
# Format Version: 3
# Snappy Chunk Count: 0 (bytes: 0)
# ZStd Chunk Count: 260
[[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]3[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]0.*ZStd[[:space:]]Chunk[[:space:]]Count:[[:space:]]260 ]] || false
dolt fsck
}
@test "archive: fetch into empty database with archive disabled" {
export DOLT_ARCHIVE_PULL_STREAMER=0
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt fetch r1
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content was fetched from the remote into a table file. Note that since
## the remote is all archive, the chunks are translated into the snappy format
## multiline regex - no quotes - to match this text:
# Table File Metadata:
# Snappy Chunk Count: 260
[[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]260[[:space:]] ]] || false
dolt fsck
}
@test "archive: read legacy v1 database" {
mkdir -p original/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/v1/* original/.dolt
cd original
dolt fsck
}
@test "archive: read legacy v2 database" {
mkdir -p original/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/v2/* original/.dolt
cd original
dolt fsck
}
@test "archive: can mmap archive index" {
mkdir -p remote/.dolt
# Copy the archive test repo to remote directory
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
# When mmap_archive_indexes is not set in the config, or is set to false,
# setting DOLT_TEST_ASSERT_NO_IN_MEMORY_ARCHIVE_INDEX should result in an error
run env DOLT_TEST_ASSERT_NO_IN_MEMORY_ARCHIVE_INDEX=1 dolt sql -q 'select sum(i) from tbl;'
[ $status -ne 0 ]
echo "$output"
[[ $output =~ "attempted to load archive index into memory but DOLT_TEST_ASSERT_NO_IN_MEMORY_ARCHIVE_INDEX was set" ]] || false
dolt config --local --set "mmap_archive_indexes" false
run env DOLT_TEST_ASSERT_NO_IN_MEMORY_ARCHIVE_INDEX=1 dolt sql -q 'select sum(i) from tbl;'
[ $status -ne 0 ]
[[ $output =~ "attempted to load archive index into memory but DOLT_TEST_ASSERT_NO_IN_MEMORY_ARCHIVE_INDEX was set" ]] || false
dolt config --local --set "mmap_archive_indexes" true
# Verify we can read data
run env DOLT_TEST_ASSERT_NO_IN_MEMORY_ARCHIVE_INDEX=1 dolt sql -q 'select sum(i) from tbl;'
echo "$output"
[[ "$status" -eq 0 ]] || false
[[ "$output" =~ "138075" ]] || false # i = 1 - 525, sum is 138075
}