Tests for mixed format pushes and fetches

This commit is contained in:
Neil Macneale IV
2025-03-17 15:24:50 -07:00
parent 6e3fe5ff47
commit d12f777e5c
20 changed files with 324 additions and 2 deletions

View File

@@ -0,0 +1,93 @@
This directory contains dolt database files which are used as part of the
archive.bats tests.
Each directory is a .dolt directory which can be copied into place
for a given test.
Eg:
$ mkdir -p repo/.dolt
$ cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* repo/.dolt
$ cd repo
$ dolt sql
The Directories are as follows:
--------------------------------------------------------------------------
base: This was the first repo created. It's contents are all archive files.
$ dolt admin storage
Storage Artifact:
ID: 8p5e2m6skovfdjlh4jg3llr8sfvu384l
Path: /Users/neil/Documents/data_dir_1/db1/base/.dolt/noms/8p5e2m6skovfdjlh4jg3llr8sfvu384l
Table File Metadata:
Snappy Chunk Count: 2 (bytes: 382)
Storage Artifact:
ID: 29o8a3uevcpr15tilcemb3s438edmoog
Path: /Users/neil/Documents/data_dir_1/db1/base/.dolt/noms/oldgen/29o8a3uevcpr15tilcemb3s438edmoog.darc
Archive Metadata:
Format Version: 1
Snappy Chunk Count: 0 (bytes: 0)
ZStd Chunk Count: 125 (bytes: 51318)
Dictionary Count: 1 (bytes: 2620)
Storage Artifact:
ID: dnu4lr5j8sstbj5usbld7alsnuj5nf23
Path: /Users/neil/Documents/data_dir_1/db1/base/.dolt/noms/oldgen/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc
Archive Metadata:
Format Version: 1
Snappy Chunk Count: 0 (bytes: 0)
ZStd Chunk Count: 139 (bytes: 105419)
Dictionary Count: 1 (bytes: 2641)
--------------------------------------------------------------------------
large_clone: a full clone of base, with a bunch of mutations on it. It was
then garbage collected to ensure all objects were in Snappy format
This repo has more than 1000 new chunks in it, which is important
because when we push with archive as the default, all snappy
objects will be converted into zstd compressed objects.
# clones from base.
$ dolt clone http://localhost:$port/test-org/test-repo clone1
$ cd clone1
$ dolt sql -q "$(mutations_and_gc_statement)" # see archive.bats
... repeated ...
$ dolt sql -q "$(mutations_and_gc_statement)"
$ dolt gc --full
$ dolt admin storage
Storage Artifact:
ID: p6d0rsovtipdk6r2b1kh9qrndif41703
Path: /Users/neil/Documents/data_dir_1/db1/clone1/.dolt/noms/p6d0rsovtipdk6r2b1kh9qrndif41703
Table File Metadata:
Snappy Chunk Count: 2 (bytes: 386)
Storage Artifact:
ID: b5j6kmj2m68sukiu22ouoke7u9281a06
Path: /Users/neil/Documents/data_dir_1/db1/clone1/.dolt/noms/oldgen/b5j6kmj2m68sukiu22ouoke7u9281a06
Table File Metadata:
Snappy Chunk Count: 1873 (bytes: 4119222)
--------------------------------------------------------------------------
small_clone: a full clone of base with a very small number of mutations on it.
Similar to clone1, it was garbage collected to convert chunks into
the snappy format. There are only a few new chunks though, so
pushing with archives enabled will result in archive files which
have snappy objects in them.
$ dolt admin storage
Storage Artifact:
ID: sv0o2e33pel8caor7979s4rk227raink
Path: /Users/neil/Documents/data_dir_1/db1/clone2/.dolt/noms/sv0o2e33pel8caor7979s4rk227raink
Table File Metadata:
Snappy Chunk Count: 2 (bytes: 386)
Storage Artifact:
ID: c5j9u4ced6eg6cnegk0mgdglc3t04air
Path: /Users/neil/Documents/data_dir_1/db1/clone2/.dolt/noms/oldgen/c5j9u4ced6eg6cnegk0mgdglc3t04air
Table File Metadata:
Snappy Chunk Count: 273 (bytes: 377979)

View File

@@ -0,0 +1 @@
5:__DOLT__:kbcpgpu62rcq5tuoqv4e449g74u2ojua:jnld0tef9m9tebcsmnbdetqhl6mv6hjt:mf7fscamh7fkbnjhca5no0mdlo7cibth:p6d0rsovtipdk6r2b1kh9qrndif41703:2

View File

@@ -0,0 +1 @@
5:__DOLT__:od2romsk6nrmnqrtpdjgl7qesoe4jn51:00000000000000000000000000000000:is478auoug5331bfupvb1filajskf8l3:b5j6kmj2m68sukiu22ouoke7u9281a06:1873

View File

@@ -0,0 +1,20 @@
{
"head": "refs/heads/main",
"remotes": {
"origin": {
"name": "origin",
"url": "http://localhost:4635/test-org/test-repo",
"fetch_specs": [
"refs/heads/*:refs/remotes/origin/*"
],
"params": {}
}
},
"backups": {},
"branches": {
"main": {
"head": "refs/heads/main",
"remote": "origin"
}
}
}

View File

@@ -0,0 +1 @@
5:__DOLT__:857q4ttsgr7aki83d08cqm17o1i0cd16:iss8tc03aq3fu4adsnebv7pm9osfjtrt:gpabdsepm9jndanjjudc87lmedd9aoqu:sv0o2e33pel8caor7979s4rk227raink:2

View File

@@ -0,0 +1 @@
5:__DOLT__:i2j8dbkj85h75e618a585k92sijd0n59:00000000000000000000000000000000:i63ieuasltd86b6bqsgvc53jtsp2dcte:c5j9u4ced6eg6cnegk0mgdglc3t04air:273

View File

@@ -0,0 +1,20 @@
{
"head": "refs/heads/main",
"remotes": {
"origin": {
"name": "origin",
"url": "http://localhost:3903/test-org/test-repo",
"fetch_specs": [
"refs/heads/*:refs/remotes/origin/*"
],
"params": {}
}
},
"backups": {},
"branches": {
"main": {
"head": "refs/heads/main",
"remote": "origin"
}
}
}

View File

@@ -151,8 +151,7 @@ mutations_and_gc_statement() {
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../cloned
run dolt clone http://localhost:$port/test-org/test-repo repo1
[ "$status" -eq 0 ]
dolt clone http://localhost:$port/test-org/test-repo repo1
cd repo1
# Verify we can read data
@@ -310,4 +309,190 @@ mutations_and_gc_statement() {
dolt fsck
}
@test "archive: large push remote without archive default produces no new archives" {
unset DOLT_ARCHIVE_PULL_STREAMER
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/large_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as snappy chunks
## in a classic table file. multiline regex - no quotes - to match this text:
# Table File Metadata:
# Snappy Chunk Count: 1609
[[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]1609[[:space:]] ]] || false
}
@test "archive: small push remote without archive default produces no new archives" {
unset DOLT_ARCHIVE_PULL_STREAMER
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/small_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as snappy chunks
## in a classic table file. multiline regex - no quotes - to match this text:
# Table File Metadata:
# Snappy Chunk Count: 9
[[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]9[[:space:]] ]] || false
}
@test "archive: large push remote with archive default produces new archive with converted snappy chunks" {
export DOLT_ARCHIVE_PULL_STREAMER=1
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/large_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as zStd chunks
## in an archive file. multiline regex - no quotes - to match this text:
# Archive Metadata:
# Format Version: 2
# Snappy Chunk Count: 0 (bytes: 0)
# ZStd Chunk Count: 1609
[[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]2[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]0.*ZStd[[:space:]]Chunk[[:space:]]Count:[[:space:]]1609 ]] || false
}
@test "archive: small push remote with archive default produces archive with snappy chunks" {
export DOLT_ARCHIVE_PULL_STREAMER=1
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ..
mkdir -p clone/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/small_clone/* clone/.dolt
cd clone
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt push r1 HEAD:main
cd ../remote
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content pushed to the remote all landed as snappy chunks
## in an archive file. multiline regex - no quotes - to match this text:
# Archive Metadata:
# Format Version: 2
# Snappy Chunk Count: 9
[[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]2[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]9[[:space:]] ]] || false
}
@test "archive: fetch into empty database with archive default" {
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../
dolt remote add r1 http://localhost:$port/test-org/test-repo
DOLT_ARCHIVE_PULL_STREAMER=1 dolt fetch r1
run dolt admin storage
[ $status -eq 0 ]
## This output indicates that the new content was fetch from the remote into an archive file. Note that since
## the remote is all archive, the chunks end up as zStd as well.
## multiline regex - no quotes - to match this text:
# Archive Metadata:
# Format Version: 2
# Snappy Chunk Count: 0 (bytes: 0)
# ZStd Chunk Count: 260
[[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]2[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]0.*ZStd[[:space:]]Chunk[[:space:]]Count:[[:space:]]260 ]] || false
dolt fsck
}
@test "archive: fetch into empty database with archive disabled" {
unset DOLT_ARCHIVE_PULL_STREAMER
mkdir -p remote/.dolt
cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt
cd remote
port=$( definePORT )
remotesrv --http-port $port --grpc-port $port --repo-mode &
remotesrv_pid=$!
[[ "$remotesrv_pid" -gt 0 ]] || false
cd ../
dolt remote add r1 http://localhost:$port/test-org/test-repo
dolt fetch r1
run dolt admin storage
[ $status -eq 0 ]
echo "------------------"
echo "$output"
echo "------------------"
## This output indicates that the new content was fetched from the remote into a table file. Note that since
## the remote is all archive, the chunks are translated into the snappy format
## multiline regex - no quotes - to match this text:
# Table File Metadata:
# Snappy Chunk Count: 260
[[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]260[[:space:]] ]] || false
dolt fsck
}