diff --git a/integration-tests/bats/archive-test-repos/README.txt b/integration-tests/bats/archive-test-repos/README.txt new file mode 100644 index 0000000000..fb3ad64e10 --- /dev/null +++ b/integration-tests/bats/archive-test-repos/README.txt @@ -0,0 +1,93 @@ + +This directory contains dolt database files which are used as part of the +archive.bats tests. + +Each directory is a .dolt directory which can be copied into place +for a given test. + +Eg: + +$ mkdir -p repo/.dolt +$ cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* repo/.dolt +$ cd repo +$ dolt sql + +The Directories are as follows: +-------------------------------------------------------------------------- +base: This was the first repo created. It's contents are all archive files. + +$ dolt admin storage +Storage Artifact: + ID: 8p5e2m6skovfdjlh4jg3llr8sfvu384l + Path: /Users/neil/Documents/data_dir_1/db1/base/.dolt/noms/8p5e2m6skovfdjlh4jg3llr8sfvu384l + Table File Metadata: + Snappy Chunk Count: 2 (bytes: 382) + +Storage Artifact: + ID: 29o8a3uevcpr15tilcemb3s438edmoog + Path: /Users/neil/Documents/data_dir_1/db1/base/.dolt/noms/oldgen/29o8a3uevcpr15tilcemb3s438edmoog.darc + Archive Metadata: + Format Version: 1 + Snappy Chunk Count: 0 (bytes: 0) + ZStd Chunk Count: 125 (bytes: 51318) + Dictionary Count: 1 (bytes: 2620) + +Storage Artifact: + ID: dnu4lr5j8sstbj5usbld7alsnuj5nf23 + Path: /Users/neil/Documents/data_dir_1/db1/base/.dolt/noms/oldgen/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc + Archive Metadata: + Format Version: 1 + Snappy Chunk Count: 0 (bytes: 0) + ZStd Chunk Count: 139 (bytes: 105419) + Dictionary Count: 1 (bytes: 2641) + + +-------------------------------------------------------------------------- +large_clone: a full clone of base, with a bunch of mutations on it. It was + then garbage collected to ensure all objects were in Snappy format + + This repo has more than 1000 new chunks in it, which is important + because when we push with archive as the default, all snappy + objects will be converted into zstd compressed objects. + +# clones from base. +$ dolt clone http://localhost:$port/test-org/test-repo clone1 +$ cd clone1 +$ dolt sql -q "$(mutations_and_gc_statement)" # see archive.bats +... repeated ... +$ dolt sql -q "$(mutations_and_gc_statement)" +$ dolt gc --full + +$ dolt admin storage +Storage Artifact: + ID: p6d0rsovtipdk6r2b1kh9qrndif41703 + Path: /Users/neil/Documents/data_dir_1/db1/clone1/.dolt/noms/p6d0rsovtipdk6r2b1kh9qrndif41703 + Table File Metadata: + Snappy Chunk Count: 2 (bytes: 386) + +Storage Artifact: + ID: b5j6kmj2m68sukiu22ouoke7u9281a06 + Path: /Users/neil/Documents/data_dir_1/db1/clone1/.dolt/noms/oldgen/b5j6kmj2m68sukiu22ouoke7u9281a06 + Table File Metadata: + Snappy Chunk Count: 1873 (bytes: 4119222) + +-------------------------------------------------------------------------- +small_clone: a full clone of base with a very small number of mutations on it. + + Similar to clone1, it was garbage collected to convert chunks into + the snappy format. There are only a few new chunks though, so + pushing with archives enabled will result in archive files which + have snappy objects in them. + +$ dolt admin storage +Storage Artifact: + ID: sv0o2e33pel8caor7979s4rk227raink + Path: /Users/neil/Documents/data_dir_1/db1/clone2/.dolt/noms/sv0o2e33pel8caor7979s4rk227raink + Table File Metadata: + Snappy Chunk Count: 2 (bytes: 386) + +Storage Artifact: + ID: c5j9u4ced6eg6cnegk0mgdglc3t04air + Path: /Users/neil/Documents/data_dir_1/db1/clone2/.dolt/noms/oldgen/c5j9u4ced6eg6cnegk0mgdglc3t04air + Table File Metadata: + Snappy Chunk Count: 273 (bytes: 377979) diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/29o8a3uevcpr15tilcemb3s438edmoog.darc b/integration-tests/bats/archive-test-repos/large_clone/noms/29o8a3uevcpr15tilcemb3s438edmoog.darc new file mode 100644 index 0000000000..1b938008b8 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/large_clone/noms/29o8a3uevcpr15tilcemb3s438edmoog.darc differ diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/LOCK b/integration-tests/bats/archive-test-repos/large_clone/noms/LOCK new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc b/integration-tests/bats/archive-test-repos/large_clone/noms/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc new file mode 100644 index 0000000000..c13a757ab4 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/large_clone/noms/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc differ diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/manifest b/integration-tests/bats/archive-test-repos/large_clone/noms/manifest new file mode 100644 index 0000000000..df4b3fe292 --- /dev/null +++ b/integration-tests/bats/archive-test-repos/large_clone/noms/manifest @@ -0,0 +1 @@ +5:__DOLT__:kbcpgpu62rcq5tuoqv4e449g74u2ojua:jnld0tef9m9tebcsmnbdetqhl6mv6hjt:mf7fscamh7fkbnjhca5no0mdlo7cibth:p6d0rsovtipdk6r2b1kh9qrndif41703:2 \ No newline at end of file diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/LOCK b/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/LOCK new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/b5j6kmj2m68sukiu22ouoke7u9281a06 b/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/b5j6kmj2m68sukiu22ouoke7u9281a06 new file mode 100644 index 0000000000..6ce6dade11 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/b5j6kmj2m68sukiu22ouoke7u9281a06 differ diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/manifest b/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/manifest new file mode 100644 index 0000000000..a3c348538a --- /dev/null +++ b/integration-tests/bats/archive-test-repos/large_clone/noms/oldgen/manifest @@ -0,0 +1 @@ +5:__DOLT__:od2romsk6nrmnqrtpdjgl7qesoe4jn51:00000000000000000000000000000000:is478auoug5331bfupvb1filajskf8l3:b5j6kmj2m68sukiu22ouoke7u9281a06:1873 \ No newline at end of file diff --git a/integration-tests/bats/archive-test-repos/large_clone/noms/p6d0rsovtipdk6r2b1kh9qrndif41703 b/integration-tests/bats/archive-test-repos/large_clone/noms/p6d0rsovtipdk6r2b1kh9qrndif41703 new file mode 100644 index 0000000000..dd1e84f92a Binary files /dev/null and b/integration-tests/bats/archive-test-repos/large_clone/noms/p6d0rsovtipdk6r2b1kh9qrndif41703 differ diff --git a/integration-tests/bats/archive-test-repos/large_clone/repo_state.json b/integration-tests/bats/archive-test-repos/large_clone/repo_state.json new file mode 100755 index 0000000000..8139359c7e --- /dev/null +++ b/integration-tests/bats/archive-test-repos/large_clone/repo_state.json @@ -0,0 +1,20 @@ +{ + "head": "refs/heads/main", + "remotes": { + "origin": { + "name": "origin", + "url": "http://localhost:4635/test-org/test-repo", + "fetch_specs": [ + "refs/heads/*:refs/remotes/origin/*" + ], + "params": {} + } + }, + "backups": {}, + "branches": { + "main": { + "head": "refs/heads/main", + "remote": "origin" + } + } +} \ No newline at end of file diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/29o8a3uevcpr15tilcemb3s438edmoog.darc b/integration-tests/bats/archive-test-repos/small_clone/noms/29o8a3uevcpr15tilcemb3s438edmoog.darc new file mode 100644 index 0000000000..1b938008b8 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/small_clone/noms/29o8a3uevcpr15tilcemb3s438edmoog.darc differ diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/LOCK b/integration-tests/bats/archive-test-repos/small_clone/noms/LOCK new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc b/integration-tests/bats/archive-test-repos/small_clone/noms/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc new file mode 100644 index 0000000000..c13a757ab4 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/small_clone/noms/dnu4lr5j8sstbj5usbld7alsnuj5nf23.darc differ diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/manifest b/integration-tests/bats/archive-test-repos/small_clone/noms/manifest new file mode 100644 index 0000000000..b45e40d6a6 --- /dev/null +++ b/integration-tests/bats/archive-test-repos/small_clone/noms/manifest @@ -0,0 +1 @@ +5:__DOLT__:857q4ttsgr7aki83d08cqm17o1i0cd16:iss8tc03aq3fu4adsnebv7pm9osfjtrt:gpabdsepm9jndanjjudc87lmedd9aoqu:sv0o2e33pel8caor7979s4rk227raink:2 \ No newline at end of file diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/LOCK b/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/LOCK new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/c5j9u4ced6eg6cnegk0mgdglc3t04air b/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/c5j9u4ced6eg6cnegk0mgdglc3t04air new file mode 100644 index 0000000000..d2e9114495 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/c5j9u4ced6eg6cnegk0mgdglc3t04air differ diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/manifest b/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/manifest new file mode 100644 index 0000000000..7f72cc4298 --- /dev/null +++ b/integration-tests/bats/archive-test-repos/small_clone/noms/oldgen/manifest @@ -0,0 +1 @@ +5:__DOLT__:i2j8dbkj85h75e618a585k92sijd0n59:00000000000000000000000000000000:i63ieuasltd86b6bqsgvc53jtsp2dcte:c5j9u4ced6eg6cnegk0mgdglc3t04air:273 \ No newline at end of file diff --git a/integration-tests/bats/archive-test-repos/small_clone/noms/sv0o2e33pel8caor7979s4rk227raink b/integration-tests/bats/archive-test-repos/small_clone/noms/sv0o2e33pel8caor7979s4rk227raink new file mode 100644 index 0000000000..2b4985b7e2 Binary files /dev/null and b/integration-tests/bats/archive-test-repos/small_clone/noms/sv0o2e33pel8caor7979s4rk227raink differ diff --git a/integration-tests/bats/archive-test-repos/small_clone/repo_state.json b/integration-tests/bats/archive-test-repos/small_clone/repo_state.json new file mode 100755 index 0000000000..fb2d495896 --- /dev/null +++ b/integration-tests/bats/archive-test-repos/small_clone/repo_state.json @@ -0,0 +1,20 @@ +{ + "head": "refs/heads/main", + "remotes": { + "origin": { + "name": "origin", + "url": "http://localhost:3903/test-org/test-repo", + "fetch_specs": [ + "refs/heads/*:refs/remotes/origin/*" + ], + "params": {} + } + }, + "backups": {}, + "branches": { + "main": { + "head": "refs/heads/main", + "remote": "origin" + } + } +} \ No newline at end of file diff --git a/integration-tests/bats/archive.bats b/integration-tests/bats/archive.bats index 44e28c9c59..3ed6ff0cb6 100755 --- a/integration-tests/bats/archive.bats +++ b/integration-tests/bats/archive.bats @@ -151,8 +151,7 @@ mutations_and_gc_statement() { [[ "$remotesrv_pid" -gt 0 ]] || false cd ../cloned - run dolt clone http://localhost:$port/test-org/test-repo repo1 - [ "$status" -eq 0 ] + dolt clone http://localhost:$port/test-org/test-repo repo1 cd repo1 # Verify we can read data @@ -310,4 +309,190 @@ mutations_and_gc_statement() { dolt fsck +} + +@test "archive: large push remote without archive default produces no new archives" { + unset DOLT_ARCHIVE_PULL_STREAMER + + mkdir -p remote/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt + cd remote + + port=$( definePORT ) + remotesrv --http-port $port --grpc-port $port --repo-mode & + remotesrv_pid=$! + [[ "$remotesrv_pid" -gt 0 ]] || false + + cd .. + mkdir -p clone/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/large_clone/* clone/.dolt + cd clone + dolt remote add r1 http://localhost:$port/test-org/test-repo + dolt push r1 HEAD:main + + cd ../remote + run dolt admin storage + [ $status -eq 0 ] + + ## This output indicates that the new content pushed to the remote all landed as snappy chunks + ## in a classic table file. multiline regex - no quotes - to match this text: + # Table File Metadata: + # Snappy Chunk Count: 1609 + [[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]1609[[:space:]] ]] || false +} + +@test "archive: small push remote without archive default produces no new archives" { + unset DOLT_ARCHIVE_PULL_STREAMER + + mkdir -p remote/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt + cd remote + + port=$( definePORT ) + remotesrv --http-port $port --grpc-port $port --repo-mode & + remotesrv_pid=$! + [[ "$remotesrv_pid" -gt 0 ]] || false + + cd .. + mkdir -p clone/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/small_clone/* clone/.dolt + cd clone + dolt remote add r1 http://localhost:$port/test-org/test-repo + dolt push r1 HEAD:main + + cd ../remote + run dolt admin storage + [ $status -eq 0 ] + + ## This output indicates that the new content pushed to the remote all landed as snappy chunks + ## in a classic table file. multiline regex - no quotes - to match this text: + # Table File Metadata: + # Snappy Chunk Count: 9 + [[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]9[[:space:]] ]] || false +} + +@test "archive: large push remote with archive default produces new archive with converted snappy chunks" { + export DOLT_ARCHIVE_PULL_STREAMER=1 + + mkdir -p remote/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt + cd remote + + port=$( definePORT ) + remotesrv --http-port $port --grpc-port $port --repo-mode & + remotesrv_pid=$! + [[ "$remotesrv_pid" -gt 0 ]] || false + + cd .. + mkdir -p clone/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/large_clone/* clone/.dolt + cd clone + dolt remote add r1 http://localhost:$port/test-org/test-repo + dolt push r1 HEAD:main + + cd ../remote + run dolt admin storage + [ $status -eq 0 ] + + ## This output indicates that the new content pushed to the remote all landed as zStd chunks + ## in an archive file. multiline regex - no quotes - to match this text: + # Archive Metadata: + # Format Version: 2 + # Snappy Chunk Count: 0 (bytes: 0) + # ZStd Chunk Count: 1609 + [[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]2[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]0.*ZStd[[:space:]]Chunk[[:space:]]Count:[[:space:]]1609 ]] || false +} + +@test "archive: small push remote with archive default produces archive with snappy chunks" { + export DOLT_ARCHIVE_PULL_STREAMER=1 + + mkdir -p remote/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt + cd remote + + port=$( definePORT ) + remotesrv --http-port $port --grpc-port $port --repo-mode & + remotesrv_pid=$! + [[ "$remotesrv_pid" -gt 0 ]] || false + + cd .. + mkdir -p clone/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/small_clone/* clone/.dolt + cd clone + dolt remote add r1 http://localhost:$port/test-org/test-repo + dolt push r1 HEAD:main + + cd ../remote + run dolt admin storage + [ $status -eq 0 ] + + ## This output indicates that the new content pushed to the remote all landed as snappy chunks + ## in an archive file. multiline regex - no quotes - to match this text: + # Archive Metadata: + # Format Version: 2 + # Snappy Chunk Count: 9 + [[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]2[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]9[[:space:]] ]] || false +} + +@test "archive: fetch into empty database with archive default" { + mkdir -p remote/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt + cd remote + + port=$( definePORT ) + remotesrv --http-port $port --grpc-port $port --repo-mode & + remotesrv_pid=$! + [[ "$remotesrv_pid" -gt 0 ]] || false + + cd ../ + dolt remote add r1 http://localhost:$port/test-org/test-repo + DOLT_ARCHIVE_PULL_STREAMER=1 dolt fetch r1 + + run dolt admin storage + [ $status -eq 0 ] + + ## This output indicates that the new content was fetch from the remote into an archive file. Note that since + ## the remote is all archive, the chunks end up as zStd as well. + ## multiline regex - no quotes - to match this text: + # Archive Metadata: + # Format Version: 2 + # Snappy Chunk Count: 0 (bytes: 0) + # ZStd Chunk Count: 260 + [[ $output =~ Archive[[:space:]]Metadata:[[:space:]]*Format[[:space:]]Version:[[:space:]]2[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]0.*ZStd[[:space:]]Chunk[[:space:]]Count:[[:space:]]260 ]] || false + + dolt fsck +} + +@test "archive: fetch into empty database with archive disabled" { + unset DOLT_ARCHIVE_PULL_STREAMER + + mkdir -p remote/.dolt + cp -R $BATS_TEST_DIRNAME/archive-test-repos/base/* remote/.dolt + cd remote + + port=$( definePORT ) + remotesrv --http-port $port --grpc-port $port --repo-mode & + remotesrv_pid=$! + [[ "$remotesrv_pid" -gt 0 ]] || false + + cd ../ + dolt remote add r1 http://localhost:$port/test-org/test-repo + dolt fetch r1 + + run dolt admin storage + [ $status -eq 0 ] + + echo "------------------" + echo "$output" + echo "------------------" + + + ## This output indicates that the new content was fetched from the remote into a table file. Note that since + ## the remote is all archive, the chunks are translated into the snappy format + ## multiline regex - no quotes - to match this text: + # Table File Metadata: + # Snappy Chunk Count: 260 + [[ $output =~ Table[[:space:]]File[[:space:]]Metadata:[[:space:]]*Snappy[[:space:]]Chunk[[:space:]]Count:[[:space:]]260[[:space:]] ]] || false + + dolt fsck } \ No newline at end of file