diff --git a/go/cmd/dolt/commands/archive.go b/go/cmd/dolt/commands/archive.go index e4a525fc4c..11cd70183a 100644 --- a/go/cmd/dolt/commands/archive.go +++ b/go/cmd/dolt/commands/archive.go @@ -54,6 +54,7 @@ table files into archives. Currently, for safety, table files are left in place. const groupChunksFlag = "group-chunks" const revertFlag = "revert" +const purgeFlag = "purge" // Description returns a description of the command func (cmd ArchiveCmd) Description() string { @@ -71,9 +72,7 @@ func (cmd ArchiveCmd) ArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 0) ap.SupportsFlag(groupChunksFlag, "", "Attempt to group chunks. This will produce smaller archives, but can take much longer to build.") ap.SupportsFlag(revertFlag, "", "Return to unpurged table files, or rebuilt table files from archives") - /* TODO: Implement these flags - ap.SupportsFlag("purge", "", "remove table files after archiving") - */ + ap.SupportsFlag(purgeFlag, "", "remove table files after archiving") return ap } func (cmd ArchiveCmd) Hidden() bool { @@ -137,7 +136,9 @@ func (cmd ArchiveCmd) Exec(ctx context.Context, commandStr string, args []string } } - err = nbs.BuildArchive(ctx, cs, &groupings, progress) + purge := apr.Contains(purgeFlag) + + err = nbs.BuildArchive(ctx, cs, &groupings, purge, progress) if err != nil { cli.PrintErrln(err) return 1 diff --git a/go/store/nbs/archive_build.go b/go/store/nbs/archive_build.go index 675110c918..a784e68e2a 100644 --- a/go/store/nbs/archive_build.go +++ b/go/store/nbs/archive_build.go @@ -114,7 +114,7 @@ func UnArchive(ctx context.Context, cs chunks.ChunkStore, smd StorageMetadata, p return nil } -func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRelations, progress chan interface{}) (err error) { +func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRelations, purge bool, progress chan interface{}) (err error) { // Currently, we don't have any stats to report. Required for calls to the lower layers tho. var stats Stats @@ -165,12 +165,15 @@ func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRel return fmt.Errorf("No tables found to archive. Run 'dolt gc' first") } + cleanup := make([]hash.Hash, 0, len(swapMap)) + //NM4 TODO: This code path must only be run on an offline database. We should add a check for that. specs, err := gs.oldGen.tables.toSpecs() newSpecs := make([]tableSpec, 0, len(specs)) for _, spec := range specs { if newSpec, exists := swapMap[spec.name]; exists { newSpecs = append(newSpecs, tableSpec{newSpec, spec.chunkCount}) + cleanup = append(cleanup, spec.name) } else { newSpecs = append(newSpecs, spec) } @@ -179,6 +182,17 @@ func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRel if err != nil { return err } + + if purge && len(cleanup) > 0 { + for _, h := range cleanup { + tf := filepath.Join(outPath, h.String()) + err = os.Remove(tf) + if err != nil { + return err + } + } + } + } else { return errors.New("Modern DB Expected") } diff --git a/integration-tests/bats/archive.bats b/integration-tests/bats/archive.bats index 3ed6ff0cb6..e898f6973a 100755 --- a/integration-tests/bats/archive.bats +++ b/integration-tests/bats/archive.bats @@ -128,7 +128,6 @@ mutations_and_gc_statement() { @test "archive: archive --revert (rebuild)" { dolt sql -q "$(mutations_and_gc_statement)" dolt archive - dolt gc # This will delete the unused table files. dolt archive --revert # dolt log --stat will load every single chunk. 66 manually verified. @@ -136,6 +135,18 @@ mutations_and_gc_statement() { [ "$commits" -eq "66" ] } +@test "archive: archive --purge" { + dolt sql -q "$(mutations_and_gc_statement)" + + tablefile=$(find ./.dolt/noms/oldgen -type f -regex '.*/[a-v0-9]\{32\}') + + [ -e "$tablefile" ] # extreme paranoia. make sure it exists before. + dolt archive --purge + # Ensure the table file is gone. + [ ! -e "$tablefile" ] +} + + @test "archive: can clone archived repository" { mkdir -p remote/.dolt mkdir cloned