Add the archive --purge flag to clean up table files

This commit is contained in:
Neil Macneale IV
2025-04-08 15:28:57 -07:00
parent 4f8c0213a1
commit 8d2ffa595f
3 changed files with 32 additions and 6 deletions

View File

@@ -54,6 +54,7 @@ table files into archives. Currently, for safety, table files are left in place.
const groupChunksFlag = "group-chunks"
const revertFlag = "revert"
const purgeFlag = "purge"
// Description returns a description of the command
func (cmd ArchiveCmd) Description() string {
@@ -71,9 +72,7 @@ func (cmd ArchiveCmd) ArgParser() *argparser.ArgParser {
ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 0)
ap.SupportsFlag(groupChunksFlag, "", "Attempt to group chunks. This will produce smaller archives, but can take much longer to build.")
ap.SupportsFlag(revertFlag, "", "Return to unpurged table files, or rebuilt table files from archives")
/* TODO: Implement these flags
ap.SupportsFlag("purge", "", "remove table files after archiving")
*/
ap.SupportsFlag(purgeFlag, "", "remove table files after archiving")
return ap
}
func (cmd ArchiveCmd) Hidden() bool {
@@ -137,7 +136,9 @@ func (cmd ArchiveCmd) Exec(ctx context.Context, commandStr string, args []string
}
}
err = nbs.BuildArchive(ctx, cs, &groupings, progress)
purge := apr.Contains(purgeFlag)
err = nbs.BuildArchive(ctx, cs, &groupings, purge, progress)
if err != nil {
cli.PrintErrln(err)
return 1

View File

@@ -114,7 +114,7 @@ func UnArchive(ctx context.Context, cs chunks.ChunkStore, smd StorageMetadata, p
return nil
}
func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRelations, progress chan interface{}) (err error) {
func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRelations, purge bool, progress chan interface{}) (err error) {
// Currently, we don't have any stats to report. Required for calls to the lower layers tho.
var stats Stats
@@ -165,12 +165,15 @@ func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRel
return fmt.Errorf("No tables found to archive. Run 'dolt gc' first")
}
cleanup := make([]hash.Hash, 0, len(swapMap))
//NM4 TODO: This code path must only be run on an offline database. We should add a check for that.
specs, err := gs.oldGen.tables.toSpecs()
newSpecs := make([]tableSpec, 0, len(specs))
for _, spec := range specs {
if newSpec, exists := swapMap[spec.name]; exists {
newSpecs = append(newSpecs, tableSpec{newSpec, spec.chunkCount})
cleanup = append(cleanup, spec.name)
} else {
newSpecs = append(newSpecs, spec)
}
@@ -179,6 +182,17 @@ func BuildArchive(ctx context.Context, cs chunks.ChunkStore, dagGroups *ChunkRel
if err != nil {
return err
}
if purge && len(cleanup) > 0 {
for _, h := range cleanup {
tf := filepath.Join(outPath, h.String())
err = os.Remove(tf)
if err != nil {
return err
}
}
}
} else {
return errors.New("Modern DB Expected")
}

View File

@@ -128,7 +128,6 @@ mutations_and_gc_statement() {
@test "archive: archive --revert (rebuild)" {
dolt sql -q "$(mutations_and_gc_statement)"
dolt archive
dolt gc # This will delete the unused table files.
dolt archive --revert
# dolt log --stat will load every single chunk. 66 manually verified.
@@ -136,6 +135,18 @@ mutations_and_gc_statement() {
[ "$commits" -eq "66" ]
}
@test "archive: archive --purge" {
dolt sql -q "$(mutations_and_gc_statement)"
tablefile=$(find ./.dolt/noms/oldgen -type f -regex '.*/[a-v0-9]\{32\}')
[ -e "$tablefile" ] # extreme paranoia. make sure it exists before.
dolt archive --purge
# Ensure the table file is gone.
[ ! -e "$tablefile" ]
}
@test "archive: can clone archived repository" {
mkdir -p remote/.dolt
mkdir cloned