mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-29 10:41:05 -06:00
Make metadata code aware of newgen archives
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
package nbs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -22,34 +23,38 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/hash"
|
||||
)
|
||||
|
||||
type StorageType int
|
||||
|
||||
const (
|
||||
Journal StorageType = iota
|
||||
TableFileNewGen
|
||||
TableFileOldGen
|
||||
Archive
|
||||
)
|
||||
|
||||
type ArchiveMetadata struct {
|
||||
originalTableFileId string
|
||||
}
|
||||
|
||||
type TableFileFormat int
|
||||
|
||||
const (
|
||||
TypeNoms TableFileFormat = iota
|
||||
TypeArchive
|
||||
)
|
||||
|
||||
type StorageArtifact struct {
|
||||
id hash.Hash
|
||||
path string
|
||||
storageType StorageType
|
||||
// ID of the storage artifact. This is uses in the manifest to identify the artifact, but it is not the file name.
|
||||
// as archives has a suffix.
|
||||
id hash.Hash
|
||||
// path to the storage artifact.
|
||||
path string
|
||||
// storageType is the type of the storage artifact.
|
||||
storageType TableFileFormat
|
||||
// arcMetadata is additional metadata for archive files. it is only set for storageType == TypeArchive.
|
||||
arcMetadata *ArchiveMetadata
|
||||
}
|
||||
|
||||
type StorageMetadata struct {
|
||||
// root is the path to storage. Specifically, it contains a .dolt directory.
|
||||
root string
|
||||
artifacts []StorageArtifact
|
||||
}
|
||||
|
||||
func (sm *StorageMetadata) ArchiveFilesPresent() bool {
|
||||
for _, artifact := range sm.artifacts {
|
||||
if artifact.storageType == Archive {
|
||||
if artifact.storageType == TypeArchive {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@@ -60,7 +65,7 @@ func (sm *StorageMetadata) ArchiveFilesPresent() bool {
|
||||
func (sm *StorageMetadata) RevertMap() map[hash.Hash]hash.Hash {
|
||||
revertMap := make(map[hash.Hash]hash.Hash)
|
||||
for _, artifact := range sm.artifacts {
|
||||
if artifact.storageType == Archive {
|
||||
if artifact.storageType == TypeArchive {
|
||||
md := artifact.arcMetadata
|
||||
revertMap[artifact.id] = hash.Parse(md.originalTableFileId)
|
||||
}
|
||||
@@ -68,6 +73,8 @@ func (sm *StorageMetadata) RevertMap() map[hash.Hash]hash.Hash {
|
||||
return revertMap
|
||||
}
|
||||
|
||||
// oldGenTableExists returns true if the table file exists in the oldgen directory. This is a file system check for
|
||||
// a table file we have no record of, but may be useful in the process of reverting an archive operation.
|
||||
func (sm *StorageMetadata) oldGenTableExists(id hash.Hash) (bool, error) {
|
||||
path := filepath.Join(sm.root, ".dolt", "noms", "oldgen", id.String())
|
||||
_, err := os.Stat(path)
|
||||
@@ -88,20 +95,9 @@ func GetStorageMetadata(path string) (StorageMetadata, error) {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
|
||||
// TODO: new gen and journal information in storage metadata will be useful in the future.
|
||||
// newGen := filepath.Join(path, ".dolt", "noms")
|
||||
// newgenManifest := filepath.Join(newGen, "manifest")
|
||||
|
||||
oldgen := filepath.Join(path, ".dolt", "noms", "oldgen")
|
||||
oldgenManifest := filepath.Join(oldgen, "manifest")
|
||||
|
||||
// If there is not oldgen manifest, then GC has never been run. Which is fine. We just don't have any oldgen.
|
||||
if _, err := os.Stat(oldgenManifest); err != nil {
|
||||
return StorageMetadata{}, nil
|
||||
}
|
||||
|
||||
// create a io.Reader for the manifest file
|
||||
manifestReader, err := os.Open(oldgenManifest)
|
||||
newGen := filepath.Join(path, ".dolt", "noms")
|
||||
newgenManifest := filepath.Join(newGen, "manifest")
|
||||
manifestReader, err := os.Open(newgenManifest)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
@@ -116,53 +112,90 @@ func GetStorageMetadata(path string) (StorageMetadata, error) {
|
||||
// for each table in the manifest, get the table spec
|
||||
for i := 0; i < manifest.NumTableSpecs(); i++ {
|
||||
tableSpecInfo := manifest.GetTableSpecInfo(i)
|
||||
|
||||
// If the oldgen/name exists, it's not an archive. If it exists with a .darc suffix, then it's an archive.
|
||||
tfName := tableSpecInfo.GetName()
|
||||
fullPath := filepath.Join(oldgen, tfName)
|
||||
_, err := os.Stat(fullPath)
|
||||
if err == nil {
|
||||
// exists. Not an archive.
|
||||
artifacts = append(artifacts, StorageArtifact{
|
||||
id: hash.Parse(tfName),
|
||||
path: fullPath,
|
||||
storageType: TableFileOldGen,
|
||||
})
|
||||
} else if os.IsNotExist(err) {
|
||||
arcName := tfName + ".darc"
|
||||
arcPath := filepath.Join(oldgen, arcName)
|
||||
_, err := os.Stat(arcPath)
|
||||
if err == nil {
|
||||
// reader for the path. State. call
|
||||
reader, fileSize, err := openReader(arcPath)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
|
||||
arcMetadata, err := newArchiveMetadata(reader, fileSize)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
|
||||
artifacts = append(artifacts, StorageArtifact{
|
||||
id: hash.Parse(tfName),
|
||||
path: arcPath,
|
||||
storageType: Archive,
|
||||
arcMetadata: arcMetadata,
|
||||
})
|
||||
} else {
|
||||
// any error is bad here. If the files don't exist, then the manifest is no good.
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
} else {
|
||||
// some other error.
|
||||
artifact, err := buildArtifact(tableSpecInfo, newGen)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
artifacts = append(artifacts, artifact)
|
||||
}
|
||||
|
||||
oldgen := filepath.Join(newGen, "oldgen")
|
||||
oldgenManifest := filepath.Join(oldgen, "manifest")
|
||||
|
||||
// If there is no oldgen manifest, then GC has never been run. Which is fine. We just don't have any oldgen.
|
||||
if _, err := os.Stat(oldgenManifest); err != nil {
|
||||
return StorageMetadata{path, artifacts}, nil
|
||||
}
|
||||
|
||||
manifestReader, err = os.Open(oldgenManifest)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
manifest, err = ParseManifest(manifestReader)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
|
||||
for i := 0; i < manifest.NumTableSpecs(); i++ {
|
||||
tableSpecInfo := manifest.GetTableSpecInfo(i)
|
||||
|
||||
artifact, err := buildArtifact(tableSpecInfo, oldgen)
|
||||
if err != nil {
|
||||
return StorageMetadata{}, err
|
||||
}
|
||||
artifacts = append(artifacts, artifact)
|
||||
}
|
||||
|
||||
return StorageMetadata{path, artifacts}, nil
|
||||
}
|
||||
|
||||
func buildArtifact(info TableSpecInfo, genPath string) (StorageArtifact, error) {
|
||||
tfName := info.GetName()
|
||||
|
||||
// This code is going to be removed as soon as backup supports archives.
|
||||
archive := false
|
||||
fullPath := filepath.Join(genPath, tfName)
|
||||
|
||||
_, err := os.Stat(fullPath)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
fullPath = filepath.Join(genPath, tfName+ArchiveFileSuffix)
|
||||
} else {
|
||||
return StorageArtifact{}, err
|
||||
}
|
||||
_, err = os.Stat(fullPath)
|
||||
if err != nil {
|
||||
return StorageArtifact{}, err
|
||||
}
|
||||
archive = true
|
||||
}
|
||||
|
||||
if !archive {
|
||||
return StorageArtifact{
|
||||
id: hash.Parse(tfName),
|
||||
path: fullPath,
|
||||
storageType: TypeNoms,
|
||||
}, nil
|
||||
} else {
|
||||
reader, fileSize, err := openReader(fullPath)
|
||||
if err != nil {
|
||||
return StorageArtifact{}, err
|
||||
}
|
||||
|
||||
arcMetadata, err := newArchiveMetadata(reader, fileSize)
|
||||
if err != nil {
|
||||
return StorageArtifact{}, err
|
||||
}
|
||||
|
||||
return StorageArtifact{
|
||||
id: hash.Parse(tfName),
|
||||
path: fullPath,
|
||||
storageType: TypeArchive,
|
||||
arcMetadata: arcMetadata,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
func validateDir(path string) error {
|
||||
info, err := os.Stat(path)
|
||||
|
||||
|
||||
@@ -183,6 +183,16 @@ mutations_and_gc_statement() {
|
||||
run dolt sql -q 'select sum(i) from tbl;'
|
||||
[[ "$status" -eq 0 ]] || false
|
||||
[[ "$output" =~ "138075" ]] || false # i = 1 - 525, sum is 138075
|
||||
|
||||
|
||||
## Temporary check. We want to ensure that backup will give an error, even when
|
||||
## there are archives in newgen.
|
||||
mkdir ../backup
|
||||
dolt backup add bac1 file://../backup
|
||||
|
||||
run dolt backup sync bac1
|
||||
[ "$status" -eq 1 ]
|
||||
[[ "$output" =~ "error: archive files present" ]] || false
|
||||
}
|
||||
|
||||
@test "archive: can clone respiratory with mixed types" {
|
||||
@@ -235,7 +245,7 @@ mutations_and_gc_statement() {
|
||||
dolt fetch
|
||||
|
||||
## update the remote repo directly. Need to run the archive command when the server is stopped.
|
||||
## This will result in achived files on the remote, which we will need to read chunks from when we fetch.
|
||||
## This will result in archived files on the remote, which we will need to read chunks from when we fetch.
|
||||
cd ../../remote
|
||||
kill $remotesrv_pid
|
||||
wait $remotesrv_pid || :
|
||||
@@ -248,7 +258,6 @@ mutations_and_gc_statement() {
|
||||
[[ "$remotesrv_pid" -gt 0 ]] || false
|
||||
|
||||
cd ../cloned/repo1
|
||||
|
||||
run dolt fetch
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user