From d655c8140a6ef8ab2a9aae3f0ba905afc435ed48 Mon Sep 17 00:00:00 2001 From: jkoberg Date: Fri, 31 May 2024 17:45:17 +0200 Subject: [PATCH] feat(ocis): finally unit tests for backup consistency Signed-off-by: jkoberg --- ocis/pkg/backup/backup.go | 48 ++-------- ocis/pkg/backup/backup_test.go | 169 +++++++++++++++++++++++++++++++++ ocis/pkg/backup/provider.go | 116 +++++++++++++++------- 3 files changed, 257 insertions(+), 76 deletions(-) create mode 100644 ocis/pkg/backup/backup_test.go diff --git a/ocis/pkg/backup/backup.go b/ocis/pkg/backup/backup.go index 8ca84fbbdb..37a57c71e8 100644 --- a/ocis/pkg/backup/backup.go +++ b/ocis/pkg/backup/backup.go @@ -3,10 +3,8 @@ package backup import ( "fmt" - "io/fs" "os" "regexp" - "strings" ) // Inconsistency describes the type of inconsistency @@ -65,19 +63,19 @@ func NewConsistency() *Consistency { func CheckProviderConsistency(storagepath string, lbs ListBlobstore) error { fsys := os.DirFS(storagepath) - nodes, links, blobs, quit, err := NewProvider(fsys, storagepath, lbs).ProduceData() - if err != nil { + p := NewProvider(fsys, storagepath, lbs) + if err := p.ProduceData(); err != nil { return err } c := NewConsistency() - c.GatherData(nodes, links, blobs, quit) + c.GatherData(p.Nodes, p.Links, p.Blobs, p.Quit) return c.PrintResults(storagepath) } // GatherData gathers and evaluates data produced by the DataProvider -func (c *Consistency) GatherData(nodes chan NodeData, links chan LinkData, blobs chan BlobData, quit chan struct{}) { +func (c *Consistency) GatherData(nodes <-chan NodeData, links <-chan LinkData, blobs <-chan BlobData, quit <-chan struct{}) { c.gatherData(nodes, links, blobs, quit) for n := range c.Nodes { @@ -96,7 +94,7 @@ func (c *Consistency) GatherData(nodes chan NodeData, links chan LinkData, blobs } } -func (c *Consistency) gatherData(nodes chan NodeData, links chan LinkData, blobs chan BlobData, quit chan struct{}) { +func (c *Consistency) gatherData(nodes <-chan NodeData, links <-chan LinkData, blobs <-chan BlobData, quit <-chan struct{}) { for { select { case n := <-nodes: @@ -107,8 +105,7 @@ func (c *Consistency) gatherData(nodes chan NodeData, links chan LinkData, blobs // is it linked? if _, ok := c.LinkedNodes[n.NodePath]; ok { deleteInconsistency(c.LinkedNodes, n.NodePath) - deleteInconsistency(c.Nodes, n.NodePath) - } else if requiresSymlink(n.NodePath) { + } else if n.RequiresSymlink { c.Nodes[n.NodePath] = c.Nodes[n.NodePath] } // does it have a blob? @@ -176,41 +173,8 @@ func (c *Consistency) PrintResults(discpath string) error { } -func requiresSymlink(path string) bool { - spaceID, nodeID := getIDsFromPath(path) - if nodeID != "" && spaceID != "" && (spaceID == nodeID || _versionRegex.MatchString(nodeID)) { - return false - } - - return true -} - -func (c *DataProvider) filesExist(path string) bool { - check := func(p string) bool { - _, err := fs.Stat(c.fsys, p) - return err == nil - } - return check(path) && check(path+".mpk") -} - func deleteInconsistency(incs map[string][]Inconsistency, path string) { if len(incs[path]) == 0 { delete(incs, path) } } - -func getIDsFromPath(path string) (string, string) { - rawIDs := strings.Split(path, "/nodes/") - if len(rawIDs) != 2 { - return "", "" - } - - s := strings.Split(rawIDs[0], "/spaces/") - if len(s) != 2 { - return "", "" - } - - spaceID := strings.Replace(s[1], "/", "", -1) - nodeID := strings.Replace(rawIDs[1], "/", "", -1) - return spaceID, nodeID -} diff --git a/ocis/pkg/backup/backup_test.go b/ocis/pkg/backup/backup_test.go new file mode 100644 index 0000000000..6aff566f37 --- /dev/null +++ b/ocis/pkg/backup/backup_test.go @@ -0,0 +1,169 @@ +package backup_test + +import ( + "testing" + + "github.com/owncloud/ocis/v2/ocis/pkg/backup" + "github.com/test-go/testify/require" +) + +func TestGatherData(t *testing.T) { + testcases := []struct { + Name string + Events []interface{} + Expected *backup.Consistency + }{ + { + Name: "no symlinks - no blobs", + Events: []interface{}{ + nodeData("nodepath", "blobpath", true), + }, + Expected: consistency(func(c *backup.Consistency) { + node(c, "nodepath", backup.InconsistencySymlinkMissing) + blobReference(c, "blobpath", backup.InconsistencyBlobMissing) + }), + }, + { + Name: "symlink not required - no blobs", + Events: []interface{}{ + nodeData("nodepath", "blobpath", false), + }, + Expected: consistency(func(c *backup.Consistency) { + blobReference(c, "blobpath", backup.InconsistencyBlobMissing) + }), + }, + { + Name: "no inconsistencies", + Events: []interface{}{ + nodeData("nodepath", "blobpath", true), + linkData("linkpath", "nodepath"), + blobData("blobpath"), + }, + Expected: consistency(func(c *backup.Consistency) { + }), + }, + { + Name: "orphaned blob", + Events: []interface{}{ + nodeData("nodepath", "blobpath", true), + linkData("linkpath", "nodepath"), + blobData("blobpath"), + blobData("anotherpath"), + }, + Expected: consistency(func(c *backup.Consistency) { + blob(c, "anotherpath", backup.InconsistencyBlobOrphaned) + }), + }, + { + Name: "missing node", + Events: []interface{}{ + linkData("linkpath", "nodepath"), + blobData("blobpath"), + }, + Expected: consistency(func(c *backup.Consistency) { + linkedNode(c, "nodepath", backup.InconsistencyNodeMissing) + blob(c, "blobpath", backup.InconsistencyBlobOrphaned) + }), + }, + { + Name: "corrupt metadata", + Events: []interface{}{ + nodeData("nodepath", "blobpath", true, backup.InconsistencyMetadataMissing), + linkData("linkpath", "nodepath"), + blobData("blobpath"), + }, + Expected: consistency(func(c *backup.Consistency) { + node(c, "nodepath", backup.InconsistencyMetadataMissing) + }), + }, + { + Name: "corrupt metadata, no blob", + Events: []interface{}{ + nodeData("nodepath", "blobpath", true, backup.InconsistencyMetadataMissing), + linkData("linkpath", "nodepath"), + }, + Expected: consistency(func(c *backup.Consistency) { + node(c, "nodepath", backup.InconsistencyMetadataMissing) + blobReference(c, "blobpath", backup.InconsistencyBlobMissing) + }), + }, + } + + for _, tc := range testcases { + nodes := make(chan backup.NodeData) + links := make(chan backup.LinkData) + blobs := make(chan backup.BlobData) + quit := make(chan struct{}) + + go func() { + for _, ev := range tc.Events { + switch e := ev.(type) { + case backup.NodeData: + nodes <- e + case backup.LinkData: + links <- e + case backup.BlobData: + blobs <- e + } + } + quit <- struct{}{} + close(nodes) + close(links) + close(blobs) + close(quit) + }() + + c := backup.NewConsistency() + c.GatherData(nodes, links, blobs, quit) + + require.Equal(t, tc.Expected.Nodes, c.Nodes) + require.Equal(t, tc.Expected.LinkedNodes, c.LinkedNodes) + require.Equal(t, tc.Expected.Blobs, c.Blobs) + require.Equal(t, tc.Expected.BlobReferences, c.BlobReferences) + } + +} + +func nodeData(nodePath, blobPath string, requiresSymlink bool, incs ...backup.Inconsistency) backup.NodeData { + return backup.NodeData{ + NodePath: nodePath, + BlobPath: blobPath, + RequiresSymlink: requiresSymlink, + Inconsistencies: incs, + } +} + +func linkData(linkPath, nodePath string) backup.LinkData { + return backup.LinkData{ + LinkPath: linkPath, + NodePath: nodePath, + } +} + +func blobData(blobPath string) backup.BlobData { + return backup.BlobData{ + BlobPath: blobPath, + } +} + +func consistency(f func(*backup.Consistency)) *backup.Consistency { + c := backup.NewConsistency() + f(c) + return c +} + +func node(c *backup.Consistency, path string, inc ...backup.Inconsistency) { + c.Nodes[path] = inc +} + +func linkedNode(c *backup.Consistency, path string, inc ...backup.Inconsistency) { + c.LinkedNodes[path] = inc +} + +func blob(c *backup.Consistency, path string, inc ...backup.Inconsistency) { + c.Blobs[path] = inc +} + +func blobReference(c *backup.Consistency, path string, inc ...backup.Inconsistency) { + c.BlobReferences[path] = inc +} diff --git a/ocis/pkg/backup/provider.go b/ocis/pkg/backup/provider.go index 917be0ad27..7f3e98529a 100644 --- a/ocis/pkg/backup/provider.go +++ b/ocis/pkg/backup/provider.go @@ -6,6 +6,7 @@ import ( "io/fs" "os" "path/filepath" + "strings" "sync" "github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/node" @@ -20,6 +21,11 @@ type ListBlobstore interface { // DataProvider provides data for the consistency check type DataProvider struct { + Nodes chan NodeData + Links chan LinkData + Blobs chan BlobData + Quit chan struct{} + fsys fs.FS discpath string lbs ListBlobstore @@ -29,6 +35,7 @@ type DataProvider struct { type NodeData struct { NodePath string BlobPath string + RequiresSymlink bool Inconsistencies []Inconsistency } @@ -46,6 +53,11 @@ type BlobData struct { // NewProvider creates a new DataProvider object func NewProvider(fsys fs.FS, discpath string, lbs ListBlobstore) *DataProvider { return &DataProvider{ + Nodes: make(chan NodeData), + Links: make(chan LinkData), + Blobs: make(chan BlobData), + Quit: make(chan struct{}), + fsys: fsys, discpath: discpath, lbs: lbs, @@ -53,65 +65,62 @@ func NewProvider(fsys fs.FS, discpath string, lbs ListBlobstore) *DataProvider { } // ProduceData produces data for the consistency check -func (c *DataProvider) ProduceData() (chan NodeData, chan LinkData, chan BlobData, chan struct{}, error) { - dirs, err := fs.Glob(c.fsys, "spaces/*/*/nodes/*/*/*/*") +// Spawns 4 go-routines at the moment. If needed, this can be optimized. +func (dp *DataProvider) ProduceData() error { + dirs, err := fs.Glob(dp.fsys, "spaces/*/*/nodes/*/*/*/*") if err != nil { - return nil, nil, nil, nil, err + return err } if len(dirs) == 0 { - return nil, nil, nil, nil, errors.New("no backup found. Double check storage path") + return errors.New("no backup found. Double check storage path") } - nodes := make(chan NodeData) - links := make(chan LinkData) - blobs := make(chan BlobData) - quit := make(chan struct{}) wg := sync.WaitGroup{} // crawl spaces wg.Add(1) go func() { for _, d := range dirs { - entries, err := fs.ReadDir(c.fsys, d) + entries, err := fs.ReadDir(dp.fsys, d) if err != nil { fmt.Println("error reading dir", err) continue } if len(entries) == 0 { - fmt.Println("empty dir", filepath.Join(c.discpath, d)) + fmt.Println("empty dir", filepath.Join(dp.discpath, d)) continue } for _, e := range entries { switch { case e.IsDir(): - ls, err := fs.ReadDir(c.fsys, filepath.Join(d, e.Name())) + ls, err := fs.ReadDir(dp.fsys, filepath.Join(d, e.Name())) if err != nil { fmt.Println("error reading dir", err) continue } for _, l := range ls { - linkpath := filepath.Join(c.discpath, d, e.Name(), l.Name()) + linkpath := filepath.Join(dp.discpath, d, e.Name(), l.Name()) r, _ := os.Readlink(linkpath) - nodePath := filepath.Join(c.discpath, d, e.Name(), r) - links <- LinkData{LinkPath: linkpath, NodePath: nodePath} + nodePath := filepath.Join(dp.discpath, d, e.Name(), r) + dp.Links <- LinkData{LinkPath: linkpath, NodePath: nodePath} } fallthrough case filepath.Ext(e.Name()) == "" || _versionRegex.MatchString(e.Name()) || _trashRegex.MatchString(e.Name()): - np := filepath.Join(c.discpath, d, e.Name()) + np := filepath.Join(dp.discpath, d, e.Name()) var inc []Inconsistency - if !c.filesExist(filepath.Join(d, e.Name())) { + if !dp.filesExist(filepath.Join(d, e.Name())) { inc = append(inc, InconsistencyFilesMissing) } - bp, i := c.getBlobPath(filepath.Join(d, e.Name())) + bp, i := dp.getBlobPath(filepath.Join(d, e.Name())) if i != "" { inc = append(inc, i) } - nodes <- NodeData{NodePath: np, BlobPath: bp, Inconsistencies: inc} + dp.Nodes <- NodeData{NodePath: np, BlobPath: bp, RequiresSymlink: requiresSymlink(np), Inconsistencies: inc} } } } @@ -121,15 +130,15 @@ func (c *DataProvider) ProduceData() (chan NodeData, chan LinkData, chan BlobDat // crawl trash wg.Add(1) go func() { - linkpaths, err := fs.Glob(c.fsys, "spaces/*/*/trash/*/*/*/*/*") + linkpaths, err := fs.Glob(dp.fsys, "spaces/*/*/trash/*/*/*/*/*") if err != nil { fmt.Println("error reading trash", err) } for _, l := range linkpaths { - linkpath := filepath.Join(c.discpath, l) + linkpath := filepath.Join(dp.discpath, l) r, _ := os.Readlink(linkpath) - p := filepath.Join(c.discpath, l, "..", r) - links <- LinkData{LinkPath: linkpath, NodePath: p} + p := filepath.Join(dp.discpath, l, "..", r) + dp.Links <- LinkData{LinkPath: linkpath, NodePath: p} } wg.Done() }() @@ -137,13 +146,13 @@ func (c *DataProvider) ProduceData() (chan NodeData, chan LinkData, chan BlobDat // crawl blobstore wg.Add(1) go func() { - bs, err := c.lbs.List() + bs, err := dp.lbs.List() if err != nil { fmt.Println("error listing blobs", err) } for _, bn := range bs { - blobs <- BlobData{BlobPath: c.lbs.Path(bn)} + dp.Blobs <- BlobData{BlobPath: dp.lbs.Path(bn)} } wg.Done() }() @@ -151,18 +160,14 @@ func (c *DataProvider) ProduceData() (chan NodeData, chan LinkData, chan BlobDat // wait for all crawlers to finish go func() { wg.Wait() - quit <- struct{}{} - close(nodes) - close(links) - close(blobs) - close(quit) + dp.quit() }() - return nodes, links, blobs, quit, nil + return nil } -func (c *DataProvider) getBlobPath(path string) (string, Inconsistency) { - b, err := fs.ReadFile(c.fsys, path+".mpk") +func (dp *DataProvider) getBlobPath(path string) (string, Inconsistency) { + b, err := fs.ReadFile(dp.fsys, path+".mpk") if err != nil { return "", InconsistencyFilesMissing } @@ -172,10 +177,53 @@ func (c *DataProvider) getBlobPath(path string) (string, Inconsistency) { return "", InconsistencyMalformedFile } + // FIXME: how to check if metadata is complete? + if bid := m["user.ocis.blobid"]; string(bid) != "" { - spaceID, _ := getIDsFromPath(filepath.Join(c.discpath, path)) - return c.lbs.Path(&node.Node{BlobID: string(bid), SpaceID: spaceID}), "" + spaceID, _ := getIDsFromPath(filepath.Join(dp.discpath, path)) + return dp.lbs.Path(&node.Node{BlobID: string(bid), SpaceID: spaceID}), "" } return "", "" } + +func (dp *DataProvider) filesExist(path string) bool { + check := func(p string) bool { + _, err := fs.Stat(dp.fsys, p) + return err == nil + } + return check(path) && check(path+".mpk") +} + +func (dp *DataProvider) quit() { + dp.Quit <- struct{}{} + close(dp.Nodes) + close(dp.Links) + close(dp.Blobs) + close(dp.Quit) +} + +func requiresSymlink(path string) bool { + spaceID, nodeID := getIDsFromPath(path) + if nodeID != "" && spaceID != "" && (spaceID == nodeID || _versionRegex.MatchString(nodeID)) { + return false + } + + return true +} + +func getIDsFromPath(path string) (string, string) { + rawIDs := strings.Split(path, "/nodes/") + if len(rawIDs) != 2 { + return "", "" + } + + s := strings.Split(rawIDs[0], "/spaces/") + if len(s) != 2 { + return "", "" + } + + spaceID := strings.Replace(s[1], "/", "", -1) + nodeID := strings.Replace(rawIDs[1], "/", "", -1) + return spaceID, nodeID +}