Merge pull request #9238 from kobergj/OcisConsistencyCheck

Add command to check ocis backup consistency
This commit is contained in:
kobergj
2024-06-12 15:17:11 +02:00
committed by GitHub
6 changed files with 685 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
Enhancement: Add command to check ocis backup consistency
Adds a command that checks the consistency of an ocis backup.
https://github.com/owncloud/ocis/pull/9238

View File

@@ -217,3 +217,13 @@ BACKUP RECOMMENDED/OMITABLE. This folder contains custom web assets. Can be spec
When using an external idp/idm/nats or blobstore, its data needs to be backed up separately. Refer to your idp/idm/nats/blobstore documentation for backup details.
## Backup Consistency Command
Infinite Scale now allows checking an existing backup for consistency. Use the command:
```bash
ocis backup consistency -p "<path-to-base-folder>"
```
`path-to-base-folder` needs to be replaced with the path to the storage providers base path. Should be same as the `STORAGE_USERS_OCIS_ROOT`
Use the `-b s3ng` option when using an external (s3) blobstore. Note: When using this flag, the path to the blobstore must be configured via envvars or a yaml file to match the configuration of the original instance. Consistency checks for other blobstores than `ocis` and `s3ng` are not supported at the moment.

173
ocis/pkg/backup/backup.go Normal file
View File

@@ -0,0 +1,173 @@
// Package backup contains ocis backup functionality.
package backup
import (
"fmt"
"os"
"regexp"
)
// Inconsistency describes the type of inconsistency
type Inconsistency string
var (
// InconsistencyBlobMissing is an inconsistency where a blob is missing in the blobstore
InconsistencyBlobMissing Inconsistency = "blob missing"
// InconsistencyBlobOrphaned is an inconsistency where a blob in the blobstore has no reference
InconsistencyBlobOrphaned Inconsistency = "blob orphaned"
// InconsistencyNodeMissing is an inconsistency where a symlink points to a non-existing node
InconsistencyNodeMissing Inconsistency = "node missing"
// InconsistencyMetadataMissing is an inconsistency where a node is missing metadata
InconsistencyMetadataMissing Inconsistency = "metadata missing"
// InconsistencySymlinkMissing is an inconsistency where a node is missing a symlink
InconsistencySymlinkMissing Inconsistency = "symlink missing"
// InconsistencyFilesMissing is an inconsistency where a node is missing metadata files like .mpk or .mlock
InconsistencyFilesMissing Inconsistency = "files missing"
// InconsistencyMalformedFile is an inconsistency where a node has a malformed metadata file
InconsistencyMalformedFile Inconsistency = "malformed file"
// regex to determine if a node is trashed or versioned.
// 9113a718-8285-4b32-9042-f930f1a58ac2.REV.2024-05-22T07:32:53.89969726Z
_versionRegex = regexp.MustCompile(`\.REV\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+Z$`)
// 9113a718-8285-4b32-9042-f930f1a58ac2.T.2024-05-23T08:25:20.006571811Z <- this HAS a symlink
_trashRegex = regexp.MustCompile(`\.T\.[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+Z$`)
)
// Consistency holds the node and blob data of a storage provider
type Consistency struct {
// Storing the data like this might take a lot of memory
// we might need to optimize this if we run into memory issues
Nodes map[string][]Inconsistency
LinkedNodes map[string][]Inconsistency
BlobReferences map[string][]Inconsistency
Blobs map[string][]Inconsistency
nodeToLink map[string]string
blobToNode map[string]string
}
// NewConsistency creates a new Consistency object
func NewConsistency() *Consistency {
return &Consistency{
Nodes: make(map[string][]Inconsistency),
LinkedNodes: make(map[string][]Inconsistency),
BlobReferences: make(map[string][]Inconsistency),
Blobs: make(map[string][]Inconsistency),
nodeToLink: make(map[string]string),
blobToNode: make(map[string]string),
}
}
// CheckProviderConsistency checks the consistency of a space
func CheckProviderConsistency(storagepath string, lbs ListBlobstore) error {
fsys := os.DirFS(storagepath)
p := NewProvider(fsys, storagepath, lbs)
if err := p.ProduceData(); err != nil {
return err
}
c := NewConsistency()
c.GatherData(p.Events)
return c.PrintResults(storagepath)
}
// GatherData gathers and evaluates data produced by the DataProvider
func (c *Consistency) GatherData(events <-chan interface{}) {
for ev := range events {
switch d := ev.(type) {
case NodeData:
// does it have inconsistencies?
if len(d.Inconsistencies) != 0 {
c.Nodes[d.NodePath] = append(c.Nodes[d.NodePath], d.Inconsistencies...)
}
// is it linked?
if _, ok := c.LinkedNodes[d.NodePath]; ok {
deleteInconsistency(c.LinkedNodes, d.NodePath)
} else if d.RequiresSymlink && c.Nodes[d.NodePath] == nil {
c.Nodes[d.NodePath] = []Inconsistency{}
}
// does it have a blob?
if d.BlobPath != "" {
if _, ok := c.Blobs[d.BlobPath]; ok {
deleteInconsistency(c.Blobs, d.BlobPath)
} else {
c.BlobReferences[d.BlobPath] = []Inconsistency{}
c.blobToNode[d.BlobPath] = d.NodePath
}
}
case LinkData:
// does it have a node?
if _, ok := c.Nodes[d.NodePath]; ok {
deleteInconsistency(c.Nodes, d.NodePath)
} else {
c.LinkedNodes[d.NodePath] = []Inconsistency{}
c.nodeToLink[d.NodePath] = d.LinkPath
}
case BlobData:
// does it have a reference?
if _, ok := c.BlobReferences[d.BlobPath]; ok {
deleteInconsistency(c.BlobReferences, d.BlobPath)
} else {
c.Blobs[d.BlobPath] = []Inconsistency{}
}
}
}
for n := range c.Nodes {
if len(c.Nodes[n]) == 0 {
c.Nodes[n] = append(c.Nodes[n], InconsistencySymlinkMissing)
}
}
for l := range c.LinkedNodes {
c.LinkedNodes[l] = append(c.LinkedNodes[l], InconsistencyNodeMissing)
}
for b := range c.Blobs {
c.Blobs[b] = append(c.Blobs[b], InconsistencyBlobOrphaned)
}
for b := range c.BlobReferences {
c.BlobReferences[b] = append(c.BlobReferences[b], InconsistencyBlobMissing)
}
}
// PrintResults prints the results of the evaluation
func (c *Consistency) PrintResults(discpath string) error {
if len(c.Nodes) != 0 {
fmt.Println("\n🚨 Inconsistent Nodes:")
}
for n := range c.Nodes {
fmt.Printf("\t👉 %v\tpath: %s\n", c.Nodes[n], n)
}
if len(c.LinkedNodes) != 0 {
fmt.Println("\n🚨 Inconsistent Links:")
}
for l := range c.LinkedNodes {
fmt.Printf("\t👉 %v\tpath: %s\n\t\t\t\tmissing node:%s\n", c.LinkedNodes[l], c.nodeToLink[l], l)
}
if len(c.Blobs) != 0 {
fmt.Println("\n🚨 Inconsistent Blobs:")
}
for b := range c.Blobs {
fmt.Printf("\t👉 %v\tblob: %s\n", c.Blobs[b], b)
}
if len(c.BlobReferences) != 0 {
fmt.Println("\n🚨 Inconsistent BlobReferences:")
}
for b := range c.BlobReferences {
fmt.Printf("\t👉 %v\tblob: %s\n\t\t\t\treferencing node:%s\n", c.BlobReferences[b], b, c.blobToNode[b])
}
if len(c.Nodes) == 0 && len(c.LinkedNodes) == 0 && len(c.Blobs) == 0 && len(c.BlobReferences) == 0 {
fmt.Printf("💚 No inconsistency found. The backup in '%s' seems to be valid.\n", discpath)
}
return nil
}
func deleteInconsistency(incs map[string][]Inconsistency, path string) {
if len(incs[path]) == 0 {
delete(incs, path)
}
}

View File

@@ -0,0 +1,162 @@
package backup_test
import (
"testing"
"github.com/owncloud/ocis/v2/ocis/pkg/backup"
"github.com/test-go/testify/require"
)
func TestGatherData(t *testing.T) {
testcases := []struct {
Name string
Events []interface{}
Expected *backup.Consistency
}{
{
Name: "no symlinks - no blobs",
Events: []interface{}{
nodeData("nodepath", "blobpath", true),
},
Expected: consistency(func(c *backup.Consistency) {
node(c, "nodepath", backup.InconsistencySymlinkMissing)
blobReference(c, "blobpath", backup.InconsistencyBlobMissing)
}),
},
{
Name: "symlink not required - no blobs",
Events: []interface{}{
nodeData("nodepath", "blobpath", false),
},
Expected: consistency(func(c *backup.Consistency) {
blobReference(c, "blobpath", backup.InconsistencyBlobMissing)
}),
},
{
Name: "no inconsistencies",
Events: []interface{}{
nodeData("nodepath", "blobpath", true),
linkData("linkpath", "nodepath"),
blobData("blobpath"),
},
Expected: consistency(func(c *backup.Consistency) {
}),
},
{
Name: "orphaned blob",
Events: []interface{}{
nodeData("nodepath", "blobpath", true),
linkData("linkpath", "nodepath"),
blobData("blobpath"),
blobData("anotherpath"),
},
Expected: consistency(func(c *backup.Consistency) {
blob(c, "anotherpath", backup.InconsistencyBlobOrphaned)
}),
},
{
Name: "missing node",
Events: []interface{}{
linkData("linkpath", "nodepath"),
blobData("blobpath"),
},
Expected: consistency(func(c *backup.Consistency) {
linkedNode(c, "nodepath", backup.InconsistencyNodeMissing)
blob(c, "blobpath", backup.InconsistencyBlobOrphaned)
}),
},
{
Name: "corrupt metadata",
Events: []interface{}{
nodeData("nodepath", "blobpath", true, backup.InconsistencyMetadataMissing),
linkData("linkpath", "nodepath"),
blobData("blobpath"),
},
Expected: consistency(func(c *backup.Consistency) {
node(c, "nodepath", backup.InconsistencyMetadataMissing)
}),
},
{
Name: "corrupt metadata, no blob",
Events: []interface{}{
nodeData("nodepath", "blobpath", true, backup.InconsistencyMetadataMissing),
linkData("linkpath", "nodepath"),
},
Expected: consistency(func(c *backup.Consistency) {
node(c, "nodepath", backup.InconsistencyMetadataMissing)
blobReference(c, "blobpath", backup.InconsistencyBlobMissing)
}),
},
}
for _, tc := range testcases {
events := make(chan interface{})
go func() {
for _, ev := range tc.Events {
switch e := ev.(type) {
case backup.NodeData:
events <- e
case backup.LinkData:
events <- e
case backup.BlobData:
events <- e
}
}
close(events)
}()
c := backup.NewConsistency()
c.GatherData(events)
require.Equal(t, tc.Expected.Nodes, c.Nodes)
require.Equal(t, tc.Expected.LinkedNodes, c.LinkedNodes)
require.Equal(t, tc.Expected.Blobs, c.Blobs)
require.Equal(t, tc.Expected.BlobReferences, c.BlobReferences)
}
}
func nodeData(nodePath, blobPath string, requiresSymlink bool, incs ...backup.Inconsistency) backup.NodeData {
return backup.NodeData{
NodePath: nodePath,
BlobPath: blobPath,
RequiresSymlink: requiresSymlink,
Inconsistencies: incs,
}
}
func linkData(linkPath, nodePath string) backup.LinkData {
return backup.LinkData{
LinkPath: linkPath,
NodePath: nodePath,
}
}
func blobData(blobPath string) backup.BlobData {
return backup.BlobData{
BlobPath: blobPath,
}
}
func consistency(f func(*backup.Consistency)) *backup.Consistency {
c := backup.NewConsistency()
f(c)
return c
}
func node(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.Nodes[path] = inc
}
func linkedNode(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.LinkedNodes[path] = inc
}
func blob(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.Blobs[path] = inc
}
func blobReference(c *backup.Consistency, path string, inc ...backup.Inconsistency) {
c.BlobReferences[path] = inc
}

237
ocis/pkg/backup/provider.go Normal file
View File

@@ -0,0 +1,237 @@
package backup
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"sync"
"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/node"
"github.com/shamaton/msgpack/v2"
)
// ListBlobstore required to check blob consistency
type ListBlobstore interface {
List() ([]*node.Node, error)
Path(node *node.Node) string
}
// DataProvider provides data for the consistency check
type DataProvider struct {
Events chan interface{}
fsys fs.FS
discpath string
lbs ListBlobstore
skipBlobs bool
}
// NodeData holds data about the nodes
type NodeData struct {
NodePath string
BlobPath string
RequiresSymlink bool
Inconsistencies []Inconsistency
}
// LinkData about the symlinks
type LinkData struct {
LinkPath string
NodePath string
}
// BlobData about the blobs in the blobstore
type BlobData struct {
BlobPath string
}
// NewProvider creates a new DataProvider object
func NewProvider(fsys fs.FS, discpath string, lbs ListBlobstore) *DataProvider {
return &DataProvider{
Events: make(chan interface{}),
fsys: fsys,
discpath: discpath,
lbs: lbs,
skipBlobs: lbs == nil,
}
}
// ProduceData produces data for the consistency check
// Spawns 4 go-routines at the moment. If needed, this can be optimized.
func (dp *DataProvider) ProduceData() error {
dirs, err := fs.Glob(dp.fsys, "spaces/*/*/nodes/*/*/*/*")
if err != nil {
return err
}
if len(dirs) == 0 {
return errors.New("no backup found. Double check storage path")
}
wg := sync.WaitGroup{}
// crawl spaces
wg.Add(1)
go func() {
for _, d := range dirs {
dp.evaluateNodeDir(d)
}
wg.Done()
}()
// crawl trash
wg.Add(1)
go func() {
dp.evaluateTrashDir()
wg.Done()
}()
// crawl blobstore
if !dp.skipBlobs {
wg.Add(1)
go func() {
bs, err := dp.lbs.List()
if err != nil {
fmt.Println("error listing blobs", err)
}
for _, bn := range bs {
dp.Events <- BlobData{BlobPath: dp.lbs.Path(bn)}
}
wg.Done()
}()
}
// wait for all crawlers to finish
go func() {
wg.Wait()
dp.quit()
}()
return nil
}
func (dp *DataProvider) getBlobPath(path string) (string, Inconsistency) {
if dp.skipBlobs {
return "", ""
}
b, err := fs.ReadFile(dp.fsys, path+".mpk")
if err != nil {
return "", InconsistencyFilesMissing
}
m := map[string][]byte{}
if err := msgpack.Unmarshal(b, &m); err != nil {
return "", InconsistencyMalformedFile
}
// FIXME: how to check if metadata is complete?
if bid := m["user.ocis.blobid"]; string(bid) != "" {
spaceID, _ := getIDsFromPath(filepath.Join(dp.discpath, path))
return dp.lbs.Path(&node.Node{BlobID: string(bid), SpaceID: spaceID}), ""
}
return "", ""
}
func (dp *DataProvider) evaluateNodeDir(d string) {
// d is something like spaces/a8/e5d981-41e4-4468-b532-258d5fb457d3/nodes/2d/08/8d/24
// we could have multiple nodes under this, but we are only interested in one file per node - the one with "" extension
entries, err := fs.ReadDir(dp.fsys, d)
if err != nil {
fmt.Println("error reading dir", err)
return
}
if len(entries) == 0 {
fmt.Println("empty dir", filepath.Join(dp.discpath, d))
return
}
for _, e := range entries {
switch {
case e.IsDir():
ls, err := fs.ReadDir(dp.fsys, filepath.Join(d, e.Name()))
if err != nil {
fmt.Println("error reading dir", err)
continue
}
for _, l := range ls {
linkpath := filepath.Join(dp.discpath, d, e.Name(), l.Name())
r, _ := os.Readlink(linkpath)
nodePath := filepath.Join(dp.discpath, d, e.Name(), r)
dp.Events <- LinkData{LinkPath: linkpath, NodePath: nodePath}
}
fallthrough
case filepath.Ext(e.Name()) == "" || _versionRegex.MatchString(e.Name()) || _trashRegex.MatchString(e.Name()):
np := filepath.Join(dp.discpath, d, e.Name())
var inc []Inconsistency
if !dp.filesExist(filepath.Join(d, e.Name())) {
inc = append(inc, InconsistencyFilesMissing)
}
bp, i := dp.getBlobPath(filepath.Join(d, e.Name()))
if i != "" {
inc = append(inc, i)
}
dp.Events <- NodeData{NodePath: np, BlobPath: bp, RequiresSymlink: requiresSymlink(np), Inconsistencies: inc}
}
}
}
func (dp *DataProvider) evaluateTrashDir() {
linkpaths, err := fs.Glob(dp.fsys, "spaces/*/*/trash/*/*/*/*/*")
if err != nil {
fmt.Println("error reading trash", err)
}
for _, l := range linkpaths {
linkpath := filepath.Join(dp.discpath, l)
r, _ := os.Readlink(linkpath)
p := filepath.Join(dp.discpath, l, "..", r)
dp.Events <- LinkData{LinkPath: linkpath, NodePath: p}
}
}
func (dp *DataProvider) filesExist(path string) bool {
check := func(p string) bool {
_, err := fs.Stat(dp.fsys, p)
return err == nil
}
return check(path) && check(path+".mpk")
}
func (dp *DataProvider) quit() {
close(dp.Events)
}
func requiresSymlink(path string) bool {
spaceID, nodeID := getIDsFromPath(path)
if nodeID != "" && spaceID != "" && (spaceID == nodeID || _versionRegex.MatchString(nodeID)) {
return false
}
return true
}
func getIDsFromPath(path string) (string, string) {
rawIDs := strings.Split(path, "/nodes/")
if len(rawIDs) != 2 {
return "", ""
}
s := strings.Split(rawIDs[0], "/spaces/")
if len(s) != 2 {
return "", ""
}
spaceID := strings.Replace(s[1], "/", "", -1)
nodeID := strings.Replace(rawIDs[1], "/", "", -1)
return spaceID, nodeID
}

View File

@@ -0,0 +1,98 @@
package command
import (
"errors"
"fmt"
ocisbs "github.com/cs3org/reva/v2/pkg/storage/fs/ocis/blobstore"
s3bs "github.com/cs3org/reva/v2/pkg/storage/fs/s3ng/blobstore"
"github.com/owncloud/ocis/v2/ocis-pkg/config"
"github.com/owncloud/ocis/v2/ocis-pkg/config/configlog"
"github.com/owncloud/ocis/v2/ocis-pkg/config/parser"
"github.com/owncloud/ocis/v2/ocis/pkg/backup"
"github.com/owncloud/ocis/v2/ocis/pkg/register"
"github.com/urfave/cli/v2"
)
// BackupCommand is the entrypoint for the backup command
func BackupCommand(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "backup",
Usage: "ocis backup functionality",
Subcommands: []*cli.Command{
ConsistencyCommand(cfg),
},
Before: func(c *cli.Context) error {
return configlog.ReturnError(parser.ParseConfig(cfg, true))
},
Action: func(_ *cli.Context) error {
fmt.Println("Read the docs")
return nil
},
}
}
// ConsistencyCommand is the entrypoint for the consistency Command
func ConsistencyCommand(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "consistency",
Usage: "check backup consistency",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "basepath",
Aliases: []string{"p"},
Usage: "the basepath of the decomposedfs (e.g. /var/tmp/ocis/storage/users)",
Required: true,
},
&cli.StringFlag{
Name: "blobstore",
Aliases: []string{"b"},
Usage: "the blobstore type. Can be (none, ocis, s3ng). Default ocis",
Value: "ocis",
},
},
Action: func(c *cli.Context) error {
basePath := c.String("basepath")
if basePath == "" {
fmt.Println("basepath is required")
return cli.ShowCommandHelp(c, "consistency")
}
var (
bs backup.ListBlobstore
err error
)
switch c.String("blobstore") {
case "s3ng":
bs, err = s3bs.New(
cfg.StorageUsers.Drivers.S3NG.Endpoint,
cfg.StorageUsers.Drivers.S3NG.Region,
cfg.StorageUsers.Drivers.S3NG.Bucket,
cfg.StorageUsers.Drivers.S3NG.AccessKey,
cfg.StorageUsers.Drivers.S3NG.SecretKey,
s3bs.Options{},
)
case "ocis":
bs, err = ocisbs.New(basePath)
case "none":
bs = nil
default:
err = errors.New("blobstore type not supported")
}
if err != nil {
fmt.Println(err)
return err
}
if err := backup.CheckProviderConsistency(basePath, bs); err != nil {
fmt.Println(err)
return err
}
return nil
},
}
}
func init() {
register.AddCommand(BackupCommand)
}