// Package indexer provides symlink-based indexer for on-disk document-directories. package indexer import ( "context" "fmt" "path" "strings" "github.com/owncloud/ocis/ocis-pkg/sync" "github.com/CiscoM31/godata" "github.com/iancoleman/strcase" "github.com/owncloud/ocis/ocis-pkg/indexer/config" "github.com/owncloud/ocis/ocis-pkg/indexer/errors" "github.com/owncloud/ocis/ocis-pkg/indexer/index" _ "github.com/owncloud/ocis/ocis-pkg/indexer/index/cs3" // to populate index _ "github.com/owncloud/ocis/ocis-pkg/indexer/index/disk" // to populate index "github.com/owncloud/ocis/ocis-pkg/indexer/option" "github.com/owncloud/ocis/ocis-pkg/indexer/registry" ) // Indexer is a facade to configure and query over multiple indices. type Indexer struct { config *config.Config indices typeMap mu sync.NamedRWMutex } // IdxAddResult represents the result of an Add call on an index type IdxAddResult struct { Field, Value string } // CreateIndexer creates a new Indexer. func CreateIndexer(cfg *config.Config) *Indexer { return &Indexer{ config: cfg, indices: typeMap{}, mu: sync.NewNamedRWMutex(), } } // Reset takes care of deleting all indices from storage and from the internal map of indices func (i *Indexer) Reset() error { for j := range i.indices { for _, indices := range i.indices[j].IndicesByField { for _, idx := range indices { err := idx.Delete() if err != nil { return err } } } delete(i.indices, j) } return nil } // AddIndex adds a new index to the indexer receiver. func (i *Indexer) AddIndex(t interface{}, indexBy, pkName, entityDirName, indexType string, bound *option.Bound, caseInsensitive bool) error { f := registry.IndexConstructorRegistry[i.config.Repo.Backend][indexType] var idx index.Index if i.config.Repo.Backend == "cs3" { idx = f( option.CaseInsensitive(caseInsensitive), option.WithEntity(t), option.WithBounds(bound), option.WithTypeName(getTypeFQN(t)), option.WithIndexBy(indexBy), option.WithDataURL(i.config.Repo.CS3.DataURL), option.WithDataPrefix(i.config.Repo.CS3.DataPrefix), option.WithJWTSecret(i.config.Repo.CS3.JWTSecret), option.WithProviderAddr(i.config.Repo.CS3.ProviderAddr), option.WithServiceUser(i.config.ServiceUser), ) } else { idx = f( option.CaseInsensitive(caseInsensitive), option.WithEntity(t), option.WithBounds(bound), option.WithTypeName(getTypeFQN(t)), option.WithIndexBy(indexBy), option.WithFilesDir(path.Join(i.config.Repo.Disk.Path, entityDirName)), option.WithDataDir(i.config.Repo.Disk.Path), ) } i.indices.addIndex(getTypeFQN(t), pkName, idx) return idx.Init() } // Add a new entry to the indexer func (i *Indexer) Add(t interface{}) ([]IdxAddResult, error) { typeName := getTypeFQN(t) i.mu.Lock(typeName) defer i.mu.Unlock(typeName) var results []IdxAddResult if fields, ok := i.indices[typeName]; ok { for _, indices := range fields.IndicesByField { for _, idx := range indices { pkVal := valueOf(t, fields.PKFieldName) idxByVal := valueOf(t, idx.IndexBy()) value, err := idx.Add(pkVal, idxByVal) if err != nil { return []IdxAddResult{}, err } if value == "" { continue } results = append(results, IdxAddResult{Field: idx.IndexBy(), Value: value}) } } } return results, nil } // FindBy finds a value on an index by field and value. func (i *Indexer) FindBy(t interface{}, field string, val string) ([]string, error) { typeName := getTypeFQN(t) i.mu.RLock(typeName) defer i.mu.RUnlock(typeName) resultPaths := make([]string, 0) if fields, ok := i.indices[typeName]; ok { for _, idx := range fields.IndicesByField[strcase.ToCamel(field)] { idxVal := val res, err := idx.Lookup(idxVal) if err != nil { if errors.IsNotFoundErr(err) { continue } if err != nil { return nil, err } } resultPaths = append(resultPaths, res...) } } result := make([]string, 0, len(resultPaths)) for _, v := range resultPaths { result = append(result, path.Base(v)) } return result, nil } // Delete deletes all indexed fields of a given type t on the Indexer. func (i *Indexer) Delete(t interface{}) error { typeName := getTypeFQN(t) i.mu.Lock(typeName) defer i.mu.Unlock(typeName) if fields, ok := i.indices[typeName]; ok { for _, indices := range fields.IndicesByField { for _, idx := range indices { pkVal := valueOf(t, fields.PKFieldName) idxByVal := valueOf(t, idx.IndexBy()) if err := idx.Remove(pkVal, idxByVal); err != nil { return err } } } } return nil } // FindByPartial allows for glob search across all indexes. func (i *Indexer) FindByPartial(t interface{}, field string, pattern string) ([]string, error) { typeName := getTypeFQN(t) i.mu.RLock(typeName) defer i.mu.RUnlock(typeName) resultPaths := make([]string, 0) if fields, ok := i.indices[typeName]; ok { for _, idx := range fields.IndicesByField[strcase.ToCamel(field)] { res, err := idx.Search(pattern) if err != nil { if errors.IsNotFoundErr(err) { continue } if err != nil { return nil, err } } resultPaths = append(resultPaths, res...) } } result := make([]string, 0, len(resultPaths)) for _, v := range resultPaths { result = append(result, path.Base(v)) } return result, nil } // Update updates all indexes on a value to a value . func (i *Indexer) Update(from, to interface{}) error { typeNameFrom := getTypeFQN(from) i.mu.Lock(typeNameFrom) defer i.mu.Unlock(typeNameFrom) if typeNameTo := getTypeFQN(to); typeNameFrom != typeNameTo { return fmt.Errorf("update types do not match: from %v to %v", typeNameFrom, typeNameTo) } if fields, ok := i.indices[typeNameFrom]; ok { for fName, indices := range fields.IndicesByField { oldV := valueOf(from, fName) newV := valueOf(to, fName) pkVal := valueOf(from, fields.PKFieldName) for _, idx := range indices { if oldV == newV { continue } if oldV == "" { if _, err := idx.Add(pkVal, newV); err != nil { return err } continue } if newV == "" { if err := idx.Remove(pkVal, oldV); err != nil { return err } continue } if err := idx.Update(pkVal, oldV, newV); err != nil { return err } } } } return nil } // Query parses an OData query into something our indexer.Index understands and resolves it. func (i *Indexer) Query(ctx context.Context, t interface{}, q string) ([]string, error) { query, err := godata.ParseFilterString(ctx, q) if err != nil { return nil, err } tree := newQueryTree() if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil { return nil, err } results := make([]string, 0) if err := i.resolveTree(t, &tree, &results); err != nil { return nil, err } return results, nil } // t is used to infer the indexed field names. When building an index search query, field names have to respect Golang // conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the // indexer directory. Traversal of the tree happens in a pre-order fashion. // TODO implement logic for `and` operators. func (i *Indexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error { if partials == nil { return fmt.Errorf("return value cannot be nil: partials") } if tree.left != nil { _ = i.resolveTree(t, tree.left, partials) } if tree.right != nil { _ = i.resolveTree(t, tree.right, partials) } // by the time we're here we reached a leaf node. if tree.token != nil { switch tree.token.filterType { case "FindBy": operand, err := sanitizeInput(tree.token.operands) if err != nil { return err } r, err := i.FindBy(t, operand.field, operand.value) if err != nil { return err } *partials = append(*partials, r...) case "FindByPartial": operand, err := sanitizeInput(tree.token.operands) if err != nil { return err } r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value)) if err != nil { return err } *partials = append(*partials, r...) default: return fmt.Errorf("unsupported filter: %v", tree.token.filterType) } } *partials = dedup(*partials) return nil } type indexerTuple struct { field, value string } // sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters. func sanitizeInput(operands []string) (*indexerTuple, error) { if len(operands) != 2 { return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands)) } // field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules. // for further information on this have a look at the reflection package. f := strcase.ToCamel(operands[0]) // remove single quotes from value. v := strings.ReplaceAll(operands[1], "'", "") return &indexerTuple{ field: f, value: v, }, nil } // buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree // is to transform godata operators and functions into supported operations on our index. At the time of this writing // we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es). func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error { if root.Token.Type == godata.ExpressionTokenFunc { // i.e "startswith", "contains" switch root.Token.Value { case "startswith": token := token{ operator: root.Token.Value, filterType: "FindByPartial", // TODO sanitize the number of operands it the expected one. operands: []string{ root.Children[0].Token.Value, // field name, i.e: Name root.Children[1].Token.Value, // field value, i.e: Jac }, } tree.insert(&token) default: return fmt.Errorf("operation not supported") } } if root.Token.Type == godata.ExpressionTokenLogical { switch root.Token.Value { case "or": tree.insert(&token{operator: root.Token.Value}) for _, child := range root.Children { if err := buildTreeFromOdataQuery(child, tree.left); err != nil { return err } } case "eq": tree.insert(&token{ operator: root.Token.Value, filterType: "FindBy", operands: []string{ root.Children[0].Token.Value, root.Children[1].Token.Value, }, }) for _, child := range root.Children { if err := buildTreeFromOdataQuery(child, tree.left); err != nil { return err } } default: return fmt.Errorf("operator not supported") } } return nil }