diff --git a/accounts/go.mod b/accounts/go.mod index a13239f3cd..25e1d64a9a 100644 --- a/accounts/go.mod +++ b/accounts/go.mod @@ -3,6 +3,7 @@ module github.com/owncloud/ocis/accounts go 1.13 require ( + github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 github.com/cs3org/go-cs3apis v0.0.0-20200810113633-b00aca449666 github.com/cs3org/reva v1.2.2-0.20200924071957-e6676516e61e github.com/go-chi/chi v4.1.2+incompatible diff --git a/accounts/go.sum b/accounts/go.sum index 07f8c42a24..98d42798a8 100644 --- a/accounts/go.sum +++ b/accounts/go.sum @@ -50,6 +50,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/Masterminds/goutils v1.1.0 h1:zukEsf/1JZwCMgHiK3GZftabmxiCw4apj3a28RPBiVg= github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= diff --git a/accounts/pkg/indexer/helper.go b/accounts/pkg/indexer/helper.go new file mode 100644 index 0000000000..8591beb1d7 --- /dev/null +++ b/accounts/pkg/indexer/helper.go @@ -0,0 +1,15 @@ +package indexer + +// dedup removes duplicate values in given slice +func dedup(s []string) []string { + for i := 0; i < len(s); i++ { + for i2 := i + 1; i2 < len(s); i2++ { + if s[i] == s[i2] { + // delete + s = append(s[:i2], s[i2+1:]...) + i2-- + } + } + } + return s +} diff --git a/accounts/pkg/indexer/indexer.go b/accounts/pkg/indexer/indexer.go index 2c23ffe20e..33207bc4d7 100644 --- a/accounts/pkg/indexer/indexer.go +++ b/accounts/pkg/indexer/indexer.go @@ -4,7 +4,9 @@ package indexer import ( "fmt" "path" + "strings" + "github.com/CiscoM31/godata" "github.com/iancoleman/strcase" "github.com/owncloud/ocis/accounts/pkg/config" "github.com/owncloud/ocis/accounts/pkg/indexer/errors" @@ -236,3 +238,151 @@ func (i Indexer) Update(from, to interface{}) error { return nil } + +// Query parses an OData query into something our indexer.Index understands and resolves it. +func (i Indexer) Query(t interface{}, q string) ([]string, error) { + query, err := godata.ParseFilterString(q) + if err != nil { + return nil, err + } + + tree := newQueryTree() + if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil { + return nil, err + } + + results := make([]string, 0) + if err := i.resolveTree(t, &tree, &results); err != nil { + return nil, err + } + + return results, nil +} + +// t is used to infer the indexed field names. When building an index search query, field names have to respect Golang +// conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the +// indexer directory. Traversal of the tree happens in a pre-order fashion. +// TODO implement logic for `and` operators. +func (i Indexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error { + if partials == nil { + return fmt.Errorf("return value cannot be nil: partials") + } + + if tree.left != nil { + _ = i.resolveTree(t, tree.left, partials) + } + + if tree.right != nil { + _ = i.resolveTree(t, tree.right, partials) + } + + // by the time we're here we reached a leaf node. + if tree.token != nil { + switch tree.token.filterType { + case "FindBy": + operand, err := sanitizeInput(tree.token.operands) + if err != nil { + return err + } + + r, err := i.FindBy(t, operand.field, operand.value) + if err != nil { + return err + } + + *partials = append(*partials, r...) + case "FindByPartial": + operand, err := sanitizeInput(tree.token.operands) + if err != nil { + return err + } + + r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value)) + if err != nil { + return err + } + + *partials = append(*partials, r...) + default: + return fmt.Errorf("unsupported filter: %v", tree.token.filterType) + } + } + + *partials = dedup(*partials) + return nil +} + +type indexerTuple struct { + field, value string +} + +// sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters. +func sanitizeInput(operands []string) (*indexerTuple, error) { + if len(operands) != 2 { + return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands)) + } + + // field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules. + // for further information on this have a look at the reflection package. + f := strcase.ToCamel(operands[0]) + + // remove single quotes from value. + v := strings.ReplaceAll(operands[1], "'", "") + return &indexerTuple{ + field: f, + value: v, + }, nil +} + +// buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree +// is to transform godata operators and functions into supported operations on our index. At the time of this writing +// we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es). +func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error { + if root.Token.Type == godata.FilterTokenFunc { // i.e "startswith", "contains" + switch root.Token.Value { + case "startswith": + token := token{ + operator: root.Token.Value, + filterType: "FindByPartial", + // TODO sanitize the number of operands it the expected one. + operands: []string{ + root.Children[0].Token.Value, // field name, i.e: Name + root.Children[1].Token.Value, // field value, i.e: Jac + }, + } + + tree.insert(&token) + default: + return fmt.Errorf("operation not supported") + } + } + + if root.Token.Type == godata.FilterTokenLogical { + switch root.Token.Value { + case "or": + tree.insert(&token{operator: root.Token.Value}) + for _, child := range root.Children { + if err := buildTreeFromOdataQuery(child, tree.left); err != nil { + return err + } + } + case "eq": + tree.insert(&token{ + operator: root.Token.Value, + filterType: "FindBy", + operands: []string{ + root.Children[0].Token.Value, + root.Children[1].Token.Value, + }, + }) + for _, child := range root.Children { + if err := buildTreeFromOdataQuery(child, tree.left); err != nil { + return err + } + } + default: + return fmt.Errorf("operator not supported") + } + } + return nil +} diff --git a/accounts/pkg/indexer/indexer_test.go b/accounts/pkg/indexer/indexer_test.go index 942adddf23..54d59ecf82 100644 --- a/accounts/pkg/indexer/indexer_test.go +++ b/accounts/pkg/indexer/indexer_test.go @@ -251,18 +251,51 @@ func TestIndexer_Disk_UpdateWithNonUniqueIndex(t *testing.T) { _ = os.RemoveAll(dataDir) } -//func createCs3Indexer() *Indexer { -// return CreateIndexer(&config.Config{ -// Repo: config.Repo{ -// CS3: config.CS3{ -// ProviderAddr: "0.0.0.0:9215", -// DataURL: "http://localhost:9216", -// DataPrefix: "data", -// JWTSecret: "Pive-Fumkiu4", -// }, -// }, -// }) -//} +func TestQueryDiskImpl(t *testing.T) { + dataDir, err := WriteIndexTestData(Data, "ID", "") + assert.NoError(t, err) + indexer := createDiskIndexer(dataDir) + + err = indexer.AddIndex(&Account{}, "OnPremisesSamAccountName", "ID", "accounts", "non_unique", nil, false) + assert.NoError(t, err) + + err = indexer.AddIndex(&Account{}, "Mail", "ID", "accounts", "non_unique", nil, false) + assert.NoError(t, err) + + err = indexer.AddIndex(&Account{}, "ID", "ID", "accounts", "non_unique", nil, false) + assert.NoError(t, err) + + acc := Account{ + ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2", + Mail: "spooky@skeletons.org", + OnPremisesSamAccountName: "MrDootDoot", + } + + _, err = indexer.Add(acc) + assert.NoError(t, err) + + r, err := indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "mail eq 'spooky@skeletons.org'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot' or mail eq 'spooky@skeletons.org'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "startswith(on_premises_sam_account_name,'MrDoo')") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + r, err = indexer.Query(&Account{}, "id eq 'ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2' or on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets. + assert.NoError(t, err) + assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r) + + _ = os.RemoveAll(dataDir) +} func createDiskIndexer(dataDir string) *Indexer { return CreateIndexer(&config.Config{ diff --git a/accounts/pkg/indexer/query_tree.go b/accounts/pkg/indexer/query_tree.go new file mode 100644 index 0000000000..1177cc0853 --- /dev/null +++ b/accounts/pkg/indexer/query_tree.go @@ -0,0 +1,40 @@ +package indexer + +type queryTree struct { + token *token + root bool + left *queryTree + right *queryTree +} + +// token to be resolved by the index +type token struct { + operator string // original OData operator. i.e: 'startswith', `or`, `and`. + filterType string // equivalent operator from OData -> indexer i.e FindByPartial or FindBy. + operands []string +} + +// newQueryTree constructs a new tree with a root node. +func newQueryTree() queryTree { + return queryTree{ + root: true, + } +} + +// insert populates first the LHS of the tree first, if this is not possible it fills the RHS. +func (t *queryTree) insert(tkn *token) { + if t != nil && t.root { + t.left = &queryTree{token: tkn} + return + } + + if t.left == nil { + t.left = &queryTree{token: tkn} + return + } + + if t.left != nil && t.right == nil { + t.right = &queryTree{token: tkn} + return + } +} diff --git a/accounts/pkg/indexer/test/data.go b/accounts/pkg/indexer/test/data.go index 98b2916a36..d36552f651 100644 --- a/accounts/pkg/indexer/test/data.go +++ b/accounts/pkg/indexer/test/data.go @@ -19,6 +19,13 @@ type Pet struct { UID int } +// Account mocks an ocis account. +type Account struct { + ID string + OnPremisesSamAccountName string + Mail string +} + // Data mock data. var Data = map[string][]interface{}{ "users": { @@ -33,6 +40,9 @@ var Data = map[string][]interface{}{ Pet{ID: "goefe-789", Kind: "Hog", Color: "Green", Name: "Dicky"}, Pet{ID: "xadaf-189", Kind: "Hog", Color: "Green", Name: "Ricky"}, }, + "accounts": { + Account{ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2", Mail: "spooky@skeletons.org", OnPremisesSamAccountName: "MrDootDoot"}, + }, } // WriteIndexTestData writes mock data to disk. diff --git a/accounts/pkg/service/v0/accounts.go b/accounts/pkg/service/v0/accounts.go index 45021bbe8e..c26d861102 100644 --- a/accounts/pkg/service/v0/accounts.go +++ b/accounts/pkg/service/v0/accounts.go @@ -197,83 +197,7 @@ func (s Service) ListAccounts(ctx context.Context, in *proto.ListAccountsRequest } func (s Service) findAccountsByQuery(ctx context.Context, query string) ([]string, error) { - var searchResults []string - var err error - - // TODO: more explicit queries have to be on top - var onPremOrMailQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)' or mail eq '(.*)'$`) - match := onPremOrMailQuery.FindStringSubmatch(query) - if len(match) == 3 { - resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*") - if err != nil { - return nil, err - } - resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[2]+"*") - if err != nil { - return nil, err - } - - searchResults = append(resSam, resMail...) - return unique(searchResults), nil - } - - // startswith(on_premises_sam_account_name,'mar') or startswith(display_name,'mar') or startswith(mail,'mar') - var searchQuery = regexp.MustCompile(`^startswith\(on_premises_sam_account_name,'(.*)'\) or startswith\(display_name,'(.*)'\) or startswith\(mail,'(.*)'\)$`) - match = searchQuery.FindStringSubmatch(query) - if len(match) == 4 { - resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*") - if err != nil { - return nil, err - } - resDisp, err := s.index.FindByPartial(&proto.Account{}, "DisplayName", match[2]+"*") - if err != nil { - return nil, err - } - resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[3]+"*") - if err != nil { - return nil, err - } - - searchResults = append(resSam, append(resDisp, resMail...)...) - return unique(searchResults), nil - } - - // id eq 'marie' or on_premises_sam_account_name eq 'marie' - // id eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c' or on_premises_sam_account_name eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c' - var idOrQuery = regexp.MustCompile(`^id eq '(.*)' or on_premises_sam_account_name eq '(.*)'$`) - match = idOrQuery.FindStringSubmatch(query) - if len(match) == 3 { - qID, qSam := match[1], match[2] - tmp := &proto.Account{} - err = s.repo.LoadAccount(ctx, qID, tmp) - if err != nil && !storage.IsNotFoundErr(err) { - return nil, err - } - searchResults, err = s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", qSam) - if err != nil { - return nil, err - } - - if tmp.Id != "" { - searchResults = append(searchResults, tmp.Id) - } - - return unique(searchResults), nil - } - - var onPremQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)'$`) // TODO how is ' escaped in the password? - match = onPremQuery.FindStringSubmatch(query) - if len(match) == 2 { - return s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", match[1]) - } - - var mailQuery = regexp.MustCompile(`^mail eq '(.*)'$`) - match = mailQuery.FindStringSubmatch(query) - if len(match) == 2 { - return s.index.FindBy(&proto.Account{}, "Mail", match[1]) - } - - return nil, merrors.BadRequest(s.id, "unsupported query %s", query) + return s.index.Query(&proto.Account{}, query) } // GetAccount implements the AccountsServiceHandler interface @@ -762,18 +686,6 @@ func (s Service) debugLogAccount(a *proto.Account) *zerolog.Event { }) } -func unique(strSlice []string) []string { - keys := make(map[string]bool) - list := []string{} - for _, entry := range strSlice { - if _, value := keys[entry]; !value { - keys[entry] = true - list = append(list, entry) - } - } - return list -} - func (s Service) accountExists(ctx context.Context, username, mail, id string) (exists bool, err error) { var ids []string ids, err = s.index.FindBy(&proto.Account{}, "preferred_name", username) diff --git a/ocis/go.sum b/ocis/go.sum index e2d77c9646..eeb1656e84 100644 --- a/ocis/go.sum +++ b/ocis/go.sum @@ -55,6 +55,10 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc h1:W4GbdgLvIYP02W1CFpqQbMC6fxEfKHYNlmVIt3QmByk= +github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY= +github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E= github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e h1:Bqtt5C+uVk+vH/t5dmB47uDCTwxw16EYHqvJnmY2aQc= github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e/go.mod h1:njRCDrl+1RQ/A/+KVU8Ho2EWAxUSkohOWczdW3dzDG0=