Merge pull request #767 from owncloud/ocis-697

Support OData for search queries on the Indexer
This commit is contained in:
Jörn Friedrich Dreyer
2020-11-02 14:30:24 +01:00
committed by GitHub
10 changed files with 272 additions and 101 deletions

View File

@@ -3,6 +3,7 @@ module github.com/owncloud/ocis/accounts
go 1.13
require (
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06
github.com/cs3org/go-cs3apis v0.0.0-20200810113633-b00aca449666
github.com/cs3org/reva v1.2.2-0.20200924071957-e6676516e61e
github.com/go-chi/chi v4.1.2+incompatible

View File

@@ -50,6 +50,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E=
github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/Masterminds/goutils v1.1.0 h1:zukEsf/1JZwCMgHiK3GZftabmxiCw4apj3a28RPBiVg=
github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=

View File

@@ -0,0 +1,15 @@
package indexer
// dedup removes duplicate values in given slice
func dedup(s []string) []string {
for i := 0; i < len(s); i++ {
for i2 := i + 1; i2 < len(s); i2++ {
if s[i] == s[i2] {
// delete
s = append(s[:i2], s[i2+1:]...)
i2--
}
}
}
return s
}

View File

@@ -4,7 +4,9 @@ package indexer
import (
"fmt"
"path"
"strings"
"github.com/CiscoM31/godata"
"github.com/iancoleman/strcase"
"github.com/owncloud/ocis/accounts/pkg/config"
"github.com/owncloud/ocis/accounts/pkg/indexer/errors"
@@ -236,3 +238,151 @@ func (i Indexer) Update(from, to interface{}) error {
return nil
}
// Query parses an OData query into something our indexer.Index understands and resolves it.
func (i Indexer) Query(t interface{}, q string) ([]string, error) {
query, err := godata.ParseFilterString(q)
if err != nil {
return nil, err
}
tree := newQueryTree()
if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil {
return nil, err
}
results := make([]string, 0)
if err := i.resolveTree(t, &tree, &results); err != nil {
return nil, err
}
return results, nil
}
// t is used to infer the indexed field names. When building an index search query, field names have to respect Golang
// conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the
// indexer directory. Traversal of the tree happens in a pre-order fashion.
// TODO implement logic for `and` operators.
func (i Indexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error {
if partials == nil {
return fmt.Errorf("return value cannot be nil: partials")
}
if tree.left != nil {
_ = i.resolveTree(t, tree.left, partials)
}
if tree.right != nil {
_ = i.resolveTree(t, tree.right, partials)
}
// by the time we're here we reached a leaf node.
if tree.token != nil {
switch tree.token.filterType {
case "FindBy":
operand, err := sanitizeInput(tree.token.operands)
if err != nil {
return err
}
r, err := i.FindBy(t, operand.field, operand.value)
if err != nil {
return err
}
*partials = append(*partials, r...)
case "FindByPartial":
operand, err := sanitizeInput(tree.token.operands)
if err != nil {
return err
}
r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value))
if err != nil {
return err
}
*partials = append(*partials, r...)
default:
return fmt.Errorf("unsupported filter: %v", tree.token.filterType)
}
}
*partials = dedup(*partials)
return nil
}
type indexerTuple struct {
field, value string
}
// sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters.
func sanitizeInput(operands []string) (*indexerTuple, error) {
if len(operands) != 2 {
return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands))
}
// field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules.
// for further information on this have a look at the reflection package.
f := strcase.ToCamel(operands[0])
// remove single quotes from value.
v := strings.ReplaceAll(operands[1], "'", "")
return &indexerTuple{
field: f,
value: v,
}, nil
}
// buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree
// is to transform godata operators and functions into supported operations on our index. At the time of this writing
// we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es).
func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error {
if root.Token.Type == godata.FilterTokenFunc { // i.e "startswith", "contains"
switch root.Token.Value {
case "startswith":
token := token{
operator: root.Token.Value,
filterType: "FindByPartial",
// TODO sanitize the number of operands it the expected one.
operands: []string{
root.Children[0].Token.Value, // field name, i.e: Name
root.Children[1].Token.Value, // field value, i.e: Jac
},
}
tree.insert(&token)
default:
return fmt.Errorf("operation not supported")
}
}
if root.Token.Type == godata.FilterTokenLogical {
switch root.Token.Value {
case "or":
tree.insert(&token{operator: root.Token.Value})
for _, child := range root.Children {
if err := buildTreeFromOdataQuery(child, tree.left); err != nil {
return err
}
}
case "eq":
tree.insert(&token{
operator: root.Token.Value,
filterType: "FindBy",
operands: []string{
root.Children[0].Token.Value,
root.Children[1].Token.Value,
},
})
for _, child := range root.Children {
if err := buildTreeFromOdataQuery(child, tree.left); err != nil {
return err
}
}
default:
return fmt.Errorf("operator not supported")
}
}
return nil
}

View File

@@ -251,18 +251,51 @@ func TestIndexer_Disk_UpdateWithNonUniqueIndex(t *testing.T) {
_ = os.RemoveAll(dataDir)
}
//func createCs3Indexer() *Indexer {
// return CreateIndexer(&config.Config{
// Repo: config.Repo{
// CS3: config.CS3{
// ProviderAddr: "0.0.0.0:9215",
// DataURL: "http://localhost:9216",
// DataPrefix: "data",
// JWTSecret: "Pive-Fumkiu4",
// },
// },
// })
//}
func TestQueryDiskImpl(t *testing.T) {
dataDir, err := WriteIndexTestData(Data, "ID", "")
assert.NoError(t, err)
indexer := createDiskIndexer(dataDir)
err = indexer.AddIndex(&Account{}, "OnPremisesSamAccountName", "ID", "accounts", "non_unique", nil, false)
assert.NoError(t, err)
err = indexer.AddIndex(&Account{}, "Mail", "ID", "accounts", "non_unique", nil, false)
assert.NoError(t, err)
err = indexer.AddIndex(&Account{}, "ID", "ID", "accounts", "non_unique", nil, false)
assert.NoError(t, err)
acc := Account{
ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2",
Mail: "spooky@skeletons.org",
OnPremisesSamAccountName: "MrDootDoot",
}
_, err = indexer.Add(acc)
assert.NoError(t, err)
r, err := indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "mail eq 'spooky@skeletons.org'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot' or mail eq 'spooky@skeletons.org'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "startswith(on_premises_sam_account_name,'MrDoo')") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "id eq 'ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2' or on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
_ = os.RemoveAll(dataDir)
}
func createDiskIndexer(dataDir string) *Indexer {
return CreateIndexer(&config.Config{

View File

@@ -0,0 +1,40 @@
package indexer
type queryTree struct {
token *token
root bool
left *queryTree
right *queryTree
}
// token to be resolved by the index
type token struct {
operator string // original OData operator. i.e: 'startswith', `or`, `and`.
filterType string // equivalent operator from OData -> indexer i.e FindByPartial or FindBy.
operands []string
}
// newQueryTree constructs a new tree with a root node.
func newQueryTree() queryTree {
return queryTree{
root: true,
}
}
// insert populates first the LHS of the tree first, if this is not possible it fills the RHS.
func (t *queryTree) insert(tkn *token) {
if t != nil && t.root {
t.left = &queryTree{token: tkn}
return
}
if t.left == nil {
t.left = &queryTree{token: tkn}
return
}
if t.left != nil && t.right == nil {
t.right = &queryTree{token: tkn}
return
}
}

View File

@@ -19,6 +19,13 @@ type Pet struct {
UID int
}
// Account mocks an ocis account.
type Account struct {
ID string
OnPremisesSamAccountName string
Mail string
}
// Data mock data.
var Data = map[string][]interface{}{
"users": {
@@ -33,6 +40,9 @@ var Data = map[string][]interface{}{
Pet{ID: "goefe-789", Kind: "Hog", Color: "Green", Name: "Dicky"},
Pet{ID: "xadaf-189", Kind: "Hog", Color: "Green", Name: "Ricky"},
},
"accounts": {
Account{ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2", Mail: "spooky@skeletons.org", OnPremisesSamAccountName: "MrDootDoot"},
},
}
// WriteIndexTestData writes mock data to disk.

View File

@@ -197,83 +197,7 @@ func (s Service) ListAccounts(ctx context.Context, in *proto.ListAccountsRequest
}
func (s Service) findAccountsByQuery(ctx context.Context, query string) ([]string, error) {
var searchResults []string
var err error
// TODO: more explicit queries have to be on top
var onPremOrMailQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)' or mail eq '(.*)'$`)
match := onPremOrMailQuery.FindStringSubmatch(query)
if len(match) == 3 {
resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*")
if err != nil {
return nil, err
}
resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[2]+"*")
if err != nil {
return nil, err
}
searchResults = append(resSam, resMail...)
return unique(searchResults), nil
}
// startswith(on_premises_sam_account_name,'mar') or startswith(display_name,'mar') or startswith(mail,'mar')
var searchQuery = regexp.MustCompile(`^startswith\(on_premises_sam_account_name,'(.*)'\) or startswith\(display_name,'(.*)'\) or startswith\(mail,'(.*)'\)$`)
match = searchQuery.FindStringSubmatch(query)
if len(match) == 4 {
resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*")
if err != nil {
return nil, err
}
resDisp, err := s.index.FindByPartial(&proto.Account{}, "DisplayName", match[2]+"*")
if err != nil {
return nil, err
}
resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[3]+"*")
if err != nil {
return nil, err
}
searchResults = append(resSam, append(resDisp, resMail...)...)
return unique(searchResults), nil
}
// id eq 'marie' or on_premises_sam_account_name eq 'marie'
// id eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c' or on_premises_sam_account_name eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c'
var idOrQuery = regexp.MustCompile(`^id eq '(.*)' or on_premises_sam_account_name eq '(.*)'$`)
match = idOrQuery.FindStringSubmatch(query)
if len(match) == 3 {
qID, qSam := match[1], match[2]
tmp := &proto.Account{}
err = s.repo.LoadAccount(ctx, qID, tmp)
if err != nil && !storage.IsNotFoundErr(err) {
return nil, err
}
searchResults, err = s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", qSam)
if err != nil {
return nil, err
}
if tmp.Id != "" {
searchResults = append(searchResults, tmp.Id)
}
return unique(searchResults), nil
}
var onPremQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)'$`) // TODO how is ' escaped in the password?
match = onPremQuery.FindStringSubmatch(query)
if len(match) == 2 {
return s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", match[1])
}
var mailQuery = regexp.MustCompile(`^mail eq '(.*)'$`)
match = mailQuery.FindStringSubmatch(query)
if len(match) == 2 {
return s.index.FindBy(&proto.Account{}, "Mail", match[1])
}
return nil, merrors.BadRequest(s.id, "unsupported query %s", query)
return s.index.Query(&proto.Account{}, query)
}
// GetAccount implements the AccountsServiceHandler interface
@@ -762,18 +686,6 @@ func (s Service) debugLogAccount(a *proto.Account) *zerolog.Event {
})
}
func unique(strSlice []string) []string {
keys := make(map[string]bool)
list := []string{}
for _, entry := range strSlice {
if _, value := keys[entry]; !value {
keys[entry] = true
list = append(list, entry)
}
}
return list
}
func (s Service) accountExists(ctx context.Context, username, mail, id string) (exists bool, err error) {
var ids []string
ids, err = s.index.FindBy(&proto.Account{}, "preferred_name", username)

View File

@@ -32,6 +32,10 @@ func (s Service) RebuildIndex(ctx context.Context, request *proto.RebuildIndexRe
// recreateContainers adds all indices to the indexer that we have for this service.
func recreateContainers(idx *indexer.Indexer, cfg *config.Config) error {
// Accounts
if err := idx.AddIndex(&proto.Account{}, "Id", "Id", "accounts", "non_unique", nil, true); err != nil {
return err
}
if err := idx.AddIndex(&proto.Account{}, "DisplayName", "Id", "accounts", "non_unique", nil, true); err != nil {
return err
}

View File

@@ -55,6 +55,10 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc h1:W4GbdgLvIYP02W1CFpqQbMC6fxEfKHYNlmVIt3QmByk=
github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E=
github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e h1:Bqtt5C+uVk+vH/t5dmB47uDCTwxw16EYHqvJnmY2aQc=
github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e/go.mod h1:njRCDrl+1RQ/A/+KVU8Ho2EWAxUSkohOWczdW3dzDG0=