first implementation of our own query tree and tree resolution

added godata dep

use correct godata dependency, enhance tests

refactor around indexer search tree

tests reflecting currently used queries

fix static checker

fix linter

replace old regexp with a calll to indexer.Query

fix linter

remove offenses
This commit is contained in:
A.Unger
2020-10-28 11:59:08 +01:00
parent f3fb12b3bd
commit c72e212c91
9 changed files with 268 additions and 101 deletions

View File

@@ -3,6 +3,7 @@ module github.com/owncloud/ocis/accounts
go 1.13
require (
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06
github.com/cs3org/go-cs3apis v0.0.0-20200810113633-b00aca449666
github.com/cs3org/reva v1.2.2-0.20200924071957-e6676516e61e
github.com/go-chi/chi v4.1.2+incompatible

View File

@@ -50,6 +50,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E=
github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/Masterminds/goutils v1.1.0 h1:zukEsf/1JZwCMgHiK3GZftabmxiCw4apj3a28RPBiVg=
github.com/Masterminds/goutils v1.1.0/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=

View File

@@ -0,0 +1,15 @@
package indexer
// dedup removes duplicate values in given slice
func dedup(s []string) []string {
for i := 0; i < len(s); i++ {
for i2 := i + 1; i2 < len(s); i2++ {
if s[i] == s[i2] {
// delete
s = append(s[:i2], s[i2+1:]...)
i2--
}
}
}
return s
}

View File

@@ -4,7 +4,9 @@ package indexer
import (
"fmt"
"path"
"strings"
"github.com/CiscoM31/godata"
"github.com/iancoleman/strcase"
"github.com/owncloud/ocis/accounts/pkg/config"
"github.com/owncloud/ocis/accounts/pkg/indexer/errors"
@@ -236,3 +238,151 @@ func (i Indexer) Update(from, to interface{}) error {
return nil
}
// Query parses an OData query into something our indexer.Index understands and resolves it.
func (i Indexer) Query(t interface{}, q string) ([]string, error) {
query, err := godata.ParseFilterString(q)
if err != nil {
return nil, err
}
tree := newQueryTree()
if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil {
return nil, err
}
results := make([]string, 0)
if err := i.resolveTree(t, &tree, &results); err != nil {
return nil, err
}
return results, nil
}
// t is used to infer the indexed field names. When building an index search query, field names have to respect Golang
// conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the
// indexer directory. Traversal of the tree happens in a pre-order fashion.
// TODO implement logic for `and` operators.
func (i Indexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error {
if partials == nil {
return fmt.Errorf("return value cannot be nil: partials")
}
if tree.left != nil {
_ = i.resolveTree(t, tree.left, partials)
}
if tree.right != nil {
_ = i.resolveTree(t, tree.right, partials)
}
// by the time we're here we reached a leaf node.
if tree.token != nil {
switch tree.token.filterType {
case "FindBy":
operand, err := sanitizeInput(tree.token.operands)
if err != nil {
return err
}
r, err := i.FindBy(t, operand.field, operand.value)
if err != nil {
return err
}
*partials = append(*partials, r...)
case "FindByPartial":
operand, err := sanitizeInput(tree.token.operands)
if err != nil {
return err
}
r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value))
if err != nil {
return err
}
*partials = append(*partials, r...)
default:
return fmt.Errorf("unsupported filter: %v", tree.token.filterType)
}
}
*partials = dedup(*partials)
return nil
}
type indexerTuple struct {
field, value string
}
// sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters.
func sanitizeInput(operands []string) (*indexerTuple, error) {
if len(operands) != 2 {
return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands))
}
// field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules.
// for further information on this have a look at the reflection package.
f := strcase.ToCamel(operands[0])
// remove single quotes from value.
v := strings.ReplaceAll(operands[1], "'", "")
return &indexerTuple{
field: f,
value: v,
}, nil
}
// buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree
// is to transform godata operators and functions into supported operations on our index. At the time of this writing
// we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es).
func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error {
if root.Token.Type == godata.FilterTokenFunc { // i.e "startswith", "contains"
switch root.Token.Value {
case "startswith":
token := token{
operator: root.Token.Value,
filterType: "FindByPartial",
// TODO sanitize the number of operands it the expected one.
operands: []string{
root.Children[0].Token.Value, // field name, i.e: Name
root.Children[1].Token.Value, // field value, i.e: Jac
},
}
tree.insert(&token)
default:
return fmt.Errorf("operation not supported")
}
}
if root.Token.Type == godata.FilterTokenLogical {
switch root.Token.Value {
case "or":
tree.insert(&token{operator: root.Token.Value})
for _, child := range root.Children {
if err := buildTreeFromOdataQuery(child, tree.left); err != nil {
return err
}
}
case "eq":
tree.insert(&token{
operator: root.Token.Value,
filterType: "FindBy",
operands: []string{
root.Children[0].Token.Value,
root.Children[1].Token.Value,
},
})
for _, child := range root.Children {
if err := buildTreeFromOdataQuery(child, tree.left); err != nil {
return err
}
}
default:
return fmt.Errorf("operator not supported")
}
}
return nil
}

View File

@@ -251,18 +251,51 @@ func TestIndexer_Disk_UpdateWithNonUniqueIndex(t *testing.T) {
_ = os.RemoveAll(dataDir)
}
//func createCs3Indexer() *Indexer {
// return CreateIndexer(&config.Config{
// Repo: config.Repo{
// CS3: config.CS3{
// ProviderAddr: "0.0.0.0:9215",
// DataURL: "http://localhost:9216",
// DataPrefix: "data",
// JWTSecret: "Pive-Fumkiu4",
// },
// },
// })
//}
func TestQueryDiskImpl(t *testing.T) {
dataDir, err := WriteIndexTestData(Data, "ID", "")
assert.NoError(t, err)
indexer := createDiskIndexer(dataDir)
err = indexer.AddIndex(&Account{}, "OnPremisesSamAccountName", "ID", "accounts", "non_unique", nil, false)
assert.NoError(t, err)
err = indexer.AddIndex(&Account{}, "Mail", "ID", "accounts", "non_unique", nil, false)
assert.NoError(t, err)
err = indexer.AddIndex(&Account{}, "ID", "ID", "accounts", "non_unique", nil, false)
assert.NoError(t, err)
acc := Account{
ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2",
Mail: "spooky@skeletons.org",
OnPremisesSamAccountName: "MrDootDoot",
}
_, err = indexer.Add(acc)
assert.NoError(t, err)
r, err := indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "mail eq 'spooky@skeletons.org'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "on_premises_sam_account_name eq 'MrDootDoot' or mail eq 'spooky@skeletons.org'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "startswith(on_premises_sam_account_name,'MrDoo')") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
r, err = indexer.Query(&Account{}, "id eq 'ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2' or on_premises_sam_account_name eq 'MrDootDoot'") // this query will match both pets.
assert.NoError(t, err)
assert.Equal(t, []string{"ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2"}, r)
_ = os.RemoveAll(dataDir)
}
func createDiskIndexer(dataDir string) *Indexer {
return CreateIndexer(&config.Config{

View File

@@ -0,0 +1,40 @@
package indexer
type queryTree struct {
token *token
root bool
left *queryTree
right *queryTree
}
// token to be resolved by the index
type token struct {
operator string // original OData operator. i.e: 'startswith', `or`, `and`.
filterType string // equivalent operator from OData -> indexer i.e FindByPartial or FindBy.
operands []string
}
// newQueryTree constructs a new tree with a root node.
func newQueryTree() queryTree {
return queryTree{
root: true,
}
}
// insert populates first the LHS of the tree first, if this is not possible it fills the RHS.
func (t *queryTree) insert(tkn *token) {
if t != nil && t.root {
t.left = &queryTree{token: tkn}
return
}
if t.left == nil {
t.left = &queryTree{token: tkn}
return
}
if t.left != nil && t.right == nil {
t.right = &queryTree{token: tkn}
return
}
}

View File

@@ -19,6 +19,13 @@ type Pet struct {
UID int
}
// Account mocks an ocis account.
type Account struct {
ID string
OnPremisesSamAccountName string
Mail string
}
// Data mock data.
var Data = map[string][]interface{}{
"users": {
@@ -33,6 +40,9 @@ var Data = map[string][]interface{}{
Pet{ID: "goefe-789", Kind: "Hog", Color: "Green", Name: "Dicky"},
Pet{ID: "xadaf-189", Kind: "Hog", Color: "Green", Name: "Ricky"},
},
"accounts": {
Account{ID: "ba5b6e54-e29d-4b2b-8cc4-0a0b958140d2", Mail: "spooky@skeletons.org", OnPremisesSamAccountName: "MrDootDoot"},
},
}
// WriteIndexTestData writes mock data to disk.

View File

@@ -197,83 +197,7 @@ func (s Service) ListAccounts(ctx context.Context, in *proto.ListAccountsRequest
}
func (s Service) findAccountsByQuery(ctx context.Context, query string) ([]string, error) {
var searchResults []string
var err error
// TODO: more explicit queries have to be on top
var onPremOrMailQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)' or mail eq '(.*)'$`)
match := onPremOrMailQuery.FindStringSubmatch(query)
if len(match) == 3 {
resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*")
if err != nil {
return nil, err
}
resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[2]+"*")
if err != nil {
return nil, err
}
searchResults = append(resSam, resMail...)
return unique(searchResults), nil
}
// startswith(on_premises_sam_account_name,'mar') or startswith(display_name,'mar') or startswith(mail,'mar')
var searchQuery = regexp.MustCompile(`^startswith\(on_premises_sam_account_name,'(.*)'\) or startswith\(display_name,'(.*)'\) or startswith\(mail,'(.*)'\)$`)
match = searchQuery.FindStringSubmatch(query)
if len(match) == 4 {
resSam, err := s.index.FindByPartial(&proto.Account{}, "OnPremisesSamAccountName", match[1]+"*")
if err != nil {
return nil, err
}
resDisp, err := s.index.FindByPartial(&proto.Account{}, "DisplayName", match[2]+"*")
if err != nil {
return nil, err
}
resMail, err := s.index.FindByPartial(&proto.Account{}, "Mail", match[3]+"*")
if err != nil {
return nil, err
}
searchResults = append(resSam, append(resDisp, resMail...)...)
return unique(searchResults), nil
}
// id eq 'marie' or on_premises_sam_account_name eq 'marie'
// id eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c' or on_premises_sam_account_name eq 'f7fbf8c8-139b-4376-b307-cf0a8c2d0d9c'
var idOrQuery = regexp.MustCompile(`^id eq '(.*)' or on_premises_sam_account_name eq '(.*)'$`)
match = idOrQuery.FindStringSubmatch(query)
if len(match) == 3 {
qID, qSam := match[1], match[2]
tmp := &proto.Account{}
err = s.repo.LoadAccount(ctx, qID, tmp)
if err != nil && !storage.IsNotFoundErr(err) {
return nil, err
}
searchResults, err = s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", qSam)
if err != nil {
return nil, err
}
if tmp.Id != "" {
searchResults = append(searchResults, tmp.Id)
}
return unique(searchResults), nil
}
var onPremQuery = regexp.MustCompile(`^on_premises_sam_account_name eq '(.*)'$`) // TODO how is ' escaped in the password?
match = onPremQuery.FindStringSubmatch(query)
if len(match) == 2 {
return s.index.FindBy(&proto.Account{}, "OnPremisesSamAccountName", match[1])
}
var mailQuery = regexp.MustCompile(`^mail eq '(.*)'$`)
match = mailQuery.FindStringSubmatch(query)
if len(match) == 2 {
return s.index.FindBy(&proto.Account{}, "Mail", match[1])
}
return nil, merrors.BadRequest(s.id, "unsupported query %s", query)
return s.index.Query(&proto.Account{}, query)
}
// GetAccount implements the AccountsServiceHandler interface
@@ -762,18 +686,6 @@ func (s Service) debugLogAccount(a *proto.Account) *zerolog.Event {
})
}
func unique(strSlice []string) []string {
keys := make(map[string]bool)
list := []string{}
for _, entry := range strSlice {
if _, value := keys[entry]; !value {
keys[entry] = true
list = append(list, entry)
}
}
return list
}
func (s Service) accountExists(ctx context.Context, username, mail, id string) (exists bool, err error) {
var ids []string
ids, err = s.index.FindBy(&proto.Account{}, "preferred_name", username)

View File

@@ -55,6 +55,10 @@ github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzS
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc h1:W4GbdgLvIYP02W1CFpqQbMC6fxEfKHYNlmVIt3QmByk=
github.com/CiscoM31/godata v0.0.0-20200914000433-6585b72239dc/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06 h1:FKxVU/j9Dd8Je0YkVkm8Fxpz9zIeN21SEkcbzA6NWgY=
github.com/CiscoM31/godata v0.0.0-20201003040028-eadcd34e7f06/go.mod h1:tjaihnMBH6p5DVnGBksDQQHpErbrLvb9ek6cEWuyc7E=
github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e h1:Bqtt5C+uVk+vH/t5dmB47uDCTwxw16EYHqvJnmY2aQc=
github.com/GeertJohan/yubigo v0.0.0-20190917122436-175bc097e60e/go.mod h1:njRCDrl+1RQ/A/+KVU8Ho2EWAxUSkohOWczdW3dzDG0=