Merge pull request #74 from butonic/fix-document-type-mapping

fix index mapping
This commit is contained in:
Jörn Friedrich Dreyer
2020-07-31 11:51:53 +02:00
committed by GitHub
5 changed files with 74 additions and 16 deletions

View File

@@ -0,0 +1,5 @@
Enhancement: Add simple user listing UI
We added an extension for ocis-web that shows a simple list of all existing users.
https://github.com/owncloud/ocis-accounts/pull/51

View File

@@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"strconv"
"strings"
"github.com/CiscoM31/godata"
"github.com/blevesearch/bleve"
@@ -35,9 +36,11 @@ func recursiveBuildQuery(n *godata.ParseNode) (query.Query, error) {
if n.Children[1].Token.Type != godata.FilterTokenString {
return nil, errors.New("startswith expected a string as the second param")
}
q := bleve.NewTermQuery(n.Children[1].Token.Value)
q := bleve.NewPrefixQuery(n.Children[1].Token.Value)
q.SetField(n.Children[0].Token.Value)
return q, nil
// TODO contains as regex?
// TODO endswith as regex?
default:
return nil, godata.NotImplementedError(n.Token.Value + " is not implemented.")
}
@@ -52,11 +55,20 @@ func recursiveBuildQuery(n *godata.ParseNode) (query.Query, error) {
return nil, errors.New("equality expected a literal on the lhs")
}
if n.Children[1].Token.Type == godata.FilterTokenString {
// string tokens are enclosed with 'some string'
// ' is escaped as ''
// TODO unescape '' as '
// http://docs.oasis-open.org/odata/odata/v4.01/cs01/part2-url-conventions/odata-v4.01-cs01-part2-url-conventions.html#sec_URLComponents
q := bleve.NewTermQuery(n.Children[1].Token.Value[1 : len(n.Children[1].Token.Value)-1])
// for escape rules see http://docs.oasis-open.org/odata/odata/v4.01/cs01/part2-url-conventions/odata-v4.01-cs01-part2-url-conventions.html#sec_URLComponents
// remove enclosing ' of string tokens (looks like 'some ol'' string')
value := n.Children[1].Token.Value[1 : len(n.Children[1].Token.Value)-1]
// unescape '' as '
unascaped := strings.ReplaceAll(value, "''", "'")
// use a match query, so the field mapping, e.g. lowercase is applied to the value
// remember we defined the field mapping for `preferred_name` to be lowercase
// a term query like `preferred_name eq 'Artur'` would use `Artur` to search in the index and come up empty
// a match query will apply the field mapping (lowercasing `Artur` to `artur`) before doing the search
// TODO there is a mismatch between the LDAP and odata filters:
// - LDAP matching rules depend on the attribute: see https://ldapwiki.com/wiki/MatchingRule
// - odata has functions like `startswith`, `contains`, `tolower`, `toupper`, `matchesPattern` andy more: see http://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part1-protocol.html#sec_BuiltinQueryFunctions
// - ocis-glauth should do the mapping between LDAP and odata filter
q := bleve.NewMatchQuery(unascaped)
q.SetField(n.Children[0].Token.Value)
return q, nil
} else if n.Children[1].Token.Type == godata.FilterTokenInteger {

View File

@@ -48,7 +48,10 @@ func (s Service) indexAccounts(path string) (err error) {
return
}
for _, file := range list {
_ = s.indexAccount(file.Name())
err = s.indexAccount(file.Name())
if err != nil {
s.log.Error().Err(err).Str("file", file.Name()).Msg("could not index account")
}
}
return

View File

@@ -30,7 +30,10 @@ func (s Service) indexGroups(path string) (err error) {
return
}
for _, file := range list {
_ = s.indexGroup(file.Name())
err = s.indexGroup(file.Name())
if err != nil {
s.log.Error().Err(err).Str("file", file.Name()).Msg("could not index account")
}
}
return

View File

@@ -10,8 +10,13 @@ import (
"strings"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/analysis/analyzer/simple"
"github.com/blevesearch/bleve/analysis/analyzer/standard"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
"github.com/owncloud/ocis-accounts/pkg/config"
"github.com/owncloud/ocis-accounts/pkg/proto/v0"
"github.com/owncloud/ocis-pkg/v2/log"
@@ -213,9 +218,11 @@ func New(opts ...Option) (s *Service, err error) {
// keep all symbols in terms to allow exact maching, eg. emails
indexMapping.DefaultAnalyzer = keyword.Name
// TODO don't bother to store fields as we will load the account from disk
//groupsFieldMapping := bleve.NewTextFieldMapping()
//blogMapping.AddFieldMappingsAt("memberOf", nameFieldMapping)
// TODO index groups and accounts as different types!
// Reusable mapping for text
standardTextFieldMapping := bleve.NewTextFieldMapping()
standardTextFieldMapping.Analyzer = standard.Name
standardTextFieldMapping.Store = false
// Reusable mapping for text, uses english stop word removal
simpleTextFieldMapping := bleve.NewTextFieldMapping()
@@ -227,13 +234,33 @@ func New(opts ...Option) (s *Service, err error) {
keywordFieldMapping.Analyzer = keyword.Name
keywordFieldMapping.Store = false
// Reusable mapping for lowercase text
err = indexMapping.AddCustomAnalyzer("lowercase",
map[string]interface{}{
"type": custom.Name,
"tokenizer": unicode.Name,
"token_filters": []string{
lowercase.Name,
},
})
if err != nil {
return nil, err
}
lowercaseTextFieldMapping := bleve.NewTextFieldMapping()
lowercaseTextFieldMapping.Analyzer = "lowercase"
lowercaseTextFieldMapping.Store = true
// accounts
accountMapping := bleve.NewDocumentMapping()
indexMapping.AddDocumentMapping("account", accountMapping)
// Text
accountMapping.AddFieldMappingsAt("display_name", simpleTextFieldMapping)
accountMapping.AddFieldMappingsAt("description", simpleTextFieldMapping)
accountMapping.AddFieldMappingsAt("display_name", standardTextFieldMapping)
accountMapping.AddFieldMappingsAt("description", standardTextFieldMapping)
// Lowercase
accountMapping.AddFieldMappingsAt("on_premises_sam_account_name", lowercaseTextFieldMapping)
accountMapping.AddFieldMappingsAt("preferred_name", lowercaseTextFieldMapping)
// Keywords
accountMapping.AddFieldMappingsAt("mail", keywordFieldMapping)
@@ -243,10 +270,18 @@ func New(opts ...Option) (s *Service, err error) {
indexMapping.AddDocumentMapping("group", groupMapping)
// Text
groupMapping.AddFieldMappingsAt("display_name", simpleTextFieldMapping)
groupMapping.AddFieldMappingsAt("description", simpleTextFieldMapping)
groupMapping.AddFieldMappingsAt("display_name", standardTextFieldMapping)
groupMapping.AddFieldMappingsAt("description", standardTextFieldMapping)
indexMapping.TypeField = "bleve_type"
// Lowercase
groupMapping.AddFieldMappingsAt("on_premises_sam_account_name", lowercaseTextFieldMapping)
// Tell blevesearch how to determine the type of the structs that are indexed.
// The referenced field needs to match the struct field exactly and it must be public.
// See pkg/proto/v0/bleve.go how we wrap the generated Account and Group to add a
// BleveType property which is indexed as `bleve_type` so we can also distinguish the
// documents in the index by querying for that property.
indexMapping.TypeField = "BleveType"
s = &Service{
id: cfg.GRPC.Namespace + "." + cfg.Server.Name,