Change filename search to be case-insensitive

This commit is contained in:
André Duffeck
2022-05-10 15:49:34 +02:00
parent e91318458d
commit f7e8e48a1d
2 changed files with 50 additions and 7 deletions

View File

@@ -26,7 +26,10 @@ import (
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
"github.com/blevesearch/bleve/v2/mapping"
"google.golang.org/protobuf/types/known/timestamppb"
@@ -57,7 +60,11 @@ type Index struct {
// NewPersisted returns a new instance of Index with the data being persisted in the given directory
func NewPersisted(path string) (*Index, error) {
bi, err := bleve.New(path, BuildMapping())
mapping, err := BuildMapping()
if err != nil {
return nil, err
}
bi, err := bleve.New(path, mapping)
if err != nil {
return nil, err
}
@@ -204,7 +211,7 @@ func (i *Index) Search(ctx context.Context, req *searchsvc.SearchIndexRequest) (
deletedQuery := bleve.NewBoolFieldQuery(false)
deletedQuery.SetField("Deleted")
query := bleve.NewConjunctionQuery(
bleve.NewQueryStringQuery("Name:"+req.Query),
bleve.NewQueryStringQuery("Name:"+strings.ToLower(req.Query)),
deletedQuery, // Skip documents that have been marked as deleted
bleve.NewQueryStringQuery("RootID:"+req.Ref.ResourceId.StorageId+"!"+req.Ref.ResourceId.OpaqueId), // Limit search to the space
bleve.NewQueryStringQuery("Path:"+utils.MakeRelativePath(path.Join(req.Ref.Path, "/"))+"*"), // Limit search to this directory in the space
@@ -232,10 +239,29 @@ func (i *Index) Search(ctx context.Context, req *searchsvc.SearchIndexRequest) (
}
// BuildMapping builds a bleve index mapping which can be used for indexing
func BuildMapping() mapping.IndexMapping {
func BuildMapping() (mapping.IndexMapping, error) {
NameMapping := bleve.NewTextFieldMapping()
NameMapping.Analyzer = "lowercaseKeyword"
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("Name", NameMapping)
indexMapping := bleve.NewIndexMapping()
indexMapping.DefaultAnalyzer = keyword.Name
return indexMapping
indexMapping.DefaultMapping = docMapping
err := indexMapping.AddCustomAnalyzer("lowercaseKeyword",
map[string]interface{}{
"type": custom.Name,
"tokenizer": single.Name,
"token_filters": []string{
lowercase.Name,
},
})
if err != nil {
return nil, err
}
return indexMapping, nil
}
func toEntity(ref *sprovider.Reference, ri *sprovider.ResourceInfo) *indexDocument {

View File

@@ -37,7 +37,7 @@ var _ = Describe("Index", func() {
StorageId: "storageid",
OpaqueId: "someopaqueid",
},
Path: "foo.pdf",
Path: "Foo.pdf",
Size: 12345,
Type: sprovider.ResourceType_RESOURCE_TYPE_FILE,
MimeType: "application/pdf",
@@ -92,8 +92,10 @@ var _ = Describe("Index", func() {
)
BeforeEach(func() {
var err error
bleveIndex, err = bleve.NewMemOnly(index.BuildMapping())
mapping, err := index.BuildMapping()
Expect(err).ToNot(HaveOccurred())
bleveIndex, err = bleve.NewMemOnly(mapping)
Expect(err).ToNot(HaveOccurred())
i, err = index.New(bleveIndex)
@@ -201,6 +203,21 @@ var _ = Describe("Index", func() {
}
})
It("is case-insensitive", func() {
res, err := i.Search(ctx, &searchsvc.SearchIndexRequest{
Ref: &searchmsg.Reference{
ResourceId: &searchmsg.ResourceID{
StorageId: ref.ResourceId.StorageId,
OpaqueId: ref.ResourceId.OpaqueId,
},
},
Query: "Foo*",
})
Expect(err).ToNot(HaveOccurred())
Expect(res).ToNot(BeNil())
Expect(len(res.Matches)).To(Equal(1))
})
Context("and an additional file in a subdirectory", func() {
var (
nestedRef *sprovider.Reference