diff --git a/extensions/search/pkg/search/index/index.go b/extensions/search/pkg/search/index/index.go index 5f84f69ee7..23cd9b2935 100644 --- a/extensions/search/pkg/search/index/index.go +++ b/extensions/search/pkg/search/index/index.go @@ -26,7 +26,10 @@ import ( "time" "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/token/lowercase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" "github.com/blevesearch/bleve/v2/mapping" "google.golang.org/protobuf/types/known/timestamppb" @@ -57,7 +60,11 @@ type Index struct { // NewPersisted returns a new instance of Index with the data being persisted in the given directory func NewPersisted(path string) (*Index, error) { - bi, err := bleve.New(path, BuildMapping()) + mapping, err := BuildMapping() + if err != nil { + return nil, err + } + bi, err := bleve.New(path, mapping) if err != nil { return nil, err } @@ -204,7 +211,7 @@ func (i *Index) Search(ctx context.Context, req *searchsvc.SearchIndexRequest) ( deletedQuery := bleve.NewBoolFieldQuery(false) deletedQuery.SetField("Deleted") query := bleve.NewConjunctionQuery( - bleve.NewQueryStringQuery("Name:"+req.Query), + bleve.NewQueryStringQuery("Name:"+strings.ToLower(req.Query)), deletedQuery, // Skip documents that have been marked as deleted bleve.NewQueryStringQuery("RootID:"+req.Ref.ResourceId.StorageId+"!"+req.Ref.ResourceId.OpaqueId), // Limit search to the space bleve.NewQueryStringQuery("Path:"+utils.MakeRelativePath(path.Join(req.Ref.Path, "/"))+"*"), // Limit search to this directory in the space @@ -232,10 +239,29 @@ func (i *Index) Search(ctx context.Context, req *searchsvc.SearchIndexRequest) ( } // BuildMapping builds a bleve index mapping which can be used for indexing -func BuildMapping() mapping.IndexMapping { +func BuildMapping() (mapping.IndexMapping, error) { + NameMapping := bleve.NewTextFieldMapping() + NameMapping.Analyzer = "lowercaseKeyword" + + docMapping := bleve.NewDocumentMapping() + docMapping.AddFieldMappingsAt("Name", NameMapping) + indexMapping := bleve.NewIndexMapping() indexMapping.DefaultAnalyzer = keyword.Name - return indexMapping + indexMapping.DefaultMapping = docMapping + err := indexMapping.AddCustomAnalyzer("lowercaseKeyword", + map[string]interface{}{ + "type": custom.Name, + "tokenizer": single.Name, + "token_filters": []string{ + lowercase.Name, + }, + }) + if err != nil { + return nil, err + } + + return indexMapping, nil } func toEntity(ref *sprovider.Reference, ri *sprovider.ResourceInfo) *indexDocument { diff --git a/extensions/search/pkg/search/index/index_test.go b/extensions/search/pkg/search/index/index_test.go index 7727041861..7c46c64b22 100644 --- a/extensions/search/pkg/search/index/index_test.go +++ b/extensions/search/pkg/search/index/index_test.go @@ -37,7 +37,7 @@ var _ = Describe("Index", func() { StorageId: "storageid", OpaqueId: "someopaqueid", }, - Path: "foo.pdf", + Path: "Foo.pdf", Size: 12345, Type: sprovider.ResourceType_RESOURCE_TYPE_FILE, MimeType: "application/pdf", @@ -92,8 +92,10 @@ var _ = Describe("Index", func() { ) BeforeEach(func() { - var err error - bleveIndex, err = bleve.NewMemOnly(index.BuildMapping()) + mapping, err := index.BuildMapping() + Expect(err).ToNot(HaveOccurred()) + + bleveIndex, err = bleve.NewMemOnly(mapping) Expect(err).ToNot(HaveOccurred()) i, err = index.New(bleveIndex) @@ -201,6 +203,21 @@ var _ = Describe("Index", func() { } }) + It("is case-insensitive", func() { + res, err := i.Search(ctx, &searchsvc.SearchIndexRequest{ + Ref: &searchmsg.Reference{ + ResourceId: &searchmsg.ResourceID{ + StorageId: ref.ResourceId.StorageId, + OpaqueId: ref.ResourceId.OpaqueId, + }, + }, + Query: "Foo*", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(res).ToNot(BeNil()) + Expect(len(res.Matches)).To(Equal(1)) + }) + Context("and an additional file in a subdirectory", func() { var ( nestedRef *sprovider.Reference