From c6c718eb419b67fd6c5d3b19cddb7d3a00a66a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Thu, 12 May 2022 10:57:06 +0200 Subject: [PATCH] Allow for searching by metadata like "Size:<100" --- .../search/pkg/search/index/index_test.go | 211 ++++++++++-------- .../pkg/search/provider/searchprovider.go | 12 +- .../search/provider/searchprovider_test.go | 9 + extensions/webdav/pkg/service/v0/search.go | 2 +- 4 files changed, 136 insertions(+), 98 deletions(-) diff --git a/extensions/search/pkg/search/index/index_test.go b/extensions/search/pkg/search/index/index_test.go index af7a3fe507..582df6f4f6 100644 --- a/extensions/search/pkg/search/index/index_test.go +++ b/extensions/search/pkg/search/index/index_test.go @@ -128,120 +128,139 @@ var _ = Describe("Index", func() { }) Describe("Search", func() { - It("finds files with spaces in the filename", func() { - ri.Path = "Foo oo.pdf" - ref.Path = "./" + ri.Path - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(ref.ResourceId, `Name:foo\ o*`, 1) - }) - - It("finds files by digits in the filename", func() { - ri.Path = "12345.pdf" - ref.Path = "./" + ri.Path - err := i.Add(ref, ri) - Expect(err).ToNot(HaveOccurred()) - - assertDocCount(ref.ResourceId, `Name:1234*`, 1) - }) - - Context("with a file in the root of the space", func() { + Context("by other fields than filename", func() { JustBeforeEach(func() { err := i.Add(ref, ri) Expect(err).ToNot(HaveOccurred()) }) - It("scopes the search to the specified space", func() { - resourceId := &sprovider.ResourceId{ - StorageId: "differentstorageid", - OpaqueId: "differentopaqueid", - } - assertDocCount(resourceId, `Name:foo.pdf`, 0) + It("finds files by size", func() { + assertDocCount(ref.ResourceId, `Size:12345`, 1) + assertDocCount(ref.ResourceId, `Size:>1000`, 1) + assertDocCount(ref.ResourceId, `Size:<100000`, 1) + + assertDocCount(ref.ResourceId, `Size:12344`, 0) + assertDocCount(ref.ResourceId, `Size:<1000`, 0) + assertDocCount(ref.ResourceId, `Size:>100000`, 0) + }) + }) + + Context("by filename", func() { + It("finds files with spaces in the filename", func() { + ri.Path = "Foo oo.pdf" + ref.Path = "./" + ri.Path + err := i.Add(ref, ri) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(ref.ResourceId, `Name:foo\ o*`, 1) }) - It("limits the search to the specified fields", func() { - assertDocCount(ref.ResourceId, "Name:*"+ref.ResourceId.OpaqueId+"*", 0) + It("finds files by digits in the filename", func() { + ri.Path = "12345.pdf" + ref.Path = "./" + ri.Path + err := i.Add(ref, ri) + Expect(err).ToNot(HaveOccurred()) + + assertDocCount(ref.ResourceId, `Name:1234*`, 1) }) - It("returns all desired fields", func() { - matches := assertDocCount(ref.ResourceId, "Name:foo.pdf", 1) - match := matches[0] - Expect(match.Entity.Ref.ResourceId.OpaqueId).To(Equal(ref.ResourceId.OpaqueId)) - Expect(match.Entity.Ref.Path).To(Equal(ref.Path)) - Expect(match.Entity.Id.OpaqueId).To(Equal(ri.Id.OpaqueId)) - Expect(match.Entity.Name).To(Equal(ri.Path)) - Expect(match.Entity.Size).To(Equal(ri.Size)) - Expect(match.Entity.Type).To(Equal(uint64(ri.Type))) - Expect(match.Entity.MimeType).To(Equal(ri.MimeType)) - Expect(match.Entity.Deleted).To(BeFalse()) - Expect(uint64(match.Entity.LastModifiedTime.AsTime().Unix())).To(Equal(ri.Mtime.Seconds)) - }) - - It("finds files by name, prefix or substring match", func() { - queries := []string{"foo.pdf", "foo*", "*oo.p*"} - for _, query := range queries { - matches := assertDocCount(ref.ResourceId, query, 1) - Expect(matches[0].Entity.Ref.ResourceId.OpaqueId).To(Equal(ref.ResourceId.OpaqueId)) - Expect(matches[0].Entity.Ref.Path).To(Equal(ref.Path)) - Expect(matches[0].Entity.Id.OpaqueId).To(Equal(ri.Id.OpaqueId)) - Expect(matches[0].Entity.Name).To(Equal(ri.Path)) - Expect(matches[0].Entity.Size).To(Equal(ri.Size)) - } - }) - - It("uses a lower-case index", func() { - assertDocCount(ref.ResourceId, "Name:foo*", 1) - assertDocCount(ref.ResourceId, "Name:Foo*", 0) - }) - - Context("and an additional file in a subdirectory", func() { - var ( - nestedRef *sprovider.Reference - nestedRI *sprovider.ResourceInfo - ) - - BeforeEach(func() { - nestedRef = &sprovider.Reference{ - ResourceId: &sprovider.ResourceId{ - StorageId: "storageid", - OpaqueId: "rootopaqueid", - }, - Path: "./nested/nestedpdf.pdf", - } - nestedRI = &sprovider.ResourceInfo{ - Id: &sprovider.ResourceId{ - StorageId: "storageid", - OpaqueId: "nestedopaqueid", - }, - Path: "nestedpdf.pdf", - Size: 12345, - } - err := i.Add(nestedRef, nestedRI) + Context("with a file in the root of the space", func() { + JustBeforeEach(func() { + err := i.Add(ref, ri) Expect(err).ToNot(HaveOccurred()) }) - It("finds files living deeper in the tree by filename, prefix or substring match", func() { - queries := []string{"nestedpdf.pdf", "nested*", "*tedpdf.*"} + It("scopes the search to the specified space", func() { + resourceId := &sprovider.ResourceId{ + StorageId: "differentstorageid", + OpaqueId: "differentopaqueid", + } + assertDocCount(resourceId, `Name:foo.pdf`, 0) + }) + + It("limits the search to the specified fields", func() { + assertDocCount(ref.ResourceId, "Name:*"+ref.ResourceId.OpaqueId+"*", 0) + }) + + It("returns all desired fields", func() { + matches := assertDocCount(ref.ResourceId, "Name:foo.pdf", 1) + match := matches[0] + Expect(match.Entity.Ref.ResourceId.OpaqueId).To(Equal(ref.ResourceId.OpaqueId)) + Expect(match.Entity.Ref.Path).To(Equal(ref.Path)) + Expect(match.Entity.Id.OpaqueId).To(Equal(ri.Id.OpaqueId)) + Expect(match.Entity.Name).To(Equal(ri.Path)) + Expect(match.Entity.Size).To(Equal(ri.Size)) + Expect(match.Entity.Type).To(Equal(uint64(ri.Type))) + Expect(match.Entity.MimeType).To(Equal(ri.MimeType)) + Expect(match.Entity.Deleted).To(BeFalse()) + Expect(uint64(match.Entity.LastModifiedTime.AsTime().Unix())).To(Equal(ri.Mtime.Seconds)) + }) + + It("finds files by name, prefix or substring match", func() { + queries := []string{"foo.pdf", "foo*", "*oo.p*"} for _, query := range queries { - assertDocCount(ref.ResourceId, query, 1) + matches := assertDocCount(ref.ResourceId, query, 1) + Expect(matches[0].Entity.Ref.ResourceId.OpaqueId).To(Equal(ref.ResourceId.OpaqueId)) + Expect(matches[0].Entity.Ref.Path).To(Equal(ref.Path)) + Expect(matches[0].Entity.Id.OpaqueId).To(Equal(ri.Id.OpaqueId)) + Expect(matches[0].Entity.Name).To(Equal(ri.Path)) + Expect(matches[0].Entity.Size).To(Equal(ri.Size)) } }) - It("does not find the higher levels when limiting the searched directory", func() { - res, err := i.Search(ctx, &searchsvc.SearchIndexRequest{ - Ref: &searchmsg.Reference{ - ResourceId: &searchmsg.ResourceID{ - StorageId: ref.ResourceId.StorageId, - OpaqueId: ref.ResourceId.OpaqueId, + It("uses a lower-case index", func() { + assertDocCount(ref.ResourceId, "Name:foo*", 1) + assertDocCount(ref.ResourceId, "Name:Foo*", 0) + }) + + Context("and an additional file in a subdirectory", func() { + var ( + nestedRef *sprovider.Reference + nestedRI *sprovider.ResourceInfo + ) + + BeforeEach(func() { + nestedRef = &sprovider.Reference{ + ResourceId: &sprovider.ResourceId{ + StorageId: "storageid", + OpaqueId: "rootopaqueid", }, - Path: "./nested/", - }, - Query: "Name:foo.pdf", + Path: "./nested/nestedpdf.pdf", + } + nestedRI = &sprovider.ResourceInfo{ + Id: &sprovider.ResourceId{ + StorageId: "storageid", + OpaqueId: "nestedopaqueid", + }, + Path: "nestedpdf.pdf", + Size: 12345, + } + err := i.Add(nestedRef, nestedRI) + Expect(err).ToNot(HaveOccurred()) + }) + + It("finds files living deeper in the tree by filename, prefix or substring match", func() { + queries := []string{"nestedpdf.pdf", "nested*", "*tedpdf.*"} + for _, query := range queries { + assertDocCount(ref.ResourceId, query, 1) + } + }) + + It("does not find the higher levels when limiting the searched directory", func() { + res, err := i.Search(ctx, &searchsvc.SearchIndexRequest{ + Ref: &searchmsg.Reference{ + ResourceId: &searchmsg.ResourceID{ + StorageId: ref.ResourceId.StorageId, + OpaqueId: ref.ResourceId.OpaqueId, + }, + Path: "./nested/", + }, + Query: "Name:foo.pdf", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(res).ToNot(BeNil()) + Expect(len(res.Matches)).To(Equal(0)) }) - Expect(err).ToNot(HaveOccurred()) - Expect(res).ToNot(BeNil()) - Expect(len(res.Matches)).To(Equal(0)) }) }) }) diff --git a/extensions/search/pkg/search/provider/searchprovider.go b/extensions/search/pkg/search/provider/searchprovider.go index 2c6fc46079..233d12f195 100644 --- a/extensions/search/pkg/search/provider/searchprovider.go +++ b/extensions/search/pkg/search/provider/searchprovider.go @@ -125,7 +125,7 @@ func (p *Provider) Search(ctx context.Context, req *searchsvc.SearchRequest) (*s _, rootStorageID := storagespace.SplitStorageID(space.Root.StorageId) res, err := p.indexClient.Search(ctx, &searchsvc.SearchIndexRequest{ - Query: "Name:" + strings.ReplaceAll(strings.ToLower(req.Query), " ", `\ `), + Query: formatQuery(req.Query), Ref: &searchmsg.Reference{ ResourceId: &searchmsg.ResourceID{ StorageId: space.Root.StorageId, @@ -217,3 +217,13 @@ func (p *Provider) logDocCount() { } p.logger.Debug().Interface("count", c).Msg("new document count") } + +func formatQuery(q string) string { + query := q + if strings.Contains(q, ":") { + return q // Sophisticated field based search + } + + // this is a basic filename search + return "Name:*" + strings.ReplaceAll(strings.ToLower(query), " ", `\ `) + "*" +} diff --git a/extensions/search/pkg/search/provider/searchprovider_test.go b/extensions/search/pkg/search/provider/searchprovider_test.go index 2f6e2a2abf..288a559a4b 100644 --- a/extensions/search/pkg/search/provider/searchprovider_test.go +++ b/extensions/search/pkg/search/provider/searchprovider_test.go @@ -155,6 +155,15 @@ var _ = Describe("Searchprovider", func() { })) }) + It("does not mess with field-based searches", func() { + p.Search(ctx, &searchsvc.SearchRequest{ + Query: "Size:<10", + }) + indexClient.AssertCalled(GinkgoT(), "Search", mock.Anything, mock.MatchedBy(func(req *searchsvc.SearchIndexRequest) bool { + return req.Query == "Size:<10" + })) + }) + It("escapes special characters", func() { p.Search(ctx, &searchsvc.SearchRequest{ Query: "Foo oo.pdf", diff --git a/extensions/webdav/pkg/service/v0/search.go b/extensions/webdav/pkg/service/v0/search.go index e90382984f..96288df10b 100644 --- a/extensions/webdav/pkg/service/v0/search.go +++ b/extensions/webdav/pkg/service/v0/search.go @@ -43,7 +43,7 @@ func (g Webdav) Search(w http.ResponseWriter, r *http.Request) { ctx := revactx.ContextSetToken(r.Context(), t) ctx = metadata.Set(ctx, revactx.TokenHeader, t) rsp, err := g.searchClient.Search(ctx, &searchsvc.SearchRequest{ - Query: "*" + rep.SearchFiles.Search.Pattern + "*", + Query: rep.SearchFiles.Search.Pattern, }) if err != nil { e := merrors.Parse(err.Error())