mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-02-13 15:39:50 -06:00
* feat(search): introduce search query package With the increasing complexity of how we organize our resources, the search must also be able to find them using entity properties. The query package provides the necessary functionality to do this. This makes it possible to search for resources via KQL, the microsoft spec is largely covered and can be used for this. In the current state, the legacy query language is still used, in a future update this will be deprecated and KQL will become the standard
374 lines
10 KiB
Go
374 lines
10 KiB
Go
package engine
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"math"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/blevesearch/bleve/v2"
|
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/porter"
|
|
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
|
|
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
|
"github.com/blevesearch/bleve/v2/mapping"
|
|
bleveQuery "github.com/blevesearch/bleve/v2/search/query"
|
|
storageProvider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
|
|
"github.com/cs3org/reva/v2/pkg/storagespace"
|
|
"github.com/cs3org/reva/v2/pkg/utils"
|
|
|
|
searchMessage "github.com/owncloud/ocis/v2/protogen/gen/ocis/messages/search/v0"
|
|
searchService "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0"
|
|
"github.com/owncloud/ocis/v2/services/search/pkg/content"
|
|
"github.com/owncloud/ocis/v2/services/search/pkg/query"
|
|
)
|
|
|
|
// Bleve represents a search engine which utilizes bleve to search and store resources.
|
|
type Bleve struct {
|
|
index bleve.Index
|
|
query query.Creator[bleveQuery.Query]
|
|
}
|
|
|
|
// NewBleveIndex returns a new bleve index
|
|
// given path must exist.
|
|
func NewBleveIndex(root string) (bleve.Index, error) {
|
|
destination := filepath.Join(root, "bleve")
|
|
index, err := bleve.Open(destination)
|
|
if errors.Is(bleve.ErrorIndexPathDoesNotExist, err) {
|
|
m, err := BuildBleveMapping()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
index, err = bleve.New(destination, m)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return index, nil
|
|
}
|
|
|
|
return index, err
|
|
}
|
|
|
|
// NewBleveEngine creates a new Bleve instance
|
|
func NewBleveEngine(index bleve.Index, qbc query.Creator[bleveQuery.Query]) *Bleve {
|
|
return &Bleve{
|
|
index: index,
|
|
query: qbc,
|
|
}
|
|
}
|
|
|
|
// BuildBleveMapping builds a bleve index mapping which can be used for indexing
|
|
func BuildBleveMapping() (mapping.IndexMapping, error) {
|
|
nameMapping := bleve.NewTextFieldMapping()
|
|
nameMapping.Analyzer = "lowercaseKeyword"
|
|
|
|
lowercaseMapping := bleve.NewTextFieldMapping()
|
|
lowercaseMapping.IncludeInAll = false
|
|
lowercaseMapping.Analyzer = "lowercaseKeyword"
|
|
|
|
fulltextFieldMapping := bleve.NewTextFieldMapping()
|
|
fulltextFieldMapping.Analyzer = "fulltext"
|
|
fulltextFieldMapping.IncludeInAll = false
|
|
|
|
docMapping := bleve.NewDocumentMapping()
|
|
docMapping.AddFieldMappingsAt("Name", nameMapping)
|
|
docMapping.AddFieldMappingsAt("Tags", lowercaseMapping)
|
|
docMapping.AddFieldMappingsAt("Content", fulltextFieldMapping)
|
|
|
|
indexMapping := bleve.NewIndexMapping()
|
|
indexMapping.DefaultAnalyzer = keyword.Name
|
|
indexMapping.DefaultMapping = docMapping
|
|
err := indexMapping.AddCustomAnalyzer("lowercaseKeyword",
|
|
map[string]interface{}{
|
|
"type": custom.Name,
|
|
"tokenizer": single.Name,
|
|
"token_filters": []string{
|
|
lowercase.Name,
|
|
},
|
|
},
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = indexMapping.AddCustomAnalyzer("fulltext",
|
|
map[string]interface{}{
|
|
"type": custom.Name,
|
|
"tokenizer": unicode.Name,
|
|
"token_filters": []string{
|
|
lowercase.Name,
|
|
porter.Name,
|
|
},
|
|
},
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return indexMapping, nil
|
|
}
|
|
|
|
// Search executes a search request operation within the index.
|
|
// Returns a SearchIndexResponse object or an error.
|
|
func (b *Bleve) Search(_ context.Context, sir *searchService.SearchIndexRequest) (*searchService.SearchIndexResponse, error) {
|
|
createdQuery, err := b.query.Create(sir.Query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
q := bleve.NewConjunctionQuery(
|
|
// Skip documents that have been marked as deleted
|
|
&bleveQuery.BoolFieldQuery{
|
|
Bool: false,
|
|
FieldVal: "Deleted",
|
|
},
|
|
createdQuery,
|
|
)
|
|
|
|
if sir.Ref != nil {
|
|
q.Conjuncts = append(
|
|
q.Conjuncts,
|
|
&bleveQuery.TermQuery{
|
|
FieldVal: "RootID",
|
|
Term: storagespace.FormatResourceID(
|
|
storageProvider.ResourceId{
|
|
StorageId: sir.Ref.GetResourceId().GetStorageId(),
|
|
SpaceId: sir.Ref.GetResourceId().GetSpaceId(),
|
|
OpaqueId: sir.Ref.GetResourceId().GetOpaqueId(),
|
|
},
|
|
),
|
|
},
|
|
)
|
|
}
|
|
|
|
bleveReq := bleve.NewSearchRequest(q)
|
|
bleveReq.Highlight = bleve.NewHighlight()
|
|
|
|
switch {
|
|
case sir.PageSize == -1:
|
|
bleveReq.Size = math.MaxInt
|
|
case sir.PageSize == 0:
|
|
bleveReq.Size = 200
|
|
default:
|
|
bleveReq.Size = int(sir.PageSize)
|
|
}
|
|
|
|
bleveReq.Fields = []string{"*"}
|
|
res, err := b.index.Search(bleveReq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
matches := make([]*searchMessage.Match, 0, len(res.Hits))
|
|
totalMatches := res.Total
|
|
for _, hit := range res.Hits {
|
|
if sir.Ref != nil {
|
|
path := strings.TrimSuffix(getFieldValue[string](hit.Fields, "Path"), "/")
|
|
relRefPath := utils.MakeRelativePath(sir.Ref.Path)
|
|
if relRefPath != "." && !strings.HasPrefix(path, relRefPath+"/") {
|
|
totalMatches--
|
|
continue
|
|
}
|
|
}
|
|
|
|
rootID, err := storagespace.ParseID(getFieldValue[string](hit.Fields, "RootID"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
rID, err := storagespace.ParseID(getFieldValue[string](hit.Fields, "ID"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pID, _ := storagespace.ParseID(getFieldValue[string](hit.Fields, "ParentID"))
|
|
match := &searchMessage.Match{
|
|
Score: float32(hit.Score),
|
|
Entity: &searchMessage.Entity{
|
|
Ref: &searchMessage.Reference{
|
|
ResourceId: resourceIDtoSearchID(rootID),
|
|
Path: getFieldValue[string](hit.Fields, "Path"),
|
|
},
|
|
Id: resourceIDtoSearchID(rID),
|
|
Name: getFieldValue[string](hit.Fields, "Name"),
|
|
ParentId: resourceIDtoSearchID(pID),
|
|
Size: uint64(getFieldValue[float64](hit.Fields, "Size")),
|
|
Type: uint64(getFieldValue[float64](hit.Fields, "Type")),
|
|
MimeType: getFieldValue[string](hit.Fields, "MimeType"),
|
|
Deleted: getFieldValue[bool](hit.Fields, "Deleted"),
|
|
Tags: getFieldSliceValue[string](hit.Fields, "Tags"),
|
|
Highlights: getFragmentValue(hit.Fragments, "Content", 0),
|
|
},
|
|
}
|
|
|
|
if mtime, err := time.Parse(time.RFC3339, getFieldValue[string](hit.Fields, "Mtime")); err == nil {
|
|
match.Entity.LastModifiedTime = ×tamppb.Timestamp{Seconds: mtime.Unix(), Nanos: int32(mtime.Nanosecond())}
|
|
}
|
|
|
|
matches = append(matches, match)
|
|
}
|
|
|
|
return &searchService.SearchIndexResponse{
|
|
Matches: matches,
|
|
TotalMatches: int32(totalMatches),
|
|
}, nil
|
|
}
|
|
|
|
// Upsert indexes or stores Resource data fields.
|
|
func (b *Bleve) Upsert(id string, r Resource) error {
|
|
return b.index.Index(id, r)
|
|
}
|
|
|
|
// Move updates the resource location and all of its necessary fields.
|
|
func (b *Bleve) Move(id string, parentid string, target string) error {
|
|
r, err := b.getResource(id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
currentPath := r.Path
|
|
nextPath := utils.MakeRelativePath(target)
|
|
|
|
r, err = b.updateEntity(id, func(r *Resource) {
|
|
r.Path = nextPath
|
|
r.Name = path.Base(nextPath)
|
|
r.ParentID = parentid
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if r.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) {
|
|
q := bleve.NewConjunctionQuery(
|
|
bleve.NewQueryStringQuery("RootID:"+r.RootID),
|
|
bleve.NewQueryStringQuery("Path:"+escapeQuery(currentPath+"/*")),
|
|
)
|
|
bleveReq := bleve.NewSearchRequest(q)
|
|
bleveReq.Size = math.MaxInt
|
|
bleveReq.Fields = []string{"*"}
|
|
res, err := b.index.Search(bleveReq)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, h := range res.Hits {
|
|
_, err := b.updateEntity(h.ID, func(r *Resource) {
|
|
r.Path = strings.Replace(r.Path, currentPath, nextPath, 1)
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Delete marks the resource as deleted.
|
|
// The resource object will stay in the bleve index,
|
|
// instead of removing the resource it just marks it as deleted!
|
|
// can be undone
|
|
func (b *Bleve) Delete(id string) error {
|
|
return b.setDeleted(id, true)
|
|
}
|
|
|
|
// Restore is the counterpart to Delete.
|
|
// It restores the resource which makes it available again.
|
|
func (b *Bleve) Restore(id string) error {
|
|
return b.setDeleted(id, false)
|
|
}
|
|
|
|
// Purge removes a resource from the index, irreversible operation.
|
|
func (b *Bleve) Purge(id string) error {
|
|
return b.index.Delete(id)
|
|
}
|
|
|
|
// DocCount returns the number of resources in the index.
|
|
func (b *Bleve) DocCount() (uint64, error) {
|
|
return b.index.DocCount()
|
|
}
|
|
|
|
func (b *Bleve) getResource(id string) (*Resource, error) {
|
|
req := bleve.NewSearchRequest(bleve.NewDocIDQuery([]string{id}))
|
|
req.Fields = []string{"*"}
|
|
res, err := b.index.Search(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if res.Hits.Len() == 0 {
|
|
return nil, errors.New("entity not found")
|
|
}
|
|
|
|
fields := res.Hits[0].Fields
|
|
|
|
return &Resource{
|
|
ID: getFieldValue[string](fields, "ID"),
|
|
RootID: getFieldValue[string](fields, "RootID"),
|
|
Path: getFieldValue[string](fields, "Path"),
|
|
ParentID: getFieldValue[string](fields, "ParentID"),
|
|
Type: uint64(getFieldValue[float64](fields, "Type")),
|
|
Deleted: getFieldValue[bool](fields, "Deleted"),
|
|
Document: content.Document{
|
|
Name: getFieldValue[string](fields, "Name"),
|
|
Title: getFieldValue[string](fields, "Title"),
|
|
Size: uint64(getFieldValue[float64](fields, "Size")),
|
|
Mtime: getFieldValue[string](fields, "Mtime"),
|
|
MimeType: getFieldValue[string](fields, "MimeType"),
|
|
Content: getFieldValue[string](fields, "Content"),
|
|
Tags: getFieldSliceValue[string](fields, "Tags"),
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func (b *Bleve) updateEntity(id string, mutateFunc func(r *Resource)) (*Resource, error) {
|
|
it, err := b.getResource(id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
mutateFunc(it)
|
|
|
|
return it, b.index.Index(it.ID, it)
|
|
}
|
|
|
|
func (b *Bleve) setDeleted(id string, deleted bool) error {
|
|
it, err := b.updateEntity(id, func(r *Resource) {
|
|
r.Deleted = deleted
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if it.Type == uint64(storageProvider.ResourceType_RESOURCE_TYPE_CONTAINER) {
|
|
q := bleve.NewConjunctionQuery(
|
|
bleve.NewQueryStringQuery("RootID:"+it.RootID),
|
|
bleve.NewQueryStringQuery("Path:"+escapeQuery(it.Path+"/*")),
|
|
)
|
|
bleveReq := bleve.NewSearchRequest(q)
|
|
bleveReq.Size = math.MaxInt
|
|
bleveReq.Fields = []string{"*"}
|
|
res, err := b.index.Search(bleveReq)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, h := range res.Hits {
|
|
_, err := b.updateEntity(h.ID, func(r *Resource) {
|
|
r.Deleted = deleted
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|