mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-05-07 20:15:31 -05:00
build(deps): bump github.com/blevesearch/bleve/v2 from 2.3.10 to 2.4.0
Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.10 to 2.4.0. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.3.10...v2.4.0) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
committed by
Ralf Haferkamp
parent
8f432c4cdd
commit
68e4e81870
+3
-2
@@ -22,7 +22,8 @@ A modern text indexing library in go
|
||||
* Conjunction, Disjunction, Boolean (must/should/must_not)
|
||||
* Term Range, Numeric Range, Date Range
|
||||
* [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
|
||||
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
|
||||
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
|
||||
* [Vector Search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
|
||||
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring
|
||||
* Query time boosting
|
||||
* Search result match highlighting with document fragments
|
||||
@@ -101,7 +102,7 @@ Use "bleve [command] --help" for more information about a command.
|
||||
|
||||
Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
|
||||
|
||||
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
|
||||
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Polish (pl), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
|
||||
|
||||
## Text Analysis Wizard
|
||||
|
||||
|
||||
+145
@@ -0,0 +1,145 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeVectorField int
|
||||
|
||||
func init() {
|
||||
var f VectorField
|
||||
reflectStaticSizeVectorField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultVectorIndexingOptions = index.IndexField
|
||||
|
||||
type VectorField struct {
|
||||
name string
|
||||
dims int // Dimensionality of the vector
|
||||
similarity string // Similarity metric to use for scoring
|
||||
options index.FieldIndexingOptions
|
||||
value []float32
|
||||
numPlainTextBytes uint64
|
||||
vectorIndexOptimizedFor string // Optimization applied to this index.
|
||||
}
|
||||
|
||||
func (n *VectorField) Size() int {
|
||||
return reflectStaticSizeVectorField + size.SizeOfPtr +
|
||||
len(n.name) +
|
||||
int(numBytesFloat32s(n.value))
|
||||
}
|
||||
|
||||
func (n *VectorField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *VectorField) ArrayPositions() []uint64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *VectorField) Options() index.FieldIndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
||||
func (n *VectorField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (n *VectorField) AnalyzedLength() int {
|
||||
// vectors aren't analyzed
|
||||
return 0
|
||||
}
|
||||
|
||||
func (n *VectorField) EncodedFieldType() byte {
|
||||
return 'v'
|
||||
}
|
||||
|
||||
func (n *VectorField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
// vectors aren't analyzed
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *VectorField) Analyze() {
|
||||
// vectors aren't analyzed
|
||||
}
|
||||
|
||||
func (n *VectorField) Value() []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *VectorField) GoString() string {
|
||||
return fmt.Sprintf("&document.VectorField{Name:%s, Options: %s, "+
|
||||
"Value: %+v}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
// For the sake of not polluting the API, we are keeping arrayPositions as a
|
||||
// parameter, but it is not used.
|
||||
func NewVectorField(name string, arrayPositions []uint64,
|
||||
vector []float32, dims int, similarity, vectorIndexOptimizedFor string) *VectorField {
|
||||
return NewVectorFieldWithIndexingOptions(name, arrayPositions,
|
||||
vector, dims, similarity, vectorIndexOptimizedFor,
|
||||
DefaultVectorIndexingOptions)
|
||||
}
|
||||
|
||||
// For the sake of not polluting the API, we are keeping arrayPositions as a
|
||||
// parameter, but it is not used.
|
||||
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
|
||||
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
|
||||
options index.FieldIndexingOptions) *VectorField {
|
||||
options = options | DefaultVectorIndexingOptions
|
||||
|
||||
return &VectorField{
|
||||
name: name,
|
||||
dims: dims,
|
||||
similarity: similarity,
|
||||
options: options,
|
||||
value: vector,
|
||||
numPlainTextBytes: numBytesFloat32s(vector),
|
||||
vectorIndexOptimizedFor: vectorIndexOptimizedFor,
|
||||
}
|
||||
}
|
||||
|
||||
func numBytesFloat32s(value []float32) uint64 {
|
||||
return uint64(len(value) * size.SizeOfFloat32)
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Following methods help in implementing the bleve_index_api's VectorField
|
||||
// interface.
|
||||
|
||||
func (n *VectorField) Vector() []float32 {
|
||||
return n.value
|
||||
}
|
||||
|
||||
func (n *VectorField) Dims() int {
|
||||
return n.dims
|
||||
}
|
||||
|
||||
func (n *VectorField) Similarity() string {
|
||||
return n.similarity
|
||||
}
|
||||
|
||||
func (n *VectorField) IndexOptimizedFor() string {
|
||||
return n.vectorIndexOptimizedFor
|
||||
}
|
||||
+12
-10
@@ -26,6 +26,7 @@ const (
|
||||
ErrorUnknownIndexType
|
||||
ErrorEmptyID
|
||||
ErrorIndexReadInconsistency
|
||||
ErrorTwoPhaseSearchInconsistency
|
||||
)
|
||||
|
||||
// Error represents a more strongly typed bleve error for detecting
|
||||
@@ -37,14 +38,15 @@ func (e Error) Error() string {
|
||||
}
|
||||
|
||||
var errorMessages = map[Error]string{
|
||||
ErrorIndexPathExists: "cannot create new index, path already exists",
|
||||
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
|
||||
ErrorIndexMetaMissing: "cannot open index, metadata missing",
|
||||
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
|
||||
ErrorIndexClosed: "index is closed",
|
||||
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
|
||||
ErrorAliasEmpty: "cannot perform operation on empty alias",
|
||||
ErrorUnknownIndexType: "unknown index type",
|
||||
ErrorEmptyID: "document ID cannot be empty",
|
||||
ErrorIndexReadInconsistency: "index read inconsistency detected",
|
||||
ErrorIndexPathExists: "cannot create new index, path already exists",
|
||||
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
|
||||
ErrorIndexMetaMissing: "cannot open index, metadata missing",
|
||||
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
|
||||
ErrorIndexClosed: "index is closed",
|
||||
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
|
||||
ErrorAliasEmpty: "cannot perform operation on empty alias",
|
||||
ErrorUnknownIndexType: "unknown index type",
|
||||
ErrorEmptyID: "document ID cannot be empty",
|
||||
ErrorIndexReadInconsistency: "index read inconsistency detected",
|
||||
ErrorTwoPhaseSearchInconsistency: "2-phase search failed, likely due to an overlapping topology change",
|
||||
}
|
||||
|
||||
+12
-29
@@ -18,6 +18,8 @@ import (
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
)
|
||||
|
||||
// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to
|
||||
@@ -61,12 +63,12 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
||||
first := thingVal.Index(0)
|
||||
if first.CanInterface() {
|
||||
firstVal := first.Interface()
|
||||
lon, foundLon = extractNumericVal(firstVal)
|
||||
lon, foundLon = util.ExtractNumericValFloat64(firstVal)
|
||||
}
|
||||
second := thingVal.Index(1)
|
||||
if second.CanInterface() {
|
||||
secondVal := second.Interface()
|
||||
lat, foundLat = extractNumericVal(secondVal)
|
||||
lat, foundLat = util.ExtractNumericValFloat64(secondVal)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,12 +107,12 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
||||
// is it a map
|
||||
if l, ok := thing.(map[string]interface{}); ok {
|
||||
if lval, ok := l["lon"]; ok {
|
||||
lon, foundLon = extractNumericVal(lval)
|
||||
lon, foundLon = util.ExtractNumericValFloat64(lval)
|
||||
} else if lval, ok := l["lng"]; ok {
|
||||
lon, foundLon = extractNumericVal(lval)
|
||||
lon, foundLon = util.ExtractNumericValFloat64(lval)
|
||||
}
|
||||
if lval, ok := l["lat"]; ok {
|
||||
lat, foundLat = extractNumericVal(lval)
|
||||
lat, foundLat = util.ExtractNumericValFloat64(lval)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,19 +123,19 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
|
||||
if thingVal.Field(i).CanInterface() {
|
||||
fieldVal := thingVal.Field(i).Interface()
|
||||
lon, foundLon = extractNumericVal(fieldVal)
|
||||
lon, foundLon = util.ExtractNumericValFloat64(fieldVal)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lng") {
|
||||
if thingVal.Field(i).CanInterface() {
|
||||
fieldVal := thingVal.Field(i).Interface()
|
||||
lon, foundLon = extractNumericVal(fieldVal)
|
||||
lon, foundLon = util.ExtractNumericValFloat64(fieldVal)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(fieldName), "lat") {
|
||||
if thingVal.Field(i).CanInterface() {
|
||||
fieldVal := thingVal.Field(i).Interface()
|
||||
lat, foundLat = extractNumericVal(fieldVal)
|
||||
lat, foundLat = util.ExtractNumericValFloat64(fieldVal)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -157,25 +159,6 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
||||
return lon, lat, foundLon && foundLat
|
||||
}
|
||||
|
||||
// extract numeric value (if possible) and returns a float64
|
||||
func extractNumericVal(v interface{}) (float64, bool) {
|
||||
val := reflect.ValueOf(v)
|
||||
if !val.IsValid() {
|
||||
return 0, false
|
||||
}
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return val.Float(), true
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return float64(val.Int()), true
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
return float64(val.Uint()), true
|
||||
}
|
||||
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// various support interfaces which can be used to find lat/lon
|
||||
type loner interface {
|
||||
Lon() float64
|
||||
@@ -209,12 +192,12 @@ func extractCoordinates(thing interface{}) []float64 {
|
||||
first := thingVal.Index(0)
|
||||
if first.CanInterface() {
|
||||
firstVal := first.Interface()
|
||||
lon, foundLon = extractNumericVal(firstVal)
|
||||
lon, foundLon = util.ExtractNumericValFloat64(firstVal)
|
||||
}
|
||||
second := thingVal.Index(1)
|
||||
if second.CanInterface() {
|
||||
secondVal := second.Interface()
|
||||
lat, foundLat = extractNumericVal(secondVal)
|
||||
lat, foundLat = util.ExtractNumericValFloat64(secondVal)
|
||||
}
|
||||
|
||||
if !foundLon || !foundLat {
|
||||
|
||||
+21
-2
@@ -30,6 +30,7 @@ type segmentIntroduction struct {
|
||||
obsoletes map[uint64]*roaring.Bitmap
|
||||
ids []string
|
||||
internal map[string][]byte
|
||||
stats *fieldStats
|
||||
|
||||
applied chan error
|
||||
persisted chan error
|
||||
@@ -146,7 +147,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
newss := &SegmentSnapshot{
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
stats: root.segment[i].stats,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
cachedMeta: root.segment[i].cachedMeta,
|
||||
creator: root.segment[i].creator,
|
||||
}
|
||||
|
||||
@@ -154,7 +157,11 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
if root.segment[i].deleted == nil {
|
||||
newss.deleted = delta
|
||||
} else {
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
if delta.IsEmpty() {
|
||||
newss.deleted = root.segment[i].deleted
|
||||
} else {
|
||||
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
|
||||
}
|
||||
}
|
||||
if newss.deleted.IsEmpty() {
|
||||
newss.deleted = nil
|
||||
@@ -188,7 +195,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
|
||||
newSegmentSnapshot := &SegmentSnapshot{
|
||||
id: next.id,
|
||||
segment: next.data, // take ownership of next.data's ref-count
|
||||
stats: next.stats,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
creator: "introduceSegment",
|
||||
}
|
||||
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
|
||||
@@ -275,7 +284,9 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
|
||||
id: segmentSnapshot.id,
|
||||
segment: replacement,
|
||||
deleted: segmentSnapshot.deleted,
|
||||
stats: segmentSnapshot.stats,
|
||||
cachedDocs: segmentSnapshot.cachedDocs,
|
||||
cachedMeta: segmentSnapshot.cachedMeta,
|
||||
creator: "introducePersist",
|
||||
mmaped: 1,
|
||||
}
|
||||
@@ -374,7 +385,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
id: root.segment[i].id,
|
||||
segment: root.segment[i].segment,
|
||||
deleted: root.segment[i].deleted,
|
||||
stats: root.segment[i].stats,
|
||||
cachedDocs: root.segment[i].cachedDocs,
|
||||
cachedMeta: root.segment[i].cachedMeta,
|
||||
creator: root.segment[i].creator,
|
||||
})
|
||||
root.segment[i].segment.AddRef()
|
||||
@@ -394,7 +407,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
// merged segment wrt the current root segments, hence
|
||||
// applying the obsolete segment contents to newly merged segment
|
||||
@@ -415,12 +427,19 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
||||
if nextMerge.new != nil &&
|
||||
nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {
|
||||
|
||||
stats := newFieldStats()
|
||||
if fsr, ok := nextMerge.new.(segment.FieldStatsReporter); ok {
|
||||
fsr.UpdateFieldStats(stats)
|
||||
}
|
||||
|
||||
// put new segment at end
|
||||
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
|
||||
id: nextMerge.id,
|
||||
segment: nextMerge.new, // take ownership for nextMerge.new's ref-count
|
||||
deleted: newSegmentDeleted,
|
||||
stats: stats,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
creator: "introduceMerge",
|
||||
mmaped: nextMerge.mmaped,
|
||||
})
|
||||
|
||||
+4
-4
@@ -290,7 +290,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
|
||||
|
||||
oldMap := make(map[uint64]*SegmentSnapshot)
|
||||
oldMap := make(map[uint64]*SegmentSnapshot, len(task.Segments))
|
||||
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
|
||||
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
|
||||
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
|
||||
@@ -357,7 +357,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
|
||||
totalBytesRead := seg.BytesRead() + prevBytesReadTotal
|
||||
seg.ResetBytesRead(totalBytesRead)
|
||||
|
||||
oldNewDocNums = make(map[uint64][]uint64)
|
||||
oldNewDocNums = make(map[uint64][]uint64, len(newDocNums))
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
}
|
||||
@@ -485,8 +485,8 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
||||
|
||||
sm := &segmentMerge{
|
||||
id: newSegmentID,
|
||||
old: make(map[uint64]*SegmentSnapshot),
|
||||
oldNewDocNums: make(map[uint64][]uint64),
|
||||
old: make(map[uint64]*SegmentSnapshot, len(sbsIndexes)),
|
||||
oldNewDocNums: make(map[uint64][]uint64, len(sbsIndexes)),
|
||||
new: seg,
|
||||
notifyCh: make(chan *mergeTaskIntroStatus),
|
||||
}
|
||||
|
||||
+2
-1
@@ -16,10 +16,11 @@ package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
var OptimizeConjunction = true
|
||||
|
||||
+187
@@ -0,0 +1,187 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
type OptimizeVR struct {
|
||||
ctx context.Context
|
||||
snapshot *IndexSnapshot
|
||||
totalCost uint64
|
||||
// maps field to vector readers
|
||||
vrs map[string][]*IndexSnapshotVectorReader
|
||||
}
|
||||
|
||||
// This setting _MUST_ only be changed during init and not after.
|
||||
var BleveMaxKNNConcurrency = 10
|
||||
|
||||
func (o *OptimizeVR) invokeSearcherEndCallback() {
|
||||
if o.ctx != nil {
|
||||
if cb := o.ctx.Value(search.SearcherEndCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(search.SearcherEndCallbackFn); ok {
|
||||
if o.totalCost > 0 {
|
||||
// notify the callback that the searcher creation etc. is finished
|
||||
// and report back the total cost for it to track and take actions
|
||||
// appropriately.
|
||||
_ = cbF(o.totalCost)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *OptimizeVR) Finish() error {
|
||||
// for each field, get the vector index --> invoke the zap func.
|
||||
// for each VR, populate postings list and iterators
|
||||
// by passing the obtained vector index and getting similar vectors.
|
||||
// defer close index - just once.
|
||||
var errorsM sync.Mutex
|
||||
var errors []error
|
||||
|
||||
defer o.invokeSearcherEndCallback()
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
semaphore := make(chan struct{}, BleveMaxKNNConcurrency)
|
||||
// Launch goroutines to get vector index for each segment
|
||||
for i, seg := range o.snapshot.segment {
|
||||
if sv, ok := seg.segment.(segment_api.VectorSegment); ok {
|
||||
wg.Add(1)
|
||||
semaphore <- struct{}{} // Acquire a semaphore slot
|
||||
go func(index int, segment segment_api.VectorSegment, origSeg *SegmentSnapshot) {
|
||||
defer func() {
|
||||
<-semaphore // Release the semaphore slot
|
||||
wg.Done()
|
||||
}()
|
||||
for field, vrs := range o.vrs {
|
||||
vecIndex, err := segment.InterpretVectorIndex(field)
|
||||
if err != nil {
|
||||
errorsM.Lock()
|
||||
errors = append(errors, err)
|
||||
errorsM.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// update the vector index size as a meta value in the segment snapshot
|
||||
vectorIndexSize := vecIndex.Size()
|
||||
origSeg.cachedMeta.updateMeta(field, vectorIndexSize)
|
||||
for _, vr := range vrs {
|
||||
// for each VR, populate postings list and iterators
|
||||
// by passing the obtained vector index and getting similar vectors.
|
||||
pl, err := vecIndex.Search(vr.vector, vr.k, origSeg.deleted)
|
||||
if err != nil {
|
||||
errorsM.Lock()
|
||||
errors = append(errors, err)
|
||||
errorsM.Unlock()
|
||||
go vecIndex.Close()
|
||||
return
|
||||
}
|
||||
|
||||
atomic.AddUint64(&o.snapshot.parent.stats.TotKNNSearches, uint64(1))
|
||||
|
||||
// postings and iterators are already alloc'ed when
|
||||
// IndexSnapshotVectorReader is created
|
||||
vr.postings[index] = pl
|
||||
vr.iterators[index] = pl.Iterator(vr.iterators[index])
|
||||
}
|
||||
go vecIndex.Close()
|
||||
}
|
||||
}(i, sv, seg)
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
close(semaphore)
|
||||
if len(errors) > 0 {
|
||||
return errors[0]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *IndexSnapshotVectorReader) VectorOptimize(ctx context.Context,
|
||||
octx index.VectorOptimizableContext) (index.VectorOptimizableContext, error) {
|
||||
|
||||
if s.snapshot.parent.segPlugin.Version() < VectorSearchSupportedSegmentVersion {
|
||||
return nil, fmt.Errorf("vector search not supported for this index, "+
|
||||
"index's segment version %v, supported segment version for vector search %v",
|
||||
s.snapshot.parent.segPlugin.Version(), VectorSearchSupportedSegmentVersion)
|
||||
}
|
||||
|
||||
if octx == nil {
|
||||
octx = &OptimizeVR{snapshot: s.snapshot,
|
||||
vrs: make(map[string][]*IndexSnapshotVectorReader),
|
||||
}
|
||||
}
|
||||
|
||||
o, ok := octx.(*OptimizeVR)
|
||||
if !ok {
|
||||
return octx, nil
|
||||
}
|
||||
o.ctx = ctx
|
||||
|
||||
if o.snapshot != s.snapshot {
|
||||
o.invokeSearcherEndCallback()
|
||||
return nil, fmt.Errorf("tried to optimize KNN across different snapshots")
|
||||
}
|
||||
|
||||
// for every searcher creation, consult the segment snapshot to see
|
||||
// what's the vector index size and since you're anyways going
|
||||
// to use this vector index to perform the search etc. as part of the Finish()
|
||||
// perform a check as to whether we allow the searcher creation (the downstream)
|
||||
// Finish() logic to even occur or not.
|
||||
var sumVectorIndexSize uint64
|
||||
for _, seg := range o.snapshot.segment {
|
||||
vecIndexSize := seg.cachedMeta.fetchMeta(s.field)
|
||||
if vecIndexSize != nil {
|
||||
sumVectorIndexSize += vecIndexSize.(uint64)
|
||||
}
|
||||
}
|
||||
|
||||
if o.ctx != nil {
|
||||
if cb := o.ctx.Value(search.SearcherStartCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(search.SearcherStartCallbackFn); ok {
|
||||
err := cbF(sumVectorIndexSize)
|
||||
if err != nil {
|
||||
// it's important to invoke the end callback at this point since
|
||||
// if the earlier searchers of this optimze struct were successful
|
||||
// the cost corresponding to it would be incremented and if the
|
||||
// current searcher fails the check then we end up erroring out
|
||||
// the overall optimized searcher creation, the cost needs to be
|
||||
// handled appropriately.
|
||||
o.invokeSearcherEndCallback()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// total cost is essentially the sum of the vector indexes' size across all the
|
||||
// searchers - all of them end up reading and maintaining a vector index.
|
||||
// misacconting this value would end up calling the "end" callback with a value
|
||||
// not equal to the value passed to "start" callback.
|
||||
o.totalCost += sumVectorIndexSize
|
||||
o.vrs[s.field] = append(o.vrs[s.field], s)
|
||||
return o, nil
|
||||
}
|
||||
+29
-1
@@ -17,6 +17,7 @@ package scorch
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@@ -424,6 +425,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
|
||||
id: newSegmentID,
|
||||
segment: segment.segment,
|
||||
deleted: nil, // nil since merging handled deletions
|
||||
stats: nil,
|
||||
})
|
||||
break
|
||||
}
|
||||
@@ -602,6 +604,18 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// store segment stats
|
||||
if segmentSnapshot.stats != nil {
|
||||
b, err := json.Marshal(segmentSnapshot.stats.Fetch())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
err = snapshotSegmentBucket.Put(boltStatsKey, b)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filenames, newSegmentPaths, nil
|
||||
@@ -634,7 +648,7 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
|
||||
// the newly populated boltdb snapshotBucket above
|
||||
if len(newSegmentPaths) > 0 {
|
||||
// now try to open all the new snapshots
|
||||
newSegments := make(map[uint64]segment.Segment)
|
||||
newSegments := make(map[uint64]segment.Segment, len(newSegmentPaths))
|
||||
defer func() {
|
||||
for _, s := range newSegments {
|
||||
if s != nil {
|
||||
@@ -704,6 +718,7 @@ var boltMetaDataKey = []byte{'m'}
|
||||
var boltMetaDataSegmentTypeKey = []byte("type")
|
||||
var boltMetaDataSegmentVersionKey = []byte("version")
|
||||
var boltMetaDataTimeStamp = []byte("timeStamp")
|
||||
var boltStatsKey = []byte("stats")
|
||||
var TotBytesWrittenKey = []byte("TotBytesWritten")
|
||||
|
||||
func (s *Scorch) loadFromBolt() error {
|
||||
@@ -858,6 +873,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
|
||||
rv := &SegmentSnapshot{
|
||||
segment: segment,
|
||||
cachedDocs: &cachedDocs{cache: nil},
|
||||
cachedMeta: &cachedMeta{meta: nil},
|
||||
}
|
||||
deletedBytes := segmentBucket.Get(boltDeletedKey)
|
||||
if deletedBytes != nil {
|
||||
@@ -872,6 +888,18 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
|
||||
rv.deleted = deletedBitmap
|
||||
}
|
||||
}
|
||||
statBytes := segmentBucket.Get(boltStatsKey)
|
||||
if statBytes != nil {
|
||||
var statsMap map[string]map[string]uint64
|
||||
|
||||
err := json.Unmarshal(statBytes, &statsMap)
|
||||
stats := &fieldStats{statMap: statsMap}
|
||||
if err != nil {
|
||||
_ = segment.Close()
|
||||
return nil, fmt.Errorf("error reading stat bytes: %v", err)
|
||||
}
|
||||
rv.stats = stats
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
+73
-3
@@ -428,6 +428,8 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
|
||||
var newSegment segment.Segment
|
||||
var bufBytes uint64
|
||||
stats := newFieldStats()
|
||||
|
||||
if len(analysisResults) > 0 {
|
||||
newSegment, bufBytes, err = s.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
@@ -438,11 +440,14 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
segB.BytesWritten())
|
||||
}
|
||||
atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
|
||||
if fsr, ok := newSegment.(segment.FieldStatsReporter); ok {
|
||||
fsr.UpdateFieldStats(stats)
|
||||
}
|
||||
} else {
|
||||
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
|
||||
}
|
||||
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback())
|
||||
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback(), stats)
|
||||
if err != nil {
|
||||
if newSegment != nil {
|
||||
_ = newSegment.Close()
|
||||
@@ -462,15 +467,15 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
||||
}
|
||||
|
||||
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
internalOps map[string][]byte, persistedCallback index.BatchCallback) error {
|
||||
internalOps map[string][]byte, persistedCallback index.BatchCallback, stats *fieldStats) error {
|
||||
|
||||
// new introduction
|
||||
introduction := &segmentIntroduction{
|
||||
id: atomic.AddUint64(&s.nextSegmentID, 1),
|
||||
data: newSegment,
|
||||
ids: ids,
|
||||
obsoletes: make(map[uint64]*roaring.Bitmap),
|
||||
internal: internalOps,
|
||||
stats: stats,
|
||||
applied: make(chan error),
|
||||
persistedCallback: persistedCallback,
|
||||
}
|
||||
@@ -487,6 +492,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
|
||||
|
||||
defer func() { _ = root.DecRef() }()
|
||||
|
||||
introduction.obsoletes = make(map[uint64]*roaring.Bitmap, len(root.segment))
|
||||
|
||||
for _, seg := range root.segment {
|
||||
delta, err := seg.segment.DocNumbers(ids)
|
||||
if err != nil {
|
||||
@@ -617,6 +624,8 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m["index_time"] = m["TotIndexTime"]
|
||||
m["term_searchers_started"] = m["TotTermSearchersStarted"]
|
||||
m["term_searchers_finished"] = m["TotTermSearchersFinished"]
|
||||
m["knn_searches"] = m["TotKNNSearches"]
|
||||
|
||||
m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"]
|
||||
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
|
||||
m["num_bytes_written_at_index_time"] = m["TotBytesWrittenAtIndexTime"]
|
||||
@@ -638,6 +647,20 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
|
||||
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
|
||||
|
||||
// calculate the aggregate of all the segment's field stats
|
||||
aggFieldStats := newFieldStats()
|
||||
for _, segmentSnapshot := range indexSnapshot.Segments() {
|
||||
if segmentSnapshot.stats != nil {
|
||||
aggFieldStats.Aggregate(segmentSnapshot.stats)
|
||||
}
|
||||
}
|
||||
|
||||
aggFieldStatsMap := aggFieldStats.Fetch()
|
||||
for statName, stats := range aggFieldStatsMap {
|
||||
for fieldName, val := range stats {
|
||||
m["field:"+fieldName+":"+statName] = val
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -762,3 +785,50 @@ func parseToInteger(i interface{}) (int, error) {
|
||||
return 0, fmt.Errorf("expects int or float64 value")
|
||||
}
|
||||
}
|
||||
|
||||
// Holds Zap's field level stats at a segment level
|
||||
type fieldStats struct {
|
||||
// StatName -> FieldName -> value
|
||||
statMap map[string]map[string]uint64
|
||||
}
|
||||
|
||||
// Add the data into the map after checking if the statname is valid
|
||||
func (fs *fieldStats) Store(statName, fieldName string, value uint64) {
|
||||
if _, exists := fs.statMap[statName]; !exists {
|
||||
fs.statMap[statName] = make(map[string]uint64)
|
||||
}
|
||||
fs.statMap[statName][fieldName] = value
|
||||
}
|
||||
|
||||
// Combine the given stats map with the existing map
|
||||
func (fs *fieldStats) Aggregate(stats segment.FieldStats) {
|
||||
|
||||
statMap := stats.Fetch()
|
||||
if statMap == nil {
|
||||
return
|
||||
}
|
||||
for statName, statMap := range statMap {
|
||||
if _, exists := fs.statMap[statName]; !exists {
|
||||
fs.statMap[statName] = make(map[string]uint64)
|
||||
}
|
||||
for fieldName, val := range statMap {
|
||||
if _, exists := fs.statMap[statName][fieldName]; !exists {
|
||||
fs.statMap[statName][fieldName] = 0
|
||||
}
|
||||
fs.statMap[statName][fieldName] += val
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the stats map
|
||||
func (fs *fieldStats) Fetch() map[string]map[string]uint64 {
|
||||
return fs.statMap
|
||||
}
|
||||
|
||||
// Initializes an empty stats map
|
||||
func newFieldStats() *fieldStats {
|
||||
rv := &fieldStats{
|
||||
statMap: map[string]map[string]uint64{},
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
+3
-1
@@ -28,6 +28,7 @@ import (
|
||||
zapv13 "github.com/blevesearch/zapx/v13"
|
||||
zapv14 "github.com/blevesearch/zapx/v14"
|
||||
zapv15 "github.com/blevesearch/zapx/v15"
|
||||
zapv16 "github.com/blevesearch/zapx/v16"
|
||||
)
|
||||
|
||||
// SegmentPlugin represents the essential functions required by a package to plug in
|
||||
@@ -73,7 +74,8 @@ var defaultSegmentPlugin SegmentPlugin
|
||||
|
||||
func init() {
|
||||
ResetSegmentPlugins()
|
||||
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, true)
|
||||
RegisterSegmentPlugin(&zapv16.ZapPlugin{}, true)
|
||||
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv14.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv13.ZapPlugin{}, false)
|
||||
RegisterSegmentPlugin(&zapv12.ZapPlugin{}, false)
|
||||
|
||||
+158
@@ -0,0 +1,158 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
const VectorSearchSupportedSegmentVersion = 16
|
||||
|
||||
var reflectStaticSizeIndexSnapshotVectorReader int
|
||||
|
||||
func init() {
|
||||
var istfr IndexSnapshotVectorReader
|
||||
reflectStaticSizeIndexSnapshotVectorReader = int(reflect.TypeOf(istfr).Size())
|
||||
}
|
||||
|
||||
type IndexSnapshotVectorReader struct {
|
||||
vector []float32
|
||||
field string
|
||||
k int64
|
||||
snapshot *IndexSnapshot
|
||||
postings []segment_api.VecPostingsList
|
||||
iterators []segment_api.VecPostingsIterator
|
||||
segmentOffset int
|
||||
currPosting segment_api.VecPosting
|
||||
currID index.IndexInternalID
|
||||
ctx context.Context
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Size() int {
|
||||
sizeInBytes := reflectStaticSizeIndexSnapshotVectorReader + size.SizeOfPtr +
|
||||
len(i.vector) + len(i.field) + len(i.currID)
|
||||
|
||||
for _, entry := range i.postings {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range i.iterators {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
if i.currPosting != nil {
|
||||
sizeInBytes += i.currPosting.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) (
|
||||
*index.VectorDoc, error) {
|
||||
rv := preAlloced
|
||||
if rv == nil {
|
||||
rv = &index.VectorDoc{}
|
||||
}
|
||||
|
||||
for i.segmentOffset < len(i.iterators) {
|
||||
next, err := i.iterators[i.segmentOffset].Next()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next != nil {
|
||||
// make segment number into global number by adding offset
|
||||
globalOffset := i.snapshot.offsets[i.segmentOffset]
|
||||
nnum := next.Number()
|
||||
rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset)
|
||||
rv.Score = float64(next.Score())
|
||||
|
||||
i.currID = rv.ID
|
||||
i.currPosting = next
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
i.segmentOffset++
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID,
|
||||
preAlloced *index.VectorDoc) (*index.VectorDoc, error) {
|
||||
|
||||
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
|
||||
i2, err := i.snapshot.VectorReader(i.ctx, i.vector, i.field, i.k)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// close the current term field reader before replacing it with a new one
|
||||
_ = i.Close()
|
||||
*i = *(i2.(*IndexSnapshotVectorReader))
|
||||
}
|
||||
|
||||
num, err := docInternalToNumber(ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
|
||||
}
|
||||
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
|
||||
if segIndex >= len(i.snapshot.segment) {
|
||||
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
|
||||
segIndex, len(i.snapshot.segment))
|
||||
}
|
||||
// skip directly to the target segment
|
||||
i.segmentOffset = segIndex
|
||||
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if next == nil {
|
||||
// we jumped directly to the segment that should have contained it
|
||||
// but it wasn't there, so reuse Next() which should correctly
|
||||
// get the next hit after it (we moved i.segmentOffset)
|
||||
return i.Next(preAlloced)
|
||||
}
|
||||
|
||||
if preAlloced == nil {
|
||||
preAlloced = &index.VectorDoc{}
|
||||
}
|
||||
preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+
|
||||
i.snapshot.offsets[segIndex])
|
||||
i.currID = preAlloced.ID
|
||||
i.currPosting = next
|
||||
return preAlloced, nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Count() uint64 {
|
||||
var rv uint64
|
||||
for _, posting := range i.postings {
|
||||
rv += posting.Count()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotVectorReader) Close() error {
|
||||
// TODO Consider if any scope of recycling here.
|
||||
return nil
|
||||
}
|
||||
+30
@@ -39,6 +39,9 @@ type SegmentSnapshot struct {
|
||||
segment segment.Segment
|
||||
deleted *roaring.Bitmap
|
||||
creator string
|
||||
stats *fieldStats
|
||||
|
||||
cachedMeta *cachedMeta
|
||||
|
||||
cachedDocs *cachedDocs
|
||||
}
|
||||
@@ -282,3 +285,30 @@ func (c *cachedDocs) visitDoc(localDocNum uint64,
|
||||
|
||||
c.m.Unlock()
|
||||
}
|
||||
|
||||
// the purpose of the cachedMeta is to simply allow the user of this type to record
|
||||
// and cache certain meta data information (specific to the segment) that can be
|
||||
// used across calls to save compute on the same.
|
||||
// for example searcher creations on the same index snapshot can use this struct
|
||||
// to help and fetch the backing index size information which can be used in
|
||||
// memory usage calculation thereby deciding whether to allow a query or not.
|
||||
type cachedMeta struct {
|
||||
m sync.RWMutex
|
||||
meta map[string]interface{}
|
||||
}
|
||||
|
||||
func (c *cachedMeta) updateMeta(field string, val interface{}) {
|
||||
c.m.Lock()
|
||||
if c.meta == nil {
|
||||
c.meta = make(map[string]interface{})
|
||||
}
|
||||
c.meta[field] = val
|
||||
c.m.Unlock()
|
||||
}
|
||||
|
||||
func (c *cachedMeta) fetchMeta(field string) (rv interface{}) {
|
||||
c.m.RLock()
|
||||
rv = c.meta[field]
|
||||
c.m.RUnlock()
|
||||
return rv
|
||||
}
|
||||
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment_api "github.com/blevesearch/scorch_segment_api/v2"
|
||||
)
|
||||
|
||||
func (is *IndexSnapshot) VectorReader(ctx context.Context, vector []float32,
|
||||
field string, k int64) (
|
||||
index.VectorReader, error) {
|
||||
|
||||
rv := &IndexSnapshotVectorReader{
|
||||
vector: vector,
|
||||
field: field,
|
||||
k: k,
|
||||
snapshot: is,
|
||||
}
|
||||
|
||||
if rv.postings == nil {
|
||||
rv.postings = make([]segment_api.VecPostingsList, len(is.segment))
|
||||
}
|
||||
if rv.iterators == nil {
|
||||
rv.iterators = make([]segment_api.VecPostingsIterator, len(is.segment))
|
||||
}
|
||||
|
||||
// initialize postings and iterators within the OptimizeVR's Finish()
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
+2
@@ -51,6 +51,8 @@ type Stats struct {
|
||||
TotTermSearchersStarted uint64
|
||||
TotTermSearchersFinished uint64
|
||||
|
||||
TotKNNSearches uint64
|
||||
|
||||
TotEventTriggerStarted uint64
|
||||
TotEventTriggerCompleted uint64
|
||||
|
||||
|
||||
+281
-39
@@ -21,6 +21,8 @@ import (
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/collector"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
@@ -160,13 +162,92 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
|
||||
if len(i.indexes) < 1 {
|
||||
return nil, ErrorAliasEmpty
|
||||
}
|
||||
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
|
||||
// since preSearchKey is set, it means that the request
|
||||
// is being executed as part of a preSearch, which
|
||||
// indicates that this index alias is set as an Index
|
||||
// in another alias, so we need to do a preSearch search
|
||||
// and NOT a real search
|
||||
return preSearchDataSearch(ctx, req, i.indexes...)
|
||||
}
|
||||
|
||||
// at this point we know we are doing a real search
|
||||
// either after a preSearch is done, or directly
|
||||
// on the alias
|
||||
|
||||
// check if request has preSearchData which would indicate that the
|
||||
// request has already been preSearched and we can skip the
|
||||
// preSearch step now, we call an optional function to
|
||||
// redistribute the preSearchData to the individual indexes
|
||||
// if necessary
|
||||
var preSearchData map[string]map[string]interface{}
|
||||
if req.PreSearchData != nil {
|
||||
if requestHasKNN(req) {
|
||||
var err error
|
||||
preSearchData, err = redistributeKNNPreSearchData(req, i.indexes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// short circuit the simple case
|
||||
if len(i.indexes) == 1 {
|
||||
if preSearchData != nil {
|
||||
req.PreSearchData = preSearchData[i.indexes[0].Name()]
|
||||
}
|
||||
return i.indexes[0].SearchInContext(ctx, req)
|
||||
}
|
||||
|
||||
return MultiSearch(ctx, req, i.indexes...)
|
||||
// at this stage we know we have multiple indexes
|
||||
// check if preSearchData needs to be gathered from all indexes
|
||||
// before executing the query
|
||||
var err error
|
||||
// only perform preSearch if
|
||||
// - the request does not already have preSearchData
|
||||
// - the request requires preSearch
|
||||
var preSearchDuration time.Duration
|
||||
var sr *SearchResult
|
||||
if req.PreSearchData == nil && preSearchRequired(req) {
|
||||
searchStart := time.Now()
|
||||
preSearchResult, err := preSearch(ctx, req, i.indexes...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// check if the preSearch result has any errors and if so
|
||||
// return the search result as is without executing the query
|
||||
// so that the errors are not lost
|
||||
if preSearchResult.Status.Failed > 0 || len(preSearchResult.Status.Errors) > 0 {
|
||||
return preSearchResult, nil
|
||||
}
|
||||
// finalize the preSearch result now
|
||||
finalizePreSearchResult(req, preSearchResult)
|
||||
|
||||
// if there are no errors, then merge the data in the preSearch result
|
||||
// and construct the preSearchData to be used in the actual search
|
||||
// if the request is satisfied by the preSearch result, then we can
|
||||
// directly return the preSearch result as the final result
|
||||
if requestSatisfiedByPreSearch(req) {
|
||||
sr = finalizeSearchResult(req, preSearchResult)
|
||||
// no need to run the 2nd phase MultiSearch(..)
|
||||
} else {
|
||||
preSearchData, err = constructPreSearchData(req, preSearchResult, i.indexes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
preSearchDuration = time.Since(searchStart)
|
||||
}
|
||||
|
||||
// check if search result was generated as part of preSearch itself
|
||||
if sr == nil {
|
||||
sr, err = MultiSearch(ctx, req, preSearchData, i.indexes...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
sr.Took += preSearchDuration
|
||||
return sr, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) Fields() ([]string, error) {
|
||||
@@ -429,22 +510,8 @@ func (i *indexAliasImpl) Swap(in, out []Index) {
|
||||
// the actual final results.
|
||||
// Perhaps that part needs to be optional,
|
||||
// could be slower in remote usages.
|
||||
func createChildSearchRequest(req *SearchRequest) *SearchRequest {
|
||||
rv := SearchRequest{
|
||||
Query: req.Query,
|
||||
Size: req.Size + req.From,
|
||||
From: 0,
|
||||
Highlight: req.Highlight,
|
||||
Fields: req.Fields,
|
||||
Facets: req.Facets,
|
||||
Explain: req.Explain,
|
||||
Sort: req.Sort.Copy(),
|
||||
IncludeLocations: req.IncludeLocations,
|
||||
Score: req.Score,
|
||||
SearchAfter: req.SearchAfter,
|
||||
SearchBefore: req.SearchBefore,
|
||||
}
|
||||
return &rv
|
||||
func createChildSearchRequest(req *SearchRequest, preSearchData map[string]interface{}) *SearchRequest {
|
||||
return copySearchRequest(req, preSearchData)
|
||||
}
|
||||
|
||||
type asyncSearchResult struct {
|
||||
@@ -453,9 +520,195 @@ type asyncSearchResult struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
func preSearchRequired(req *SearchRequest) bool {
|
||||
return requestHasKNN(req)
|
||||
}
|
||||
|
||||
func preSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
|
||||
// create a dummy request with a match none query
|
||||
// since we only care about the preSearchData in PreSearch
|
||||
dummyRequest := &SearchRequest{
|
||||
Query: query.NewMatchNoneQuery(),
|
||||
}
|
||||
newCtx := context.WithValue(ctx, search.PreSearchKey, true)
|
||||
if requestHasKNN(req) {
|
||||
addKnnToDummyRequest(dummyRequest, req)
|
||||
}
|
||||
return preSearchDataSearch(newCtx, dummyRequest, indexes...)
|
||||
}
|
||||
|
||||
// if the request is satisfied by just the preSearch result,
|
||||
// finalize the result and return it directly without
|
||||
// performing multi search
|
||||
func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *SearchResult {
|
||||
if preSearchResult == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// global values across all hits irrespective of pagination settings
|
||||
preSearchResult.Total = uint64(preSearchResult.Hits.Len())
|
||||
maxScore := float64(0)
|
||||
for i, hit := range preSearchResult.Hits {
|
||||
// since we are now using the preSearch result as the final result
|
||||
// we can discard the indexNames from the hits as they are no longer
|
||||
// relevant.
|
||||
hit.IndexNames = nil
|
||||
if hit.Score > maxScore {
|
||||
maxScore = hit.Score
|
||||
}
|
||||
hit.HitNumber = uint64(i)
|
||||
}
|
||||
preSearchResult.MaxScore = maxScore
|
||||
// now apply pagination settings
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
}
|
||||
if req.SearchAfter != nil {
|
||||
preSearchResult.Hits = collector.FilterHitsBySearchAfter(preSearchResult.Hits, req.Sort, req.SearchAfter)
|
||||
}
|
||||
preSearchResult.Hits = hitsInCurrentPage(req, preSearchResult.Hits)
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, preSearchResult.Hits)
|
||||
req.SortFunc()(mhs)
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
if req.Explain {
|
||||
preSearchResult.Request = req
|
||||
}
|
||||
return preSearchResult
|
||||
}
|
||||
|
||||
func requestSatisfiedByPreSearch(req *SearchRequest) bool {
|
||||
if requestHasKNN(req) && isKNNrequestSatisfiedByPreSearch(req) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
|
||||
mergedOut := make(map[string]map[string]interface{}, len(indexes))
|
||||
for _, index := range indexes {
|
||||
mergedOut[index.Name()] = make(map[string]interface{})
|
||||
}
|
||||
var err error
|
||||
if requestHasKNN(req) {
|
||||
mergedOut, err = constructKnnPreSearchData(mergedOut, preSearchResult, indexes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return mergedOut, nil
|
||||
}
|
||||
|
||||
func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
|
||||
asyncResults := make(chan *asyncSearchResult, len(indexes))
|
||||
// run search on each index in separate go routine
|
||||
var waitGroup sync.WaitGroup
|
||||
var searchChildIndex = func(in Index, childReq *SearchRequest) {
|
||||
rv := asyncSearchResult{Name: in.Name()}
|
||||
rv.Result, rv.Err = in.SearchInContext(ctx, childReq)
|
||||
asyncResults <- &rv
|
||||
waitGroup.Done()
|
||||
}
|
||||
waitGroup.Add(len(indexes))
|
||||
for _, in := range indexes {
|
||||
go searchChildIndex(in, createChildSearchRequest(req, nil))
|
||||
}
|
||||
// on another go routine, close after finished
|
||||
go func() {
|
||||
waitGroup.Wait()
|
||||
close(asyncResults)
|
||||
}()
|
||||
// the final search result to be returned after combining the preSearch results
|
||||
var sr *SearchResult
|
||||
// the preSearch result processor
|
||||
var prp preSearchResultProcessor
|
||||
// error map
|
||||
indexErrors := make(map[string]error)
|
||||
for asr := range asyncResults {
|
||||
if asr.Err == nil {
|
||||
// a valid preSearch result
|
||||
if prp == nil {
|
||||
// first valid preSearch result
|
||||
// create a new preSearch result processor
|
||||
prp = createPreSearchResultProcessor(req)
|
||||
}
|
||||
prp.add(asr.Result, asr.Name)
|
||||
if sr == nil {
|
||||
// first result
|
||||
sr = &SearchResult{
|
||||
Status: asr.Result.Status,
|
||||
Cost: asr.Result.Cost,
|
||||
}
|
||||
} else {
|
||||
// merge with previous
|
||||
sr.Status.Merge(asr.Result.Status)
|
||||
sr.Cost += asr.Result.Cost
|
||||
}
|
||||
} else {
|
||||
indexErrors[asr.Name] = asr.Err
|
||||
}
|
||||
}
|
||||
// handle case where no results were successful
|
||||
if sr == nil {
|
||||
sr = &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Errors: make(map[string]error),
|
||||
},
|
||||
}
|
||||
}
|
||||
// in preSearch, partial results are not allowed as it can lead to
|
||||
// the real search giving incorrect results, and hence the search
|
||||
// result is not populated with any of the processed data from
|
||||
// the preSearch result processor if there are any errors
|
||||
// or the preSearch result status has any failures
|
||||
if len(indexErrors) > 0 || sr.Status.Failed > 0 {
|
||||
if sr.Status.Errors == nil {
|
||||
sr.Status.Errors = make(map[string]error)
|
||||
}
|
||||
for indexName, indexErr := range indexErrors {
|
||||
sr.Status.Errors[indexName] = indexErr
|
||||
sr.Status.Total++
|
||||
sr.Status.Failed++
|
||||
}
|
||||
} else {
|
||||
prp.finalize(sr)
|
||||
}
|
||||
return sr, nil
|
||||
}
|
||||
|
||||
// hitsInCurrentPage returns the hits in the current page
|
||||
// using the From and Size parameters in the request
|
||||
func hitsInCurrentPage(req *SearchRequest, hits []*search.DocumentMatch) []*search.DocumentMatch {
|
||||
sortFunc := req.SortFunc()
|
||||
// sort all hits with the requested order
|
||||
if len(req.Sort) > 0 {
|
||||
sorter := newSearchHitSorter(req.Sort, hits)
|
||||
sortFunc(sorter)
|
||||
}
|
||||
// now skip over the correct From
|
||||
if req.From > 0 && len(hits) > req.From {
|
||||
hits = hits[req.From:]
|
||||
} else if req.From > 0 {
|
||||
hits = search.DocumentMatchCollection{}
|
||||
}
|
||||
// now trim to the correct size
|
||||
if req.Size > 0 && len(hits) > req.Size {
|
||||
hits = hits[0:req.Size]
|
||||
}
|
||||
return hits
|
||||
}
|
||||
|
||||
// MultiSearch executes a SearchRequest across multiple Index objects,
|
||||
// then merges the results. The indexes must honor any ctx deadline.
|
||||
func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
|
||||
func MultiSearch(ctx context.Context, req *SearchRequest, preSearchData map[string]map[string]interface{}, indexes ...Index) (*SearchResult, error) {
|
||||
|
||||
searchStart := time.Now()
|
||||
asyncResults := make(chan *asyncSearchResult, len(indexes))
|
||||
@@ -480,7 +733,11 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
||||
|
||||
waitGroup.Add(len(indexes))
|
||||
for _, in := range indexes {
|
||||
go searchChildIndex(in, createChildSearchRequest(req))
|
||||
var payload map[string]interface{}
|
||||
if preSearchData != nil {
|
||||
payload = preSearchData[in.Name()]
|
||||
}
|
||||
go searchChildIndex(in, createChildSearchRequest(req, payload))
|
||||
}
|
||||
|
||||
// on another go routine, close after finished
|
||||
@@ -518,24 +775,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
||||
}
|
||||
}
|
||||
|
||||
sortFunc := req.SortFunc()
|
||||
// sort all hits with the requested order
|
||||
if len(req.Sort) > 0 {
|
||||
sorter := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sortFunc(sorter)
|
||||
}
|
||||
|
||||
// now skip over the correct From
|
||||
if req.From > 0 && len(sr.Hits) > req.From {
|
||||
sr.Hits = sr.Hits[req.From:]
|
||||
} else if req.From > 0 {
|
||||
sr.Hits = search.DocumentMatchCollection{}
|
||||
}
|
||||
|
||||
// now trim to the correct size
|
||||
if req.Size > 0 && len(sr.Hits) > req.Size {
|
||||
sr.Hits = sr.Hits[0:req.Size]
|
||||
}
|
||||
sr.Hits = hitsInCurrentPage(req, sr.Hits)
|
||||
|
||||
// fix up facets
|
||||
for name, fr := range req.Facets {
|
||||
@@ -547,14 +787,16 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sortFunc(mhs)
|
||||
req.SortFunc()(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
// fix up original request
|
||||
sr.Request = req
|
||||
if req.Explain {
|
||||
sr.Request = req
|
||||
}
|
||||
searchDuration := time.Since(searchStart)
|
||||
sr.Took = searchDuration
|
||||
|
||||
|
||||
+77
-17
@@ -433,6 +433,25 @@ func memNeededForSearch(req *SearchRequest,
|
||||
return uint64(estimate)
|
||||
}
|
||||
|
||||
func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader index.IndexReader) (*SearchResult, error) {
|
||||
var knnHits []*search.DocumentMatch
|
||||
var err error
|
||||
if requestHasKNN(req) {
|
||||
knnHits, err = i.runKnnCollector(ctx, req, reader, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Total: 1,
|
||||
Successful: 1,
|
||||
},
|
||||
Hits: knnHits,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SearchInContext executes a search request operation within the provided
|
||||
// Context. Returns a SearchResult object or an error.
|
||||
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
|
||||
@@ -445,6 +464,25 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening index reader %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
|
||||
preSearchResult, err := i.preSearch(ctx, req, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return preSearchResult, nil
|
||||
}
|
||||
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
@@ -460,16 +498,31 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
}
|
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening index reader %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if cerr := indexReader.Close(); err == nil && cerr != nil {
|
||||
err = cerr
|
||||
var knnHits []*search.DocumentMatch
|
||||
var ok bool
|
||||
var skipKnnCollector bool
|
||||
if req.PreSearchData != nil {
|
||||
for k, v := range req.PreSearchData {
|
||||
switch k {
|
||||
case search.KnnPreSearchDataKey:
|
||||
if v != nil {
|
||||
knnHits, ok = v.([]*search.DocumentMatch)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("knn preSearchData must be of type []*search.DocumentMatch")
|
||||
}
|
||||
}
|
||||
skipKnnCollector = true
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
if !skipKnnCollector && requestHasKNN(req) {
|
||||
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
setKnnHitsInCollector(knnHits, req, coll)
|
||||
|
||||
// This callback and variable handles the tracking of bytes read
|
||||
// 1. as part of creation of tfr and its Next() calls which is
|
||||
@@ -540,14 +593,14 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
if dateTimeParser == nil {
|
||||
return nil, fmt.Errorf("no date time parser named `%s` registered", dateTimeParserName)
|
||||
}
|
||||
start, end, startLayout, endLayout, err := dr.ParseDates(dateTimeParser)
|
||||
start, end, err := dr.ParseDates(dateTimeParser)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ParseDates err: %v, using date time parser named %s", err, dateTimeParserName)
|
||||
}
|
||||
if start.IsZero() && end.IsZero() {
|
||||
return nil, fmt.Errorf("date range query must specify either start, end or both for date range name '%s'", dr.Name)
|
||||
}
|
||||
facetBuilder.AddRange(dr.Name, start, end, startLayout, endLayout)
|
||||
facetBuilder.AddRange(dr.Name, start, end)
|
||||
}
|
||||
facetsBuilder.Add(facetName, facetBuilder)
|
||||
} else {
|
||||
@@ -605,7 +658,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
|
||||
var storedFieldsCost uint64
|
||||
for _, hit := range hits {
|
||||
if i.name != "" {
|
||||
// KNN documents will already have their Index value set as part of the knn collector output
|
||||
// so check if the index is empty and set it to the current index name
|
||||
if i.name != "" && hit.Index == "" {
|
||||
hit.Index = i.name
|
||||
}
|
||||
err, storedFieldsBytes := LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
|
||||
@@ -638,18 +693,23 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
rv := &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Total: 1,
|
||||
Successful: 1,
|
||||
},
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
Total: coll.Total(),
|
||||
MaxScore: coll.MaxScore(),
|
||||
Took: searchDuration,
|
||||
Facets: coll.FacetResults(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
if req.Explain {
|
||||
rv.Request = req
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
|
||||
@@ -658,9 +718,9 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
|
||||
var totalStoredFieldsBytes uint64
|
||||
if len(req.Fields) > 0 || highlighter != nil {
|
||||
doc, err := r.Document(hit.ID)
|
||||
totalStoredFieldsBytes = doc.StoredFieldsBytes()
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
if len(req.Fields) > 0 && hit.Fields == nil {
|
||||
totalStoredFieldsBytes = doc.StoredFieldsBytes()
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
doc.VisitFields(func(docF index.Field) {
|
||||
|
||||
+58
-15
@@ -50,7 +50,8 @@ type DocumentMapping struct {
|
||||
StructTagKey string `json:"struct_tag_key,omitempty"`
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
|
||||
func (dm *DocumentMapping) Validate(cache *registry.Cache,
|
||||
parentName string, fieldAliasCtx map[string]*FieldMapping) error {
|
||||
var err error
|
||||
if dm.DefaultAnalyzer != "" {
|
||||
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
|
||||
@@ -58,8 +59,12 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, property := range dm.Properties {
|
||||
err = property.Validate(cache)
|
||||
for propertyName, property := range dm.Properties {
|
||||
newParent := propertyName
|
||||
if parentName != "" {
|
||||
newParent = fmt.Sprintf("%s.%s", parentName, propertyName)
|
||||
}
|
||||
err = property.Validate(cache, newParent, fieldAliasCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -77,15 +82,25 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
switch field.Type {
|
||||
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
|
||||
default:
|
||||
return fmt.Errorf("unknown field type: '%s'", field.Type)
|
||||
|
||||
err := validateFieldMapping(field, parentName, fieldAliasCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateFieldType(field *FieldMapping) error {
|
||||
switch field.Type {
|
||||
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("field: '%s', unknown field type: '%s'",
|
||||
field.Name, field.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// analyzerNameForPath attempts to first find the field
|
||||
// described by this path, then returns the analyzer
|
||||
// configured for that field
|
||||
@@ -141,15 +156,20 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
|
||||
return nil
|
||||
}
|
||||
|
||||
// documentMappingForPath returns the EXACT and closest matches for a sub
|
||||
// documentMappingForPathElements returns the EXACT and closest matches for a sub
|
||||
// document or for an explicitly mapped field; the closest most specific
|
||||
// document mapping could be one that matches part of the provided path.
|
||||
func (dm *DocumentMapping) documentMappingForPath(path string) (
|
||||
func (dm *DocumentMapping) documentMappingForPathElements(pathElements []string) (
|
||||
*DocumentMapping, *DocumentMapping) {
|
||||
pathElements := decodePath(path)
|
||||
var pathElementsCopy []string
|
||||
if len(pathElements) == 0 {
|
||||
pathElementsCopy = []string{""}
|
||||
} else {
|
||||
pathElementsCopy = pathElements
|
||||
}
|
||||
current := dm
|
||||
OUTER:
|
||||
for i, pathElement := range pathElements {
|
||||
for i, pathElement := range pathElementsCopy {
|
||||
if subDocMapping, exists := current.Properties[pathElement]; exists {
|
||||
current = subDocMapping
|
||||
continue OUTER
|
||||
@@ -157,7 +177,7 @@ OUTER:
|
||||
|
||||
// no subDocMapping matches this pathElement
|
||||
// only if this is the last element check for field name
|
||||
if i == len(pathElements)-1 {
|
||||
if i == len(pathElementsCopy)-1 {
|
||||
for _, field := range current.Fields {
|
||||
if field.Name == pathElement {
|
||||
break
|
||||
@@ -170,6 +190,15 @@ OUTER:
|
||||
return current, current
|
||||
}
|
||||
|
||||
// documentMappingForPath returns the EXACT and closest matches for a sub
|
||||
// document or for an explicitly mapped field; the closest most specific
|
||||
// document mapping could be one that matches part of the provided path.
|
||||
func (dm *DocumentMapping) documentMappingForPath(path string) (
|
||||
*DocumentMapping, *DocumentMapping) {
|
||||
pathElements := decodePath(path)
|
||||
return dm.documentMappingForPathElements(pathElements)
|
||||
}
|
||||
|
||||
// NewDocumentMapping returns a new document mapping
|
||||
// with all the default values.
|
||||
func NewDocumentMapping() *DocumentMapping {
|
||||
@@ -388,9 +417,8 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
|
||||
}
|
||||
|
||||
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
|
||||
pathString := encodePath(path)
|
||||
// look to see if there is a mapping for this field
|
||||
subDocMapping, closestDocMapping := dm.documentMappingForPath(pathString)
|
||||
subDocMapping, closestDocMapping := dm.documentMappingForPathElements(path)
|
||||
|
||||
// check to see if we even need to do further processing
|
||||
if subDocMapping != nil && !subDocMapping.Enabled {
|
||||
@@ -402,6 +430,8 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
||||
// cannot do anything with the zero value
|
||||
return
|
||||
}
|
||||
|
||||
pathString := encodePath(path)
|
||||
propertyType := propertyValue.Type()
|
||||
switch propertyType.Kind() {
|
||||
case reflect.String:
|
||||
@@ -502,9 +532,20 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
case reflect.Map, reflect.Slice:
|
||||
var isPropertyVector bool
|
||||
var isPropertyVectorInitialized bool
|
||||
if subDocMapping != nil {
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
switch fieldMapping.Type {
|
||||
case "vector":
|
||||
processed := fieldMapping.processVector(property, pathString, path,
|
||||
indexes, context)
|
||||
if !isPropertyVectorInitialized {
|
||||
isPropertyVector = processed
|
||||
isPropertyVectorInitialized = true
|
||||
} else {
|
||||
isPropertyVector = isPropertyVector && processed
|
||||
}
|
||||
case "geopoint":
|
||||
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
|
||||
case "IP":
|
||||
@@ -517,7 +558,9 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
||||
}
|
||||
}
|
||||
}
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
if !isPropertyVector {
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
case reflect.Ptr:
|
||||
if !propertyValue.IsNil() {
|
||||
switch property := property.(type) {
|
||||
|
||||
+26
@@ -69,6 +69,17 @@ type FieldMapping struct {
|
||||
// the processing of freq/norm details when the default score based relevancy
|
||||
// isn't needed.
|
||||
SkipFreqNorm bool `json:"skip_freq_norm,omitempty"`
|
||||
|
||||
// Dimensionality of the vector
|
||||
Dims int `json:"dims,omitempty"`
|
||||
|
||||
// Similarity is the similarity algorithm used for scoring
|
||||
// vector fields.
|
||||
// See: index.DefaultSimilarityMetric & index.SupportedSimilarityMetrics
|
||||
Similarity string `json:"similarity,omitempty"`
|
||||
|
||||
// Applicable to vector fields only - optimization string
|
||||
VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
|
||||
}
|
||||
|
||||
// NewTextFieldMapping returns a default field mapping for text
|
||||
@@ -448,6 +459,21 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "dims":
|
||||
err := json.Unmarshal(v, &fm.Dims)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "similarity":
|
||||
err := json.Unmarshal(v, &fm.Similarity)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "vector_index_optimized_for":
|
||||
err := json.Unmarshal(v, &fm.VectorIndexOptimizedFor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
invalidKeys = append(invalidKeys, k)
|
||||
}
|
||||
|
||||
+31
-2
@@ -174,12 +174,14 @@ func (im *IndexMappingImpl) Validate() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = im.DefaultMapping.Validate(im.cache)
|
||||
|
||||
fieldAliasCtx := make(map[string]*FieldMapping)
|
||||
err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
err = docMapping.Validate(im.cache)
|
||||
err = docMapping.Validate(im.cache, "", fieldAliasCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -431,6 +433,33 @@ func (im *IndexMappingImpl) FieldAnalyzer(field string) string {
|
||||
return im.AnalyzerNameForPath(field)
|
||||
}
|
||||
|
||||
// FieldMappingForPath returns the mapping for a specific field 'path'.
|
||||
func (im *IndexMappingImpl) FieldMappingForPath(path string) FieldMapping {
|
||||
if im.TypeMapping != nil {
|
||||
for _, v := range im.TypeMapping {
|
||||
for field, property := range v.Properties {
|
||||
for _, v1 := range property.Fields {
|
||||
if field == path {
|
||||
// Return field mapping if the name matches the path param.
|
||||
return *v1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for field, property := range im.DefaultMapping.Properties {
|
||||
for _, v1 := range property.Fields {
|
||||
if field == path {
|
||||
// Return field mapping if the name matches the path param.
|
||||
return *v1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return FieldMapping{}
|
||||
}
|
||||
|
||||
// wrapper to satisfy new interface
|
||||
|
||||
func (im *IndexMappingImpl) DefaultSearchField() string {
|
||||
|
||||
+2
@@ -55,4 +55,6 @@ type IndexMapping interface {
|
||||
|
||||
AnalyzerNameForPath(path string) string
|
||||
AnalyzerNamed(name string) analysis.Analyzer
|
||||
|
||||
FieldMappingForPath(path string) FieldMapping
|
||||
}
|
||||
|
||||
+35
@@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !vectors
|
||||
// +build !vectors
|
||||
|
||||
package mapping
|
||||
|
||||
func NewVectorFieldMapping() *FieldMapping {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
|
||||
pathString string, path []string, indexes []uint64, context *walkContext) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// document validation functions
|
||||
|
||||
func validateFieldMapping(field *FieldMapping, parentName string,
|
||||
fieldAliasCtx map[string]*FieldMapping) error {
|
||||
return validateFieldType(field)
|
||||
}
|
||||
+220
@@ -0,0 +1,220 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package mapping
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// Min and Max allowed dimensions for a vector field
|
||||
const (
|
||||
MinVectorDims = 1
|
||||
MaxVectorDims = 2048
|
||||
)
|
||||
|
||||
func NewVectorFieldMapping() *FieldMapping {
|
||||
return &FieldMapping{
|
||||
Type: "vector",
|
||||
Store: false,
|
||||
Index: true,
|
||||
IncludeInAll: false,
|
||||
DocValues: false,
|
||||
SkipFreqNorm: true,
|
||||
}
|
||||
}
|
||||
|
||||
// validate and process a flat vector
|
||||
func processFlatVector(vecV reflect.Value, dims int) ([]float32, bool) {
|
||||
if vecV.Len() != dims {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
rv := make([]float32, dims)
|
||||
for i := 0; i < vecV.Len(); i++ {
|
||||
item := vecV.Index(i)
|
||||
if !item.CanInterface() {
|
||||
return nil, false
|
||||
}
|
||||
itemI := item.Interface()
|
||||
itemFloat, ok := util.ExtractNumericValFloat32(itemI)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
rv[i] = itemFloat
|
||||
}
|
||||
|
||||
return rv, true
|
||||
}
|
||||
|
||||
// validate and process a vector
|
||||
// max supported depth of nesting is 2 ([][]float32)
|
||||
func processVector(vecI interface{}, dims int) ([]float32, bool) {
|
||||
vecV := reflect.ValueOf(vecI)
|
||||
if !vecV.IsValid() || vecV.Kind() != reflect.Slice || vecV.Len() == 0 {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Let's examine the first element (head) of the vector.
|
||||
// If head is a slice, then vector is nested, otherwise flat.
|
||||
head := vecV.Index(0)
|
||||
if !head.CanInterface() {
|
||||
return nil, false
|
||||
}
|
||||
headI := head.Interface()
|
||||
headV := reflect.ValueOf(headI)
|
||||
if !headV.IsValid() {
|
||||
return nil, false
|
||||
}
|
||||
if headV.Kind() != reflect.Slice { // vector is flat
|
||||
return processFlatVector(vecV, dims)
|
||||
}
|
||||
|
||||
// # process nested vector
|
||||
|
||||
// pre-allocate memory for the flattened vector
|
||||
// so that we can use copy() later
|
||||
rv := make([]float32, dims*vecV.Len())
|
||||
|
||||
for i := 0; i < vecV.Len(); i++ {
|
||||
subVec := vecV.Index(i)
|
||||
if !subVec.CanInterface() {
|
||||
return nil, false
|
||||
}
|
||||
subVecI := subVec.Interface()
|
||||
subVecV := reflect.ValueOf(subVecI)
|
||||
if !subVecV.IsValid() {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
if subVecV.Kind() != reflect.Slice {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
flatVector, ok := processFlatVector(subVecV, dims)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
copy(rv[i*dims:(i+1)*dims], flatVector)
|
||||
}
|
||||
|
||||
return rv, true
|
||||
}
|
||||
|
||||
func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
|
||||
pathString string, path []string, indexes []uint64, context *walkContext) bool {
|
||||
vector, ok := processVector(propertyMightBeVector, fm.Dims)
|
||||
// Don't add field to document if vector is invalid
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
fieldName := getFieldName(pathString, path, fm)
|
||||
options := fm.Options()
|
||||
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, vector,
|
||||
fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options)
|
||||
context.doc.AddField(field)
|
||||
|
||||
// "_all" composite field is not applicable for vector field
|
||||
context.excludedFromAll = append(context.excludedFromAll, fieldName)
|
||||
return true
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// document validation functions
|
||||
|
||||
func validateFieldMapping(field *FieldMapping, parentName string,
|
||||
fieldAliasCtx map[string]*FieldMapping) error {
|
||||
switch field.Type {
|
||||
case "vector":
|
||||
return validateVectorFieldAlias(field, parentName, fieldAliasCtx)
|
||||
default: // non-vector field
|
||||
return validateFieldType(field)
|
||||
}
|
||||
}
|
||||
|
||||
func validateVectorFieldAlias(field *FieldMapping, parentName string,
|
||||
fieldAliasCtx map[string]*FieldMapping) error {
|
||||
|
||||
if field.Name == "" {
|
||||
field.Name = parentName
|
||||
}
|
||||
|
||||
if field.Similarity == "" {
|
||||
field.Similarity = index.DefaultSimilarityMetric
|
||||
}
|
||||
|
||||
if field.VectorIndexOptimizedFor == "" {
|
||||
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
|
||||
}
|
||||
if _, exists := index.SupportedVectorIndexOptimizations[field.VectorIndexOptimizedFor]; !exists {
|
||||
// if an unsupported config is provided, override to default
|
||||
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
|
||||
}
|
||||
|
||||
// following fields are not applicable for vector
|
||||
// thus, we set them to default values
|
||||
field.IncludeInAll = false
|
||||
field.IncludeTermVectors = false
|
||||
field.Store = false
|
||||
field.DocValues = false
|
||||
field.SkipFreqNorm = true
|
||||
|
||||
// # If alias is present, validate the field options as per the alias
|
||||
// note: reading from a nil map is safe
|
||||
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
|
||||
if field.Dims != fieldAlias.Dims {
|
||||
return fmt.Errorf("field: '%s', invalid alias "+
|
||||
"(different dimensions %d and %d)", fieldAlias.Name, field.Dims,
|
||||
fieldAlias.Dims)
|
||||
}
|
||||
|
||||
if field.Similarity != fieldAlias.Similarity {
|
||||
return fmt.Errorf("field: '%s', invalid alias "+
|
||||
"(different similarity values %s and %s)", fieldAlias.Name,
|
||||
field.Similarity, fieldAlias.Similarity)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// # Validate field options
|
||||
|
||||
if field.Dims < MinVectorDims || field.Dims > MaxVectorDims {
|
||||
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
|
||||
" value should be in range (%d, %d)", field.Name, field.Dims,
|
||||
MinVectorDims, MaxVectorDims)
|
||||
}
|
||||
|
||||
if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok {
|
||||
return fmt.Errorf("field: '%s', invalid similarity "+
|
||||
"metric: '%s', valid metrics are: %+v", field.Name, field.Similarity,
|
||||
reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys())
|
||||
}
|
||||
|
||||
if fieldAliasCtx != nil { // writing to a nil map is unsafe
|
||||
fieldAliasCtx[field.Name] = field
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package bleve
|
||||
|
||||
import "github.com/blevesearch/bleve/v2/mapping"
|
||||
|
||||
func NewVectorFieldMapping() *mapping.FieldMapping {
|
||||
return mapping.NewVectorFieldMapping()
|
||||
}
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
// Copyright (c) 2024 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
// A preSearchResultProcessor processes the data in
|
||||
// the preSearch result from multiple
|
||||
// indexes in an alias and merges them together to
|
||||
// create the final preSearch result
|
||||
type preSearchResultProcessor interface {
|
||||
// adds the preSearch result to the processor
|
||||
add(*SearchResult, string)
|
||||
// updates the final search result with the finalized
|
||||
// data from the processor
|
||||
finalize(*SearchResult)
|
||||
}
|
||||
|
||||
type knnPreSearchResultProcessor struct {
|
||||
addFn func(sr *SearchResult, indexName string)
|
||||
finalizeFn func(sr *SearchResult)
|
||||
}
|
||||
|
||||
func (k *knnPreSearchResultProcessor) add(sr *SearchResult, indexName string) {
|
||||
if k.addFn != nil {
|
||||
k.addFn(sr, indexName)
|
||||
}
|
||||
}
|
||||
|
||||
func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
|
||||
if k.finalizeFn != nil {
|
||||
k.finalizeFn(sr)
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
func finalizePreSearchResult(req *SearchRequest, preSearchResult *SearchResult) {
|
||||
if requestHasKNN(req) {
|
||||
preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
|
||||
}
|
||||
}
|
||||
|
||||
func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor {
|
||||
if requestHasKNN(req) {
|
||||
return newKnnPreSearchResultProcessor(req)
|
||||
}
|
||||
return &knnPreSearchResultProcessor{} // equivalent to nil
|
||||
}
|
||||
+27
-131
@@ -15,7 +15,6 @@
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
@@ -32,19 +31,19 @@ import (
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
)
|
||||
|
||||
const defaultDateTimeParser = optional.Name
|
||||
var reflectStaticSizeSearchResult int
|
||||
var reflectStaticSizeSearchStatus int
|
||||
|
||||
func init() {
|
||||
var sr SearchResult
|
||||
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
|
||||
var ss SearchStatus
|
||||
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
|
||||
}
|
||||
|
||||
var cache = registry.NewCache()
|
||||
|
||||
var (
|
||||
reflectStaticSizeSearchResult int
|
||||
reflectStaticSizeSearchStatus int
|
||||
)
|
||||
|
||||
func init() {
|
||||
reflectStaticSizeSearchResult = int(reflect.TypeOf(SearchResult{}).Size())
|
||||
reflectStaticSizeSearchStatus = int(reflect.TypeOf(SearchStatus{}).Size())
|
||||
}
|
||||
const defaultDateTimeParser = optional.Name
|
||||
|
||||
type dateTimeRange struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
@@ -55,28 +54,24 @@ type dateTimeRange struct {
|
||||
endString *string
|
||||
}
|
||||
|
||||
func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time, startLayout, endLayout string, err error) {
|
||||
func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time, err error) {
|
||||
start = dr.Start
|
||||
startLayout = time.RFC3339Nano
|
||||
if dr.Start.IsZero() && dr.startString != nil {
|
||||
s, layout, parseError := dateTimeParser.ParseDateTime(*dr.startString)
|
||||
s, _, parseError := dateTimeParser.ParseDateTime(*dr.startString)
|
||||
if parseError != nil {
|
||||
return start, end, startLayout, endLayout, fmt.Errorf("error parsing start date '%s' for date range name '%s': %v", *dr.startString, dr.Name, parseError)
|
||||
return start, end, fmt.Errorf("error parsing start date '%s' for date range name '%s': %v", *dr.startString, dr.Name, parseError)
|
||||
}
|
||||
start = s
|
||||
startLayout = layout
|
||||
}
|
||||
end = dr.End
|
||||
endLayout = time.RFC3339Nano
|
||||
if dr.End.IsZero() && dr.endString != nil {
|
||||
e, layout, parseError := dateTimeParser.ParseDateTime(*dr.endString)
|
||||
e, _, parseError := dateTimeParser.ParseDateTime(*dr.endString)
|
||||
if parseError != nil {
|
||||
return start, end, startLayout, endLayout, fmt.Errorf("error parsing end date '%s' for date range name '%s': %v", *dr.endString, dr.Name, parseError)
|
||||
return start, end, fmt.Errorf("error parsing end date '%s' for date range name '%s': %v", *dr.endString, dr.Name, parseError)
|
||||
}
|
||||
end = e
|
||||
endLayout = layout
|
||||
}
|
||||
return start, end, startLayout, endLayout, err
|
||||
return start, end, err
|
||||
}
|
||||
|
||||
func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
|
||||
@@ -187,7 +182,7 @@ func (fr *FacetRequest) Validate() error {
|
||||
if dr.DateTimeParser == "" {
|
||||
// cannot parse the date range dates as the defaultDateTimeParser is overridden
|
||||
// so perform this validation at query time
|
||||
start, end, _, _, err := dr.ParseDates(dateTimeParser)
|
||||
start, end, err := dr.ParseDates(dateTimeParser)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ParseDates err: %v, using date time parser named %s", err, defaultDateTimeParser)
|
||||
}
|
||||
@@ -285,51 +280,10 @@ func (h *HighlightRequest) AddField(field string) {
|
||||
h.Fields = append(h.Fields, field)
|
||||
}
|
||||
|
||||
// A SearchRequest describes all the parameters
|
||||
// needed to search the index.
|
||||
// Query is required.
|
||||
// Size/From describe how much and which part of the
|
||||
// result set to return.
|
||||
// Highlight describes optional search result
|
||||
// highlighting.
|
||||
// Fields describes a list of field values which
|
||||
// should be retrieved for result documents, provided they
|
||||
// were stored while indexing.
|
||||
// Facets describe the set of facets to be computed.
|
||||
// Explain triggers inclusion of additional search
|
||||
// result score explanations.
|
||||
// Sort describes the desired order for the results to be returned.
|
||||
// Score controls the kind of scoring performed
|
||||
// SearchAfter supports deep paging by providing a minimum sort key
|
||||
// SearchBefore supports deep paging by providing a maximum sort key
|
||||
// sortFunc specifies the sort implementation to use for sorting results.
|
||||
//
|
||||
// A special field named "*" can be used to return all fields.
|
||||
type SearchRequest struct {
|
||||
ClientContextID string `json:"client_context_id,omitempty"`
|
||||
Query query.Query `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
|
||||
sortFunc func(sort.Interface)
|
||||
}
|
||||
|
||||
func (r *SearchRequest) SetClientContextID(id string) {
|
||||
r.ClientContextID = id
|
||||
}
|
||||
|
||||
func (r *SearchRequest) Validate() error {
|
||||
if srq, ok := r.Query.(query.ValidatableQuery); ok {
|
||||
if err := srq.Validate(); err != nil {
|
||||
err := srq.Validate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -355,6 +309,10 @@ func (r *SearchRequest) Validate() error {
|
||||
}
|
||||
}
|
||||
|
||||
err := validateKNN(r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.Facets.Validate()
|
||||
}
|
||||
|
||||
@@ -393,69 +351,6 @@ func (r *SearchRequest) SetSearchBefore(before []string) {
|
||||
r.SearchBefore = before
|
||||
}
|
||||
|
||||
// UnmarshalJSON deserializes a JSON representation of
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
var (
|
||||
temp struct {
|
||||
ClientContextID string `json:"client_context_id"`
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
}
|
||||
err error
|
||||
)
|
||||
|
||||
if err = util.UnmarshalJSON(input, &temp); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if temp.Size == nil {
|
||||
r.Size = 10
|
||||
} else {
|
||||
r.Size = *temp.Size
|
||||
}
|
||||
if temp.Sort == nil {
|
||||
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
|
||||
} else {
|
||||
if r.Sort, err = search.ParseSortOrderJSON(temp.Sort); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
r.ClientContextID = temp.ClientContextID
|
||||
r.From = temp.From
|
||||
r.Explain = temp.Explain
|
||||
r.Highlight = temp.Highlight
|
||||
r.Fields = temp.Fields
|
||||
r.Facets = temp.Facets
|
||||
r.IncludeLocations = temp.IncludeLocations
|
||||
r.Score = temp.Score
|
||||
r.SearchAfter = temp.SearchAfter
|
||||
r.SearchBefore = temp.SearchBefore
|
||||
if r.Query, err = query.ParseQuery(temp.Q); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if r.Size < 0 {
|
||||
r.Size = 10
|
||||
}
|
||||
if r.From < 0 {
|
||||
r.From = 0
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
// NewSearchRequest creates a new SearchRequest
|
||||
// for the Query, using default values for all
|
||||
// other search parameters.
|
||||
@@ -491,7 +386,8 @@ func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
|
||||
|
||||
func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
|
||||
var tmp map[string]string
|
||||
if err := util.UnmarshalJSON(data, &tmp); err != nil {
|
||||
err := util.UnmarshalJSON(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, v := range tmp {
|
||||
@@ -541,7 +437,7 @@ func (ss *SearchStatus) Merge(other *SearchStatus) {
|
||||
// Facets - The facet results for the search.
|
||||
type SearchResult struct {
|
||||
Status *SearchStatus `json:"status"`
|
||||
Request *SearchRequest `json:"request"`
|
||||
Request *SearchRequest `json:"request,omitempty"`
|
||||
Hits search.DocumentMatchCollection `json:"hits"`
|
||||
Total uint64 `json:"total_hits"`
|
||||
Cost uint64 `json:"cost"`
|
||||
@@ -571,7 +467,7 @@ func (sr *SearchResult) Size() int {
|
||||
func (sr *SearchResult) String() string {
|
||||
rv := ""
|
||||
if sr.Total > 0 {
|
||||
if sr.Request.Size > 0 {
|
||||
if sr.Request != nil && sr.Request.Size > 0 {
|
||||
rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
|
||||
for i, hit := range sr.Hits {
|
||||
rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
|
||||
|
||||
+6
@@ -44,9 +44,15 @@ type MakeDocumentMatchHandlerKeyType string
|
||||
var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
|
||||
"MakeDocumentMatchHandlerKey")
|
||||
|
||||
var MakeKNNDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
|
||||
"MakeKNNDocumentMatchHandlerKey")
|
||||
|
||||
// MakeDocumentMatchHandler is an optional DocumentMatchHandler
|
||||
// builder function which the applications can pass to bleve.
|
||||
// These builder methods gives a DocumentMatchHandler function
|
||||
// to bleve, which it will invoke on every document matches.
|
||||
type MakeDocumentMatchHandler func(ctx *SearchContext) (
|
||||
callback DocumentMatchHandler, loadID bool, err error)
|
||||
|
||||
type MakeKNNDocumentMatchHandler func(ctx *SearchContext) (
|
||||
callback DocumentMatchHandler, err error)
|
||||
|
||||
+4
@@ -69,6 +69,10 @@ func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.Documen
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (c *collectStoreHeap) Internal() search.DocumentMatchCollection {
|
||||
return c.heap
|
||||
}
|
||||
|
||||
// heap interface implementation
|
||||
|
||||
func (c *collectStoreHeap) Len() int {
|
||||
|
||||
+262
@@ -0,0 +1,262 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type collectStoreKNN struct {
|
||||
internalHeaps []collectorStore
|
||||
kValues []int64
|
||||
allHits map[*search.DocumentMatch]struct{}
|
||||
ejectedDocs map[*search.DocumentMatch]struct{}
|
||||
}
|
||||
|
||||
func newStoreKNN(internalHeaps []collectorStore, kValues []int64) *collectStoreKNN {
|
||||
return &collectStoreKNN{
|
||||
internalHeaps: internalHeaps,
|
||||
kValues: kValues,
|
||||
ejectedDocs: make(map[*search.DocumentMatch]struct{}),
|
||||
allHits: make(map[*search.DocumentMatch]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a document to the collector store and returns the documents that were ejected
|
||||
// from the store. The documents that were ejected from the store are the ones that
|
||||
// were not in the top K documents for any of the heaps.
|
||||
// These document are put back into the pool document match pool in the KNN Collector.
|
||||
func (c *collectStoreKNN) AddDocument(doc *search.DocumentMatch) []*search.DocumentMatch {
|
||||
for heapIdx := 0; heapIdx < len(c.internalHeaps); heapIdx++ {
|
||||
if _, ok := doc.ScoreBreakdown[heapIdx]; !ok {
|
||||
continue
|
||||
}
|
||||
ejectedDoc := c.internalHeaps[heapIdx].AddNotExceedingSize(doc, int(c.kValues[heapIdx]))
|
||||
if ejectedDoc != nil {
|
||||
delete(ejectedDoc.ScoreBreakdown, heapIdx)
|
||||
c.ejectedDocs[ejectedDoc] = struct{}{}
|
||||
}
|
||||
}
|
||||
var rv []*search.DocumentMatch
|
||||
for doc := range c.ejectedDocs {
|
||||
if len(doc.ScoreBreakdown) == 0 {
|
||||
rv = append(rv, doc)
|
||||
}
|
||||
// clear out the ejectedDocs map to reuse it in the next AddDocument call
|
||||
delete(c.ejectedDocs, doc)
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreKNN) Final(fixup collectorFixup) (search.DocumentMatchCollection, error) {
|
||||
for _, heap := range c.internalHeaps {
|
||||
for _, doc := range heap.Internal() {
|
||||
// duplicates may be present across the internal heaps
|
||||
// meaning the same document match may be in the top K
|
||||
// for multiple KNN queries.
|
||||
c.allHits[doc] = struct{}{}
|
||||
}
|
||||
}
|
||||
size := len(c.allHits)
|
||||
if size <= 0 {
|
||||
return make(search.DocumentMatchCollection, 0), nil
|
||||
}
|
||||
rv := make(search.DocumentMatchCollection, size)
|
||||
i := 0
|
||||
for doc := range c.allHits {
|
||||
if fixup != nil {
|
||||
err := fixup(doc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
rv[i] = doc
|
||||
i++
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func MakeKNNDocMatchHandler(ctx *search.SearchContext) (search.DocumentMatchHandler, error) {
|
||||
var hc *KNNCollector
|
||||
var ok bool
|
||||
if hc, ok = ctx.Collector.(*KNNCollector); ok {
|
||||
return func(d *search.DocumentMatch) error {
|
||||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
toRelease := hc.knnStore.AddDocument(d)
|
||||
for _, doc := range toRelease {
|
||||
ctx.DocumentMatchPool.Put(doc)
|
||||
}
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func GetNewKNNCollectorStore(kArray []int64) *collectStoreKNN {
|
||||
internalHeaps := make([]collectorStore, len(kArray))
|
||||
for knnIdx, k := range kArray {
|
||||
// TODO - Check if the datatype of k can be made into an int instead of int64
|
||||
idx := knnIdx
|
||||
internalHeaps[idx] = getOptimalCollectorStore(int(k), 0, func(i, j *search.DocumentMatch) int {
|
||||
if i.ScoreBreakdown[idx] < j.ScoreBreakdown[idx] {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
})
|
||||
}
|
||||
return newStoreKNN(internalHeaps, kArray)
|
||||
}
|
||||
|
||||
// implements Collector interface
|
||||
type KNNCollector struct {
|
||||
knnStore *collectStoreKNN
|
||||
size int
|
||||
total uint64
|
||||
took time.Duration
|
||||
results search.DocumentMatchCollection
|
||||
maxScore float64
|
||||
}
|
||||
|
||||
func NewKNNCollector(kArray []int64, size int64) *KNNCollector {
|
||||
return &KNNCollector{
|
||||
knnStore: GetNewKNNCollectorStore(kArray),
|
||||
size: int(size),
|
||||
}
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
|
||||
startTime := time.Now()
|
||||
var err error
|
||||
var next *search.DocumentMatch
|
||||
|
||||
// pre-allocate enough space in the DocumentMatchPool
|
||||
// unless the sum of K is too large, then cap it
|
||||
// everything should still work, just allocates DocumentMatches on demand
|
||||
backingSize := hc.size
|
||||
if backingSize > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
searchContext := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), 0),
|
||||
Collector: hc,
|
||||
IndexReader: reader,
|
||||
}
|
||||
|
||||
dmHandlerMakerKNN := MakeKNNDocMatchHandler
|
||||
if cv := ctx.Value(search.MakeKNNDocumentMatchHandlerKey); cv != nil {
|
||||
dmHandlerMakerKNN = cv.(search.MakeKNNDocumentMatchHandler)
|
||||
}
|
||||
// use the application given builder for making the custom document match
|
||||
// handler and perform callbacks/invocations on the newly made handler.
|
||||
dmHandler, err := dmHandlerMakerKNN(searchContext)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
for err == nil && next != nil {
|
||||
if hc.total%CheckDoneEvery == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
hc.total++
|
||||
|
||||
err = dmHandler(next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// help finalize/flush the results in case
|
||||
// of custom document match handlers.
|
||||
err = dmHandler(nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// compute search duration
|
||||
hc.took = time.Since(startTime)
|
||||
|
||||
// finalize actual results
|
||||
err = hc.finalizeResults(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) finalizeResults(r index.IndexReader) error {
|
||||
var err error
|
||||
hc.results, err = hc.knnStore.Final(func(doc *search.DocumentMatch) error {
|
||||
if doc.ID == "" {
|
||||
// look up the id since we need it for lookup
|
||||
var err error
|
||||
doc.ID, err = r.ExternalID(doc.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Results() search.DocumentMatchCollection {
|
||||
return hc.results
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Total() uint64 {
|
||||
return hc.total
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) MaxScore() float64 {
|
||||
return hc.maxScore
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) Took() time.Duration {
|
||||
return hc.took
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
// facet unsupported for vector search
|
||||
}
|
||||
|
||||
func (hc *KNNCollector) FacetResults() search.FacetResults {
|
||||
// facet unsupported for vector search
|
||||
return nil
|
||||
}
|
||||
+10
@@ -81,6 +81,16 @@ func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.Documen
|
||||
return search.DocumentMatchCollection{}, nil
|
||||
}
|
||||
|
||||
func (c *collectStoreList) Internal() search.DocumentMatchCollection {
|
||||
rv := make(search.DocumentMatchCollection, c.results.Len())
|
||||
i := 0
|
||||
for e := c.results.Front(); e != nil; e = e.Next() {
|
||||
rv[i] = e.Value.(*search.DocumentMatch)
|
||||
i++
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *collectStoreList) len() int {
|
||||
return c.results.Len()
|
||||
}
|
||||
|
||||
+4
@@ -72,6 +72,10 @@ func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.Docume
|
||||
return search.DocumentMatchCollection{}, nil
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) Internal() search.DocumentMatchCollection {
|
||||
return c.slice
|
||||
}
|
||||
|
||||
func (c *collectStoreSlice) len() int {
|
||||
return len(c.slice)
|
||||
}
|
||||
|
||||
+143
-39
@@ -39,6 +39,9 @@ type collectorStore interface {
|
||||
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
|
||||
|
||||
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
|
||||
|
||||
// Provide access the internal heap implementation
|
||||
Internal() search.DocumentMatchCollection
|
||||
}
|
||||
|
||||
// PreAllocSizeSkipCap will cap preallocation to this amount when
|
||||
@@ -72,6 +75,9 @@ type TopNCollector struct {
|
||||
updateFieldVisitor index.DocValueVisitor
|
||||
dvReader index.DocValueReader
|
||||
searchAfter *search.DocumentMatch
|
||||
|
||||
knnHits map[string]*search.DocumentMatch
|
||||
computeNewScoreExpl search.ScoreExplCorrectionCallbackFunc
|
||||
}
|
||||
|
||||
// CheckDoneEvery controls how frequently we check the context deadline
|
||||
@@ -89,44 +95,16 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
|
||||
rv := newTopNCollector(size, 0, sort)
|
||||
rv.searchAfter = &search.DocumentMatch{
|
||||
Sort: after,
|
||||
}
|
||||
|
||||
for pos, ss := range sort {
|
||||
if ss.RequiresDocID() {
|
||||
rv.searchAfter.ID = after[pos]
|
||||
}
|
||||
if ss.RequiresScoring() {
|
||||
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
|
||||
rv.searchAfter.Score = score
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv.searchAfter = createSearchAfterDocument(sort, after)
|
||||
return rv
|
||||
}
|
||||
|
||||
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
hc := &TopNCollector{size: size, skip: skip, sort: sort}
|
||||
|
||||
// pre-allocate space on the store to avoid reslicing
|
||||
// unless the size + skip is too large, then cap it
|
||||
// everything should still work, just reslices as necessary
|
||||
backingSize := size + skip + 1
|
||||
if size+skip > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
|
||||
if size+skip > 10 {
|
||||
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
|
||||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
|
||||
})
|
||||
} else {
|
||||
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
|
||||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
|
||||
})
|
||||
}
|
||||
hc.store = getOptimalCollectorStore(size, skip, func(i, j *search.DocumentMatch) int {
|
||||
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
|
||||
})
|
||||
|
||||
// these lookups traverse an interface, so do once up-front
|
||||
if sort.RequiresDocID() {
|
||||
@@ -139,6 +117,59 @@ func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
|
||||
return hc
|
||||
}
|
||||
|
||||
func createSearchAfterDocument(sort search.SortOrder, after []string) *search.DocumentMatch {
|
||||
rv := &search.DocumentMatch{
|
||||
Sort: after,
|
||||
}
|
||||
for pos, ss := range sort {
|
||||
if ss.RequiresDocID() {
|
||||
rv.ID = after[pos]
|
||||
}
|
||||
if ss.RequiresScoring() {
|
||||
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
|
||||
rv.Score = score
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
// Filter document matches based on the SearchAfter field in the SearchRequest.
|
||||
func FilterHitsBySearchAfter(hits []*search.DocumentMatch, sort search.SortOrder, after []string) []*search.DocumentMatch {
|
||||
if len(hits) == 0 {
|
||||
return hits
|
||||
}
|
||||
// create a search after document
|
||||
searchAfter := createSearchAfterDocument(sort, after)
|
||||
// filter the hits
|
||||
idx := 0
|
||||
cachedScoring := sort.CacheIsScore()
|
||||
cachedDesc := sort.CacheDescending()
|
||||
for _, hit := range hits {
|
||||
if sort.Compare(cachedScoring, cachedDesc, hit, searchAfter) > 0 {
|
||||
hits[idx] = hit
|
||||
idx++
|
||||
}
|
||||
}
|
||||
return hits[:idx]
|
||||
}
|
||||
|
||||
func getOptimalCollectorStore(size, skip int, comparator collectorCompare) collectorStore {
|
||||
// pre-allocate space on the store to avoid reslicing
|
||||
// unless the size + skip is too large, then cap it
|
||||
// everything should still work, just reslices as necessary
|
||||
backingSize := size + skip + 1
|
||||
if size+skip > PreAllocSizeSkipCap {
|
||||
backingSize = PreAllocSizeSkipCap + 1
|
||||
}
|
||||
|
||||
if size+skip > 10 {
|
||||
return newStoreHeap(backingSize, comparator)
|
||||
} else {
|
||||
return newStoreSlice(backingSize, comparator)
|
||||
}
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) Size() int {
|
||||
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
|
||||
|
||||
@@ -215,7 +246,12 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
}
|
||||
}
|
||||
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, next)
|
||||
err = hc.adjustDocumentMatch(searchContext, reader, next)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, next, false)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
@@ -227,6 +263,23 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
|
||||
next, err = searcher.Next(searchContext)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hc.knnHits != nil {
|
||||
// we may have some knn hits left that did not match any of the top N tf-idf hits
|
||||
// we need to add them to the collector store to consider them as well.
|
||||
for _, knnDoc := range hc.knnHits {
|
||||
err = hc.prepareDocumentMatch(searchContext, reader, knnDoc, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = dmHandler(knnDoc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
|
||||
if statsCallbackFn != nil {
|
||||
@@ -258,12 +311,40 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
|
||||
var sortByScoreOpt = []string{"_score"}
|
||||
|
||||
func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
|
||||
func (hc *TopNCollector) adjustDocumentMatch(ctx *search.SearchContext,
|
||||
reader index.IndexReader, d *search.DocumentMatch) (err error) {
|
||||
if hc.knnHits != nil {
|
||||
d.ID, err = reader.ExternalID(d.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if knnHit, ok := hc.knnHits[d.ID]; ok {
|
||||
d.Score, d.Expl = hc.computeNewScoreExpl(d, knnHit)
|
||||
delete(hc.knnHits, d.ID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
|
||||
reader index.IndexReader, d *search.DocumentMatch, isKnnDoc bool) (err error) {
|
||||
|
||||
// visit field terms for features that require it (sort, facets)
|
||||
if len(hc.neededFields) > 0 {
|
||||
err = hc.visitFieldTerms(reader, d)
|
||||
if !isKnnDoc && len(hc.neededFields) > 0 {
|
||||
err = hc.visitFieldTerms(reader, d, hc.updateFieldVisitor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if isKnnDoc && hc.facetsBuilder != nil {
|
||||
// we need to visit the field terms for the knn document
|
||||
// only for those fields that are required for faceting
|
||||
// and not for sorting. This is because the knn document's
|
||||
// sort value is already computed in the knn collector.
|
||||
err = hc.visitFieldTerms(reader, d, func(field string, term []byte) {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.UpdateVisitor(field, term)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -277,9 +358,14 @@ func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
|
||||
if d.Score > hc.maxScore {
|
||||
hc.maxScore = d.Score
|
||||
}
|
||||
// early exit as the document match had its sort value calculated in the knn
|
||||
// collector itself
|
||||
if isKnnDoc {
|
||||
return nil
|
||||
}
|
||||
|
||||
// see if we need to load ID (at this early stage, for example to sort on it)
|
||||
if hc.needDocIds {
|
||||
if hc.needDocIds && d.ID == "" {
|
||||
d.ID, err = reader.ExternalID(d.IndexInternalID)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -314,6 +400,7 @@ func MakeTopNDocumentMatchHandler(
|
||||
// but we want to allow for exact match, so we pretend
|
||||
hc.searchAfter.HitNumber = d.HitNumber
|
||||
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
|
||||
ctx.DocumentMatchPool.Put(d)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -353,12 +440,21 @@ func MakeTopNDocumentMatchHandler(
|
||||
|
||||
// visitFieldTerms is responsible for visiting the field terms of the
|
||||
// search hit, and passing visited terms to the sort and facet builder
|
||||
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
|
||||
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch, v index.DocValueVisitor) error {
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.StartDoc()
|
||||
}
|
||||
if d.ID != "" && d.IndexInternalID == nil {
|
||||
// this document may have been sent over as preSearchData and
|
||||
// we need to look up the internal id to visit the doc values for it
|
||||
var err error
|
||||
d.IndexInternalID, err = reader.InternalID(d.ID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
|
||||
err := hc.dvReader.VisitDocValues(d.IndexInternalID, v)
|
||||
if hc.facetsBuilder != nil {
|
||||
hc.facetsBuilder.EndDoc()
|
||||
}
|
||||
@@ -435,3 +531,11 @@ func (hc *TopNCollector) FacetResults() search.FacetResults {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hc *TopNCollector) SetKNNHits(knnHits search.DocumentMatchCollection, newScoreExplComputer search.ScoreExplCorrectionCallbackFunc) {
|
||||
hc.knnHits = make(map[string]*search.DocumentMatch, len(knnHits))
|
||||
for _, hit := range knnHits {
|
||||
hc.knnHits[hit.ID] = hit
|
||||
}
|
||||
hc.computeNewScoreExpl = newScoreExplComputer
|
||||
}
|
||||
|
||||
+7
-24
@@ -17,7 +17,6 @@ package facet
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/numeric"
|
||||
@@ -36,10 +35,8 @@ func init() {
|
||||
}
|
||||
|
||||
type dateTimeRange struct {
|
||||
start time.Time
|
||||
end time.Time
|
||||
startLayout string
|
||||
endLayout string
|
||||
start time.Time
|
||||
end time.Time
|
||||
}
|
||||
|
||||
type DateTimeFacetBuilder struct {
|
||||
@@ -78,12 +75,10 @@ func (fb *DateTimeFacetBuilder) Size() int {
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time, startLayout string, endLayout string) {
|
||||
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
|
||||
r := dateTimeRange{
|
||||
start: start,
|
||||
end: end,
|
||||
startLayout: startLayout,
|
||||
endLayout: endLayout,
|
||||
start: start,
|
||||
end: end,
|
||||
}
|
||||
fb.ranges[name] = &r
|
||||
}
|
||||
@@ -139,23 +134,11 @@ func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
|
||||
Count: count,
|
||||
}
|
||||
if !dateRange.start.IsZero() {
|
||||
var start string
|
||||
if dateRange.startLayout == "" {
|
||||
// layout not set probably means it is probably a timestamp
|
||||
start = strconv.FormatInt(dateRange.start.UnixNano(), 10)
|
||||
} else {
|
||||
start = dateRange.start.Format(dateRange.startLayout)
|
||||
}
|
||||
start := dateRange.start.Format(time.RFC3339Nano)
|
||||
tf.Start = &start
|
||||
}
|
||||
if !dateRange.end.IsZero() {
|
||||
var end string
|
||||
if dateRange.endLayout == "" {
|
||||
// layout not set probably means it is probably a timestamp
|
||||
end = strconv.FormatInt(dateRange.end.UnixNano(), 10)
|
||||
} else {
|
||||
end = dateRange.end.Format(dateRange.endLayout)
|
||||
}
|
||||
end := dateRange.end.Format(time.RFC3339Nano)
|
||||
tf.End = &end
|
||||
}
|
||||
rv.DateRanges = append(rv.DateRanges, tf)
|
||||
|
||||
+15
-3
@@ -321,17 +321,29 @@ func (fr *FacetResult) Merge(other *FacetResult) {
|
||||
fr.Total += other.Total
|
||||
fr.Missing += other.Missing
|
||||
fr.Other += other.Other
|
||||
if fr.Terms != nil && other.Terms != nil {
|
||||
if other.Terms != nil {
|
||||
if fr.Terms == nil {
|
||||
fr.Terms = other.Terms
|
||||
return
|
||||
}
|
||||
for _, term := range other.Terms.termFacets {
|
||||
fr.Terms.Add(term)
|
||||
}
|
||||
}
|
||||
if fr.NumericRanges != nil && other.NumericRanges != nil {
|
||||
if other.NumericRanges != nil {
|
||||
if fr.NumericRanges == nil {
|
||||
fr.NumericRanges = other.NumericRanges
|
||||
return
|
||||
}
|
||||
for _, nr := range other.NumericRanges {
|
||||
fr.NumericRanges = fr.NumericRanges.Add(nr)
|
||||
}
|
||||
}
|
||||
if fr.DateRanges != nil && other.DateRanges != nil {
|
||||
if other.DateRanges != nil {
|
||||
if fr.DateRanges == nil {
|
||||
fr.DateRanges = other.DateRanges
|
||||
return
|
||||
}
|
||||
for _, dr := range other.DateRanges {
|
||||
fr.DateRanges = fr.DateRanges.Add(dr)
|
||||
}
|
||||
|
||||
+18
-9
@@ -27,10 +27,15 @@ import (
|
||||
)
|
||||
|
||||
type DisjunctionQuery struct {
|
||||
Disjuncts []Query `json:"disjuncts"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
Min float64 `json:"min"`
|
||||
queryStringMode bool
|
||||
Disjuncts []Query `json:"disjuncts"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
Min float64 `json:"min"`
|
||||
retrieveScoreBreakdown bool
|
||||
queryStringMode bool
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) RetrieveScoreBreakdown(b bool) {
|
||||
q.retrieveScoreBreakdown = b
|
||||
}
|
||||
|
||||
// NewDisjunctionQuery creates a new compound Query.
|
||||
@@ -73,18 +78,22 @@ func (q *DisjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
|
||||
// in query string mode, skip match none
|
||||
continue
|
||||
if sr != nil {
|
||||
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
|
||||
// in query string mode, skip match none
|
||||
continue
|
||||
}
|
||||
ss = append(ss, sr)
|
||||
}
|
||||
ss = append(ss, sr)
|
||||
}
|
||||
|
||||
if len(ss) < 1 {
|
||||
return searcher.NewMatchNoneSearcher(i)
|
||||
}
|
||||
|
||||
return searcher.NewDisjunctionSearcher(ctx, i, ss, q.Min, options)
|
||||
nctx := context.WithValue(ctx, search.IncludeScoreBreakdownKey, q.retrieveScoreBreakdown)
|
||||
|
||||
return searcher.NewDisjunctionSearcher(nctx, i, ss, q.Min, options)
|
||||
}
|
||||
|
||||
func (q *DisjunctionQuery) Validate() error {
|
||||
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/searcher"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type KNNQuery struct {
|
||||
VectorField string `json:"field"`
|
||||
Vector []float32 `json:"vector"`
|
||||
K int64 `json:"k"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewKNNQuery(vector []float32) *KNNQuery {
|
||||
return &KNNQuery{Vector: vector}
|
||||
}
|
||||
|
||||
func (q *KNNQuery) Field() string {
|
||||
return q.VectorField
|
||||
}
|
||||
|
||||
func (q *KNNQuery) SetK(k int64) {
|
||||
q.K = k
|
||||
}
|
||||
|
||||
func (q *KNNQuery) SetFieldVal(field string) {
|
||||
q.VectorField = field
|
||||
}
|
||||
|
||||
func (q *KNNQuery) SetBoost(b float64) {
|
||||
boost := Boost(b)
|
||||
q.BoostVal = &boost
|
||||
}
|
||||
|
||||
func (q *KNNQuery) Boost() float64 {
|
||||
return q.BoostVal.Value()
|
||||
}
|
||||
|
||||
func (q *KNNQuery) Searcher(ctx context.Context, i index.IndexReader,
|
||||
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
fieldMapping := m.FieldMappingForPath(q.VectorField)
|
||||
similarityMetric := fieldMapping.Similarity
|
||||
if similarityMetric == "" {
|
||||
similarityMetric = index.DefaultSimilarityMetric
|
||||
}
|
||||
if q.K <= 0 || len(q.Vector) == 0 {
|
||||
return nil, fmt.Errorf("k must be greater than 0 and vector must be non-empty")
|
||||
}
|
||||
return searcher.NewKNNSearcher(ctx, i, m, options, q.VectorField,
|
||||
q.Vector, q.K, q.BoostVal.Value(), similarityMetric)
|
||||
}
|
||||
+41
@@ -65,14 +65,55 @@ type ValidatableQuery interface {
|
||||
Validate() error
|
||||
}
|
||||
|
||||
// ParseQuery deserializes a JSON representation of
|
||||
// a PreSearchData object.
|
||||
func ParsePreSearchData(input []byte) (map[string]interface{}, error) {
|
||||
var rv map[string]interface{}
|
||||
|
||||
var tmp map[string]json.RawMessage
|
||||
err := util.UnmarshalJSON(input, &tmp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for k, v := range tmp {
|
||||
switch k {
|
||||
case search.KnnPreSearchDataKey:
|
||||
var value []*search.DocumentMatch
|
||||
if v != nil {
|
||||
err := util.UnmarshalJSON(v, &value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if rv == nil {
|
||||
rv = make(map[string]interface{})
|
||||
}
|
||||
rv[search.KnnPreSearchDataKey] = value
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
// ParseQuery deserializes a JSON representation of
|
||||
// a Query object.
|
||||
func ParseQuery(input []byte) (Query, error) {
|
||||
if len(input) == 0 {
|
||||
// interpret as a match_none query
|
||||
return NewMatchNoneQuery(), nil
|
||||
}
|
||||
|
||||
var tmp map[string]interface{}
|
||||
err := util.UnmarshalJSON(input, &tmp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(tmp) == 0 {
|
||||
// interpret as a match_none query
|
||||
return NewMatchNoneQuery(), nil
|
||||
}
|
||||
|
||||
_, hasFuzziness := tmp["fuzziness"]
|
||||
_, isMatchQuery := tmp["match"]
|
||||
_, isMatchPhraseQuery := tmp["match_phrase"]
|
||||
|
||||
+36
-31
@@ -37,6 +37,7 @@ type ConstantScorer struct {
|
||||
queryNorm float64
|
||||
queryWeight float64
|
||||
queryWeightExplanation *search.Explanation
|
||||
includeScore bool
|
||||
}
|
||||
|
||||
func (s *ConstantScorer) Size() int {
|
||||
@@ -51,10 +52,11 @@ func (s *ConstantScorer) Size() int {
|
||||
|
||||
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
|
||||
rv := ConstantScorer{
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
constant: constant,
|
||||
boost: boost,
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
constant: constant,
|
||||
boost: boost,
|
||||
includeScore: options.Score != "none",
|
||||
}
|
||||
|
||||
return &rv
|
||||
@@ -92,35 +94,38 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
|
||||
func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternalID) *search.DocumentMatch {
|
||||
var scoreExplanation *search.Explanation
|
||||
|
||||
score := s.constant
|
||||
|
||||
if s.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("ConstantScore()"),
|
||||
}
|
||||
}
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(^%f), product of:", s.boost),
|
||||
Children: childExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
rv.IndexInternalID = id
|
||||
rv.Score = score
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
|
||||
if s.includeScore {
|
||||
score := s.constant
|
||||
|
||||
if s.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("ConstantScore()"),
|
||||
}
|
||||
}
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(^%f), product of:", s.boost),
|
||||
Children: childExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rv.Score = score
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
|
||||
+40
@@ -81,3 +81,43 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
// This method is used only when disjunction searcher is used over multiple
|
||||
// KNN searchers, where only the score breakdown and the optional explanation breakdown
|
||||
// is required. The final score and explanation is set when we finalize the KNN hits.
|
||||
func (s *DisjunctionQueryScorer) ScoreAndExplBreakdown(ctx *search.SearchContext, constituents []*search.DocumentMatch,
|
||||
matchingIdxs []int, originalPositions []int, countTotal int) *search.DocumentMatch {
|
||||
|
||||
scoreBreakdown := make(map[int]float64)
|
||||
var childrenExplanations []*search.Explanation
|
||||
if s.options.Explain {
|
||||
// since we want to notify which expl belongs to which matched searcher within the disjunction searcher
|
||||
childrenExplanations = make([]*search.Explanation, countTotal)
|
||||
}
|
||||
|
||||
for i, docMatch := range constituents {
|
||||
var index int
|
||||
if originalPositions != nil {
|
||||
// scorer used in disjunction slice searcher
|
||||
index = originalPositions[matchingIdxs[i]]
|
||||
} else {
|
||||
// scorer used in disjunction heap searcher
|
||||
index = matchingIdxs[i]
|
||||
}
|
||||
scoreBreakdown[index] = docMatch.Score
|
||||
if s.options.Explain {
|
||||
childrenExplanations[index] = docMatch.Expl
|
||||
}
|
||||
}
|
||||
var explBreakdown *search.Explanation
|
||||
if s.options.Explain {
|
||||
explBreakdown = &search.Explanation{Children: childrenExplanations}
|
||||
}
|
||||
|
||||
rv := constituents[0]
|
||||
rv.ScoreBreakdown = scoreBreakdown
|
||||
rv.Expl = explBreakdown
|
||||
rv.FieldTermLocations = search.MergeFieldTermLocations(
|
||||
rv.FieldTermLocations, constituents[1:])
|
||||
return rv
|
||||
}
|
||||
|
||||
+156
@@ -0,0 +1,156 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package scorer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeKNNQueryScorer int
|
||||
|
||||
func init() {
|
||||
var sqs KNNQueryScorer
|
||||
reflectStaticSizeKNNQueryScorer = int(reflect.TypeOf(sqs).Size())
|
||||
}
|
||||
|
||||
type KNNQueryScorer struct {
|
||||
queryVector []float32
|
||||
queryField string
|
||||
queryWeight float64
|
||||
queryBoost float64
|
||||
queryNorm float64
|
||||
options search.SearcherOptions
|
||||
similarityMetric string
|
||||
queryWeightExplanation *search.Explanation
|
||||
}
|
||||
|
||||
func (s *KNNQueryScorer) Size() int {
|
||||
sizeInBytes := reflectStaticSizeKNNQueryScorer + size.SizeOfPtr +
|
||||
(len(s.queryVector) * size.SizeOfFloat32) + len(s.queryField)
|
||||
|
||||
if s.queryWeightExplanation != nil {
|
||||
sizeInBytes += s.queryWeightExplanation.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func NewKNNQueryScorer(queryVector []float32, queryField string, queryBoost float64,
|
||||
options search.SearcherOptions,
|
||||
similarityMetric string) *KNNQueryScorer {
|
||||
return &KNNQueryScorer{
|
||||
queryVector: queryVector,
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
queryWeight: 1.0,
|
||||
options: options,
|
||||
similarityMetric: similarityMetric,
|
||||
}
|
||||
}
|
||||
|
||||
// Score used when the knnMatch.Score = 0 ->
|
||||
// the query and indexed vector are exactly the same.
|
||||
const maxKNNScore = math.MaxFloat32
|
||||
|
||||
func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext,
|
||||
knnMatch *index.VectorDoc) *search.DocumentMatch {
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
var scoreExplanation *search.Explanation
|
||||
score := knnMatch.Score
|
||||
if sqs.similarityMetric == index.EuclideanDistance {
|
||||
// in case of euclidean distance being the distance metric,
|
||||
// an exact vector (perfect match), would return distance = 0
|
||||
if score == 0 {
|
||||
score = maxKNNScore
|
||||
} else {
|
||||
// euclidean distances need to be inverted to work with
|
||||
// tf-idf scoring
|
||||
score = 1.0 / score
|
||||
}
|
||||
}
|
||||
if sqs.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("fieldWeight(%s in doc %s), score of:",
|
||||
sqs.queryField, knnMatch.ID),
|
||||
Children: []*search.Explanation{
|
||||
{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("vector(field(%s:%s) with similarity_metric(%s)=%e",
|
||||
sqs.queryField, knnMatch.ID, sqs.similarityMetric, score),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
// if the query weight isn't 1, multiply
|
||||
if sqs.queryWeight != 1.0 && score != maxKNNScore {
|
||||
score = score * sqs.queryWeight
|
||||
if sqs.options.Explain {
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
// Product of score * weight
|
||||
// Avoid adding the query vector to the explanation since vectors
|
||||
// can get quite large.
|
||||
Message: fmt.Sprintf("weight(%s:query Vector^%f in %s), product of:",
|
||||
sqs.queryField, sqs.queryBoost, knnMatch.ID),
|
||||
Children: []*search.Explanation{sqs.queryWeightExplanation, scoreExplanation},
|
||||
}
|
||||
}
|
||||
}
|
||||
rv.Score = score
|
||||
if sqs.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, knnMatch.ID...)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (sqs *KNNQueryScorer) Weight() float64 {
|
||||
return sqs.queryBoost * sqs.queryBoost
|
||||
}
|
||||
|
||||
func (sqs *KNNQueryScorer) SetQueryNorm(qnorm float64) {
|
||||
sqs.queryNorm = qnorm
|
||||
|
||||
// update the query weight
|
||||
sqs.queryWeight = sqs.queryBoost * sqs.queryNorm
|
||||
|
||||
if sqs.options.Explain {
|
||||
childrenExplanations := make([]*search.Explanation, 2)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: sqs.queryBoost,
|
||||
Message: "boost",
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: sqs.queryNorm,
|
||||
Message: "queryNorm",
|
||||
}
|
||||
sqs.queryWeightExplanation = &search.Explanation{
|
||||
Value: sqs.queryWeight,
|
||||
Message: fmt.Sprintf("queryWeight(%s:query Vector^%f), product of:",
|
||||
sqs.queryField, sqs.queryBoost),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
+18
-2
@@ -147,7 +147,7 @@ type DocumentMatch struct {
|
||||
Index string `json:"index,omitempty"`
|
||||
ID string `json:"id"`
|
||||
IndexInternalID index.IndexInternalID `json:"-"`
|
||||
Score float64 `json:"score"`
|
||||
Score float64 `json:"score,omitempty"`
|
||||
Expl *Explanation `json:"explanation,omitempty"`
|
||||
Locations FieldTermLocationMap `json:"locations,omitempty"`
|
||||
Fragments FieldFragmentMap `json:"fragments,omitempty"`
|
||||
@@ -173,6 +173,22 @@ type DocumentMatch struct {
|
||||
// not all sub-queries matched
|
||||
// if false, all the sub-queries matched
|
||||
PartialMatch bool `json:"partial_match,omitempty"`
|
||||
|
||||
// used to indicate the sub-scores that combined to form the
|
||||
// final score for this document match. This is only populated
|
||||
// when the search request's query is a DisjunctionQuery
|
||||
// or a ConjunctionQuery. The map key is the index of the sub-query
|
||||
// in the DisjunctionQuery or ConjunctionQuery. The map value is the
|
||||
// sub-score for that sub-query.
|
||||
ScoreBreakdown map[int]float64 `json:"score_breakdown,omitempty"`
|
||||
|
||||
// internal variable used in PreSearch phase of search in alias
|
||||
// to indicate the name of the index that this match came from.
|
||||
// used in knn search.
|
||||
// it is a stack of index names, the top of the stack is the name
|
||||
// of the index that this match came from
|
||||
// of the current alias view, used in alias of aliases scenario
|
||||
IndexNames []string `json:"index_names,omitempty"`
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
|
||||
@@ -334,7 +350,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) String() string {
|
||||
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
|
||||
return fmt.Sprintf("[%s-%f]", dm.ID, dm.Score)
|
||||
}
|
||||
|
||||
type DocumentMatchCollection []*DocumentMatch
|
||||
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func optimizeKNN(ctx context.Context, indexReader index.IndexReader,
|
||||
qsearchers []search.Searcher) error {
|
||||
var octx index.VectorOptimizableContext
|
||||
var err error
|
||||
|
||||
for _, searcher := range qsearchers {
|
||||
// Only applicable to KNN Searchers.
|
||||
o, ok := searcher.(index.VectorOptimizable)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
octx, err = o.VectorOptimize(ctx, octx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// No KNN searchers.
|
||||
if octx == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Postings lists and iterators replaced in the pointer to the
|
||||
// vector reader
|
||||
return octx.Finish()
|
||||
}
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !vectors
|
||||
// +build !vectors
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func optimizeKNN(ctx context.Context, indexReader index.IndexReader,
|
||||
qsearchers []search.Searcher) error {
|
||||
// No-op
|
||||
return nil
|
||||
}
|
||||
+20
@@ -33,3 +33,23 @@ func (otrl OrderedSearcherList) Less(i, j int) bool {
|
||||
func (otrl OrderedSearcherList) Swap(i, j int) {
|
||||
otrl[i], otrl[j] = otrl[j], otrl[i]
|
||||
}
|
||||
|
||||
type OrderedPositionalSearcherList struct {
|
||||
searchers []search.Searcher
|
||||
index []int
|
||||
}
|
||||
|
||||
// sort.Interface
|
||||
|
||||
func (otrl OrderedPositionalSearcherList) Len() int {
|
||||
return len(otrl.searchers)
|
||||
}
|
||||
|
||||
func (otrl OrderedPositionalSearcherList) Less(i, j int) bool {
|
||||
return otrl.searchers[i].Count() < otrl.searchers[j].Count()
|
||||
}
|
||||
|
||||
func (otrl OrderedPositionalSearcherList) Swap(i, j int) {
|
||||
otrl.searchers[i], otrl.searchers[j] = otrl.searchers[j], otrl.searchers[i]
|
||||
otrl.index[i], otrl.index[j] = otrl.index[j], otrl.index[i]
|
||||
}
|
||||
|
||||
+15
-15
@@ -35,7 +35,7 @@ func init() {
|
||||
|
||||
type ConjunctionSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
searchers OrderedSearcherList
|
||||
searchers []search.Searcher
|
||||
queryNorm float64
|
||||
currs []*search.DocumentMatch
|
||||
maxIDIdx int
|
||||
@@ -88,6 +88,20 @@ func NewConjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
|
||||
s.scorer.Size()
|
||||
@@ -105,20 +119,6 @@ func (s *ConjunctionSearcher) Size() int {
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
|
||||
var err error
|
||||
// get all searchers pointing at their first match
|
||||
|
||||
+25
-9
@@ -46,15 +46,31 @@ func optionsDisjunctionOptimizable(options search.SearcherOptions) bool {
|
||||
func newDisjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
|
||||
limit bool) (search.Searcher, error) {
|
||||
// attempt the "unadorned" disjunction optimization only when we
|
||||
// do not need extra information like freq-norm's or term vectors
|
||||
// and the requested min is simple
|
||||
if len(qsearchers) > 1 && min <= 1 &&
|
||||
optionsDisjunctionOptimizable(options) {
|
||||
rv, err := optimizeCompositeSearcher(ctx, "disjunction:unadorned",
|
||||
indexReader, qsearchers, options)
|
||||
if err != nil || rv != nil {
|
||||
return rv, err
|
||||
|
||||
var disjOverKNN bool
|
||||
if ctx != nil {
|
||||
disjOverKNN, _ = ctx.Value(search.IncludeScoreBreakdownKey).(bool)
|
||||
}
|
||||
if disjOverKNN {
|
||||
// The KNN Searcher optimization is a necessary pre-req for the KNN Searchers,
|
||||
// not an optional optimization like for, say term searchers.
|
||||
// It's an optimization to repeat search an open vector index when applicable,
|
||||
// rather than individually opening and searching a vector index.
|
||||
err := optimizeKNN(ctx, indexReader, qsearchers)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// attempt the "unadorned" disjunction optimization only when we
|
||||
// do not need extra information like freq-norm's or term vectors
|
||||
// and the requested min is simple
|
||||
if len(qsearchers) > 1 && min <= 1 &&
|
||||
optionsDisjunctionOptimizable(options) {
|
||||
rv, err := optimizeCompositeSearcher(ctx, "disjunction:unadorned",
|
||||
indexReader, qsearchers, options)
|
||||
if err != nil || rv != nil {
|
||||
return rv, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Generated
Vendored
+56
-35
@@ -39,22 +39,25 @@ func init() {
|
||||
}
|
||||
|
||||
type SearcherCurr struct {
|
||||
searcher search.Searcher
|
||||
curr *search.DocumentMatch
|
||||
searcher search.Searcher
|
||||
curr *search.DocumentMatch
|
||||
matchingIdx int
|
||||
}
|
||||
|
||||
type DisjunctionHeapSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
|
||||
numSearchers int
|
||||
scorer *scorer.DisjunctionQueryScorer
|
||||
min int
|
||||
queryNorm float64
|
||||
initialized bool
|
||||
searchers []search.Searcher
|
||||
heap []*SearcherCurr
|
||||
numSearchers int
|
||||
scorer *scorer.DisjunctionQueryScorer
|
||||
min int
|
||||
queryNorm float64
|
||||
retrieveScoreBreakdown bool
|
||||
initialized bool
|
||||
searchers []search.Searcher
|
||||
heap []*SearcherCurr
|
||||
|
||||
matching []*search.DocumentMatch
|
||||
matchingIdxs []int
|
||||
matchingCurrs []*SearcherCurr
|
||||
|
||||
bytesRead uint64
|
||||
@@ -67,22 +70,42 @@ func newDisjunctionHeapSearcher(ctx context.Context, indexReader index.IndexRead
|
||||
if limit && tooManyClauses(len(searchers)) {
|
||||
return nil, tooManyClausesErr("", len(searchers))
|
||||
}
|
||||
var retrieveScoreBreakdown bool
|
||||
if ctx != nil {
|
||||
retrieveScoreBreakdown, _ = ctx.Value(search.IncludeScoreBreakdownKey).(bool)
|
||||
}
|
||||
|
||||
// build our searcher
|
||||
rv := DisjunctionHeapSearcher{
|
||||
indexReader: indexReader,
|
||||
searchers: searchers,
|
||||
numSearchers: len(searchers),
|
||||
scorer: scorer.NewDisjunctionQueryScorer(options),
|
||||
min: int(min),
|
||||
matching: make([]*search.DocumentMatch, len(searchers)),
|
||||
matchingCurrs: make([]*SearcherCurr, len(searchers)),
|
||||
heap: make([]*SearcherCurr, 0, len(searchers)),
|
||||
indexReader: indexReader,
|
||||
searchers: searchers,
|
||||
numSearchers: len(searchers),
|
||||
scorer: scorer.NewDisjunctionQueryScorer(options),
|
||||
min: int(min),
|
||||
matching: make([]*search.DocumentMatch, len(searchers)),
|
||||
matchingCurrs: make([]*SearcherCurr, len(searchers)),
|
||||
matchingIdxs: make([]int, len(searchers)),
|
||||
retrieveScoreBreakdown: retrieveScoreBreakdown,
|
||||
heap: make([]*SearcherCurr, 0, len(searchers)),
|
||||
}
|
||||
rv.computeQueryNorm()
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *DisjunctionHeapSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DisjunctionHeapSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
|
||||
s.scorer.Size()
|
||||
@@ -101,24 +124,11 @@ func (s *DisjunctionHeapSearcher) Size() int {
|
||||
// since searchers and document matches already counted above
|
||||
sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
|
||||
sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
|
||||
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *DisjunctionHeapSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
|
||||
// alloc a single block of SearcherCurrs
|
||||
block := make([]SearcherCurr, len(s.searchers))
|
||||
@@ -132,6 +142,7 @@ func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error
|
||||
if curr != nil {
|
||||
block[i].searcher = searcher
|
||||
block[i].curr = curr
|
||||
block[i].matchingIdx = i
|
||||
heap.Push(s, &block[i])
|
||||
}
|
||||
}
|
||||
@@ -147,6 +158,7 @@ func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error
|
||||
func (s *DisjunctionHeapSearcher) updateMatches() error {
|
||||
matching := s.matching[:0]
|
||||
matchingCurrs := s.matchingCurrs[:0]
|
||||
matchingIdxs := s.matchingIdxs[:0]
|
||||
|
||||
if len(s.heap) > 0 {
|
||||
|
||||
@@ -154,17 +166,20 @@ func (s *DisjunctionHeapSearcher) updateMatches() error {
|
||||
next := heap.Pop(s).(*SearcherCurr)
|
||||
matching = append(matching, next.curr)
|
||||
matchingCurrs = append(matchingCurrs, next)
|
||||
matchingIdxs = append(matchingIdxs, next.matchingIdx)
|
||||
|
||||
// now as long as top of heap matches, keep popping
|
||||
for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
|
||||
next = heap.Pop(s).(*SearcherCurr)
|
||||
matching = append(matching, next.curr)
|
||||
matchingCurrs = append(matchingCurrs, next)
|
||||
matchingIdxs = append(matchingIdxs, next.matchingIdx)
|
||||
}
|
||||
}
|
||||
|
||||
s.matching = matching
|
||||
s.matchingCurrs = matchingCurrs
|
||||
s.matchingIdxs = matchingIdxs
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -197,10 +212,16 @@ func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
|
||||
for !found && len(s.matching) > 0 {
|
||||
if len(s.matching) >= s.min {
|
||||
found = true
|
||||
partialMatch := len(s.matching) != len(s.searchers)
|
||||
// score this match
|
||||
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
|
||||
rv.PartialMatch = partialMatch
|
||||
if s.retrieveScoreBreakdown {
|
||||
// just return score and expl breakdown here, since it is a disjunction over knn searchers,
|
||||
// and the final score and expl is calculated in the knn collector
|
||||
rv = s.scorer.ScoreAndExplBreakdown(ctx, s.matching, s.matchingIdxs, nil, s.numSearchers)
|
||||
} else {
|
||||
// score this match
|
||||
partialMatch := len(s.matching) != len(s.searchers)
|
||||
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
|
||||
rv.PartialMatch = partialMatch
|
||||
}
|
||||
}
|
||||
|
||||
// invoke next on all the matching searchers
|
||||
|
||||
Generated
Vendored
+75
-42
@@ -34,17 +34,19 @@ func init() {
|
||||
}
|
||||
|
||||
type DisjunctionSliceSearcher struct {
|
||||
indexReader index.IndexReader
|
||||
searchers OrderedSearcherList
|
||||
numSearchers int
|
||||
queryNorm float64
|
||||
currs []*search.DocumentMatch
|
||||
scorer *scorer.DisjunctionQueryScorer
|
||||
min int
|
||||
matching []*search.DocumentMatch
|
||||
matchingIdxs []int
|
||||
initialized bool
|
||||
bytesRead uint64
|
||||
indexReader index.IndexReader
|
||||
searchers []search.Searcher
|
||||
originalPos []int
|
||||
numSearchers int
|
||||
queryNorm float64
|
||||
retrieveScoreBreakdown bool
|
||||
currs []*search.DocumentMatch
|
||||
scorer *scorer.DisjunctionQueryScorer
|
||||
min int
|
||||
matching []*search.DocumentMatch
|
||||
matchingIdxs []int
|
||||
initialized bool
|
||||
bytesRead uint64
|
||||
}
|
||||
|
||||
func newDisjunctionSliceSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
@@ -54,21 +56,45 @@ func newDisjunctionSliceSearcher(ctx context.Context, indexReader index.IndexRea
|
||||
if limit && tooManyClauses(len(qsearchers)) {
|
||||
return nil, tooManyClausesErr("", len(qsearchers))
|
||||
}
|
||||
// build the downstream searchers
|
||||
searchers := make(OrderedSearcherList, len(qsearchers))
|
||||
for i, searcher := range qsearchers {
|
||||
searchers[i] = searcher
|
||||
|
||||
var searchers OrderedSearcherList
|
||||
var originalPos []int
|
||||
var retrieveScoreBreakdown bool
|
||||
if ctx != nil {
|
||||
retrieveScoreBreakdown, _ = ctx.Value(search.IncludeScoreBreakdownKey).(bool)
|
||||
}
|
||||
// sort the searchers
|
||||
sort.Sort(sort.Reverse(searchers))
|
||||
// build our searcher
|
||||
|
||||
if retrieveScoreBreakdown {
|
||||
// needed only when kNN is in picture
|
||||
sortedSearchers := &OrderedPositionalSearcherList{
|
||||
searchers: make([]search.Searcher, len(qsearchers)),
|
||||
index: make([]int, len(qsearchers)),
|
||||
}
|
||||
for i, searcher := range qsearchers {
|
||||
sortedSearchers.searchers[i] = searcher
|
||||
sortedSearchers.index[i] = i
|
||||
}
|
||||
sort.Sort(sortedSearchers)
|
||||
searchers = sortedSearchers.searchers
|
||||
originalPos = sortedSearchers.index
|
||||
} else {
|
||||
searchers = make(OrderedSearcherList, len(qsearchers))
|
||||
for i, searcher := range qsearchers {
|
||||
searchers[i] = searcher
|
||||
}
|
||||
sort.Sort(searchers)
|
||||
}
|
||||
|
||||
rv := DisjunctionSliceSearcher{
|
||||
indexReader: indexReader,
|
||||
searchers: searchers,
|
||||
numSearchers: len(searchers),
|
||||
currs: make([]*search.DocumentMatch, len(searchers)),
|
||||
scorer: scorer.NewDisjunctionQueryScorer(options),
|
||||
min: int(min),
|
||||
indexReader: indexReader,
|
||||
searchers: searchers,
|
||||
originalPos: originalPos,
|
||||
numSearchers: len(searchers),
|
||||
currs: make([]*search.DocumentMatch, len(searchers)),
|
||||
scorer: scorer.NewDisjunctionQueryScorer(options),
|
||||
min: int(min),
|
||||
retrieveScoreBreakdown: retrieveScoreBreakdown,
|
||||
|
||||
matching: make([]*search.DocumentMatch, len(searchers)),
|
||||
matchingIdxs: make([]int, len(searchers)),
|
||||
}
|
||||
@@ -76,6 +102,20 @@ func newDisjunctionSliceSearcher(ctx context.Context, indexReader index.IndexRea
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (s *DisjunctionSliceSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DisjunctionSliceSearcher) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr +
|
||||
s.scorer.Size()
|
||||
@@ -97,24 +137,11 @@ func (s *DisjunctionSliceSearcher) Size() int {
|
||||
}
|
||||
|
||||
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
|
||||
sizeInBytes += len(s.originalPos) * size.SizeOfInt
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (s *DisjunctionSliceSearcher) computeQueryNorm() {
|
||||
// first calculate sum of squared weights
|
||||
sumOfSquaredWeights := 0.0
|
||||
for _, searcher := range s.searchers {
|
||||
sumOfSquaredWeights += searcher.Weight()
|
||||
}
|
||||
// now compute query norm from this
|
||||
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
|
||||
// finally tell all the downstream searchers the norm
|
||||
for _, searcher := range s.searchers {
|
||||
searcher.SetQueryNorm(s.queryNorm)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error {
|
||||
var err error
|
||||
// get all searchers pointing at their first match
|
||||
@@ -197,10 +224,16 @@ func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) (
|
||||
for !found && len(s.matching) > 0 {
|
||||
if len(s.matching) >= s.min {
|
||||
found = true
|
||||
partialMatch := len(s.matching) != len(s.searchers)
|
||||
// score this match
|
||||
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
|
||||
rv.PartialMatch = partialMatch
|
||||
if s.retrieveScoreBreakdown {
|
||||
// just return score and expl breakdown here, since it is a disjunction over knn searchers,
|
||||
// and the final score and expl is calculated in the knn collector
|
||||
rv = s.scorer.ScoreAndExplBreakdown(ctx, s.matching, s.matchingIdxs, s.originalPos, s.numSearchers)
|
||||
} else {
|
||||
// score this match
|
||||
partialMatch := len(s.matching) != len(s.searchers)
|
||||
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
|
||||
rv.PartialMatch = partialMatch
|
||||
}
|
||||
}
|
||||
|
||||
// invoke next on all the matching searchers
|
||||
|
||||
+142
@@ -0,0 +1,142 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/scorer"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeKNNSearcher int
|
||||
|
||||
func init() {
|
||||
var ks KNNSearcher
|
||||
reflectStaticSizeKNNSearcher = int(reflect.TypeOf(ks).Size())
|
||||
}
|
||||
|
||||
type KNNSearcher struct {
|
||||
field string
|
||||
vector []float32
|
||||
k int64
|
||||
indexReader index.IndexReader
|
||||
vectorReader index.VectorReader
|
||||
scorer *scorer.KNNQueryScorer
|
||||
count uint64
|
||||
vd index.VectorDoc
|
||||
}
|
||||
|
||||
func NewKNNSearcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping,
|
||||
options search.SearcherOptions, field string, vector []float32, k int64,
|
||||
boost float64, similarityMetric string) (search.Searcher, error) {
|
||||
if vr, ok := i.(index.VectorIndexReader); ok {
|
||||
vectorReader, err := vr.VectorReader(ctx, vector, field, k)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
knnScorer := scorer.NewKNNQueryScorer(vector, field, boost,
|
||||
options, similarityMetric)
|
||||
return &KNNSearcher{
|
||||
indexReader: i,
|
||||
vectorReader: vectorReader,
|
||||
field: field,
|
||||
vector: vector,
|
||||
k: k,
|
||||
scorer: knnScorer,
|
||||
}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) VectorOptimize(ctx context.Context, octx index.VectorOptimizableContext) (
|
||||
index.VectorOptimizableContext, error) {
|
||||
o, ok := s.vectorReader.(index.VectorOptimizable)
|
||||
if ok {
|
||||
return o.VectorOptimize(ctx, octx)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (
|
||||
*search.DocumentMatch, error) {
|
||||
knnMatch, err := s.vectorReader.Next(s.vd.Reset())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if knnMatch == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docMatch := s.scorer.Score(ctx, knnMatch)
|
||||
|
||||
return docMatch, nil
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Close() error {
|
||||
return s.vectorReader.Close()
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Count() uint64 {
|
||||
return s.vectorReader.Count()
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) DocumentMatchPoolSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Min() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
knnMatch, err := s.vectorReader.Next(s.vd.Reset())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if knnMatch == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
docMatch := s.scorer.Score(ctx, knnMatch)
|
||||
|
||||
return docMatch, nil
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) SetQueryNorm(qnorm float64) {
|
||||
s.scorer.SetQueryNorm(qnorm)
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Size() int {
|
||||
return reflectStaticSizeKNNSearcher + size.SizeOfPtr +
|
||||
s.vectorReader.Size() +
|
||||
s.vd.Size() +
|
||||
s.scorer.Size()
|
||||
}
|
||||
|
||||
func (s *KNNSearcher) Weight() float64 {
|
||||
return s.scorer.Weight()
|
||||
}
|
||||
+13
@@ -106,6 +106,7 @@ const (
|
||||
const SearchIncrementalCostKey = "_search_incremental_cost_key"
|
||||
const QueryTypeKey = "_query_type_key"
|
||||
const FuzzyMatchPhraseKey = "_fuzzy_match_phrase_key"
|
||||
const IncludeScoreBreakdownKey = "_include_score_breakdown_key"
|
||||
|
||||
func RecordSearchCost(ctx context.Context,
|
||||
msg SearchIncrementalCostCallbackMsg, bytes uint64) {
|
||||
@@ -133,3 +134,15 @@ const MaxGeoBufPoolSize = 24 * 1024
|
||||
const MinGeoBufPoolSize = 24
|
||||
|
||||
type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool
|
||||
|
||||
const KnnPreSearchDataKey = "_knn_pre_search_data_key"
|
||||
|
||||
const PreSearchKey = "_presearch_key"
|
||||
|
||||
type ScoreExplCorrectionCallbackFunc func(queryMatch *DocumentMatch, knnMatch *DocumentMatch) (float64, *Explanation)
|
||||
|
||||
type SearcherStartCallbackFn func(size uint64) error
|
||||
type SearcherEndCallbackFn func(size uint64) error
|
||||
|
||||
const SearcherStartCallbackKey = "_searcher_start_callback_key"
|
||||
const SearcherEndCallbackKey = "_searcher_end_callback_key"
|
||||
|
||||
+524
@@ -0,0 +1,524 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build vectors
|
||||
// +build vectors
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/collector"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type knnOperator string
|
||||
|
||||
// Must be updated only at init
|
||||
var BleveMaxK = int64(10000)
|
||||
|
||||
type SearchRequest struct {
|
||||
Query query.Query `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
|
||||
KNN []*KNNRequest `json:"knn"`
|
||||
KNNOperator knnOperator `json:"knn_operator"`
|
||||
|
||||
// PreSearchData will be a map that will be used
|
||||
// in the second phase of any 2-phase search, to provide additional
|
||||
// context to the second phase. This is useful in the case of index
|
||||
// aliases where the first phase will gather the PreSearchData from all
|
||||
// the indexes in the alias, and the second phase will use that
|
||||
// PreSearchData to perform the actual search.
|
||||
// The currently accepted map configuration is:
|
||||
//
|
||||
// "_knn_pre_search_data_key": []*search.DocumentMatch
|
||||
|
||||
PreSearchData map[string]interface{} `json:"pre_search_data,omitempty"`
|
||||
|
||||
sortFunc func(sort.Interface)
|
||||
}
|
||||
|
||||
type KNNRequest struct {
|
||||
Field string `json:"field"`
|
||||
Vector []float32 `json:"vector"`
|
||||
K int64 `json:"k"`
|
||||
Boost *query.Boost `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func (r *SearchRequest) AddKNN(field string, vector []float32, k int64, boost float64) {
|
||||
b := query.Boost(boost)
|
||||
r.KNN = append(r.KNN, &KNNRequest{
|
||||
Field: field,
|
||||
Vector: vector,
|
||||
K: k,
|
||||
Boost: &b,
|
||||
})
|
||||
}
|
||||
|
||||
func (r *SearchRequest) AddKNNOperator(operator knnOperator) {
|
||||
r.KNNOperator = operator
|
||||
}
|
||||
|
||||
// UnmarshalJSON deserializes a JSON representation of
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
var temp struct {
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
KNN []*KNNRequest `json:"knn"`
|
||||
KNNOperator knnOperator `json:"knn_operator"`
|
||||
PreSearchData json.RawMessage `json:"pre_search_data"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if temp.Size == nil {
|
||||
r.Size = 10
|
||||
} else {
|
||||
r.Size = *temp.Size
|
||||
}
|
||||
if temp.Sort == nil {
|
||||
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
|
||||
} else {
|
||||
r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
r.From = temp.From
|
||||
r.Explain = temp.Explain
|
||||
r.Highlight = temp.Highlight
|
||||
r.Fields = temp.Fields
|
||||
r.Facets = temp.Facets
|
||||
r.IncludeLocations = temp.IncludeLocations
|
||||
r.Score = temp.Score
|
||||
r.SearchAfter = temp.SearchAfter
|
||||
r.SearchBefore = temp.SearchBefore
|
||||
r.Query, err = query.ParseQuery(temp.Q)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if r.Size < 0 {
|
||||
r.Size = 10
|
||||
}
|
||||
if r.From < 0 {
|
||||
r.From = 0
|
||||
}
|
||||
|
||||
r.KNN = temp.KNN
|
||||
r.KNNOperator = temp.KNNOperator
|
||||
if r.KNNOperator == "" {
|
||||
r.KNNOperator = knnOperatorOr
|
||||
}
|
||||
|
||||
if temp.PreSearchData != nil {
|
||||
r.PreSearchData, err = query.ParsePreSearchData(temp.PreSearchData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
func copySearchRequest(req *SearchRequest, preSearchData map[string]interface{}) *SearchRequest {
|
||||
rv := SearchRequest{
|
||||
Query: req.Query,
|
||||
Size: req.Size + req.From,
|
||||
From: 0,
|
||||
Highlight: req.Highlight,
|
||||
Fields: req.Fields,
|
||||
Facets: req.Facets,
|
||||
Explain: req.Explain,
|
||||
Sort: req.Sort.Copy(),
|
||||
IncludeLocations: req.IncludeLocations,
|
||||
Score: req.Score,
|
||||
SearchAfter: req.SearchAfter,
|
||||
SearchBefore: req.SearchBefore,
|
||||
KNN: req.KNN,
|
||||
KNNOperator: req.KNNOperator,
|
||||
PreSearchData: preSearchData,
|
||||
}
|
||||
return &rv
|
||||
|
||||
}
|
||||
|
||||
var (
|
||||
knnOperatorAnd = knnOperator("and")
|
||||
knnOperatorOr = knnOperator("or")
|
||||
)
|
||||
|
||||
func createKNNQuery(req *SearchRequest) (query.Query, []int64, int64, error) {
|
||||
if requestHasKNN(req) {
|
||||
// first perform validation
|
||||
err := validateKNN(req)
|
||||
if err != nil {
|
||||
return nil, nil, 0, err
|
||||
}
|
||||
var subQueries []query.Query
|
||||
kArray := make([]int64, 0, len(req.KNN))
|
||||
sumOfK := int64(0)
|
||||
for _, knn := range req.KNN {
|
||||
knnQuery := query.NewKNNQuery(knn.Vector)
|
||||
knnQuery.SetFieldVal(knn.Field)
|
||||
knnQuery.SetK(knn.K)
|
||||
knnQuery.SetBoost(knn.Boost.Value())
|
||||
subQueries = append(subQueries, knnQuery)
|
||||
kArray = append(kArray, knn.K)
|
||||
sumOfK += knn.K
|
||||
}
|
||||
rv := query.NewDisjunctionQuery(subQueries)
|
||||
rv.RetrieveScoreBreakdown(true)
|
||||
return rv, kArray, sumOfK, nil
|
||||
}
|
||||
return nil, nil, 0, nil
|
||||
}
|
||||
|
||||
func validateKNN(req *SearchRequest) error {
|
||||
if req.KNN != nil &&
|
||||
req.KNNOperator != "" &&
|
||||
req.KNNOperator != knnOperatorOr &&
|
||||
req.KNNOperator != knnOperatorAnd {
|
||||
return fmt.Errorf("unknown knn operator: %s", req.KNNOperator)
|
||||
}
|
||||
for _, q := range req.KNN {
|
||||
if q == nil {
|
||||
return fmt.Errorf("knn query cannot be nil")
|
||||
}
|
||||
if q.K <= 0 || len(q.Vector) == 0 {
|
||||
return fmt.Errorf("k must be greater than 0 and vector must be non-empty")
|
||||
}
|
||||
if q.K > BleveMaxK {
|
||||
return fmt.Errorf("k must be less than %d", BleveMaxK)
|
||||
}
|
||||
}
|
||||
switch req.KNNOperator {
|
||||
case knnOperatorAnd, knnOperatorOr, "":
|
||||
// Valid cases, do nothing
|
||||
default:
|
||||
return fmt.Errorf("knn_operator must be either 'and' / 'or'")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func addSortAndFieldsToKNNHits(req *SearchRequest, knnHits []*search.DocumentMatch, reader index.IndexReader, name string) (err error) {
|
||||
requiredSortFields := req.Sort.RequiredFields()
|
||||
var dvReader index.DocValueReader
|
||||
var updateFieldVisitor index.DocValueVisitor
|
||||
if len(requiredSortFields) > 0 {
|
||||
dvReader, err = reader.DocValueReader(requiredSortFields)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
updateFieldVisitor = func(field string, term []byte) {
|
||||
req.Sort.UpdateVisitor(field, term)
|
||||
}
|
||||
}
|
||||
for _, hit := range knnHits {
|
||||
if len(requiredSortFields) > 0 {
|
||||
err = dvReader.VisitDocValues(hit.IndexInternalID, updateFieldVisitor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
req.Sort.Value(hit)
|
||||
err, _ = LoadAndHighlightFields(hit, req, "", reader, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hit.Index = name
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) ([]*search.DocumentMatch, error) {
|
||||
KNNQuery, kArray, sumOfK, err := createKNNQuery(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
knnSearcher, err := KNNQuery.Searcher(ctx, reader, i.m, search.SearcherOptions{
|
||||
Explain: req.Explain,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
knnCollector := collector.NewKNNCollector(kArray, sumOfK)
|
||||
err = knnCollector.Collect(ctx, knnSearcher, reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
knnHits := knnCollector.Results()
|
||||
if !preSearch {
|
||||
knnHits = finalizeKNNResults(req, knnHits)
|
||||
}
|
||||
// at this point, irrespective of whether it is a preSearch or not,
|
||||
// the knn hits are populated with Sort and Fields.
|
||||
// it must be ensured downstream that the Sort and Fields are not
|
||||
// re-evaluated, for these hits.
|
||||
// also add the index names to the hits, so that when early
|
||||
// exit takes place after the first phase, the hits will have
|
||||
// a valid value for Index.
|
||||
err = addSortAndFieldsToKNNHits(req, knnHits, reader, i.name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return knnHits, nil
|
||||
}
|
||||
|
||||
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, req *SearchRequest, coll *collector.TopNCollector) {
|
||||
if len(knnHits) > 0 {
|
||||
newScoreExplComputer := func(queryMatch *search.DocumentMatch, knnMatch *search.DocumentMatch) (float64, *search.Explanation) {
|
||||
totalScore := queryMatch.Score + knnMatch.Score
|
||||
if !req.Explain {
|
||||
// exit early as we don't need to compute the explanation
|
||||
return totalScore, nil
|
||||
}
|
||||
return totalScore, &search.Explanation{Value: totalScore, Message: "sum of:", Children: []*search.Explanation{queryMatch.Expl, knnMatch.Expl}}
|
||||
}
|
||||
coll.SetKNNHits(knnHits, search.ScoreExplCorrectionCallbackFunc(newScoreExplComputer))
|
||||
}
|
||||
}
|
||||
|
||||
func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch) []*search.DocumentMatch {
|
||||
// if the KNN operator is AND, then we need to filter out the hits that
|
||||
// do not have match the KNN queries.
|
||||
if req.KNNOperator == knnOperatorAnd {
|
||||
idx := 0
|
||||
for _, hit := range knnHits {
|
||||
if len(hit.ScoreBreakdown) == len(req.KNN) {
|
||||
knnHits[idx] = hit
|
||||
idx++
|
||||
}
|
||||
}
|
||||
knnHits = knnHits[:idx]
|
||||
}
|
||||
// fix the score using score breakdown now
|
||||
// if the score is none, then we need to set the score to 0.0
|
||||
// if req.Explain is true, then we need to use the expl breakdown to
|
||||
// finalize the correct explanation.
|
||||
for _, hit := range knnHits {
|
||||
hit.Score = 0.0
|
||||
if req.Score != "none" {
|
||||
for _, score := range hit.ScoreBreakdown {
|
||||
hit.Score += score
|
||||
}
|
||||
}
|
||||
if req.Explain {
|
||||
childrenExpl := make([]*search.Explanation, 0, len(hit.ScoreBreakdown))
|
||||
for i := range hit.ScoreBreakdown {
|
||||
childrenExpl = append(childrenExpl, hit.Expl.Children[i])
|
||||
}
|
||||
hit.Expl = &search.Explanation{Value: hit.Score, Message: "sum of:", Children: childrenExpl}
|
||||
}
|
||||
// we don't need the score breakdown anymore
|
||||
// so we can set it to nil
|
||||
hit.ScoreBreakdown = nil
|
||||
}
|
||||
return knnHits
|
||||
}
|
||||
|
||||
// when we are setting KNN hits in the preSearchData, we need to make sure that
|
||||
// the KNN hit goes to the right index. This is because the KNN hits are
|
||||
// collected from all the indexes in the alias, but the preSearchData is
|
||||
// specific to each index. If alias A1 contains indexes I1 and I2 and
|
||||
// the KNN hits collected from both I1 and I2, and merged to get top K
|
||||
// hits, then the top K hits need to be distributed to I1 and I2,
|
||||
// so that the preSearchData for I1 contains the top K hits from I1 and
|
||||
// the preSearchData for I2 contains the top K hits from I2.
|
||||
func validateAndDistributeKNNHits(knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) {
|
||||
// create a set of all the index names of this alias
|
||||
indexNames := make(map[string]struct{}, len(indexes))
|
||||
for _, index := range indexes {
|
||||
indexNames[index.Name()] = struct{}{}
|
||||
}
|
||||
segregatedKnnHits := make(map[string][]*search.DocumentMatch)
|
||||
for _, hit := range knnHits {
|
||||
// for each hit, we need to perform a validation check to ensure that the stack
|
||||
// is still valid.
|
||||
//
|
||||
// if the stack is empty, then we have an inconsistency/abnormality
|
||||
// since any hit with an empty stack is supposed to land on a leaf index,
|
||||
// and not an alias. This cannot happen in normal circumstances. But
|
||||
// performing this check to be safe. Since we extract the stack top
|
||||
// in the following steps.
|
||||
if len(hit.IndexNames) == 0 {
|
||||
return nil, ErrorTwoPhaseSearchInconsistency
|
||||
}
|
||||
// since the stack is not empty, we need to check if the top of the stack
|
||||
// is a valid index name, of an index that is part of this alias. If not,
|
||||
// then we have an inconsistency that could be caused due to a topology
|
||||
// change.
|
||||
stackTopIdx := len(hit.IndexNames) - 1
|
||||
top := hit.IndexNames[stackTopIdx]
|
||||
if _, exists := indexNames[top]; !exists {
|
||||
return nil, ErrorTwoPhaseSearchInconsistency
|
||||
}
|
||||
if stackTopIdx == 0 {
|
||||
// if the stack consists of only one index, then popping the top
|
||||
// would result in an empty slice, and handle this case by setting
|
||||
// indexNames to nil. So that the final search results will not
|
||||
// contain the indexNames field.
|
||||
hit.IndexNames = nil
|
||||
} else {
|
||||
hit.IndexNames = hit.IndexNames[:stackTopIdx]
|
||||
}
|
||||
segregatedKnnHits[top] = append(segregatedKnnHits[top], hit)
|
||||
}
|
||||
return segregatedKnnHits, nil
|
||||
}
|
||||
|
||||
func requestHasKNN(req *SearchRequest) bool {
|
||||
return len(req.KNN) > 0
|
||||
}
|
||||
|
||||
// returns true if the search request contains a KNN request that can be
|
||||
// satisfied by just performing a preSearch, completely bypassing the
|
||||
// actual search.
|
||||
func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
|
||||
// if req.Query is not match_none => then we need to go to phase 2
|
||||
// to perform the actual query.
|
||||
if _, ok := req.Query.(*query.MatchNoneQuery); !ok {
|
||||
return false
|
||||
}
|
||||
// req.Query is a match_none query
|
||||
//
|
||||
// if request contains facets, we need to perform phase 2 to calculate
|
||||
// the facet result. Since documents were removed as part of the
|
||||
// merging process after phase 1, if the facet results were to be calculated
|
||||
// during phase 1, then they will be now be incorrect, since merging would
|
||||
// remove some documents.
|
||||
if req.Facets != nil {
|
||||
return false
|
||||
}
|
||||
// the request is a match_none query and does not contain any facets
|
||||
// so we can satisfy the request using just the preSearch result.
|
||||
return true
|
||||
}
|
||||
|
||||
func constructKnnPreSearchData(mergedOut map[string]map[string]interface{}, preSearchResult *SearchResult,
|
||||
indexes []Index) (map[string]map[string]interface{}, error) {
|
||||
|
||||
distributedHits, err := validateAndDistributeKNNHits([]*search.DocumentMatch(preSearchResult.Hits), indexes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, index := range indexes {
|
||||
mergedOut[index.Name()][search.KnnPreSearchDataKey] = distributedHits[index.Name()]
|
||||
}
|
||||
return mergedOut, nil
|
||||
}
|
||||
|
||||
func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
|
||||
dummyReq.KNN = realReq.KNN
|
||||
dummyReq.KNNOperator = knnOperatorOr
|
||||
dummyReq.Explain = realReq.Explain
|
||||
dummyReq.Fields = realReq.Fields
|
||||
dummyReq.Sort = realReq.Sort
|
||||
}
|
||||
|
||||
// the preSearchData for KNN is a list of DocumentMatch objects
|
||||
// that need to be redistributed to the right index.
|
||||
// This is used only in the case of an alias tree, where the indexes
|
||||
// are at the leaves of the tree, and the master alias is at the root.
|
||||
// At each level of the tree, the preSearchData needs to be redistributed
|
||||
// to the indexes/aliases at that level. Because the preSearchData is
|
||||
// specific to each final index at the leaf.
|
||||
func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
|
||||
knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("request does not have knn preSearchData for redistribution")
|
||||
}
|
||||
segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := make(map[string]map[string]interface{})
|
||||
for _, index := range indexes {
|
||||
rv[index.Name()] = make(map[string]interface{})
|
||||
}
|
||||
|
||||
for _, index := range indexes {
|
||||
for k, v := range req.PreSearchData {
|
||||
switch k {
|
||||
case search.KnnPreSearchDataKey:
|
||||
rv[index.Name()][k] = segregatedKnnHits[index.Name()]
|
||||
default:
|
||||
rv[index.Name()][k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
|
||||
kArray := make([]int64, len(req.KNN))
|
||||
for i, knnReq := range req.KNN {
|
||||
kArray[i] = knnReq.K
|
||||
}
|
||||
knnStore := collector.GetNewKNNCollectorStore(kArray)
|
||||
return &knnPreSearchResultProcessor{
|
||||
addFn: func(sr *SearchResult, indexName string) {
|
||||
for _, hit := range sr.Hits {
|
||||
// tag the hit with the index name, so that when the
|
||||
// final search result is constructed, the hit will have
|
||||
// a valid path to follow along the alias tree to reach
|
||||
// the index.
|
||||
hit.IndexNames = append(hit.IndexNames, indexName)
|
||||
knnStore.AddDocument(hit)
|
||||
}
|
||||
},
|
||||
finalizeFn: func(sr *SearchResult) {
|
||||
// passing nil as the document fixup function, because we don't need to
|
||||
// fixup the document, since this was already done in the first phase,
|
||||
// hence error is always nil.
|
||||
// the merged knn hits are finalized and set in the search result.
|
||||
sr.Hits, _ = knnStore.Final(nil)
|
||||
},
|
||||
}
|
||||
}
|
||||
+207
@@ -0,0 +1,207 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !vectors
|
||||
// +build !vectors
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/search"
|
||||
"github.com/blevesearch/bleve/v2/search/collector"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
// A SearchRequest describes all the parameters
|
||||
// needed to search the index.
|
||||
// Query is required.
|
||||
// Size/From describe how much and which part of the
|
||||
// result set to return.
|
||||
// Highlight describes optional search result
|
||||
// highlighting.
|
||||
// Fields describes a list of field values which
|
||||
// should be retrieved for result documents, provided they
|
||||
// were stored while indexing.
|
||||
// Facets describe the set of facets to be computed.
|
||||
// Explain triggers inclusion of additional search
|
||||
// result score explanations.
|
||||
// Sort describes the desired order for the results to be returned.
|
||||
// Score controls the kind of scoring performed
|
||||
// SearchAfter supports deep paging by providing a minimum sort key
|
||||
// SearchBefore supports deep paging by providing a maximum sort key
|
||||
// sortFunc specifies the sort implementation to use for sorting results.
|
||||
//
|
||||
// A special field named "*" can be used to return all fields.
|
||||
type SearchRequest struct {
|
||||
ClientContextID string `json:"client_context_id,omitempty"`
|
||||
Query query.Query `json:"query"`
|
||||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
|
||||
// PreSearchData will be a map that will be used
|
||||
// in the second phase of any 2-phase search, to provide additional
|
||||
// context to the second phase. This is useful in the case of index
|
||||
// aliases where the first phase will gather the PreSearchData from all
|
||||
// the indexes in the alias, and the second phase will use that
|
||||
// PreSearchData to perform the actual search.
|
||||
// The currently accepted map configuration is:
|
||||
//
|
||||
// "_knn_pre_search_data_key": []*search.DocumentMatch
|
||||
|
||||
PreSearchData map[string]interface{} `json:"pre_search_data,omitempty"`
|
||||
|
||||
sortFunc func(sort.Interface)
|
||||
}
|
||||
|
||||
// UnmarshalJSON deserializes a JSON representation of
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
var temp struct {
|
||||
Q json.RawMessage `json:"query"`
|
||||
Size *int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Facets FacetsRequest `json:"facets"`
|
||||
Explain bool `json:"explain"`
|
||||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
PreSearchData json.RawMessage `json:"pre_search_data"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if temp.Size == nil {
|
||||
r.Size = 10
|
||||
} else {
|
||||
r.Size = *temp.Size
|
||||
}
|
||||
if temp.Sort == nil {
|
||||
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
|
||||
} else {
|
||||
r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
r.From = temp.From
|
||||
r.Explain = temp.Explain
|
||||
r.Highlight = temp.Highlight
|
||||
r.Fields = temp.Fields
|
||||
r.Facets = temp.Facets
|
||||
r.IncludeLocations = temp.IncludeLocations
|
||||
r.Score = temp.Score
|
||||
r.SearchAfter = temp.SearchAfter
|
||||
r.SearchBefore = temp.SearchBefore
|
||||
r.Query, err = query.ParseQuery(temp.Q)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if r.Size < 0 {
|
||||
r.Size = 10
|
||||
}
|
||||
if r.From < 0 {
|
||||
r.From = 0
|
||||
}
|
||||
if temp.PreSearchData != nil {
|
||||
r.PreSearchData, err = query.ParsePreSearchData(temp.PreSearchData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
func copySearchRequest(req *SearchRequest, preSearchData map[string]interface{}) *SearchRequest {
|
||||
rv := SearchRequest{
|
||||
Query: req.Query,
|
||||
Size: req.Size + req.From,
|
||||
From: 0,
|
||||
Highlight: req.Highlight,
|
||||
Fields: req.Fields,
|
||||
Facets: req.Facets,
|
||||
Explain: req.Explain,
|
||||
Sort: req.Sort.Copy(),
|
||||
IncludeLocations: req.IncludeLocations,
|
||||
Score: req.Score,
|
||||
SearchAfter: req.SearchAfter,
|
||||
SearchBefore: req.SearchBefore,
|
||||
PreSearchData: preSearchData,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
||||
func validateKNN(req *SearchRequest) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) ([]*search.DocumentMatch, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, req *SearchRequest, coll *collector.TopNCollector) {
|
||||
}
|
||||
|
||||
func requestHasKNN(req *SearchRequest) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
|
||||
}
|
||||
|
||||
func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func constructKnnPreSearchData(mergedOut map[string]map[string]interface{}, preSearchResult *SearchResult,
|
||||
indexes []Index) (map[string]map[string]interface{}, error) {
|
||||
return mergedOut, nil
|
||||
}
|
||||
|
||||
func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch) []*search.DocumentMatch {
|
||||
return knnHits
|
||||
}
|
||||
|
||||
func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
|
||||
return &knnPreSearchResultProcessor{} // equivalent to nil
|
||||
}
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// extract numeric value (if possible) and returns a float64
|
||||
func ExtractNumericValFloat64(v interface{}) (float64, bool) {
|
||||
val := reflect.ValueOf(v)
|
||||
if !val.IsValid() {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
switch {
|
||||
case val.CanFloat():
|
||||
return val.Float(), true
|
||||
case val.CanInt():
|
||||
return float64(val.Int()), true
|
||||
case val.CanUint():
|
||||
return float64(val.Uint()), true
|
||||
}
|
||||
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// extract numeric value (if possible) and returns a float32
|
||||
func ExtractNumericValFloat32(v interface{}) (float32, bool) {
|
||||
val := reflect.ValueOf(v)
|
||||
if !val.IsValid() {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
switch {
|
||||
case val.CanFloat():
|
||||
floatVal := val.Float()
|
||||
if floatVal > math.MaxFloat32 {
|
||||
return 0, false
|
||||
}
|
||||
return float32(floatVal), true
|
||||
case val.CanInt():
|
||||
return float32(val.Int()), true
|
||||
case val.CanUint():
|
||||
return float32(val.Uint()), true
|
||||
}
|
||||
|
||||
return 0, false
|
||||
}
|
||||
Reference in New Issue
Block a user