build(deps): bump github.com/blevesearch/bleve/v2 from 2.5.4 to 2.5.5

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.5.4 to 2.5.5.
- [Release notes](https://github.com/blevesearch/bleve/releases)
- [Commits](https://github.com/blevesearch/bleve/compare/v2.5.4...v2.5.5)

---
updated-dependencies:
- dependency-name: github.com/blevesearch/bleve/v2
  dependency-version: 2.5.5
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2025-11-20 14:18:08 +00:00
committed by Ralf Haferkamp
parent e85d8effc1
commit 4ae0951f5f
16 changed files with 582 additions and 79 deletions

10
go.mod
View File

@@ -11,7 +11,7 @@ require (
github.com/Nerzal/gocloak/v13 v13.9.0
github.com/bbalet/stopwords v1.0.0
github.com/beevik/etree v1.6.0
github.com/blevesearch/bleve/v2 v2.5.4
github.com/blevesearch/bleve/v2 v2.5.5
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/coreos/go-oidc/v3 v3.16.0
github.com/cs3org/go-cs3apis v0.0.0-20250908152307-4ca807afe54e
@@ -140,13 +140,13 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/bits-and-blooms/bitset v1.22.0 // indirect
github.com/blevesearch/bleve_index_api v1.2.10 // indirect
github.com/blevesearch/bleve_index_api v1.2.11 // indirect
github.com/blevesearch/geo v0.2.4 // indirect
github.com/blevesearch/go-faiss v1.0.25 // indirect
github.com/blevesearch/go-faiss v1.0.26 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.3.12 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.3.13 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
@@ -156,7 +156,7 @@ require (
github.com/blevesearch/zapx/v13 v13.4.2 // indirect
github.com/blevesearch/zapx/v14 v14.4.2 // indirect
github.com/blevesearch/zapx/v15 v15.4.2 // indirect
github.com/blevesearch/zapx/v16 v16.2.6 // indirect
github.com/blevesearch/zapx/v16 v16.2.7 // indirect
github.com/bluele/gcache v0.0.2 // indirect
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect

20
go.sum
View File

@@ -151,22 +151,22 @@ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve/v2 v2.5.4 h1:1iur8e+PHsxtncV2xIVuqlQme/V8guEDO2uV6Wll3lQ=
github.com/blevesearch/bleve/v2 v2.5.4/go.mod h1:yB4PnV4N2q5rTEpB2ndG8N2ISexBQEFIYgwx4ztfvoo=
github.com/blevesearch/bleve_index_api v1.2.10 h1:FMFmZCmTX6PdoLLvwUnKF2RsmILFFwO3h0WPevXY9fE=
github.com/blevesearch/bleve_index_api v1.2.10/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/bleve/v2 v2.5.5 h1:lzC89QUCco+y1qBnJxGqm4AbtsdsnlUvq0kXok8n3C8=
github.com/blevesearch/bleve/v2 v2.5.5/go.mod h1:t5WoESS5TDteTdnjhhvpA1BpLYErOBX2IQViTMLK7wo=
github.com/blevesearch/bleve_index_api v1.2.11 h1:bXQ54kVuwP8hdrXUSOnvTQfgK0KI1+f9A0ITJT8tX1s=
github.com/blevesearch/bleve_index_api v1.2.11/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk=
github.com/blevesearch/geo v0.2.4/go.mod h1:K56Q33AzXt2YExVHGObtmRSFYZKYGv0JEN5mdacJJR8=
github.com/blevesearch/go-faiss v1.0.25 h1:lel1rkOUGbT1CJ0YgzKwC7k+XH0XVBHnCVWahdCXk4U=
github.com/blevesearch/go-faiss v1.0.25/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.26 h1:4dRLolFgjPyjkaXwff4NfbZFdE/dfywbzDqporeQvXI=
github.com/blevesearch/go-faiss v1.0.26/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.3.12 h1:GGZc2qwbyRBwtckPPkHkLyXw64mmsLJxdturBI1cM+c=
github.com/blevesearch/scorch_segment_api/v2 v2.3.12/go.mod h1:JBRGAneqgLSI2+jCNjtwMqp2B7EBF3/VUzgDPIU33MM=
github.com/blevesearch/scorch_segment_api/v2 v2.3.13 h1:ZPjv/4VwWvHJZKeMSgScCapOy8+DdmsmRyLmSB88UoY=
github.com/blevesearch/scorch_segment_api/v2 v2.3.13/go.mod h1:ENk2LClTehOuMS8XzN3UxBEErYmtwkE7MAArFTXs9Vc=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
@@ -185,8 +185,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT
github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8=
github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k=
github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw=
github.com/blevesearch/zapx/v16 v16.2.6 h1:OHuUl2GhM+FpBq9RwNsJ4k/QodqbMMHoQEgn/IHYpu8=
github.com/blevesearch/zapx/v16 v16.2.6/go.mod h1:cuAPB+YoIyRngNhno1S1GPr9SfMk+x/SgAHBLXSIq3k=
github.com/blevesearch/zapx/v16 v16.2.7 h1:xcgFRa7f/tQXOwApVq7JWgPYSlzyUMmkuYa54tMDuR0=
github.com/blevesearch/zapx/v16 v16.2.7/go.mod h1:murSoCJPCk25MqURrcJaBQ1RekuqSCSfMjXH4rHyA14=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=

View File

@@ -82,8 +82,8 @@ func scoreSortFunc() func(i, j *search.DocumentMatch) int {
func getFusionExplAt(hit *search.DocumentMatch, i int, value float64, message string) *search.Explanation {
return &search.Explanation{
Value: value,
Message: message,
Value: value,
Message: message,
Children: []*search.Explanation{hit.Expl.Children[i]},
}
}

View File

@@ -388,3 +388,11 @@ type SynonymIndex interface {
// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
IndexSynonym(id string, collection string, definition *SynonymDefinition) error
}
type InsightsIndex interface {
Index
// TermFrequencies returns the tokens ordered by frequencies for the field index.
TermFrequencies(field string, limit int, descending bool) ([]index.TermFreq, error)
// CentroidCardinalities returns the centroids (clusters) from IVF indexes ordered by data density.
CentroidCardinalities(field string, limit int, desceding bool) ([]index.CentroidCardinality, error)
}

View File

@@ -23,6 +23,7 @@ import (
"path/filepath"
"reflect"
"sort"
"strings"
"sync"
"sync/atomic"
@@ -1234,3 +1235,61 @@ func (is *IndexSnapshot) MergeUpdateFieldsInfo(updatedFields map[string]*index.U
}
}
}
// TermFrequencies returns the top N terms ordered by the frequencies
// for a given field across all segments in the index snapshot.
func (is *IndexSnapshot) TermFrequencies(field string, limit int, descending bool) (
termFreqs []index.TermFreq, err error) {
if len(is.segment) == 0 {
return nil, nil
}
if limit <= 0 {
return nil, fmt.Errorf("limit must be positive")
}
// Use FieldDict which aggregates term frequencies across all segments
fieldDict, err := is.FieldDict(field)
if err != nil {
return nil, fmt.Errorf("failed to get field dictionary for field %s: %v", field, err)
}
defer fieldDict.Close()
// Preallocate slice with capacity equal to the number of unique terms
// in the field dictionary
termFreqs = make([]index.TermFreq, 0, fieldDict.Cardinality())
// Iterate through all terms using FieldDict
for {
dictEntry, err := fieldDict.Next()
if err != nil {
return nil, fmt.Errorf("error iterating field dictionary: %v", err)
}
if dictEntry == nil {
break // End of terms
}
termFreqs = append(termFreqs, index.TermFreq{
Term: dictEntry.Term,
Frequency: dictEntry.Count,
})
}
// Sort by frequency (descending or ascending)
sort.Slice(termFreqs, func(i, j int) bool {
if termFreqs[i].Frequency == termFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return strings.Compare(termFreqs[i].Term, termFreqs[j].Term) < 0
}
if descending {
return termFreqs[i].Frequency > termFreqs[j].Frequency
}
return termFreqs[i].Frequency < termFreqs[j].Frequency
})
if limit >= len(termFreqs) {
return termFreqs, nil
}
return termFreqs[:limit], nil
}

View File

@@ -23,6 +23,7 @@ import (
"encoding/json"
"fmt"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
@@ -167,3 +168,52 @@ func (i *IndexSnapshotVectorReader) Close() error {
// TODO Consider if any scope of recycling here.
return nil
}
func (i *IndexSnapshot) CentroidCardinalities(field string, limit int, descending bool) (
[]index.CentroidCardinality, error) {
if len(i.segment) == 0 {
return nil, nil
}
if limit <= 0 {
return nil, fmt.Errorf("limit must be positive")
}
centroids := make([]index.CentroidCardinality, 0, limit*len(i.segment))
for _, segment := range i.segment {
if sv, ok := segment.segment.(segment_api.VectorSegment); ok {
vecIndex, err := sv.InterpretVectorIndex(field,
false /* does not require filtering */, segment.deleted)
if err != nil {
return nil, fmt.Errorf("failed to interpret vector index for field %s in segment: %v", field, err)
}
centroidCardinalities, err := vecIndex.ObtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
if err != nil {
return nil, fmt.Errorf("failed to obtain top k centroid cardinalities for field %s in segment: %v", field, err)
}
if len(centroidCardinalities) > 0 {
centroids = append(centroids, centroidCardinalities...)
}
}
}
if len(centroids) == 0 {
return nil, nil
}
sort.Slice(centroids, func(i, j int) bool {
if descending {
return centroids[i].Cardinality > centroids[j].Cardinality
}
return centroids[i].Cardinality < centroids[j].Cardinality
})
if limit >= len(centroids) {
return centroids, nil
}
return centroids[:limit], nil
}

View File

@@ -17,6 +17,8 @@ package bleve
import (
"context"
"fmt"
"sort"
"strings"
"sync"
"time"
@@ -1136,3 +1138,159 @@ func (f *indexAliasImplFieldDict) Close() error {
func (f *indexAliasImplFieldDict) Cardinality() int {
return f.fieldDict.Cardinality()
}
// -----------------------------------------------------------------------------
func (i *indexAliasImpl) TermFrequencies(field string, limit int, descending bool) (
[]index.TermFreq, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
// short circuit the simple case
if len(i.indexes) == 1 {
if idx, ok := i.indexes[0].(InsightsIndex); ok {
return idx.TermFrequencies(field, limit, descending)
}
return nil, nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
asyncResults := make(chan []index.TermFreq, len(i.indexes))
searchChildIndex := func(in Index, field string, limit int, descending bool) {
var rv []index.TermFreq
if idx, ok := in.(InsightsIndex); ok {
// over sample for higher accuracy
rv, _ = idx.TermFrequencies(field, limit*5, descending)
}
asyncResults <- rv
waitGroup.Done()
}
waitGroup.Add(len(i.indexes))
for _, in := range i.indexes {
go searchChildIndex(in, field, limit, descending)
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(asyncResults)
}()
rvTermFreqsMap := make(map[string]uint64)
for asr := range asyncResults {
for _, entry := range asr {
rvTermFreqsMap[entry.Term] += entry.Frequency
}
}
rvTermFreqs := make([]index.TermFreq, 0, len(rvTermFreqsMap))
for term, freq := range rvTermFreqsMap {
rvTermFreqs = append(rvTermFreqs, index.TermFreq{
Term: term,
Frequency: freq,
})
}
if descending {
sort.Slice(rvTermFreqs, func(i, j int) bool {
if rvTermFreqs[i].Frequency == rvTermFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return strings.Compare(rvTermFreqs[i].Term, rvTermFreqs[j].Term) < 0
}
return rvTermFreqs[i].Frequency > rvTermFreqs[j].Frequency
})
} else {
sort.Slice(rvTermFreqs, func(i, j int) bool {
if rvTermFreqs[i].Frequency == rvTermFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return strings.Compare(rvTermFreqs[i].Term, rvTermFreqs[j].Term) < 0
}
return rvTermFreqs[i].Frequency < rvTermFreqs[j].Frequency
})
}
if limit > len(rvTermFreqs) {
limit = len(rvTermFreqs)
}
return rvTermFreqs[:limit], nil
}
func (i *indexAliasImpl) CentroidCardinalities(field string, limit int, descending bool) (
[]index.CentroidCardinality, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
// short circuit the simple case
if len(i.indexes) == 1 {
if idx, ok := i.indexes[0].(InsightsIndex); ok {
return idx.CentroidCardinalities(field, limit, descending)
}
return nil, nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
asyncResults := make(chan []index.CentroidCardinality, len(i.indexes))
searchChildIndex := func(in Index, field string, limit int, descending bool) {
var rv []index.CentroidCardinality
if idx, ok := in.(InsightsIndex); ok {
rv, _ = idx.CentroidCardinalities(field, limit, descending)
}
asyncResults <- rv
waitGroup.Done()
}
waitGroup.Add(len(i.indexes))
for _, in := range i.indexes {
go searchChildIndex(in, field, limit, descending)
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(asyncResults)
}()
rvCentroidCardinalitiesResult := make([]index.CentroidCardinality, 0, limit)
for asr := range asyncResults {
asr = append(asr, rvCentroidCardinalitiesResult...)
if descending {
sort.Slice(asr, func(i, j int) bool {
return asr[i].Cardinality > asr[j].Cardinality
})
} else {
sort.Slice(asr, func(i, j int) bool {
return asr[i].Cardinality < asr[j].Cardinality
})
}
if limit > len(asr) {
limit = len(asr)
}
rvCentroidCardinalitiesResult = asr[:limit]
}
return rvCentroidCardinalitiesResult, nil
}

View File

@@ -57,8 +57,6 @@ type indexImpl struct {
const storePath = "store"
var mappingInternalKey = []byte("_mapping")
const (
SearchQueryStartCallbackKey search.ContextKey = "_search_query_start_callback_key"
SearchQueryEndCallbackKey search.ContextKey = "_search_query_end_callback_key"
@@ -641,8 +639,57 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
// ------------------------------------------------------------------------------------------
// set up additional contexts for any search operation that will proceed from
// here, such as presearch, collectors etc.
// Scoring model callback to be used to get scoring model
scoringModelCallback := func() string {
if isBM25Enabled(i.m) {
return index.BM25Scoring
}
return index.DefaultScoringModel
}
ctx = context.WithValue(ctx, search.GetScoringModelCallbackKey,
search.GetScoringModelCallbackFn(scoringModelCallback))
// This callback and variable handles the tracking of bytes read
// 1. as part of creation of tfr and its Next() calls which is
// accounted by invoking this callback when the TFR is closed.
// 2. the docvalues portion (accounted in collector) and the retrieval
// of stored fields bytes (by LoadAndHighlightFields)
var totalSearchCost uint64
sendBytesRead := func(bytesRead uint64) {
totalSearchCost += bytesRead
}
// Ensure IO cost accounting and result cost assignment happen on all return paths
defer func() {
if sr != nil {
sr.Cost = totalSearchCost
}
if is, ok := indexReader.(*scorch.IndexSnapshot); ok {
is.UpdateIOStats(totalSearchCost)
}
search.RecordSearchCost(ctx, search.DoneM, 0)
}()
ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, search.SearchIOStatsCallbackFunc(sendBytesRead))
// Geo buffer pool callback to be used for getting geo buffer pool
var bufPool *s2.GeoBufferPool
getBufferPool := func() *s2.GeoBufferPool {
if bufPool == nil {
bufPool = s2.NewGeoBufferPool(search.MaxGeoBufPoolSize, search.MinGeoBufPoolSize)
}
return bufPool
}
ctx = context.WithValue(ctx, search.GeoBufferPoolCallbackKey, search.GeoBufferPoolCallbackFunc(getBufferPool))
// ------------------------------------------------------------------------------------------
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
preSearchResult, err := i.preSearch(ctx, req, indexReader)
sr, err = i.preSearch(ctx, req, indexReader)
if err != nil {
return nil, err
}
@@ -656,7 +703,8 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
// time stat
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
return preSearchResult, nil
return sr, nil
}
var reverseQueryExecution bool
@@ -726,6 +774,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
// if score fusion, run collect if rescorer is defined
if rescorer != nil && requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
if err != nil {
return nil, err
}
}
}
@@ -745,7 +796,6 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if !contextScoreFusionKeyExists {
setKnnHitsInCollector(knnHits, req, coll)
}
if fts != nil {
if is, ok := indexReader.(*scorch.IndexSnapshot); ok {
@@ -754,44 +804,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts)
}
scoringModelCallback := func() string {
if isBM25Enabled(i.m) {
return index.BM25Scoring
}
return index.DefaultScoringModel
}
ctx = context.WithValue(ctx, search.GetScoringModelCallbackKey,
search.GetScoringModelCallbackFn(scoringModelCallback))
// set the bm25Stats (stats important for consistent scoring) in
// the context object
if bm25Stats != nil {
ctx = context.WithValue(ctx, search.BM25StatsKey, bm25Stats)
}
// This callback and variable handles the tracking of bytes read
// 1. as part of creation of tfr and its Next() calls which is
// accounted by invoking this callback when the TFR is closed.
// 2. the docvalues portion (accounted in collector) and the retrieval
// of stored fields bytes (by LoadAndHighlightFields)
var totalSearchCost uint64
sendBytesRead := func(bytesRead uint64) {
totalSearchCost += bytesRead
}
ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, search.SearchIOStatsCallbackFunc(sendBytesRead))
var bufPool *s2.GeoBufferPool
getBufferPool := func() *s2.GeoBufferPool {
if bufPool == nil {
bufPool = s2.NewGeoBufferPool(search.MaxGeoBufPoolSize, search.MinGeoBufPoolSize)
}
return bufPool
}
ctx = context.WithValue(ctx, search.GeoBufferPoolCallbackKey, search.GeoBufferPoolCallbackFunc(getBufferPool))
searcher, err := req.Query.Searcher(ctx, indexReader, i.m, search.SearcherOptions{
Explain: req.Explain,
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
@@ -804,14 +822,6 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if serr := searcher.Close(); err == nil && serr != nil {
err = serr
}
if sr != nil {
sr.Cost = totalSearchCost
}
if sr, ok := indexReader.(*scorch.IndexSnapshot); ok {
sr.UpdateIOStats(totalSearchCost)
}
search.RecordSearchCost(ctx, search.DoneM, 0)
}()
if req.Facets != nil {
@@ -1388,3 +1398,68 @@ func (i *indexImpl) FireIndexEvent() {
internalEventIndex.FireIndexEvent()
}
}
// -----------------------------------------------------------------------------
func (i *indexImpl) TermFrequencies(field string, limit int, descending bool) (
[]index.TermFreq, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
reader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := reader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
insightsReader, ok := reader.(index.IndexInsightsReader)
if !ok {
return nil, fmt.Errorf("index reader does not support TermFrequencies")
}
return insightsReader.TermFrequencies(field, limit, descending)
}
func (i *indexImpl) CentroidCardinalities(field string, limit int, descending bool) (
[]index.CentroidCardinality, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
reader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := reader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
insightsReader, ok := reader.(index.IndexInsightsReader)
if !ok {
return nil, fmt.Errorf("index reader does not support CentroidCardinalities")
}
centroidCardinalities, err := insightsReader.CentroidCardinalities(field, limit, descending)
if err != nil {
return nil, err
}
for j := 0; j < len(centroidCardinalities); j++ {
centroidCardinalities[j].Index = i.name
}
return centroidCardinalities, nil
}

View File

@@ -755,4 +755,3 @@ func ParseParams(r *SearchRequest, input []byte) (*RequestParams, error) {
return params, nil
}

View File

@@ -185,17 +185,36 @@ func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapp
if err != nil {
return nil, err
}
var init bool
var refDoc *search.DocumentMatch
filterFunc = func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// Attempt to advance the filter searcher to the document identified by
// the base searcher's (unfiltered boolean) current result (d.IndexInternalID).
//
// If the filter searcher successfully finds a document with the same
// internal ID, it means the document satisfies the filter and should be kept.
//
// If the filter searcher returns an error, does not find a matching document,
// or finds a document with a different internal ID, the document should be discarded.
dm, err := filterSearcher.Advance(sctx, d.IndexInternalID)
return err == nil && dm != nil && bytes.Equal(dm.IndexInternalID, d.IndexInternalID)
// Initialize the reference document to point
// to the first document in the filterSearcher
var err error
if !init {
refDoc, err = filterSearcher.Next(sctx)
if err != nil {
return false
}
init = true
}
if refDoc == nil {
// filterSearcher is exhausted, d is not in filter
return false
}
// Compare document IDs
cmp := bytes.Compare(refDoc.IndexInternalID, d.IndexInternalID)
if cmp < 0 {
// filterSearcher is behind the current document, Advance() it
refDoc, err = filterSearcher.Advance(sctx, d.IndexInternalID)
if err != nil || refDoc == nil {
return false
}
// After advance, check if they're now equal
return bytes.Equal(refDoc.IndexInternalID, d.IndexInternalID)
}
// cmp >= 0: either equal (match) or filterSearcher is ahead (no match)
return cmp == 0
}
}

View File

@@ -431,6 +431,10 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
if err != nil {
return nil, err
}
q.Filter, err = expand(q.Filter)
if err != nil {
return nil, err
}
return q, nil
default:
return query, nil
@@ -481,7 +485,7 @@ func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, erro
fs, err = ExtractFields(expandedQuery, m, fs)
}
case *BooleanQuery:
for _, subq := range []Query{q.Must, q.Should, q.MustNot} {
for _, subq := range []Query{q.Must, q.Should, q.MustNot, q.Filter} {
fs, err = ExtractFields(subq, m, fs)
if err != nil {
break
@@ -553,6 +557,10 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Thes
if err != nil {
return nil, err
}
rv, err = ExtractSynonyms(ctx, m, r, q.Filter, rv)
if err != nil {
return nil, err
}
case *ConjunctionQuery:
for _, child := range q.Conjuncts {
rv, err = ExtractSynonyms(ctx, m, r, child, rv)

View File

@@ -365,3 +365,29 @@ type EligibleDocumentSelector interface {
// This must be called after all eligible documents have been added.
SegmentEligibleDocs(segmentID int) []uint64
}
// -----------------------------------------------------------------------------
type TermFreq struct {
Term string `json:"term"`
Frequency uint64 `json:"frequency"`
}
type CentroidCardinality struct {
Index string `json:"index"`
Centroid []float32 `json:"centroid"`
Cardinality uint64 `json:"cardinality"`
}
// IndexInsightsReader is an extended index reader that supports APIs which can advertise
// details about content held within the index.
type IndexInsightsReader interface {
IndexReader
// Obtains a maximum limit number of indexed tokens for the field sorted based on frequencies.
TermFrequencies(field string, limit int, descending bool) (termFreqs []TermFreq, err error)
// Obtains a maximum limit number of centroid vectors from IVF indexes sorted based on
// cluster densities (or cardinalities)
CentroidCardinalities(field string, limit int, descending bool) (cenCards []CentroidCardinality, err error)
}

View File

@@ -14,6 +14,7 @@ import "C"
import (
"encoding/json"
"fmt"
"sort"
"unsafe"
)
@@ -64,6 +65,10 @@ type Index interface {
ObtainClustersWithDistancesFromIVFIndex(x []float32, centroidIDs []int64) (
[]int64, []float32, error)
// Applicable only to IVF indexes: Returns the top k centroid cardinalities and
// their vectors in chosen order (descending or ascending)
ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) ([]uint64, [][]float32, error)
// Search queries the index with the vectors in x.
// Returns the IDs of the k nearest neighbors for each query vector and the
// corresponding distances.
@@ -214,6 +219,72 @@ func (idx *faissIndex) ObtainClustersWithDistancesFromIVFIndex(x []float32, cent
return centroids, centroidDistances, nil
}
func (idx *faissIndex) ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) (
[]uint64, [][]float32, error) {
if limit <= 0 {
return nil, nil, nil
}
nlist := int(C.faiss_IndexIVF_nlist(idx.idx))
if nlist == 0 {
return nil, nil, nil
}
centroidCardinalities := make([]C.size_t, nlist)
// Allocate a flat buffer for all centroids, then slice it per centroid
d := idx.D()
flatCentroids := make([]float32, nlist*d)
// Call the C function to fill centroid vectors and cardinalities
c := C.faiss_IndexIVF_get_centroids_and_cardinality(
idx.idx,
(*C.float)(&flatCentroids[0]),
(*C.size_t)(&centroidCardinalities[0]),
nil,
)
if c != 0 {
return nil, nil, getLastError()
}
topIndices := getIndicesOfKCentroidCardinalities(
centroidCardinalities,
min(limit, nlist),
descending)
rvCardinalities := make([]uint64, len(topIndices))
rvCentroids := make([][]float32, len(topIndices))
for i, idx := range topIndices {
rvCardinalities[i] = uint64(centroidCardinalities[idx])
rvCentroids[i] = flatCentroids[idx*d : (idx+1)*d]
}
return rvCardinalities, rvCentroids, nil
}
func getIndicesOfKCentroidCardinalities(cardinalities []C.size_t, k int, descending bool) []int {
n := len(cardinalities)
indices := make([]int, n)
for i := range indices {
indices[i] = i
}
// Sort only the indices based on cardinality values
sort.Slice(indices, func(i, j int) bool {
if descending {
return cardinalities[indices[i]] > cardinalities[indices[j]]
}
return cardinalities[indices[i]] < cardinalities[indices[j]]
})
if k >= n {
return indices
}
return indices[:k]
}
func (idx *faissIndex) SearchClustersFromIVFIndex(selector Selector,
eligibleCentroidIDs []int64, minEligibleCentroids int, k int64, x,
centroidDis []float32, params json.RawMessage) ([]float32, []int64, error) {

View File

@@ -20,6 +20,7 @@ package segment
import (
"encoding/json"
index "github.com/blevesearch/bleve_index_api"
"github.com/RoaringBitmap/roaring/v2"
)
@@ -64,6 +65,8 @@ type VectorIndex interface {
params json.RawMessage) (VecPostingsList, error)
Close()
Size() uint64
ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) ([]index.CentroidCardinality, error)
}
type VectorSegment interface {

View File

@@ -26,6 +26,7 @@ import (
"github.com/RoaringBitmap/roaring/v2"
"github.com/RoaringBitmap/roaring/v2/roaring64"
"github.com/bits-and-blooms/bitset"
index "github.com/blevesearch/bleve_index_api"
faiss "github.com/blevesearch/go-faiss"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
@@ -279,6 +280,9 @@ type vectorIndexWrapper struct {
params json.RawMessage) (segment.VecPostingsList, error)
close func()
size func() uint64
obtainKCentroidCardinalitiesFromIVFIndex func(limit int, descending bool) (
[]index.CentroidCardinality, error)
}
func (i *vectorIndexWrapper) Search(qVector []float32, k int64,
@@ -301,6 +305,11 @@ func (i *vectorIndexWrapper) Size() uint64 {
return i.size()
}
func (i *vectorIndexWrapper) ObtainKCentroidCardinalitiesFromIVFIndex(limit int, descending bool) (
[]index.CentroidCardinality, error) {
return i.obtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
}
// InterpretVectorIndex returns a construct of closures (vectorIndexWrapper)
// that will allow the caller to -
// (1) search within an attached vector index
@@ -520,6 +529,24 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
size: func() uint64 {
return vecIndexSize
},
obtainKCentroidCardinalitiesFromIVFIndex: func(limit int, descending bool) ([]index.CentroidCardinality, error) {
if vecIndex == nil || !vecIndex.IsIVFIndex() {
return nil, nil
}
cardinalities, centroids, err := vecIndex.ObtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
if err != nil {
return nil, err
}
centroidCardinalities := make([]index.CentroidCardinality, len(cardinalities))
for i, cardinality := range cardinalities {
centroidCardinalities[i] = index.CentroidCardinality{
Centroid: centroids[i],
Cardinality: cardinality,
}
}
return centroidCardinalities, nil
},
}
err error

10
vendor/modules.txt vendored
View File

@@ -117,7 +117,7 @@ github.com/bitly/go-simplejson
# github.com/bits-and-blooms/bitset v1.22.0
## explicit; go 1.16
github.com/bits-and-blooms/bitset
# github.com/blevesearch/bleve/v2 v2.5.4
# github.com/blevesearch/bleve/v2 v2.5.5
## explicit; go 1.23
github.com/blevesearch/bleve/v2
github.com/blevesearch/bleve/v2/analysis
@@ -160,7 +160,7 @@ github.com/blevesearch/bleve/v2/search/scorer
github.com/blevesearch/bleve/v2/search/searcher
github.com/blevesearch/bleve/v2/size
github.com/blevesearch/bleve/v2/util
# github.com/blevesearch/bleve_index_api v1.2.10
# github.com/blevesearch/bleve_index_api v1.2.11
## explicit; go 1.21
github.com/blevesearch/bleve_index_api
# github.com/blevesearch/geo v0.2.4
@@ -171,7 +171,7 @@ github.com/blevesearch/geo/r2
github.com/blevesearch/geo/r3
github.com/blevesearch/geo/s1
github.com/blevesearch/geo/s2
# github.com/blevesearch/go-faiss v1.0.25
# github.com/blevesearch/go-faiss v1.0.26
## explicit; go 1.21
github.com/blevesearch/go-faiss
# github.com/blevesearch/go-porterstemmer v1.0.3
@@ -183,7 +183,7 @@ github.com/blevesearch/gtreap
# github.com/blevesearch/mmap-go v1.0.4
## explicit; go 1.13
github.com/blevesearch/mmap-go
# github.com/blevesearch/scorch_segment_api/v2 v2.3.12
# github.com/blevesearch/scorch_segment_api/v2 v2.3.13
## explicit; go 1.21
github.com/blevesearch/scorch_segment_api/v2
# github.com/blevesearch/segment v0.9.1
@@ -217,7 +217,7 @@ github.com/blevesearch/zapx/v14
# github.com/blevesearch/zapx/v15 v15.4.2
## explicit; go 1.21
github.com/blevesearch/zapx/v15
# github.com/blevesearch/zapx/v16 v16.2.6
# github.com/blevesearch/zapx/v16 v16.2.7
## explicit; go 1.23
github.com/blevesearch/zapx/v16
# github.com/bluele/gcache v0.0.2