Bump github.com/blevesearch/bleve/v2 from 2.3.7 to 2.3.9

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.7 to 2.3.9.
- [Release notes](https://github.com/blevesearch/bleve/releases)
- [Commits](https://github.com/blevesearch/bleve/compare/v2.3.7...v2.3.9)

---
updated-dependencies:
- dependency-name: github.com/blevesearch/bleve/v2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2023-08-15 06:44:05 +00:00
committed by Ralf Haferkamp
parent 82b600aef5
commit f9b69afa9e
58 changed files with 1138 additions and 330 deletions
+9 -3
View File
@@ -9,7 +9,7 @@
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
A modern text indexing library in go
## Features
@@ -24,8 +24,8 @@ modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
* [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring
* Boosting
* Search result match highlighting
* Query time boosting
* Search result match highlighting with document fragments
* Aggregations/faceting support:
* Terms Facet
* Numeric Range Facet
@@ -97,6 +97,12 @@ Flags:
Use "bleve [command] --help" for more information about a command.
```
## Text Analysis
Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
## Text Analysis Wizard
[bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com)
@@ -0,0 +1,174 @@
/*
This code was ported from the Open Search Project
https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
The algorithm itself was created by Mark Harwood
https://github.com/markharwood
*/
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package en
import (
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const PluralStemmerName = "stemmer_en_plural"
type EnglishPluralStemmerFilter struct {
}
func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
return &EnglishPluralStemmerFilter{}
}
func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = []byte(stem(string(token.Term)))
}
return input
}
func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewEnglishPluralStemmerFilter(), nil
}
func init() {
registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
}
// ----------------------------------------------------------------------------
// Words ending in oes that retain the e when stemmed
var oesExceptions = []string{"shoes", "canoes", "oboes"}
// Words ending in ches that retain the e when stemmed
var chesExceptions = []string{
"cliches",
"avalanches",
"mustaches",
"moustaches",
"quiches",
"headaches",
"heartaches",
"porsches",
"tranches",
"caches",
}
func stem(word string) string {
runes := []rune(strings.ToLower(word))
if len(runes) < 3 || runes[len(runes)-1] != 's' {
return string(runes)
}
switch runes[len(runes)-2] {
case 'u':
fallthrough
case 's':
return string(runes)
case 'e':
// Modified ies->y logic from original s-stemmer - only work on strings > 4
// so spies -> spy still but pies->pie.
// The original code also special-cased aies and eies for no good reason as far as I can tell.
// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
if len(runes) > 4 && runes[len(runes)-3] == 'i' {
runes[len(runes)-3] = 'y'
return string(runes[0 : len(runes)-2])
}
// Suffix rules to remove any dangling "e"
if len(runes) > 3 {
// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
if len(runes) > 4 && runes[len(runes)-3] == 'x' {
return string(runes[0 : len(runes)-2])
}
// oes
if len(runes) > 3 && runes[len(runes)-3] == 'o' {
if isException(runes, oesExceptions) {
// Only remove the S
return string(runes[0 : len(runes)-1])
}
// Remove the es
return string(runes[0 : len(runes)-2])
}
if len(runes) > 4 {
// shes/sses
if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
return string(runes[0 : len(runes)-2])
}
// ches
if len(runes) > 4 {
if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
if isException(runes, chesExceptions) {
// Only remove the S
return string(runes[0 : len(runes)-1])
}
// Remove the es
return string(runes[0 : len(runes)-2])
}
}
}
}
fallthrough
default:
return string(runes[0 : len(runes)-1])
}
}
func isException(word []rune, exceptions []string) bool {
for _, exception := range exceptions {
exceptionRunes := []rune(exception)
exceptionPos := len(exceptionRunes) - 1
wordPos := len(word) - 1
matched := true
for exceptionPos >= 0 && wordPos >= 0 {
if exceptionRunes[exceptionPos] != word[wordPos] {
matched = false
break
}
exceptionPos--
wordPos--
}
if matched {
return true
}
}
return false
}
+4
View File
@@ -588,6 +588,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
indexSnapshot := s.currentSnapshot()
if indexSnapshot == nil {
return nil
}
defer func() {
_ = indexSnapshot.Close()
}()
+5 -3
View File
@@ -102,10 +102,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
// this is because there are chances of having a series of loadChunk calls,
// and they have to be added together before sending the bytesRead at this point
// upstream.
if delta := i.iterators[i.segmentOffset].BytesRead() - prevBytesRead; delta > 0 {
i.incrementBytesRead(delta)
bytesRead := i.iterators[i.segmentOffset].BytesRead()
if bytesRead > prevBytesRead {
i.incrementBytesRead(bytesRead - prevBytesRead)
}
return rv, nil
}
i.segmentOffset++
@@ -204,6 +204,8 @@ func (i *IndexSnapshotTermFieldReader) Close() error {
// reader's bytesRead value
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead)
}
search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead)
}
if i.snapshot != nil {
+4 -4
View File
@@ -124,16 +124,16 @@ func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields [
}
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
if tempRow.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf.buf)
if err != nil {
return err
}
value, err := i.kvreader.Get(keyBuf[:keySize])
value, err := i.kvreader.Get(keyBuf.buf[:keySize])
if err != nil {
return err
}
+38 -33
View File
@@ -134,18 +134,23 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) {
return
}
var rowBufferPool sync.Pool
func GetRowBuffer() []byte {
if rb, ok := rowBufferPool.Get().([]byte); ok {
return rb
} else {
return make([]byte, RowBufferSize)
}
type rowBuffer struct {
buf []byte
}
func PutRowBuffer(buf []byte) {
rowBufferPool.Put(buf)
var rowBufferPool sync.Pool
func GetRowBuffer() *rowBuffer {
if rb, ok := rowBufferPool.Get().(*rowBuffer); ok {
return rb
} else {
buf := make([]byte, RowBufferSize)
return &rowBuffer{buf: buf}
}
}
func PutRowBuffer(rb *rowBuffer) {
rowBufferPool.Put(rb)
}
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
@@ -169,14 +174,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
for _, row := range addRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] += 1
}
addKeyBytes += row.KeySize()
addValBytes += row.ValueSize()
@@ -197,14 +202,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] -= 1
}
deleteKeyBytes += row.KeySize()
}
@@ -541,26 +546,26 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []In
switch row := row.(type) {
case *TermFrequencyRow:
if existingTermKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
if row.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
keySize, _ := row.KeyTo(keyBuf.buf)
if _, ok := existingTermKeys[string(keyBuf.buf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf[:keySize]))
delete(existingTermKeys, string(keyBuf.buf[:keySize]))
continue
}
}
addRows = append(addRows, row)
case *StoredRow:
if existingStoredKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
if row.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
keySize, _ := row.KeyTo(keyBuf.buf)
if _, ok := existingStoredKeys[string(keyBuf.buf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf[:keySize]))
delete(existingStoredKeys, string(keyBuf.buf[:keySize]))
continue
}
}
@@ -1047,23 +1052,23 @@ func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*
}
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
if tempRow.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf.buf)
if err != nil {
return nil, err
}
value, err := kvreader.Get(keyBuf[:keySize])
value, err := kvreader.Get(keyBuf.buf[:keySize])
if err != nil {
return nil, err
}
if value == nil {
return nil, nil
}
backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value)
backIndexRow, err := NewBackIndexRowKV(keyBuf.buf[:keySize], value)
if err != nil {
return nil, err
}
+11 -5
View File
@@ -474,9 +474,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
// accounted by invoking this callback when the TFR is closed.
// 2. the docvalues portion (accounted in collector) and the retrieval
// of stored fields bytes (by LoadAndHighlightFields)
var totalBytesRead uint64
var totalSearchCost uint64
sendBytesRead := func(bytesRead uint64) {
totalBytesRead += bytesRead
totalSearchCost += bytesRead
}
ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey,
@@ -495,11 +495,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
err = serr
}
if sr != nil {
sr.BytesRead = totalBytesRead
sr.Cost = totalSearchCost
}
if sr, ok := indexReader.(*scorch.IndexSnapshot); ok {
sr.UpdateIOStats(totalBytesRead)
sr.UpdateIOStats(totalSearchCost)
}
search.RecordSearchCost(ctx, search.DoneM, 0)
}()
if req.Facets != nil {
@@ -574,6 +576,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
var storedFieldsCost uint64
for _, hit := range hits {
if i.name != "" {
hit.Index = i.name
@@ -582,9 +585,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if err != nil {
return nil, err
}
totalBytesRead += storedFieldsBytes
storedFieldsCost += storedFieldsBytes
}
totalSearchCost += storedFieldsCost
search.RecordSearchCost(ctx, search.AddM, storedFieldsCost)
atomic.AddUint64(&i.stats.searches, 1)
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
+8 -27
View File
@@ -140,11 +140,11 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
return nil
}
// documentMappingForPath only returns EXACT matches for a sub document
// or for an explicitly mapped field, if you want to find the
// closest document mapping to a field not explicitly mapped
// use closestDocMapping
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
// documentMappingForPath returns the EXACT and closest matches for a sub
// document or for an explicitly mapped field; the closest most specific
// document mapping could be one that matches part of the provided path.
func (dm *DocumentMapping) documentMappingForPath(path string) (
*DocumentMapping, *DocumentMapping) {
pathElements := decodePath(path)
current := dm
OUTER:
@@ -165,27 +165,9 @@ OUTER:
}
}
return nil
return nil, current
}
return current
}
// closestDocMapping findest the most specific document mapping that matches
// part of the provided path
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for _, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
if name == pathElement {
current = subDocMapping
continue OUTER
}
}
break
}
return current
return current, current
}
// NewDocumentMapping returns a new document mapping
@@ -408,8 +390,7 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString)
subDocMapping, closestDocMapping := dm.documentMappingForPath(pathString)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
+14 -4
View File
@@ -326,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled
allMapping := docMapping.documentMappingForPath("_all")
allMapping, _ := docMapping.documentMappingForPath("_all")
if allMapping == nil || allMapping.Enabled {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors)
doc.AddField(field)
@@ -364,8 +364,9 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
return analyzerName
}
}
// now try the default mapping
pathMapping := im.DefaultMapping.documentMappingForPath(path)
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {
@@ -377,7 +378,16 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
// next we will try default analyzers for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
rv := docMapping.defaultAnalyzerName(pathDecoded)
if docMapping.Enabled {
rv := docMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default analyzer for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
@@ -411,7 +421,7 @@ func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
pathMapping := docMapping.documentMappingForPath(path)
pathMapping, _ := docMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {
+25
View File
@@ -225,3 +225,28 @@ func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQu
func NewIPRangeQuery(cidr string) *query.IPRangeQuery {
return query.NewIPRangeQuery(cidr)
}
// NewGeoShapeQuery creates a new Query for matching the given geo shape.
// This method can be used for creating geoshape queries for shape types
// like: point, linestring, polygon, multipoint, multilinestring,
// multipolygon and envelope.
func NewGeoShapeQuery(coordinates [][][][]float64, typ, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeoShapeQuery(coordinates, typ, relation)
}
// NewGeoShapeCircleQuery creates a new query for a geoshape that is a
// circle given center point and the radius. Radius formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined,
// the entire string is parsed and the unit of meters is assumed.
func NewGeoShapeCircleQuery(coordinates []float64, radius, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeoShapeCircleQuery(coordinates, radius, relation)
}
// NewGeometryCollectionQuery creates a new query for the provided
// geometrycollection coordinates and types, which could contain
// multiple geo shapes.
func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeometryCollectionQuery(coordinates, types, relation)
}
+21 -9
View File
@@ -485,15 +485,27 @@ func (ss *SearchStatus) Merge(other *SearchStatus) {
// A SearchResult describes the results of executing
// a SearchRequest.
//
// Status - Whether the search was executed on the underlying indexes successfully
// or failed, and the corresponding errors.
// Request - The SearchRequest that was executed.
// Hits - The list of documents that matched the query and their corresponding
// scores, score explanation, location info and so on.
// Total - The total number of documents that matched the query.
// Cost - indicates how expensive was the query with respect to bytes read
// from the mmaped index files.
// MaxScore - The maximum score seen across all document hits seen for this query.
// Took - The time taken to execute the search.
// Facets - The facet results for the search.
type SearchResult struct {
Status *SearchStatus `json:"status"`
Request *SearchRequest `json:"request"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
BytesRead uint64 `json:"bytesRead"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
Status *SearchStatus `json:"status"`
Request *SearchRequest `json:"request"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
Cost uint64 `json:"cost"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
}
func (sr *SearchResult) Size() int {
@@ -566,7 +578,7 @@ func (sr *SearchResult) Merge(other *SearchResult) {
sr.Status.Merge(other.Status)
sr.Hits = append(sr.Hits, other.Hits...)
sr.Total += other.Total
sr.BytesRead += other.BytesRead
sr.Cost += other.Cost
if other.MaxScore > sr.MaxScore {
sr.MaxScore = other.MaxScore
}
+18 -1
View File
@@ -200,6 +200,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
hc.needDocIds = hc.needDocIds || loadID
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
@@ -208,6 +209,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
if hc.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
}
@@ -232,6 +234,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
// total bytes read as part of docValues being read every hit
// which must be accounted by invoking the callback.
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead)
search.RecordSearchCost(ctx, search.AddM, hc.bytesRead)
}
// help finalize/flush the results in case
@@ -367,7 +371,20 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
fieldsRequiredForFaceting := facetsBuilder.RequiredFields()
// for each of these fields, append only if not already there in hc.neededFields.
for _, field := range fieldsRequiredForFaceting {
found := false
for _, neededField := range hc.neededFields {
if field == neededField {
found = true
break
}
}
if !found {
hc.neededFields = append(hc.neededFields, field)
}
}
}
// finalizeResults starts with the heap containing the final top size+skip
@@ -63,6 +63,8 @@ func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader,
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts
@@ -61,6 +61,8 @@ func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexRea
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options)
}
+2
View File
@@ -64,6 +64,8 @@ func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err
+2
View File
@@ -107,6 +107,8 @@ func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader,
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field,
q.BoostVal.Value(), options)
}
+1
View File
@@ -77,6 +77,7 @@ func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Numeric)
return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
-4
View File
@@ -27,10 +27,6 @@ var reflectStaticSizeDocumentMatch int
var reflectStaticSizeSearchContext int
var reflectStaticSizeLocation int
const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
type SearchIOStatsCallbackFunc func(uint64)
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
+8 -5
View File
@@ -59,7 +59,8 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
}
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcher(ctx, indexReader, candidates, field,
@@ -71,13 +72,15 @@ type fuzzyCandidates struct {
bytesRead uint64
}
func reportIOStats(bytesRead uint64, ctx context.Context) {
func reportIOStats(ctx context.Context, bytesRead uint64) {
// The fuzzy, regexp like queries essentially load a dictionary,
// which potentially incurs a cost that must be accounted by
// using the callback to report the value.
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
if ctx != nil {
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
}
}
}
@@ -49,7 +49,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
return nil, err
}
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(dvReader,
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader,
field, minLon, minLat, maxLon, maxLat)), nil
}
}
@@ -85,7 +85,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher,
buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
@@ -201,7 +201,7 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel
return isIndexed, closeF, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
@@ -222,6 +222,11 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
}
})
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
for i := range lons {
if geo.BoundingBoxContains(lons[i], lats[i],
minLon, minLat, maxLon, maxLat) {
@@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(ctx, rectSearcher,
buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
@@ -113,7 +113,7 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader,
return boxSearcher, nil
}
func buildDistFilter(dvReader index.DocValueReader, field string,
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
@@ -134,6 +134,11 @@ func buildDistFilter(dvReader index.DocValueReader, field string,
}
})
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
for i := range lons {
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
if dist <= maxDist/1000 {
@@ -71,7 +71,7 @@ func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexRe
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(ctx, rectSearcher,
buildPolygonFilter(dvReader, field, coordinates)), nil
buildPolygonFilter(ctx, dvReader, field, coordinates)), nil
}
const float64EqualityThreshold = 1e-6
@@ -83,7 +83,7 @@ func almostEqual(a, b float64) bool {
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
coordinates []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
@@ -107,6 +107,11 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
nVertices := len(coordinates)
if len(coordinates) < 3 {
return false
+7 -2
View File
@@ -54,7 +54,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
}
return NewFilteringSearcher(ctx, mSearcher,
buildRelationFilterOnShapes(dvReader, field, relation, shape)), nil
buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil
}
@@ -63,7 +63,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
// implementation of doc values.
var termSeparatorSplitSlice = []byte{0xff}
func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string,
relation string, shape index.GeoJSON) FilterFunc {
// this is for accumulating the shape's actual complete value
// spread across multiple docvalue visitor callbacks.
@@ -116,6 +116,11 @@ func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
})
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
return found
}
@@ -88,7 +88,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
// reporting back the IO stats with respect to the dictionary
// loaded, using the context
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
// cannot return MatchNoneSearcher because of interaction with
@@ -110,7 +111,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
}
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcherBytes(ctx, indexReader, terms, field,
+2 -1
View File
@@ -102,7 +102,8 @@ func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, patte
}
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
+14
View File
@@ -39,6 +39,9 @@ type TermSearcher struct {
}
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
if isTermQuery(ctx) {
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
}
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
}
@@ -140,3 +143,14 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
return nil, nil
}
func isTermQuery(ctx context.Context) bool {
if ctx != nil {
// if the ctx already has a value set for query type
// it would've been done at a non term searcher level.
_, ok := ctx.Value(search.QueryTypeKey).(string)
return !ok
}
// if the context is nil, then don't set the query type
return false
}
@@ -49,7 +49,8 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
}
if ctx != nil {
reportIOStats(fieldDict.BytesRead(), ctx)
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
@@ -84,7 +84,8 @@ func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader,
}
if ctx != nil {
reportIOStats(fieldDict.BytesRead(), ctx)
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
+51
View File
@@ -14,6 +14,8 @@
package search
import "context"
func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
rv := locations[0]
@@ -67,3 +69,52 @@ func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch)
return dest
}
const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
type SearchIOStatsCallbackFunc func(uint64)
// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
// - add: increment the cost of a search operation
// (which can be specific to a query type as well)
// - abort: query was aborted due to a cancel of search's context (for eg),
// which can be handled differently as well
// - done: indicates that a search was complete and the tracked cost can be
// handled safely by the implementation.
type SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
SearchQueryType, uint64)
type SearchIncrementalCostCallbackMsg uint
type SearchQueryType uint
const (
Term = SearchQueryType(1 << iota)
Geo
Numeric
GenericCost
)
const (
AddM = SearchIncrementalCostCallbackMsg(1 << iota)
AbortM
DoneM
)
const SearchIncrementalCostKey = "_search_incremental_cost_key"
const QueryTypeKey = "_query_type_key"
func RecordSearchCost(ctx context.Context,
msg SearchIncrementalCostCallbackMsg, bytes uint64) {
if ctx != nil {
queryType, ok := ctx.Value(QueryTypeKey).(SearchQueryType)
if !ok {
// for the cost of the non query type specific factors such as
// doc values and stored fields section.
queryType = GenericCost
}
aggCallbackFn := ctx.Value(SearchIncrementalCostKey)
if aggCallbackFn != nil {
aggCallbackFn.(SearchIncrementalCostCallbackFn)(msg, queryType, bytes)
}
}
}
-5
View File
@@ -361,9 +361,6 @@ type builderNode struct {
func (n *builderNode) reset() {
n.final = false
n.finalOutput = 0
for i := range n.trans {
n.trans[i] = emptyTransition
}
n.trans = n.trans[:0]
n.next = nil
}
@@ -393,8 +390,6 @@ func (n *builderNode) equiv(o *builderNode) bool {
return true
}
var emptyTransition = transition{}
type transition struct {
out uint64
addr int
+4
View File
@@ -719,6 +719,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
}
err := segment.visitStoredFields(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
fieldID := int(fieldsMap[field]) - 1
if fieldID < 0 {
// no entry for field in fieldsMap
return false
}
vals[fieldID] = append(vals[fieldID], value)
typs[fieldID] = append(typs[fieldID], typ)
+6 -3
View File
@@ -109,7 +109,6 @@ type PostingsList struct {
chunkSize uint64
// atomic access to this variable
bytesRead uint64
}
@@ -303,12 +302,17 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
return fmt.Errorf("error loading roaring bitmap: %v", err)
}
rv.chunkSize, err = getChunkSize(d.sb.chunkMode,
chunkSize, err := getChunkSize(d.sb.chunkMode,
rv.postings.GetCardinality(), d.sb.numDocs)
if err != nil {
return err
} else if chunkSize == 0 {
return fmt.Errorf("chunk size is zero, chunkMode: %v, numDocs: %v",
d.sb.chunkMode, d.sb.numDocs)
}
rv.chunkSize = chunkSize
return nil
}
@@ -344,7 +348,6 @@ type PostingsIterator struct {
includeFreqNorm bool
includeLocs bool
// atomic access to this variable
bytesRead uint64
}
+5 -1
View File
@@ -103,7 +103,7 @@ type SegmentBase struct {
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
// atomic access to this variable
// atomic access to these variables
bytesRead uint64
bytesWritten uint64
@@ -319,6 +319,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
if vellumLen == 0 {
sb.m.Unlock()
return nil, fmt.Errorf("empty dictionary for field: %v", field)
}
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
rv.incrementBytesRead(uint64(read) + vellumLen)
rv.fst, err = vellum.Load(fstBytes)