mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-04-27 14:32:55 -05:00
Bump github.com/blevesearch/bleve/v2 from 2.3.7 to 2.3.9
Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.7 to 2.3.9. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.3.7...v2.3.9) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
committed by
Ralf Haferkamp
parent
82b600aef5
commit
f9b69afa9e
+9
-3
@@ -9,7 +9,7 @@
|
||||
[](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
|
||||
[](https://opensource.org/licenses/Apache-2.0)
|
||||
|
||||
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
|
||||
A modern text indexing library in go
|
||||
|
||||
## Features
|
||||
|
||||
@@ -24,8 +24,8 @@ modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
|
||||
* [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
|
||||
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
|
||||
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring
|
||||
* Boosting
|
||||
* Search result match highlighting
|
||||
* Query time boosting
|
||||
* Search result match highlighting with document fragments
|
||||
* Aggregations/faceting support:
|
||||
* Terms Facet
|
||||
* Numeric Range Facet
|
||||
@@ -97,6 +97,12 @@ Flags:
|
||||
Use "bleve [command] --help" for more information about a command.
|
||||
```
|
||||
|
||||
## Text Analysis
|
||||
|
||||
Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
|
||||
|
||||
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
|
||||
|
||||
## Text Analysis Wizard
|
||||
|
||||
[bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com)
|
||||
|
||||
+174
@@ -0,0 +1,174 @@
|
||||
/*
|
||||
This code was ported from the Open Search Project
|
||||
https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
|
||||
The algorithm itself was created by Mark Harwood
|
||||
https://github.com/markharwood
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const PluralStemmerName = "stemmer_en_plural"
|
||||
|
||||
type EnglishPluralStemmerFilter struct {
|
||||
}
|
||||
|
||||
func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
|
||||
return &EnglishPluralStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
token.Term = []byte(stem(string(token.Term)))
|
||||
}
|
||||
|
||||
return input
|
||||
}
|
||||
|
||||
func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewEnglishPluralStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Words ending in oes that retain the e when stemmed
|
||||
var oesExceptions = []string{"shoes", "canoes", "oboes"}
|
||||
|
||||
// Words ending in ches that retain the e when stemmed
|
||||
var chesExceptions = []string{
|
||||
"cliches",
|
||||
"avalanches",
|
||||
"mustaches",
|
||||
"moustaches",
|
||||
"quiches",
|
||||
"headaches",
|
||||
"heartaches",
|
||||
"porsches",
|
||||
"tranches",
|
||||
"caches",
|
||||
}
|
||||
|
||||
func stem(word string) string {
|
||||
runes := []rune(strings.ToLower(word))
|
||||
|
||||
if len(runes) < 3 || runes[len(runes)-1] != 's' {
|
||||
return string(runes)
|
||||
}
|
||||
|
||||
switch runes[len(runes)-2] {
|
||||
case 'u':
|
||||
fallthrough
|
||||
case 's':
|
||||
return string(runes)
|
||||
case 'e':
|
||||
// Modified ies->y logic from original s-stemmer - only work on strings > 4
|
||||
// so spies -> spy still but pies->pie.
|
||||
// The original code also special-cased aies and eies for no good reason as far as I can tell.
|
||||
// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
|
||||
if len(runes) > 4 && runes[len(runes)-3] == 'i' {
|
||||
runes[len(runes)-3] = 'y'
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
// Suffix rules to remove any dangling "e"
|
||||
if len(runes) > 3 {
|
||||
// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
|
||||
if len(runes) > 4 && runes[len(runes)-3] == 'x' {
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
// oes
|
||||
if len(runes) > 3 && runes[len(runes)-3] == 'o' {
|
||||
if isException(runes, oesExceptions) {
|
||||
// Only remove the S
|
||||
return string(runes[0 : len(runes)-1])
|
||||
}
|
||||
// Remove the es
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
if len(runes) > 4 {
|
||||
// shes/sses
|
||||
if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
// ches
|
||||
if len(runes) > 4 {
|
||||
if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
|
||||
if isException(runes, chesExceptions) {
|
||||
// Only remove the S
|
||||
return string(runes[0 : len(runes)-1])
|
||||
}
|
||||
// Remove the es
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
return string(runes[0 : len(runes)-1])
|
||||
}
|
||||
}
|
||||
|
||||
func isException(word []rune, exceptions []string) bool {
|
||||
for _, exception := range exceptions {
|
||||
|
||||
exceptionRunes := []rune(exception)
|
||||
|
||||
exceptionPos := len(exceptionRunes) - 1
|
||||
wordPos := len(word) - 1
|
||||
|
||||
matched := true
|
||||
for exceptionPos >= 0 && wordPos >= 0 {
|
||||
if exceptionRunes[exceptionPos] != word[wordPos] {
|
||||
matched = false
|
||||
break
|
||||
}
|
||||
exceptionPos--
|
||||
wordPos--
|
||||
}
|
||||
if matched {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
+4
@@ -588,6 +588,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
||||
m := s.stats.ToMap()
|
||||
|
||||
indexSnapshot := s.currentSnapshot()
|
||||
if indexSnapshot == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = indexSnapshot.Close()
|
||||
}()
|
||||
|
||||
+5
-3
@@ -102,10 +102,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
|
||||
// this is because there are chances of having a series of loadChunk calls,
|
||||
// and they have to be added together before sending the bytesRead at this point
|
||||
// upstream.
|
||||
if delta := i.iterators[i.segmentOffset].BytesRead() - prevBytesRead; delta > 0 {
|
||||
i.incrementBytesRead(delta)
|
||||
bytesRead := i.iterators[i.segmentOffset].BytesRead()
|
||||
if bytesRead > prevBytesRead {
|
||||
i.incrementBytesRead(bytesRead - prevBytesRead)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
i.segmentOffset++
|
||||
@@ -204,6 +204,8 @@ func (i *IndexSnapshotTermFieldReader) Close() error {
|
||||
// reader's bytesRead value
|
||||
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead)
|
||||
}
|
||||
|
||||
search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead)
|
||||
}
|
||||
|
||||
if i.snapshot != nil {
|
||||
|
||||
+4
-4
@@ -124,16 +124,16 @@ func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields [
|
||||
}
|
||||
|
||||
keyBuf := GetRowBuffer()
|
||||
if tempRow.KeySize() > len(keyBuf) {
|
||||
keyBuf = make([]byte, 2*tempRow.KeySize())
|
||||
if tempRow.KeySize() > len(keyBuf.buf) {
|
||||
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
|
||||
}
|
||||
defer PutRowBuffer(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf.buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
value, err := i.kvreader.Get(keyBuf[:keySize])
|
||||
value, err := i.kvreader.Get(keyBuf.buf[:keySize])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
+38
-33
@@ -134,18 +134,23 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
var rowBufferPool sync.Pool
|
||||
|
||||
func GetRowBuffer() []byte {
|
||||
if rb, ok := rowBufferPool.Get().([]byte); ok {
|
||||
return rb
|
||||
} else {
|
||||
return make([]byte, RowBufferSize)
|
||||
}
|
||||
type rowBuffer struct {
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func PutRowBuffer(buf []byte) {
|
||||
rowBufferPool.Put(buf)
|
||||
var rowBufferPool sync.Pool
|
||||
|
||||
func GetRowBuffer() *rowBuffer {
|
||||
if rb, ok := rowBufferPool.Get().(*rowBuffer); ok {
|
||||
return rb
|
||||
} else {
|
||||
buf := make([]byte, RowBufferSize)
|
||||
return &rowBuffer{buf: buf}
|
||||
}
|
||||
}
|
||||
|
||||
func PutRowBuffer(rb *rowBuffer) {
|
||||
rowBufferPool.Put(rb)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
|
||||
@@ -169,14 +174,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
|
||||
for _, row := range addRows {
|
||||
tfr, ok := row.(*TermFrequencyRow)
|
||||
if ok {
|
||||
if tfr.DictionaryRowKeySize() > len(rowBuf) {
|
||||
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
|
||||
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
|
||||
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
|
||||
}
|
||||
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
|
||||
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
|
||||
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] += 1
|
||||
}
|
||||
addKeyBytes += row.KeySize()
|
||||
addValBytes += row.ValueSize()
|
||||
@@ -197,14 +202,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
|
||||
tfr, ok := row.(*TermFrequencyRow)
|
||||
if ok {
|
||||
// need to decrement counter
|
||||
if tfr.DictionaryRowKeySize() > len(rowBuf) {
|
||||
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
|
||||
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
|
||||
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
|
||||
}
|
||||
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
|
||||
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
|
||||
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] -= 1
|
||||
}
|
||||
deleteKeyBytes += row.KeySize()
|
||||
}
|
||||
@@ -541,26 +546,26 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []In
|
||||
switch row := row.(type) {
|
||||
case *TermFrequencyRow:
|
||||
if existingTermKeys != nil {
|
||||
if row.KeySize() > len(keyBuf) {
|
||||
keyBuf = make([]byte, row.KeySize())
|
||||
if row.KeySize() > len(keyBuf.buf) {
|
||||
keyBuf.buf = make([]byte, row.KeySize())
|
||||
}
|
||||
keySize, _ := row.KeyTo(keyBuf)
|
||||
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
|
||||
keySize, _ := row.KeyTo(keyBuf.buf)
|
||||
if _, ok := existingTermKeys[string(keyBuf.buf[:keySize])]; ok {
|
||||
updateRows = append(updateRows, row)
|
||||
delete(existingTermKeys, string(keyBuf[:keySize]))
|
||||
delete(existingTermKeys, string(keyBuf.buf[:keySize]))
|
||||
continue
|
||||
}
|
||||
}
|
||||
addRows = append(addRows, row)
|
||||
case *StoredRow:
|
||||
if existingStoredKeys != nil {
|
||||
if row.KeySize() > len(keyBuf) {
|
||||
keyBuf = make([]byte, row.KeySize())
|
||||
if row.KeySize() > len(keyBuf.buf) {
|
||||
keyBuf.buf = make([]byte, row.KeySize())
|
||||
}
|
||||
keySize, _ := row.KeyTo(keyBuf)
|
||||
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
|
||||
keySize, _ := row.KeyTo(keyBuf.buf)
|
||||
if _, ok := existingStoredKeys[string(keyBuf.buf[:keySize])]; ok {
|
||||
updateRows = append(updateRows, row)
|
||||
delete(existingStoredKeys, string(keyBuf[:keySize]))
|
||||
delete(existingStoredKeys, string(keyBuf.buf[:keySize]))
|
||||
continue
|
||||
}
|
||||
}
|
||||
@@ -1047,23 +1052,23 @@ func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*
|
||||
}
|
||||
|
||||
keyBuf := GetRowBuffer()
|
||||
if tempRow.KeySize() > len(keyBuf) {
|
||||
keyBuf = make([]byte, 2*tempRow.KeySize())
|
||||
if tempRow.KeySize() > len(keyBuf.buf) {
|
||||
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
|
||||
}
|
||||
defer PutRowBuffer(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf)
|
||||
keySize, err := tempRow.KeyTo(keyBuf.buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
value, err := kvreader.Get(keyBuf[:keySize])
|
||||
value, err := kvreader.Get(keyBuf.buf[:keySize])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if value == nil {
|
||||
return nil, nil
|
||||
}
|
||||
backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value)
|
||||
backIndexRow, err := NewBackIndexRowKV(keyBuf.buf[:keySize], value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
+11
-5
@@ -474,9 +474,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
// accounted by invoking this callback when the TFR is closed.
|
||||
// 2. the docvalues portion (accounted in collector) and the retrieval
|
||||
// of stored fields bytes (by LoadAndHighlightFields)
|
||||
var totalBytesRead uint64
|
||||
var totalSearchCost uint64
|
||||
sendBytesRead := func(bytesRead uint64) {
|
||||
totalBytesRead += bytesRead
|
||||
totalSearchCost += bytesRead
|
||||
}
|
||||
|
||||
ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey,
|
||||
@@ -495,11 +495,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
err = serr
|
||||
}
|
||||
if sr != nil {
|
||||
sr.BytesRead = totalBytesRead
|
||||
sr.Cost = totalSearchCost
|
||||
}
|
||||
if sr, ok := indexReader.(*scorch.IndexSnapshot); ok {
|
||||
sr.UpdateIOStats(totalBytesRead)
|
||||
sr.UpdateIOStats(totalSearchCost)
|
||||
}
|
||||
|
||||
search.RecordSearchCost(ctx, search.DoneM, 0)
|
||||
}()
|
||||
|
||||
if req.Facets != nil {
|
||||
@@ -574,6 +576,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
}
|
||||
}
|
||||
|
||||
var storedFieldsCost uint64
|
||||
for _, hit := range hits {
|
||||
if i.name != "" {
|
||||
hit.Index = i.name
|
||||
@@ -582,9 +585,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
totalBytesRead += storedFieldsBytes
|
||||
storedFieldsCost += storedFieldsBytes
|
||||
}
|
||||
|
||||
totalSearchCost += storedFieldsCost
|
||||
search.RecordSearchCost(ctx, search.AddM, storedFieldsCost)
|
||||
|
||||
atomic.AddUint64(&i.stats.searches, 1)
|
||||
searchDuration := time.Since(searchStart)
|
||||
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
|
||||
|
||||
+8
-27
@@ -140,11 +140,11 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
|
||||
return nil
|
||||
}
|
||||
|
||||
// documentMappingForPath only returns EXACT matches for a sub document
|
||||
// or for an explicitly mapped field, if you want to find the
|
||||
// closest document mapping to a field not explicitly mapped
|
||||
// use closestDocMapping
|
||||
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
|
||||
// documentMappingForPath returns the EXACT and closest matches for a sub
|
||||
// document or for an explicitly mapped field; the closest most specific
|
||||
// document mapping could be one that matches part of the provided path.
|
||||
func (dm *DocumentMapping) documentMappingForPath(path string) (
|
||||
*DocumentMapping, *DocumentMapping) {
|
||||
pathElements := decodePath(path)
|
||||
current := dm
|
||||
OUTER:
|
||||
@@ -165,27 +165,9 @@ OUTER:
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil, current
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
// closestDocMapping findest the most specific document mapping that matches
|
||||
// part of the provided path
|
||||
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
|
||||
pathElements := decodePath(path)
|
||||
current := dm
|
||||
OUTER:
|
||||
for _, pathElement := range pathElements {
|
||||
for name, subDocMapping := range current.Properties {
|
||||
if name == pathElement {
|
||||
current = subDocMapping
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return current
|
||||
return current, current
|
||||
}
|
||||
|
||||
// NewDocumentMapping returns a new document mapping
|
||||
@@ -408,8 +390,7 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
|
||||
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
|
||||
pathString := encodePath(path)
|
||||
// look to see if there is a mapping for this field
|
||||
subDocMapping := dm.documentMappingForPath(pathString)
|
||||
closestDocMapping := dm.closestDocMapping(pathString)
|
||||
subDocMapping, closestDocMapping := dm.documentMappingForPath(pathString)
|
||||
|
||||
// check to see if we even need to do further processing
|
||||
if subDocMapping != nil && !subDocMapping.Enabled {
|
||||
|
||||
+14
-4
@@ -326,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
|
||||
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
|
||||
|
||||
// see if the _all field was disabled
|
||||
allMapping := docMapping.documentMappingForPath("_all")
|
||||
allMapping, _ := docMapping.documentMappingForPath("_all")
|
||||
if allMapping == nil || allMapping.Enabled {
|
||||
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors)
|
||||
doc.AddField(field)
|
||||
@@ -364,8 +364,9 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
|
||||
return analyzerName
|
||||
}
|
||||
}
|
||||
|
||||
// now try the default mapping
|
||||
pathMapping := im.DefaultMapping.documentMappingForPath(path)
|
||||
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
|
||||
if pathMapping != nil {
|
||||
if len(pathMapping.Fields) > 0 {
|
||||
if pathMapping.Fields[0].Analyzer != "" {
|
||||
@@ -377,7 +378,16 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
|
||||
// next we will try default analyzers for the path
|
||||
pathDecoded := decodePath(path)
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
rv := docMapping.defaultAnalyzerName(pathDecoded)
|
||||
if docMapping.Enabled {
|
||||
rv := docMapping.defaultAnalyzerName(pathDecoded)
|
||||
if rv != "" {
|
||||
return rv
|
||||
}
|
||||
}
|
||||
}
|
||||
// now the default analyzer for the default mapping
|
||||
if im.DefaultMapping.Enabled {
|
||||
rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded)
|
||||
if rv != "" {
|
||||
return rv
|
||||
}
|
||||
@@ -411,7 +421,7 @@ func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {
|
||||
|
||||
// first we look for explicit mapping on the field
|
||||
for _, docMapping := range im.TypeMapping {
|
||||
pathMapping := docMapping.documentMappingForPath(path)
|
||||
pathMapping, _ := docMapping.documentMappingForPath(path)
|
||||
if pathMapping != nil {
|
||||
if len(pathMapping.Fields) > 0 {
|
||||
if pathMapping.Fields[0].Analyzer != "" {
|
||||
|
||||
+25
@@ -225,3 +225,28 @@ func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQu
|
||||
func NewIPRangeQuery(cidr string) *query.IPRangeQuery {
|
||||
return query.NewIPRangeQuery(cidr)
|
||||
}
|
||||
|
||||
// NewGeoShapeQuery creates a new Query for matching the given geo shape.
|
||||
// This method can be used for creating geoshape queries for shape types
|
||||
// like: point, linestring, polygon, multipoint, multilinestring,
|
||||
// multipolygon and envelope.
|
||||
func NewGeoShapeQuery(coordinates [][][][]float64, typ, relation string) (*query.GeoShapeQuery, error) {
|
||||
return query.NewGeoShapeQuery(coordinates, typ, relation)
|
||||
}
|
||||
|
||||
// NewGeoShapeCircleQuery creates a new query for a geoshape that is a
|
||||
// circle given center point and the radius. Radius formats supported:
|
||||
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
|
||||
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
|
||||
// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined,
|
||||
// the entire string is parsed and the unit of meters is assumed.
|
||||
func NewGeoShapeCircleQuery(coordinates []float64, radius, relation string) (*query.GeoShapeQuery, error) {
|
||||
return query.NewGeoShapeCircleQuery(coordinates, radius, relation)
|
||||
}
|
||||
|
||||
// NewGeometryCollectionQuery creates a new query for the provided
|
||||
// geometrycollection coordinates and types, which could contain
|
||||
// multiple geo shapes.
|
||||
func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string, relation string) (*query.GeoShapeQuery, error) {
|
||||
return query.NewGeometryCollectionQuery(coordinates, types, relation)
|
||||
}
|
||||
|
||||
+21
-9
@@ -485,15 +485,27 @@ func (ss *SearchStatus) Merge(other *SearchStatus) {
|
||||
|
||||
// A SearchResult describes the results of executing
|
||||
// a SearchRequest.
|
||||
//
|
||||
// Status - Whether the search was executed on the underlying indexes successfully
|
||||
// or failed, and the corresponding errors.
|
||||
// Request - The SearchRequest that was executed.
|
||||
// Hits - The list of documents that matched the query and their corresponding
|
||||
// scores, score explanation, location info and so on.
|
||||
// Total - The total number of documents that matched the query.
|
||||
// Cost - indicates how expensive was the query with respect to bytes read
|
||||
// from the mmaped index files.
|
||||
// MaxScore - The maximum score seen across all document hits seen for this query.
|
||||
// Took - The time taken to execute the search.
|
||||
// Facets - The facet results for the search.
|
||||
type SearchResult struct {
|
||||
Status *SearchStatus `json:"status"`
|
||||
Request *SearchRequest `json:"request"`
|
||||
Hits search.DocumentMatchCollection `json:"hits"`
|
||||
Total uint64 `json:"total_hits"`
|
||||
BytesRead uint64 `json:"bytesRead"`
|
||||
MaxScore float64 `json:"max_score"`
|
||||
Took time.Duration `json:"took"`
|
||||
Facets search.FacetResults `json:"facets"`
|
||||
Status *SearchStatus `json:"status"`
|
||||
Request *SearchRequest `json:"request"`
|
||||
Hits search.DocumentMatchCollection `json:"hits"`
|
||||
Total uint64 `json:"total_hits"`
|
||||
Cost uint64 `json:"cost"`
|
||||
MaxScore float64 `json:"max_score"`
|
||||
Took time.Duration `json:"took"`
|
||||
Facets search.FacetResults `json:"facets"`
|
||||
}
|
||||
|
||||
func (sr *SearchResult) Size() int {
|
||||
@@ -566,7 +578,7 @@ func (sr *SearchResult) Merge(other *SearchResult) {
|
||||
sr.Status.Merge(other.Status)
|
||||
sr.Hits = append(sr.Hits, other.Hits...)
|
||||
sr.Total += other.Total
|
||||
sr.BytesRead += other.BytesRead
|
||||
sr.Cost += other.Cost
|
||||
if other.MaxScore > sr.MaxScore {
|
||||
sr.MaxScore = other.MaxScore
|
||||
}
|
||||
|
||||
+18
-1
@@ -200,6 +200,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
hc.needDocIds = hc.needDocIds || loadID
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
next, err = searcher.Next(searchContext)
|
||||
@@ -208,6 +209,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
if hc.total%CheckDoneEvery == 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
search.RecordSearchCost(ctx, search.AbortM, 0)
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
@@ -232,6 +234,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
||||
// total bytes read as part of docValues being read every hit
|
||||
// which must be accounted by invoking the callback.
|
||||
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead)
|
||||
|
||||
search.RecordSearchCost(ctx, search.AddM, hc.bytesRead)
|
||||
}
|
||||
|
||||
// help finalize/flush the results in case
|
||||
@@ -367,7 +371,20 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
|
||||
// SetFacetsBuilder registers a facet builder for this collector
|
||||
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
|
||||
hc.facetsBuilder = facetsBuilder
|
||||
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
|
||||
fieldsRequiredForFaceting := facetsBuilder.RequiredFields()
|
||||
// for each of these fields, append only if not already there in hc.neededFields.
|
||||
for _, field := range fieldsRequiredForFaceting {
|
||||
found := false
|
||||
for _, neededField := range hc.neededFields {
|
||||
if field == neededField {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
hc.neededFields = append(hc.neededFields, field)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finalizeResults starts with the heap containing the final top size+skip
|
||||
|
||||
+2
@@ -63,6 +63,8 @@ func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader,
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
|
||||
|
||||
if q.BottomRight[0] < q.TopLeft[0] {
|
||||
// cross date line, rewrite as two parts
|
||||
|
||||
|
||||
+2
@@ -61,6 +61,8 @@ func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexRea
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
|
||||
|
||||
return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
|
||||
+2
@@ -64,6 +64,8 @@ func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
|
||||
|
||||
dist, err := geo.ParseDistance(q.Distance)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
+2
@@ -107,6 +107,8 @@ func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader,
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
|
||||
|
||||
return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field,
|
||||
q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
+1
@@ -77,6 +77,7 @@ func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m
|
||||
if q.FieldVal == "" {
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Numeric)
|
||||
return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
|
||||
-4
@@ -27,10 +27,6 @@ var reflectStaticSizeDocumentMatch int
|
||||
var reflectStaticSizeSearchContext int
|
||||
var reflectStaticSizeLocation int
|
||||
|
||||
const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
|
||||
|
||||
type SearchIOStatsCallbackFunc func(uint64)
|
||||
|
||||
func init() {
|
||||
var dm DocumentMatch
|
||||
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
|
||||
|
||||
+8
-5
@@ -59,7 +59,8 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
|
||||
}
|
||||
|
||||
if ctx != nil {
|
||||
reportIOStats(dictBytesRead, ctx)
|
||||
reportIOStats(ctx, dictBytesRead)
|
||||
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(ctx, indexReader, candidates, field,
|
||||
@@ -71,13 +72,15 @@ type fuzzyCandidates struct {
|
||||
bytesRead uint64
|
||||
}
|
||||
|
||||
func reportIOStats(bytesRead uint64, ctx context.Context) {
|
||||
func reportIOStats(ctx context.Context, bytesRead uint64) {
|
||||
// The fuzzy, regexp like queries essentially load a dictionary,
|
||||
// which potentially incurs a cost that must be accounted by
|
||||
// using the callback to report the value.
|
||||
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
|
||||
if statsCallbackFn != nil {
|
||||
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
|
||||
if ctx != nil {
|
||||
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
|
||||
if statsCallbackFn != nil {
|
||||
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+8
-3
@@ -49,7 +49,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(dvReader,
|
||||
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader,
|
||||
field, minLon, minLat, maxLon, maxLat)), nil
|
||||
}
|
||||
}
|
||||
@@ -85,7 +85,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
|
||||
}
|
||||
// add filter to check points near the boundary
|
||||
onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher,
|
||||
buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
|
||||
buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat))
|
||||
openedSearchers = append(openedSearchers, onBoundarySearcher)
|
||||
}
|
||||
|
||||
@@ -201,7 +201,7 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel
|
||||
return isIndexed, closeF, err
|
||||
}
|
||||
|
||||
func buildRectFilter(dvReader index.DocValueReader, field string,
|
||||
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
minLon, minLat, maxLon, maxLat float64) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
@@ -222,6 +222,11 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
|
||||
}
|
||||
})
|
||||
if err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
search.RecordSearchCost(ctx, search.AddM, bytes)
|
||||
}
|
||||
for i := range lons {
|
||||
if geo.BoundingBoxContains(lons[i], lats[i],
|
||||
minLon, minLat, maxLon, maxLat) {
|
||||
|
||||
Generated
Vendored
+7
-2
@@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea
|
||||
|
||||
// wrap it in a filtering searcher which checks the actual distance
|
||||
return NewFilteringSearcher(ctx, rectSearcher,
|
||||
buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
|
||||
buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil
|
||||
}
|
||||
|
||||
// boxSearcher builds a searcher for the described bounding box
|
||||
@@ -113,7 +113,7 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
return boxSearcher, nil
|
||||
}
|
||||
|
||||
func buildDistFilter(dvReader index.DocValueReader, field string,
|
||||
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
centerLon, centerLat, maxDist float64) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
@@ -134,6 +134,11 @@ func buildDistFilter(dvReader index.DocValueReader, field string,
|
||||
}
|
||||
})
|
||||
if err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
search.RecordSearchCost(ctx, search.AddM, bytes)
|
||||
}
|
||||
for i := range lons {
|
||||
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
|
||||
if dist <= maxDist/1000 {
|
||||
|
||||
+7
-2
@@ -71,7 +71,7 @@ func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexRe
|
||||
|
||||
// wrap it in a filtering searcher that checks for the polygon inclusivity
|
||||
return NewFilteringSearcher(ctx, rectSearcher,
|
||||
buildPolygonFilter(dvReader, field, coordinates)), nil
|
||||
buildPolygonFilter(ctx, dvReader, field, coordinates)), nil
|
||||
}
|
||||
|
||||
const float64EqualityThreshold = 1e-6
|
||||
@@ -83,7 +83,7 @@ func almostEqual(a, b float64) bool {
|
||||
// buildPolygonFilter returns true if the point lies inside the
|
||||
// polygon. It is based on the ray-casting technique as referred
|
||||
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
|
||||
func buildPolygonFilter(dvReader index.DocValueReader, field string,
|
||||
func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
coordinates []geo.Point) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
// check geo matches against all numeric type terms indexed
|
||||
@@ -107,6 +107,11 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
|
||||
// Note: this approach works for points which are strictly inside
|
||||
// the polygon. ie it might fail for certain points on the polygon boundaries.
|
||||
if err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
search.RecordSearchCost(ctx, search.AddM, bytes)
|
||||
}
|
||||
nVertices := len(coordinates)
|
||||
if len(coordinates) < 3 {
|
||||
return false
|
||||
|
||||
+7
-2
@@ -54,7 +54,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
|
||||
}
|
||||
|
||||
return NewFilteringSearcher(ctx, mSearcher,
|
||||
buildRelationFilterOnShapes(dvReader, field, relation, shape)), nil
|
||||
buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil
|
||||
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
|
||||
// implementation of doc values.
|
||||
var termSeparatorSplitSlice = []byte{0xff}
|
||||
|
||||
func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
|
||||
func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string,
|
||||
relation string, shape index.GeoJSON) FilterFunc {
|
||||
// this is for accumulating the shape's actual complete value
|
||||
// spread across multiple docvalue visitor callbacks.
|
||||
@@ -116,6 +116,11 @@ func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
|
||||
})
|
||||
|
||||
if err == nil && found {
|
||||
bytes := dvReader.BytesRead()
|
||||
if bytes > 0 {
|
||||
reportIOStats(ctx, bytes)
|
||||
search.RecordSearchCost(ctx, search.AddM, bytes)
|
||||
}
|
||||
return found
|
||||
}
|
||||
|
||||
|
||||
+4
-2
@@ -88,7 +88,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
// reporting back the IO stats with respect to the dictionary
|
||||
// loaded, using the context
|
||||
if ctx != nil {
|
||||
reportIOStats(dictBytesRead, ctx)
|
||||
reportIOStats(ctx, dictBytesRead)
|
||||
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
|
||||
}
|
||||
|
||||
// cannot return MatchNoneSearcher because of interaction with
|
||||
@@ -110,7 +111,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
}
|
||||
|
||||
if ctx != nil {
|
||||
reportIOStats(dictBytesRead, ctx)
|
||||
reportIOStats(ctx, dictBytesRead)
|
||||
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
|
||||
}
|
||||
|
||||
return NewMultiTermSearcherBytes(ctx, indexReader, terms, field,
|
||||
|
||||
+2
-1
@@ -102,7 +102,8 @@ func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, patte
|
||||
}
|
||||
|
||||
if ctx != nil {
|
||||
reportIOStats(dictBytesRead, ctx)
|
||||
reportIOStats(ctx, dictBytesRead)
|
||||
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
|
||||
|
||||
+14
@@ -39,6 +39,9 @@ type TermSearcher struct {
|
||||
}
|
||||
|
||||
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
|
||||
if isTermQuery(ctx) {
|
||||
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
|
||||
}
|
||||
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
|
||||
}
|
||||
|
||||
@@ -140,3 +143,14 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func isTermQuery(ctx context.Context) bool {
|
||||
if ctx != nil {
|
||||
// if the ctx already has a value set for query type
|
||||
// it would've been done at a non term searcher level.
|
||||
_, ok := ctx.Value(search.QueryTypeKey).(string)
|
||||
return !ok
|
||||
}
|
||||
// if the context is nil, then don't set the query type
|
||||
return false
|
||||
}
|
||||
|
||||
+2
-1
@@ -49,7 +49,8 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
|
||||
}
|
||||
|
||||
if ctx != nil {
|
||||
reportIOStats(fieldDict.BytesRead(), ctx)
|
||||
reportIOStats(ctx, fieldDict.BytesRead())
|
||||
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
|
||||
|
||||
+2
-1
@@ -84,7 +84,8 @@ func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader,
|
||||
}
|
||||
|
||||
if ctx != nil {
|
||||
reportIOStats(fieldDict.BytesRead(), ctx)
|
||||
reportIOStats(ctx, fieldDict.BytesRead())
|
||||
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
|
||||
}
|
||||
|
||||
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
|
||||
|
||||
+51
@@ -14,6 +14,8 @@
|
||||
|
||||
package search
|
||||
|
||||
import "context"
|
||||
|
||||
func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
|
||||
rv := locations[0]
|
||||
|
||||
@@ -67,3 +69,52 @@ func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch)
|
||||
|
||||
return dest
|
||||
}
|
||||
|
||||
const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
|
||||
|
||||
type SearchIOStatsCallbackFunc func(uint64)
|
||||
|
||||
// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
|
||||
// - add: increment the cost of a search operation
|
||||
// (which can be specific to a query type as well)
|
||||
// - abort: query was aborted due to a cancel of search's context (for eg),
|
||||
// which can be handled differently as well
|
||||
// - done: indicates that a search was complete and the tracked cost can be
|
||||
// handled safely by the implementation.
|
||||
type SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
|
||||
SearchQueryType, uint64)
|
||||
type SearchIncrementalCostCallbackMsg uint
|
||||
type SearchQueryType uint
|
||||
|
||||
const (
|
||||
Term = SearchQueryType(1 << iota)
|
||||
Geo
|
||||
Numeric
|
||||
GenericCost
|
||||
)
|
||||
|
||||
const (
|
||||
AddM = SearchIncrementalCostCallbackMsg(1 << iota)
|
||||
AbortM
|
||||
DoneM
|
||||
)
|
||||
|
||||
const SearchIncrementalCostKey = "_search_incremental_cost_key"
|
||||
const QueryTypeKey = "_query_type_key"
|
||||
|
||||
func RecordSearchCost(ctx context.Context,
|
||||
msg SearchIncrementalCostCallbackMsg, bytes uint64) {
|
||||
if ctx != nil {
|
||||
queryType, ok := ctx.Value(QueryTypeKey).(SearchQueryType)
|
||||
if !ok {
|
||||
// for the cost of the non query type specific factors such as
|
||||
// doc values and stored fields section.
|
||||
queryType = GenericCost
|
||||
}
|
||||
|
||||
aggCallbackFn := ctx.Value(SearchIncrementalCostKey)
|
||||
if aggCallbackFn != nil {
|
||||
aggCallbackFn.(SearchIncrementalCostCallbackFn)(msg, queryType, bytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
-5
@@ -361,9 +361,6 @@ type builderNode struct {
|
||||
func (n *builderNode) reset() {
|
||||
n.final = false
|
||||
n.finalOutput = 0
|
||||
for i := range n.trans {
|
||||
n.trans[i] = emptyTransition
|
||||
}
|
||||
n.trans = n.trans[:0]
|
||||
n.next = nil
|
||||
}
|
||||
@@ -393,8 +390,6 @@ func (n *builderNode) equiv(o *builderNode) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
var emptyTransition = transition{}
|
||||
|
||||
type transition struct {
|
||||
out uint64
|
||||
addr int
|
||||
|
||||
+4
@@ -719,6 +719,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
|
||||
}
|
||||
err := segment.visitStoredFields(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
|
||||
fieldID := int(fieldsMap[field]) - 1
|
||||
if fieldID < 0 {
|
||||
// no entry for field in fieldsMap
|
||||
return false
|
||||
}
|
||||
vals[fieldID] = append(vals[fieldID], value)
|
||||
typs[fieldID] = append(typs[fieldID], typ)
|
||||
|
||||
|
||||
+6
-3
@@ -109,7 +109,6 @@ type PostingsList struct {
|
||||
|
||||
chunkSize uint64
|
||||
|
||||
// atomic access to this variable
|
||||
bytesRead uint64
|
||||
}
|
||||
|
||||
@@ -303,12 +302,17 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
|
||||
return fmt.Errorf("error loading roaring bitmap: %v", err)
|
||||
}
|
||||
|
||||
rv.chunkSize, err = getChunkSize(d.sb.chunkMode,
|
||||
chunkSize, err := getChunkSize(d.sb.chunkMode,
|
||||
rv.postings.GetCardinality(), d.sb.numDocs)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if chunkSize == 0 {
|
||||
return fmt.Errorf("chunk size is zero, chunkMode: %v, numDocs: %v",
|
||||
d.sb.chunkMode, d.sb.numDocs)
|
||||
}
|
||||
|
||||
rv.chunkSize = chunkSize
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -344,7 +348,6 @@ type PostingsIterator struct {
|
||||
includeFreqNorm bool
|
||||
includeLocs bool
|
||||
|
||||
// atomic access to this variable
|
||||
bytesRead uint64
|
||||
}
|
||||
|
||||
|
||||
+5
-1
@@ -103,7 +103,7 @@ type SegmentBase struct {
|
||||
fieldDvNames []string // field names cached in fieldDvReaders
|
||||
size uint64
|
||||
|
||||
// atomic access to this variable
|
||||
// atomic access to these variables
|
||||
bytesRead uint64
|
||||
bytesWritten uint64
|
||||
|
||||
@@ -319,6 +319,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
|
||||
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
|
||||
// read the length of the vellum data
|
||||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
|
||||
if vellumLen == 0 {
|
||||
sb.m.Unlock()
|
||||
return nil, fmt.Errorf("empty dictionary for field: %v", field)
|
||||
}
|
||||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
|
||||
rv.incrementBytesRead(uint64(read) + vellumLen)
|
||||
rv.fst, err = vellum.Load(fstBytes)
|
||||
|
||||
Reference in New Issue
Block a user