Bump github.com/blevesearch/bleve/v2 from 2.3.7 to 2.3.9

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.7 to 2.3.9. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.3.7...v2.3.9) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>
2026-04-27 14:32:55 -05:00 · 2023-08-15 06:44:05 +00:00
parent 82b600aef5
commit f9b69afa9e
58 changed files with 1138 additions and 330 deletions
@@ -9,7 +9,7 @@
 [![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)

-modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
+A modern text indexing library in go

 ## Features

@@ -24,8 +24,8 @@ modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
    * [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
    * Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
 * [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring
-* Boosting
-* Search result match highlighting
+* Query time boosting
+* Search result match highlighting with document fragments
 * Aggregations/faceting support:
    * Terms Facet
    * Numeric Range Facet
@@ -97,6 +97,12 @@ Flags:
 Use "bleve [command] --help" for more information about a command.
 ```

+## Text Analysis
+
+Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
+
+Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
+
 ## Text Analysis Wizard

 [bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com)
@@ -0,0 +1,174 @@
+/*
+	This code was ported from the Open Search Project
+	https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
+	The algorithm itself was created by Mark Harwood
+	https://github.com/markharwood
+*/
+
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package en
+
+import (
+	"strings"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const PluralStemmerName = "stemmer_en_plural"
+
+type EnglishPluralStemmerFilter struct {
+}
+
+func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
+	return &EnglishPluralStemmerFilter{}
+}
+
+func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		token.Term = []byte(stem(string(token.Term)))
+	}
+
+	return input
+}
+
+func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewEnglishPluralStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
+}
+
+// ----------------------------------------------------------------------------
+
+// Words ending in oes that retain the e when stemmed
+var oesExceptions = []string{"shoes", "canoes", "oboes"}
+
+// Words ending in ches that retain the e when stemmed
+var chesExceptions = []string{
+	"cliches",
+	"avalanches",
+	"mustaches",
+	"moustaches",
+	"quiches",
+	"headaches",
+	"heartaches",
+	"porsches",
+	"tranches",
+	"caches",
+}
+
+func stem(word string) string {
+	runes := []rune(strings.ToLower(word))
+
+	if len(runes) < 3 || runes[len(runes)-1] != 's' {
+		return string(runes)
+	}
+
+	switch runes[len(runes)-2] {
+	case 'u':
+		fallthrough
+	case 's':
+		return string(runes)
+	case 'e':
+		// Modified ies->y logic from original s-stemmer - only work on strings > 4
+		// so spies -> spy still but pies->pie.
+		// The original code also special-cased aies and eies for no good reason as far as I can tell.
+		// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
+		if len(runes) > 4 && runes[len(runes)-3] == 'i' {
+			runes[len(runes)-3] = 'y'
+			return string(runes[0 : len(runes)-2])
+		}
+
+		// Suffix rules to remove any dangling "e"
+		if len(runes) > 3 {
+			// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
+			if len(runes) > 4 && runes[len(runes)-3] == 'x' {
+				return string(runes[0 : len(runes)-2])
+			}
+
+			// oes
+			if len(runes) > 3 && runes[len(runes)-3] == 'o' {
+				if isException(runes, oesExceptions) {
+					// Only remove the S
+					return string(runes[0 : len(runes)-1])
+				}
+				// Remove the es
+				return string(runes[0 : len(runes)-2])
+			}
+
+			if len(runes) > 4 {
+				// shes/sses
+				if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
+					return string(runes[0 : len(runes)-2])
+				}
+
+				// ches
+				if len(runes) > 4 {
+					if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
+						if isException(runes, chesExceptions) {
+							// Only remove the S
+							return string(runes[0 : len(runes)-1])
+						}
+						// Remove the es
+						return string(runes[0 : len(runes)-2])
+					}
+				}
+			}
+		}
+		fallthrough
+	default:
+		return string(runes[0 : len(runes)-1])
+	}
+}
+
+func isException(word []rune, exceptions []string) bool {
+	for _, exception := range exceptions {
+
+		exceptionRunes := []rune(exception)
+
+		exceptionPos := len(exceptionRunes) - 1
+		wordPos := len(word) - 1
+
+		matched := true
+		for exceptionPos >= 0 && wordPos >= 0 {
+			if exceptionRunes[exceptionPos] != word[wordPos] {
+				matched = false
+				break
+			}
+			exceptionPos--
+			wordPos--
+		}
+		if matched {
+			return true
+		}
+	}
+	return false
+}
@@ -588,6 +588,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
 	m := s.stats.ToMap()

 	indexSnapshot := s.currentSnapshot()
+	if indexSnapshot == nil {
+		return nil
+	}
+
 	defer func() {
 		_ = indexSnapshot.Close()
 	}()
@@ -102,10 +102,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
 			// this is because there are chances of having a series of loadChunk calls,
 			// and they have to be added together before sending the bytesRead at this point
 			// upstream.
-			if delta := i.iterators[i.segmentOffset].BytesRead() - prevBytesRead; delta > 0 {
-				i.incrementBytesRead(delta)
+			bytesRead := i.iterators[i.segmentOffset].BytesRead()
+			if bytesRead > prevBytesRead {
+				i.incrementBytesRead(bytesRead - prevBytesRead)
 			}
-
 			return rv, nil
 		}
 		i.segmentOffset++
@@ -204,6 +204,8 @@ func (i *IndexSnapshotTermFieldReader) Close() error {
 			// reader's bytesRead value
 			statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead)
 		}
+
+		search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead)
 	}

 	if i.snapshot != nil {
@@ -124,16 +124,16 @@ func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields [
 	}

 	keyBuf := GetRowBuffer()
-	if tempRow.KeySize() > len(keyBuf) {
-		keyBuf = make([]byte, 2*tempRow.KeySize())
+	if tempRow.KeySize() > len(keyBuf.buf) {
+		keyBuf.buf = make([]byte, 2*tempRow.KeySize())
 	}
 	defer PutRowBuffer(keyBuf)
-	keySize, err := tempRow.KeyTo(keyBuf)
+	keySize, err := tempRow.KeyTo(keyBuf.buf)
 	if err != nil {
 		return err
 	}

-	value, err := i.kvreader.Get(keyBuf[:keySize])
+	value, err := i.kvreader.Get(keyBuf.buf[:keySize])
 	if err != nil {
 		return err
 	}
@@ -134,18 +134,23 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) {
 	return
 }

-var rowBufferPool sync.Pool
-
-func GetRowBuffer() []byte {
-	if rb, ok := rowBufferPool.Get().([]byte); ok {
-		return rb
-	} else {
-		return make([]byte, RowBufferSize)
-	}
+type rowBuffer struct {
+    buf []byte
 }

-func PutRowBuffer(buf []byte) {
-	rowBufferPool.Put(buf)
+var rowBufferPool sync.Pool
+
+func GetRowBuffer() *rowBuffer {
+    if rb, ok := rowBufferPool.Get().(*rowBuffer); ok {
+        return rb
+    } else {
+        buf := make([]byte, RowBufferSize)
+        return &rowBuffer{buf: buf}
+    }
+}
+
+func PutRowBuffer(rb *rowBuffer) {
+    rowBufferPool.Put(rb)
 }

 func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
@@ -169,14 +174,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
 		for _, row := range addRows {
 			tfr, ok := row.(*TermFrequencyRow)
 			if ok {
-				if tfr.DictionaryRowKeySize() > len(rowBuf) {
-					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
+				if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
+					rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
 				}
-				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
+				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
 				if err != nil {
 					return err
 				}
-				dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
+				dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] += 1
 			}
 			addKeyBytes += row.KeySize()
 			addValBytes += row.ValueSize()
@@ -197,14 +202,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
 			tfr, ok := row.(*TermFrequencyRow)
 			if ok {
 				// need to decrement counter
-				if tfr.DictionaryRowKeySize() > len(rowBuf) {
-					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
+				if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
+					rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
 				}
-				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
+				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
 				if err != nil {
 					return err
 				}
-				dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
+				dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] -= 1
 			}
 			deleteKeyBytes += row.KeySize()
 		}
@@ -541,26 +546,26 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []In
 		switch row := row.(type) {
 		case *TermFrequencyRow:
 			if existingTermKeys != nil {
-				if row.KeySize() > len(keyBuf) {
-					keyBuf = make([]byte, row.KeySize())
+				if row.KeySize() > len(keyBuf.buf) {
+					keyBuf.buf = make([]byte, row.KeySize())
 				}
-				keySize, _ := row.KeyTo(keyBuf)
-				if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
+				keySize, _ := row.KeyTo(keyBuf.buf)
+				if _, ok := existingTermKeys[string(keyBuf.buf[:keySize])]; ok {
 					updateRows = append(updateRows, row)
-					delete(existingTermKeys, string(keyBuf[:keySize]))
+					delete(existingTermKeys, string(keyBuf.buf[:keySize]))
 					continue
 				}
 			}
 			addRows = append(addRows, row)
 		case *StoredRow:
 			if existingStoredKeys != nil {
-				if row.KeySize() > len(keyBuf) {
-					keyBuf = make([]byte, row.KeySize())
+				if row.KeySize() > len(keyBuf.buf) {
+					keyBuf.buf = make([]byte, row.KeySize())
 				}
-				keySize, _ := row.KeyTo(keyBuf)
-				if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
+				keySize, _ := row.KeyTo(keyBuf.buf)
+				if _, ok := existingStoredKeys[string(keyBuf.buf[:keySize])]; ok {
 					updateRows = append(updateRows, row)
-					delete(existingStoredKeys, string(keyBuf[:keySize]))
+					delete(existingStoredKeys, string(keyBuf.buf[:keySize]))
 					continue
 				}
 			}
@@ -1047,23 +1052,23 @@ func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*
 	}

 	keyBuf := GetRowBuffer()
-	if tempRow.KeySize() > len(keyBuf) {
-		keyBuf = make([]byte, 2*tempRow.KeySize())
+	if tempRow.KeySize() > len(keyBuf.buf) {
+		keyBuf.buf = make([]byte, 2*tempRow.KeySize())
 	}
 	defer PutRowBuffer(keyBuf)
-	keySize, err := tempRow.KeyTo(keyBuf)
+	keySize, err := tempRow.KeyTo(keyBuf.buf)
 	if err != nil {
 		return nil, err
 	}

-	value, err := kvreader.Get(keyBuf[:keySize])
+	value, err := kvreader.Get(keyBuf.buf[:keySize])
 	if err != nil {
 		return nil, err
 	}
 	if value == nil {
 		return nil, nil
 	}
-	backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value)
+	backIndexRow, err := NewBackIndexRowKV(keyBuf.buf[:keySize], value)
 	if err != nil {
 		return nil, err
 	}
@@ -474,9 +474,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	//     accounted by invoking this callback when the TFR is closed.
 	//  2. the docvalues portion (accounted in collector) and the retrieval
 	//     of stored fields bytes (by LoadAndHighlightFields)
-	var totalBytesRead uint64
+	var totalSearchCost uint64
 	sendBytesRead := func(bytesRead uint64) {
-		totalBytesRead += bytesRead
+		totalSearchCost += bytesRead
 	}

 	ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey,
@@ -495,11 +495,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 			err = serr
 		}
 		if sr != nil {
-			sr.BytesRead = totalBytesRead
+			sr.Cost = totalSearchCost
 		}
 		if sr, ok := indexReader.(*scorch.IndexSnapshot); ok {
-			sr.UpdateIOStats(totalBytesRead)
+			sr.UpdateIOStats(totalSearchCost)
 		}
+
+		search.RecordSearchCost(ctx, search.DoneM, 0)
 	}()

 	if req.Facets != nil {
@@ -574,6 +576,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		}
 	}

+	var storedFieldsCost uint64
 	for _, hit := range hits {
 		if i.name != "" {
 			hit.Index = i.name
@@ -582,9 +585,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 		if err != nil {
 			return nil, err
 		}
-		totalBytesRead += storedFieldsBytes
+		storedFieldsCost += storedFieldsBytes
 	}

+	totalSearchCost += storedFieldsCost
+	search.RecordSearchCost(ctx, search.AddM, storedFieldsCost)
+
 	atomic.AddUint64(&i.stats.searches, 1)
 	searchDuration := time.Since(searchStart)
 	atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
@@ -140,11 +140,11 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
 	return nil
 }

-// documentMappingForPath only returns EXACT matches for a sub document
-// or for an explicitly mapped field, if you want to find the
-// closest document mapping to a field not explicitly mapped
-// use closestDocMapping
-func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
+// documentMappingForPath returns the EXACT and closest matches for a sub
+// document or for an explicitly mapped field; the closest most specific
+// document mapping could be one that matches part of the provided path.
+func (dm *DocumentMapping) documentMappingForPath(path string) (
+	*DocumentMapping, *DocumentMapping) {
 	pathElements := decodePath(path)
 	current := dm
 OUTER:
@@ -165,27 +165,9 @@ OUTER:
 			}
 		}

-		return nil
+		return nil, current
 	}
-	return current
-}
-
-// closestDocMapping findest the most specific document mapping that matches
-// part of the provided path
-func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
-	pathElements := decodePath(path)
-	current := dm
-OUTER:
-	for _, pathElement := range pathElements {
-		for name, subDocMapping := range current.Properties {
-			if name == pathElement {
-				current = subDocMapping
-				continue OUTER
-			}
-		}
-		break
-	}
-	return current
+	return current, current
 }

 // NewDocumentMapping returns a new document mapping
@@ -408,8 +390,7 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
 func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
 	pathString := encodePath(path)
 	// look to see if there is a mapping for this field
-	subDocMapping := dm.documentMappingForPath(pathString)
-	closestDocMapping := dm.closestDocMapping(pathString)
+	subDocMapping, closestDocMapping := dm.documentMappingForPath(pathString)

 	// check to see if we even need to do further processing
 	if subDocMapping != nil && !subDocMapping.Enabled {
@@ -326,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
 		docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)

 		// see if the _all field was disabled
-		allMapping := docMapping.documentMappingForPath("_all")
+		allMapping, _ := docMapping.documentMappingForPath("_all")
 		if allMapping == nil || allMapping.Enabled {
 			field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors)
 			doc.AddField(field)
@@ -364,8 +364,9 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
 			return analyzerName
 		}
 	}
+
 	// now try the default mapping
-	pathMapping := im.DefaultMapping.documentMappingForPath(path)
+	pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
 	if pathMapping != nil {
 		if len(pathMapping.Fields) > 0 {
 			if pathMapping.Fields[0].Analyzer != "" {
@@ -377,7 +378,16 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
 	// next we will try default analyzers for the path
 	pathDecoded := decodePath(path)
 	for _, docMapping := range im.TypeMapping {
-		rv := docMapping.defaultAnalyzerName(pathDecoded)
+		if docMapping.Enabled {
+			rv := docMapping.defaultAnalyzerName(pathDecoded)
+			if rv != "" {
+				return rv
+			}
+		}
+	}
+	// now the default analyzer for the default mapping
+	if im.DefaultMapping.Enabled {
+		rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded)
 		if rv != "" {
 			return rv
 		}
@@ -411,7 +421,7 @@ func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {

 	// first we look for explicit mapping on the field
 	for _, docMapping := range im.TypeMapping {
-		pathMapping := docMapping.documentMappingForPath(path)
+		pathMapping, _ := docMapping.documentMappingForPath(path)
 		if pathMapping != nil {
 			if len(pathMapping.Fields) > 0 {
 				if pathMapping.Fields[0].Analyzer != "" {
@@ -225,3 +225,28 @@ func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQu
 func NewIPRangeQuery(cidr string) *query.IPRangeQuery {
 	return query.NewIPRangeQuery(cidr)
 }
+
+// NewGeoShapeQuery creates a new Query for matching the given geo shape.
+// This method can be used for creating geoshape queries for shape types
+// like: point, linestring, polygon, multipoint, multilinestring,
+// multipolygon and envelope.
+func NewGeoShapeQuery(coordinates [][][][]float64, typ, relation string) (*query.GeoShapeQuery, error) {
+	return query.NewGeoShapeQuery(coordinates, typ, relation)
+}
+
+// NewGeoShapeCircleQuery creates a new query for a geoshape that is a
+// circle given center point and the radius. Radius formats supported:
+// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
+// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
+// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined,
+// the entire string is parsed and the unit of meters is assumed.
+func NewGeoShapeCircleQuery(coordinates []float64, radius, relation string) (*query.GeoShapeQuery, error) {
+	return query.NewGeoShapeCircleQuery(coordinates, radius, relation)
+}
+
+// NewGeometryCollectionQuery creates a new query for the provided
+// geometrycollection coordinates and types, which could contain
+// multiple geo shapes.
+func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string, relation string) (*query.GeoShapeQuery, error) {
+	return query.NewGeometryCollectionQuery(coordinates, types, relation)
+}
@@ -485,15 +485,27 @@ func (ss *SearchStatus) Merge(other *SearchStatus) {

 // A SearchResult describes the results of executing
 // a SearchRequest.
+//
+// Status - Whether the search was executed on the underlying indexes successfully
+// or failed, and the corresponding errors.
+// Request - The SearchRequest that was executed.
+// Hits - The list of documents that matched the query and their corresponding
+// scores, score explanation, location info and so on.
+// Total - The total number of documents that matched the query.
+// Cost - indicates how expensive was the query with respect to bytes read
+// from the mmaped index files.
+// MaxScore - The maximum score seen across all document hits seen for this query.
+// Took - The time taken to execute the search.
+// Facets - The facet results for the search.
 type SearchResult struct {
-	Status    *SearchStatus                  `json:"status"`
-	Request   *SearchRequest                 `json:"request"`
-	Hits      search.DocumentMatchCollection `json:"hits"`
-	Total     uint64                         `json:"total_hits"`
-	BytesRead uint64                         `json:"bytesRead"`
-	MaxScore  float64                        `json:"max_score"`
-	Took      time.Duration                  `json:"took"`
-	Facets    search.FacetResults            `json:"facets"`
+	Status   *SearchStatus                  `json:"status"`
+	Request  *SearchRequest                 `json:"request"`
+	Hits     search.DocumentMatchCollection `json:"hits"`
+	Total    uint64                         `json:"total_hits"`
+	Cost     uint64                         `json:"cost"`
+	MaxScore float64                        `json:"max_score"`
+	Took     time.Duration                  `json:"took"`
+	Facets   search.FacetResults            `json:"facets"`
 }

 func (sr *SearchResult) Size() int {
@@ -566,7 +578,7 @@ func (sr *SearchResult) Merge(other *SearchResult) {
 	sr.Status.Merge(other.Status)
 	sr.Hits = append(sr.Hits, other.Hits...)
 	sr.Total += other.Total
-	sr.BytesRead += other.BytesRead
+	sr.Cost += other.Cost
 	if other.MaxScore > sr.MaxScore {
 		sr.MaxScore = other.MaxScore
 	}
@@ -200,6 +200,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 	hc.needDocIds = hc.needDocIds || loadID
 	select {
 	case <-ctx.Done():
+		search.RecordSearchCost(ctx, search.AbortM, 0)
 		return ctx.Err()
 	default:
 		next, err = searcher.Next(searchContext)
@@ -208,6 +209,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		if hc.total%CheckDoneEvery == 0 {
 			select {
 			case <-ctx.Done():
+				search.RecordSearchCost(ctx, search.AbortM, 0)
 				return ctx.Err()
 			default:
 			}
@@ -232,6 +234,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
 		// total bytes read as part of docValues being read every hit
 		// which must be accounted by invoking the callback.
 		statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead)
+
+		search.RecordSearchCost(ctx, search.AddM, hc.bytesRead)
 	}

 	// help finalize/flush the results in case
@@ -367,7 +371,20 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
 // SetFacetsBuilder registers a facet builder for this collector
 func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
 	hc.facetsBuilder = facetsBuilder
-	hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
+	fieldsRequiredForFaceting := facetsBuilder.RequiredFields()
+	// for each of these fields, append only if not already there in hc.neededFields.
+	for _, field := range fieldsRequiredForFaceting {
+		found := false
+		for _, neededField := range hc.neededFields {
+			if field == neededField {
+				found = true
+				break
+			}
+		}
+		if !found {
+			hc.neededFields = append(hc.neededFields, field)
+		}
+	}
 }

 // finalizeResults starts with the heap containing the final top size+skip
@@ -63,6 +63,8 @@ func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader,
 		field = m.DefaultSearchField()
 	}

+	ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
+
 	if q.BottomRight[0] < q.TopLeft[0] {
 		// cross date line, rewrite as two parts

@@ -61,6 +61,8 @@ func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexRea
 		field = m.DefaultSearchField()
 	}

+	ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
+
 	return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options)
 }

@@ -64,6 +64,8 @@ func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m
 		field = m.DefaultSearchField()
 	}

+	ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
+
 	dist, err := geo.ParseDistance(q.Distance)
 	if err != nil {
 		return nil, err
@@ -107,6 +107,8 @@ func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader,
 		field = m.DefaultSearchField()
 	}

+	ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
+
 	return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field,
 		q.BoostVal.Value(), options)
 }
@@ -77,6 +77,7 @@ func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m
 	if q.FieldVal == "" {
 		field = m.DefaultSearchField()
 	}
+	ctx = context.WithValue(ctx, search.QueryTypeKey, search.Numeric)
 	return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
 }

@@ -27,10 +27,6 @@ var reflectStaticSizeDocumentMatch int
 var reflectStaticSizeSearchContext int
 var reflectStaticSizeLocation int

-const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
-
-type SearchIOStatsCallbackFunc func(uint64)
-
 func init() {
 	var dm DocumentMatch
 	reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
@@ -59,7 +59,8 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
 	}

 	if ctx != nil {
-		reportIOStats(dictBytesRead, ctx)
+		reportIOStats(ctx, dictBytesRead)
+		search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
 	}

 	return NewMultiTermSearcher(ctx, indexReader, candidates, field,
@@ -71,13 +72,15 @@ type fuzzyCandidates struct {
 	bytesRead  uint64
 }

-func reportIOStats(bytesRead uint64, ctx context.Context) {
+func reportIOStats(ctx context.Context, bytesRead uint64) {
 	// The fuzzy, regexp like queries essentially load a dictionary,
 	// which potentially incurs a cost that must be accounted by
 	// using the callback to report the value.
-	statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
-	if statsCallbackFn != nil {
-		statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
+	if ctx != nil {
+		statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
+		if statsCallbackFn != nil {
+			statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
+		}
 	}
 }

@@ -49,7 +49,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
 				return nil, err
 			}

-			return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(dvReader,
+			return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader,
 				field, minLon, minLat, maxLon, maxLat)), nil
 		}
 	}
@@ -85,7 +85,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
 		}
 		// add filter to check points near the boundary
 		onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher,
-			buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
+			buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat))
 		openedSearchers = append(openedSearchers, onBoundarySearcher)
 	}

@@ -201,7 +201,7 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel
 	return isIndexed, closeF, err
 }

-func buildRectFilter(dvReader index.DocValueReader, field string,
+func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string,
 	minLon, minLat, maxLon, maxLat float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
 		// check geo matches against all numeric type terms indexed
@@ -222,6 +222,11 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
 			}
 		})
 		if err == nil && found {
+			bytes := dvReader.BytesRead()
+			if bytes > 0 {
+				reportIOStats(ctx, bytes)
+				search.RecordSearchCost(ctx, search.AddM, bytes)
+			}
 			for i := range lons {
 				if geo.BoundingBoxContains(lons[i], lats[i],
 					minLon, minLat, maxLon, maxLat) {
@@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea

 	// wrap it in a filtering searcher which checks the actual distance
 	return NewFilteringSearcher(ctx, rectSearcher,
-		buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
+		buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil
 }

 // boxSearcher builds a searcher for the described bounding box
@@ -113,7 +113,7 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader,
 	return boxSearcher, nil
 }

-func buildDistFilter(dvReader index.DocValueReader, field string,
+func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string,
 	centerLon, centerLat, maxDist float64) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
 		// check geo matches against all numeric type terms indexed
@@ -134,6 +134,11 @@ func buildDistFilter(dvReader index.DocValueReader, field string,
 			}
 		})
 		if err == nil && found {
+			bytes := dvReader.BytesRead()
+			if bytes > 0 {
+				reportIOStats(ctx, bytes)
+				search.RecordSearchCost(ctx, search.AddM, bytes)
+			}
 			for i := range lons {
 				dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
 				if dist <= maxDist/1000 {
@@ -71,7 +71,7 @@ func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexRe

 	// wrap it in a filtering searcher that checks for the polygon inclusivity
 	return NewFilteringSearcher(ctx, rectSearcher,
-		buildPolygonFilter(dvReader, field, coordinates)), nil
+		buildPolygonFilter(ctx, dvReader, field, coordinates)), nil
 }

 const float64EqualityThreshold = 1e-6
@@ -83,7 +83,7 @@ func almostEqual(a, b float64) bool {
 // buildPolygonFilter returns true if the point lies inside the
 // polygon. It is based on the ray-casting technique as referred
 // here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
-func buildPolygonFilter(dvReader index.DocValueReader, field string,
+func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
 	coordinates []geo.Point) FilterFunc {
 	return func(d *search.DocumentMatch) bool {
 		// check geo matches against all numeric type terms indexed
@@ -107,6 +107,11 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
 		// Note: this approach works for points which are strictly inside
 		// the polygon. ie it might fail for certain points on the polygon boundaries.
 		if err == nil && found {
+			bytes := dvReader.BytesRead()
+			if bytes > 0 {
+				reportIOStats(ctx, bytes)
+				search.RecordSearchCost(ctx, search.AddM, bytes)
+			}
 			nVertices := len(coordinates)
 			if len(coordinates) < 3 {
 				return false
@@ -54,7 +54,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
 	}

 	return NewFilteringSearcher(ctx, mSearcher,
-		buildRelationFilterOnShapes(dvReader, field, relation, shape)), nil
+		buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil

 }

@@ -63,7 +63,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
 // implementation of doc values.
 var termSeparatorSplitSlice = []byte{0xff}

-func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
+func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string,
 	relation string, shape index.GeoJSON) FilterFunc {
 	// this is for accumulating the shape's actual complete value
 	// spread across multiple docvalue visitor callbacks.
@@ -116,6 +116,11 @@ func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
 			})

 		if err == nil && found {
+			bytes := dvReader.BytesRead()
+			if bytes > 0 {
+				reportIOStats(ctx, bytes)
+				search.RecordSearchCost(ctx, search.AddM, bytes)
+			}
 			return found
 		}

@@ -88,7 +88,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
 		// reporting back the IO stats with respect to the dictionary
 		// loaded, using the context
 		if ctx != nil {
-			reportIOStats(dictBytesRead, ctx)
+			reportIOStats(ctx, dictBytesRead)
+			search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
 		}

 		// cannot return MatchNoneSearcher because of interaction with
@@ -110,7 +111,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
 	}

 	if ctx != nil {
-		reportIOStats(dictBytesRead, ctx)
+		reportIOStats(ctx, dictBytesRead)
+		search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
 	}

 	return NewMultiTermSearcherBytes(ctx, indexReader, terms, field,
@@ -102,7 +102,8 @@ func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, patte
 	}

 	if ctx != nil {
-		reportIOStats(dictBytesRead, ctx)
+		reportIOStats(ctx, dictBytesRead)
+		search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
 	}

 	return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
@@ -39,6 +39,9 @@ type TermSearcher struct {
 }

 func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
+	if isTermQuery(ctx) {
+		ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
+	}
 	return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
 }

@@ -140,3 +143,14 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (

 	return nil, nil
 }
+
+func isTermQuery(ctx context.Context) bool {
+	if ctx != nil {
+		// if the ctx already has a value set for query type
+		// it would've been done at a non term searcher level.
+		_, ok := ctx.Value(search.QueryTypeKey).(string)
+		return !ok
+	}
+	// if the context is nil, then don't set the query type
+	return false
+}
@@ -49,7 +49,8 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
 	}

 	if ctx != nil {
-		reportIOStats(fieldDict.BytesRead(), ctx)
+		reportIOStats(ctx, fieldDict.BytesRead())
+		search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
 	}

 	return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
@@ -84,7 +84,8 @@ func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader,
 	}

 	if ctx != nil {
-		reportIOStats(fieldDict.BytesRead(), ctx)
+		reportIOStats(ctx, fieldDict.BytesRead())
+		search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
 	}

 	return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
@@ -14,6 +14,8 @@

 package search

+import "context"
+
 func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
 	rv := locations[0]

@@ -67,3 +69,52 @@ func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch)

 	return dest
 }
+
+const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
+
+type SearchIOStatsCallbackFunc func(uint64)
+
+// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
+//   - add: increment the cost of a search operation
+//     (which can be specific to a query type as well)
+//   - abort: query was aborted due to a cancel of search's context (for eg),
+//     which can be handled differently as well
+//   - done: indicates that a search was complete and the tracked cost can be
+//     handled safely by the implementation.
+type SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
+	SearchQueryType, uint64)
+type SearchIncrementalCostCallbackMsg uint
+type SearchQueryType uint
+
+const (
+	Term = SearchQueryType(1 << iota)
+	Geo
+	Numeric
+	GenericCost
+)
+
+const (
+	AddM = SearchIncrementalCostCallbackMsg(1 << iota)
+	AbortM
+	DoneM
+)
+
+const SearchIncrementalCostKey = "_search_incremental_cost_key"
+const QueryTypeKey = "_query_type_key"
+
+func RecordSearchCost(ctx context.Context,
+	msg SearchIncrementalCostCallbackMsg, bytes uint64) {
+	if ctx != nil {
+		queryType, ok := ctx.Value(QueryTypeKey).(SearchQueryType)
+		if !ok {
+			// for the cost of the non query type specific factors such as
+			// doc values and stored fields section.
+			queryType = GenericCost
+		}
+
+		aggCallbackFn := ctx.Value(SearchIncrementalCostKey)
+		if aggCallbackFn != nil {
+			aggCallbackFn.(SearchIncrementalCostCallbackFn)(msg, queryType, bytes)
+		}
+	}
+}
@@ -361,9 +361,6 @@ type builderNode struct {
 func (n *builderNode) reset() {
 	n.final = false
 	n.finalOutput = 0
-	for i := range n.trans {
-		n.trans[i] = emptyTransition
-	}
 	n.trans = n.trans[:0]
 	n.next = nil
 }
@@ -393,8 +390,6 @@ func (n *builderNode) equiv(o *builderNode) bool {
 	return true
 }

-var emptyTransition = transition{}
-
 type transition struct {
 	out  uint64
 	addr int
@@ -719,6 +719,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
 			}
 			err := segment.visitStoredFields(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
 				fieldID := int(fieldsMap[field]) - 1
+				if fieldID < 0 {
+					// no entry for field in fieldsMap
+					return false
+				}
 				vals[fieldID] = append(vals[fieldID], value)
 				typs[fieldID] = append(typs[fieldID], typ)

@@ -109,7 +109,6 @@ type PostingsList struct {

 	chunkSize uint64

-	// atomic access to this variable
 	bytesRead uint64
 }

@@ -303,12 +302,17 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 		return fmt.Errorf("error loading roaring bitmap: %v", err)
 	}

-	rv.chunkSize, err = getChunkSize(d.sb.chunkMode,
+	chunkSize, err := getChunkSize(d.sb.chunkMode,
 		rv.postings.GetCardinality(), d.sb.numDocs)
 	if err != nil {
 		return err
+	} else if chunkSize == 0 {
+		return fmt.Errorf("chunk size is zero, chunkMode: %v, numDocs: %v",
+			d.sb.chunkMode, d.sb.numDocs)
 	}

+	rv.chunkSize = chunkSize
+
 	return nil
 }

@@ -344,7 +348,6 @@ type PostingsIterator struct {
 	includeFreqNorm bool
 	includeLocs     bool

-	// atomic access to this variable
 	bytesRead uint64
 }

@@ -103,7 +103,7 @@ type SegmentBase struct {
 	fieldDvNames      []string                   // field names cached in fieldDvReaders
 	size              uint64

-	// atomic access to this variable
+	// atomic access to these variables
 	bytesRead    uint64
 	bytesWritten uint64

@@ -319,6 +319,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
 			if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
 				// read the length of the vellum data
 				vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
+				if vellumLen == 0 {
+					sb.m.Unlock()
+					return nil, fmt.Errorf("empty dictionary for field: %v", field)
+				}
 				fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
 				rv.incrementBytesRead(uint64(read) + vellumLen)
 				rv.fst, err = vellum.Load(fstBytes)