From 1e95c9d39a91ce06b20a30bd042fe5881ec5c464 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 15:12:28 +0000 Subject: [PATCH] build(deps): bump github.com/blevesearch/bleve/v2 from 2.5.1 to 2.5.2 Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.5.1 to 2.5.2. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.5.1...v2.5.2) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-version: 2.5.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 4 +- go.sum | 8 +- .../blevesearch/bleve/v2/CONTRIBUTING.md | 4 +- .../github.com/blevesearch/bleve/v2/README.md | 43 +-- .../blevesearch/bleve/v2/SECURITY.md | 5 +- .../blevesearch/bleve/v2/geo/README.md | 247 ++++++++++-------- .../bleve/v2/index/scorch/README.md | 66 ++--- .../v2/index/scorch/mergeplan/merge_plan.go | 6 +- .../bleve/v2/index/scorch/optimize.go | 2 + .../v2/index/scorch/snapshot_index_tfr.go | 30 ++- .../bleve/v2/index/scorch/unadorned.go | 21 +- .../bleve/v2/search/collector/topn.go | 17 -- .../blevesearch/bleve/v2/search/search.go | 6 + .../blevesearch/bleve/v2/search/sort.go | 42 ++- .../blevesearch/zapx/v16/docvalues.go | 30 +-- .../zapx/v16/faiss_vector_cache.go | 22 +- .../zapx/v16/faiss_vector_posting.go | 92 +++---- .../github.com/blevesearch/zapx/v16/merge.go | 12 +- .../github.com/blevesearch/zapx/v16/read.go | 18 +- .../zapx/v16/section_inverted_text_index.go | 16 +- .../blevesearch/zapx/v16/segment.go | 158 +++++------ vendor/modules.txt | 6 +- 22 files changed, 472 insertions(+), 383 deletions(-) diff --git a/go.mod b/go.mod index 2e51ad3828..d9b859c73a 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/Nerzal/gocloak/v13 v13.9.0 github.com/bbalet/stopwords v1.0.0 github.com/beevik/etree v1.5.1 - github.com/blevesearch/bleve/v2 v2.5.1 + github.com/blevesearch/bleve/v2 v2.5.2 github.com/cenkalti/backoff v2.2.1+incompatible github.com/coreos/go-oidc/v3 v3.14.1 github.com/cs3org/go-cs3apis v0.0.0-20241105092511-3ad35d174fc1 @@ -148,7 +148,7 @@ require ( github.com/blevesearch/zapx/v13 v13.4.2 // indirect github.com/blevesearch/zapx/v14 v14.4.2 // indirect github.com/blevesearch/zapx/v15 v15.4.2 // indirect - github.com/blevesearch/zapx/v16 v16.2.3 // indirect + github.com/blevesearch/zapx/v16 v16.2.4 // indirect github.com/bluele/gcache v0.0.2 // indirect github.com/bombsimon/logrusr/v3 v3.1.0 // indirect github.com/cenkalti/backoff/v5 v5.0.2 // indirect diff --git a/go.sum b/go.sum index f2e6e29982..ed4731da26 100644 --- a/go.sum +++ b/go.sum @@ -148,8 +148,8 @@ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6 github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4= github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= -github.com/blevesearch/bleve/v2 v2.5.1 h1:cc/O++W2Hcjp1SU5ETHeE+QYWv2oV88ldYEPowdmg8M= -github.com/blevesearch/bleve/v2 v2.5.1/go.mod h1:9g/wnbWKm9AgXrU8Ecqi+IDdqjUHWymwkQRDg+5tafU= +github.com/blevesearch/bleve/v2 v2.5.2 h1:Ab0r0MODV2C5A6BEL87GqLBySqp/s9xFgceCju6BQk8= +github.com/blevesearch/bleve/v2 v2.5.2/go.mod h1:5Dj6dUQxZM6aqYT3eutTD/GpWKGFSsV8f7LDidFbwXo= github.com/blevesearch/bleve_index_api v1.2.8 h1:Y98Pu5/MdlkRyLM0qDHostYo7i+Vv1cDNhqTeR4Sy6Y= github.com/blevesearch/bleve_index_api v1.2.8/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0= github.com/blevesearch/geo v0.2.3 h1:K9/vbGI9ehlXdxjxDRJtoAMt7zGAsMIzc6n8zWcwnhg= @@ -182,8 +182,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= -github.com/blevesearch/zapx/v16 v16.2.3 h1:7Y0r+a3diEvlazsncexq1qoFOcBd64xwMS7aDm4lo1s= -github.com/blevesearch/zapx/v16 v16.2.3/go.mod h1:wVJ+GtURAaRG9KQAMNYyklq0egV+XJlGcXNCE0OFjjA= +github.com/blevesearch/zapx/v16 v16.2.4 h1:tGgfvleXTAkwsD5mEzgM3zCS/7pgocTCnO1oyAUjlww= +github.com/blevesearch/zapx/v16 v16.2.4/go.mod h1:Rti/REtuuMmzwsI8/C/qIzRaEoSK/wiFYw5e5ctUKKs= github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw= github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= diff --git a/vendor/github.com/blevesearch/bleve/v2/CONTRIBUTING.md b/vendor/github.com/blevesearch/bleve/v2/CONTRIBUTING.md index 5ebf3d65bc..1cc8ecca07 100644 --- a/vendor/github.com/blevesearch/bleve/v2/CONTRIBUTING.md +++ b/vendor/github.com/blevesearch/bleve/v2/CONTRIBUTING.md @@ -2,11 +2,11 @@ We look forward to your contributions, but ask that you first review these guidelines. -### Sign the CLA +## Sign the CLA As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. -### Submitting a Pull Request +## Submitting a Pull Request All types of contributions are welcome, but please keep the following in mind: diff --git a/vendor/github.com/blevesearch/bleve/v2/README.md b/vendor/github.com/blevesearch/bleve/v2/README.md index ef1a6dddaa..e40715f9ff 100644 --- a/vendor/github.com/blevesearch/bleve/v2/README.md +++ b/vendor/github.com/blevesearch/bleve/v2/README.md @@ -16,41 +16,41 @@ A modern indexing + search library in GO * Index any GO data structure or JSON * Intelligent defaults backed up by powerful configuration ([scorch](https://github.com/blevesearch/bleve/blob/master/index/scorch/README.md)) * Supported field types: - * `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector` + * `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector` * Supported query types: - * `term`, `phrase`, `match`, `match_phrase`, `prefix`, `regexp`, `wildcard`, `fuzzy` - * term range, numeric range, date range, boolean field - * compound queries: `conjuncts`, `disjuncts`, boolean (`must`/`should`/`must_not`) - * [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) - * [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md) - * approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md) - * [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md) + * `term`, `phrase`, `match`, `match_phrase`, `prefix`, `regexp`, `wildcard`, `fuzzy` + * term range, numeric range, date range, boolean field + * compound queries: `conjuncts`, `disjuncts`, boolean (`must`/`should`/`must_not`) + * [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) + * [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md) + * approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md) + * [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md) * [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models * Hybrid search: exact + semantic * Query time boosting * Search result match highlighting with document fragments * Aggregations/faceting support: - * terms facet - * numeric range facet - * date range facet + * terms facet + * numeric range facet + * date range facet ## Indexing ```go -message := struct{ - Id string - From string - Body string +message := struct { + Id string + From string + Body string }{ - Id: "example", - From: "xyz@couchbase.com", - Body: "bleve indexing is easy", + Id: "example", + From: "xyz@couchbase.com", + Body: "bleve indexing is easy", } mapping := bleve.NewIndexMapping() index, err := bleve.New("example.bleve", mapping) if err != nil { - panic(err) + panic(err) } index.Index(message.Id, message) ``` @@ -69,10 +69,10 @@ searchResult, _ := index.Search(searchRequest) To install the CLI for the latest release of bleve, run: ```bash -$ go install github.com/blevesearch/bleve/v2/cmd/bleve@latest +go install github.com/blevesearch/bleve/v2/cmd/bleve@latest ``` -``` +```text $ bleve --help Bleve is a command-line tool to interact with a bleve index. @@ -113,6 +113,7 @@ Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdis ## Discussion/Issues Discuss usage/development of bleve and/or report issues here: + * [Github issues](https://github.com/blevesearch/bleve/issues) * [Google group](https://groups.google.com/forum/#!forum/bleve) diff --git a/vendor/github.com/blevesearch/bleve/v2/SECURITY.md b/vendor/github.com/blevesearch/bleve/v2/SECURITY.md index 51c6b6bdca..6f194a31aa 100644 --- a/vendor/github.com/blevesearch/bleve/v2/SECURITY.md +++ b/vendor/github.com/blevesearch/bleve/v2/SECURITY.md @@ -2,11 +2,12 @@ ## Supported Versions -We support the latest release (for example, bleve v2.3.x). +We support the latest release (for example, bleve v2.5.x). ## Reporting a Vulnerability -All security issues for this project should be reported by email to security@couchbase.com and fts-team@couchbase.com. +All security issues for this project should be reported via email to [security@couchbase.com](mailto:security@couchbase.com) and [fts-team@couchbase.com](mailto:fts-team@couchbase.com). + This mail will be delivered to the owners of this project. - To ensure your report is NOT marked as spam, please include the word "security/vulnerability" along with the project name (blevesearch/bleve) in the subject of the email. diff --git a/vendor/github.com/blevesearch/bleve/v2/geo/README.md b/vendor/github.com/blevesearch/bleve/v2/geo/README.md index 9cb73df86d..5a5bdc30d7 100644 --- a/vendor/github.com/blevesearch/bleve/v2/geo/README.md +++ b/vendor/github.com/blevesearch/bleve/v2/geo/README.md @@ -1,8 +1,7 @@ # Geo spatial search support in bleve Latest bleve spatial capabilities are powered by spatial hierarchical tokens generated from s2geometry. -You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the -extended functionality of our forked golang port of [s2geometry lib here](https://github.com/blevesearch/geo). +You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the extended functionality of our forked golang port of [s2geometry lib here](https://github.com/blevesearch/geo). Users can continue to index and query `geopoint` field type and the existing queries like, @@ -14,7 +13,7 @@ as before. ## New Spatial Field Type - geoshape -We have introduced a field type (`geoshape`) for representing the new spatial types. +We have introduced a field type (`geoshape`) for representing the new spatial types. Using the new `geoshape` field type, users can unblock the spatial capabilities for the [geojson](https://datatracker.ietf.org/doc/html/rfc7946) shapes like, @@ -37,7 +36,7 @@ To specify GeoJSON data, use a nested field with: - a field named type that specifies the GeoJSON object type and the type value will be case-insensitive. - a field named coordinates that specifies the object's coordinates. -``` +```text "fieldName": { "type": "GeoJSON Type", "coordinates": @@ -50,69 +49,67 @@ To specify GeoJSON data, use a nested field with: - Shapes would be internally represented as geodesics. - The GeoJSON specification strongly suggests splitting geometries so that neither of their parts crosses the antimeridian. - Examples for the various geojson shapes representations are as below. ## Point The following specifies a [Point](https://tools.ietf.org/html/rfc7946#section-3.1.2) field in a document: -``` - { - "type": "point", - "coordinates": [75.05687713623047,22.53539059204079] - } +```json +{ + "type": "point", + "coordinates": [75.05687713623047, 22.53539059204079] +} ``` ## Linestring The following specifies a [Linestring](https://tools.ietf.org/html/rfc7946#section-3.1.4) field in a document: - -``` -{ - "type": "linestring", - "coordinates": [ - [ 77.01416015625, 23.0797317624497], - [ 78.134765625, 20.385825381874263] - ] +```json +{ + "type": "linestring", + "coordinates": [ + [77.01416015625, 23.0797317624497], + [78.134765625, 20.385825381874263] + ] } ``` - ## Polygon The following specifies a [Polygon](https://tools.ietf.org/html/rfc7946#section-3.1.6) field in a document: -``` +```json { - "type": "polygon", - "coordinates": [ [ [ 85.605, 57.207], - [ 86.396, 55.998], - [ 87.033, 56.716], - [ 85.605, 57.207] - ] ] + "type": "polygon", + "coordinates": [ + [ + [85.605, 57.207], + [86.396, 55.998], + [87.033, 56.716], + [85.605, 57.207] + ] + ] } ``` - -The first and last coordinates must match in order to close the polygon. +The first and last coordinates must match in order to close the polygon. And the exterior coordinates have to be in Counter Clockwise Order in a polygon. (CCW) - ## MultiPoint The following specifies a [Multipoint](https://tools.ietf.org/html/rfc7946#section-3.1.3) field in a document: -``` +```json { - "type": "multipoint", - "coordinates": [ - [ -115.8343505859375, 38.45789034424927], - [ -115.81237792968749, 38.19502155795575], - [ -120.80017089843749, 36.54053616262899], - [ -120.67932128906249, 36.33725319397006] - ] + "type": "multipoint", + "coordinates": [ + [-115.8343505859375, 38.45789034424927], + [-115.81237792968749, 38.19502155795575], + [-120.80017089843749, 36.54053616262899], + [-120.67932128906249, 36.33725319397006] + ] } ``` @@ -120,14 +117,23 @@ The following specifies a [Multipoint](https://tools.ietf.org/html/rfc7946#secti The following specifies a [MultiLineString](https://tools.ietf.org/html/rfc7946#section-3.1.5) field in a document: -``` +```json { - "type": "multilinestring", - "coordinates": [ - [ [ -118.31726074, 35.250105158],[ -117.509765624, 35.3756141] ], - [ [ -118.6962890, 34.624167789],[ -118.317260742, 35.03899204] ], - [ [ -117.9492187, 35.146862906], [ -117.6745605, 34.41144164] ] -] + "type": "multilinestring", + "coordinates": [ + [ + [-118.31726074, 35.250105158], + [-117.509765624, 35.3756141] + ], + [ + [-118.696289, 34.624167789], + [-118.317260742, 35.03899204] + ], + [ + [-117.9492187, 35.146862906], + [-117.6745605, 34.41144164] + ] + ] } ``` @@ -135,112 +141,138 @@ The following specifies a [MultiLineString](https://tools.ietf.org/html/rfc7946# The following specifies a [MultiPolygon](https://tools.ietf.org/html/rfc7946#section-3.1.7) field in a document: -``` +```json { - "type": "multipolygon", - "coordinates": [ - [ [ [ -73.958, 40.8003 ], [ -73.9498, 40.7968 ], - [ -73.9737, 40.7648 ], [ -73.9814, 40.7681 ], - [ -73.958, 40.8003 ] ] ], - - - [ [ [ -73.958, 40.8003 ], [ -73.9498, 40.7968 ], - [ -73.9737, 40.7648 ], [ -73.958, 40.8003 ] ] ] - ] + "type": "multipolygon", + "coordinates": [ + [ + [ + [-73.958, 40.8003], + [-73.9498, 40.7968], + [-73.9737, 40.7648], + [-73.9814, 40.7681], + [-73.958, 40.8003] + ] + ], + [ + [ + [-73.958, 40.8003], + [-73.9498, 40.7968], + [-73.9737, 40.7648], + [-73.958, 40.8003] + ] + ] + ] } ``` - ## GeometryCollection The following specifies a [GeometryCollection](https://tools.ietf.org/html/rfc7946#section-3.1.8) field in a document: -``` +```json { - "type": "geometrycollection", - "geometries": [ + "type": "geometrycollection", + "geometries": [ { "type": "multipoint", "coordinates": [ - [ -73.9580, 40.8003 ], - [ -73.9498, 40.7968 ], - [ -73.9737, 40.7648 ], - [ -73.9814, 40.7681 ] + [-73.958, 40.8003], + [-73.9498, 40.7968], + [-73.9737, 40.7648], + [-73.9814, 40.7681] ] }, { "type": "multilinestring", "coordinates": [ - [ [ -73.96943, 40.78519 ], [ -73.96082, 40.78095 ] ], - [ [ -73.96415, 40.79229 ], [ -73.95544, 40.78854 ] ], - [ [ -73.97162, 40.78205 ], [ -73.96374, 40.77715 ] ], - [ [ -73.97880, 40.77247 ], [ -73.97036, 40.76811 ] ] + [ + [-73.96943, 40.78519], + [-73.96082, 40.78095] + ], + [ + [-73.96415, 40.79229], + [-73.95544, 40.78854] + ], + [ + [-73.97162, 40.78205], + [-73.96374, 40.77715] + ], + [ + [-73.9788, 40.77247], + [-73.97036, 40.76811] + ] ] }, { - "type" : "polygon", - "coordinates" : [ - [ [ 0 , 0 ] , [ 3 , 6 ] , [ 6 , 1 ] , [ 0 , 0 ] ], - [ [ 2 , 2 ] , [ 3 , 3 ] , [ 4 , 2 ] , [ 2 , 2 ] ] - ] - } -] + "type": "polygon", + "coordinates": [ + [ + [0, 0], + [3, 6], + [6, 1], + [0, 0] + ], + [ + [2, 2], + [3, 3], + [4, 2], + [2, 2] + ] + ] + } + ] } ``` - ## Circle -If the user wishes to cover a circular region over the earth’s surface, then they could use this shape. +If the user wishes to cover a circular region over the earth's surface, then they could use this shape. A sample circular shape is as below. -``` -{ - "type": "circle", - "coordinates": [75.05687713623047,22.53539059204079], - "radius": "1000m" +```json +{ + "type": "circle", + "coordinates": [75.05687713623047, 22.53539059204079], + "radius": "1000m" } ``` - Circle is specified over the center point coordinates along with the radius. -Example formats supported for radius are: -"5in" , "5inch" , "7yd" , "7yards", "9ft" , "9feet", "11km", "11kilometers", "3nm" -"3nauticalmiles", "13mm" , "13millimeters", "15cm", "15centimeters", "17mi", "17miles" "19m" or "19meters". +Example formats supported for radius are: +"5in" , "5inch" , "7yd" , "7yards", "9ft" , "9feet", "11km", "11kilometers", "3nm", "3nauticalmiles", "13mm" , "13millimeters", "15cm", "15centimeters", "17mi", "17miles", "19m" or "19meters". If the unit cannot be determined, the entire string is parsed and the unit of meters is assumed. - ## Envelope -Envelope type, which consists of coordinates for upper left and lower right points of the shape -to represent a bounding rectangle in the format [[minLon, maxLat], [maxLon, minLat]]. +Envelope type, which consists of coordinates for upper left and lower right points of the shape to represent a bounding rectangle in the format [[minLon, maxLat], [maxLon, minLat]]. -``` +```json { - "type": "envelope", - "coordinates": [ - [72.83, 18.979], - [78.508,17.4555] - ] + "type": "envelope", + "coordinates": [ + [72.83, 18.979], + [78.508, 17.4555] + ] } ``` - ## GeoShape Query -Geoshape query support three types/filters of spatial querying capability across those -heterogeneous types of documents indexed. +Geoshape query support three types/filters of spatial querying capability across those heterogeneous types of documents indexed. -### Query Structure: +### Query Structure -``` +```json { "query": { "geometry": { "shape": { - "type": "", - "coordinates": [[[ ]]] + "type": "", + "coordinates": [ + [[]] + ] }, "relation": "<>" } @@ -248,7 +280,6 @@ heterogeneous types of documents indexed. } ``` - *shapeType* => can be any of the aforementioned types like Point, LineString, Polygon, MultiPoint, Geometrycollection, MultiLineString, MultiPolygon, Circle and Envelope. @@ -256,16 +287,14 @@ Geometrycollection, MultiLineString, MultiPolygon, Circle and Envelope. ### Relation -| FilterName | Description | -| :-----------:| :-----------------------------------------------------------------: | -| `intersects` | Return all documents whose shape field intersects the query geometry. | -| `contains` | Return all documents whose shape field contains the query geometry | -| `within` | Return all documents whose shape field is within the query geometry. | +| FilterName | Description | +| :-----------:| :-----------------------------------------------------------------: | +| `intersects` | Return all documents whose shape field intersects the query geometry. | +| `contains` | Return all documents whose shape field contains the query geometry | +| `within` | Return all documents whose shape field is within the query geometry. | ------------------------------------------------------------------------------------------------------------------------ - - ### Older Implementation First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html). diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md b/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md index fe2abde551..e385676a91 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md @@ -3,13 +3,16 @@ ## Definitions Batch + - A collection of Documents to mutate in the index. Document + - Has a unique identifier (arbitrary bytes). - Is comprised of a list of fields. Field + - Has a name (string). - Has a type (text, number, date, geopoint). - Has a value (depending on type). @@ -41,7 +44,7 @@ NOTE: If a document already contains a field \_id, it will be replaced. If this ### Proposed Structures -``` +```go type Segment interface { Dictionary(field string) TermDictionary @@ -92,9 +95,11 @@ type IndexSnapshot struct { segment []SegmentSnapshot } ``` + **What about errors?** **What about memory mgmnt or context?** **Postings List separate iterator to separate stateful from stateless** + ### Mutating the Index The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches. @@ -105,9 +110,9 @@ From this point forward, only Batch mutations will be discussed. Sequence of Operations: -1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like: +1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like: - ``` + ```go foreach segment { dict := segment.Dictionary("\_id") postings := empty postings list @@ -119,21 +124,21 @@ Sequence of Operations: NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset. -2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool. +2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool. 3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information. -4. We now have everything we need to update the state of the system to include this new snapshot. - - - Acquire a lock - - Create a new IndexSnapshot - - For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot. - - Create a new SegmentSnapshot wrapping our new segment with nil deleted docs. - - Append the new SegmentSnapshot to the IndexSnapshot - - Release the lock +4. We now have everything we need to update the state of the system to include this new snapshot. + - Acquire a lock + - Create a new IndexSnapshot + - For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot. + - Create a new SegmentSnapshot wrapping our new segment with nil deleted docs. + - Append the new SegmentSnapshot to the IndexSnapshot + - Release the lock An ASCII art example: - ``` + +```text 0 - Empty Index No segments @@ -209,7 +214,7 @@ Term search is the only searching primitive exposed in today's bleve.index API. A term search for term T in field F will look something like this: -``` +```go searchResultPostings = empty foreach segment { dict := segment.Dictionary(F) @@ -222,31 +227,31 @@ The searchResultPostings will be a new implementation of the TermFieldReader int As a reminder this interface is: -``` +```go // TermFieldReader is the interface exposing the enumeration of documents // containing a given term in a given field. Documents are returned in byte // lexicographic order over their identifiers. type TermFieldReader interface { - // Next returns the next document containing the term in this field, or nil - // when it reaches the end of the enumeration. The preAlloced TermFieldDoc - // is optional, and when non-nil, will be used instead of allocating memory. - Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) + // Next returns the next document containing the term in this field, or nil + // when it reaches the end of the enumeration. The preAlloced TermFieldDoc + // is optional, and when non-nil, will be used instead of allocating memory. + Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) - // Advance resets the enumeration at specified document or its immediate - // follower. - Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) + // Advance resets the enumeration at specified document or its immediate + // follower. + Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) - // Count returns the number of documents contains the term in this field. - Count() uint64 - Close() error + // Count returns the number of documents contains the term in this field. + Count() uint64 + Close() error } ``` At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces: -- The weakest interface, only supports Next() no ordering at all. -- Ordered, supporting Advance() -- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking) +- The weakest interface, only supports Next() no ordering at all. +- Ordered, supporting Advance() +- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking) But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly. @@ -254,7 +259,7 @@ NOTE: there is another place where we rely on the ordering of these hits, and th An ASCII art example: -``` +```text Let's start with the IndexSnapshot we ended with earlier: 3 - Index Batch [ C' ] @@ -320,7 +325,6 @@ In the future, interfaces to detect these non-serially operating TermFieldReader Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage. - ### Memory Tracking All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers). @@ -335,14 +339,12 @@ At runtime, the state of an index (it's IndexSnapshot) is not only the contents This also relates to the topic rollback, addressed next... - ### Rollback One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo. Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal. - ### Internal Storage The bleve.index API has support for "internal storage". The ability to store information under a separate name space. diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go index 8ddde74a5d..adda4e9a1f 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go @@ -295,8 +295,10 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { if len(bestRoster) == 0 { return rv, nil } - - rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster}) + // create tasks with valid merges - i.e. there should be atleast 2 non-empty segments + if len(bestRoster) > 1 { + rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster}) + } eligibles = removeSegments(eligibles, bestRoster) } diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go index 389d582b79..20a0706ef9 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize.go @@ -393,5 +393,7 @@ func (i *IndexSnapshot) unadornedTermFieldReader( includeNorm: false, includeTermVectors: false, recycle: false, + // signal downstream that this is a special unadorned termFieldReader + unadorned: true, } } diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go index 48ba35682e..315c5686c6 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index_tfr.go @@ -50,6 +50,7 @@ type IndexSnapshotTermFieldReader struct { recycle bool bytesRead uint64 ctx context.Context + unadorned bool } func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) { @@ -146,14 +147,29 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo // FIXME do something better // for now, if we need to seek backwards, then restart from the beginning if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { - i2, err := i.snapshot.TermFieldReader(context.TODO(), i.term, i.field, - i.includeFreq, i.includeNorm, i.includeTermVectors) - if err != nil { - return nil, err + // Check if the TFR is a special unadorned composite optimization. + // Such a TFR will NOT have a valid `term` or `field` set, making it + // impossible for the TFR to replace itself with a new one. + if !i.unadorned { + i2, err := i.snapshot.TermFieldReader(context.TODO(), i.term, i.field, + i.includeFreq, i.includeNorm, i.includeTermVectors) + if err != nil { + return nil, err + } + // close the current term field reader before replacing it with a new one + _ = i.Close() + *i = *(i2.(*IndexSnapshotTermFieldReader)) + } else { + // unadorned composite optimization + // we need to reset all the iterators + // back to the beginning, which effectively + // achives the same thing as the above + for _, iter := range i.iterators { + if optimizedIterator, ok := iter.(ResetablePostingsIterator); ok { + optimizedIterator.ResetIterator() + } + } } - // close the current term field reader before replacing it with a new one - _ = i.Close() - *i = *(i2.(*IndexSnapshotTermFieldReader)) } num, err := docInternalToNumber(ID) if err != nil { diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go index 411ef2a358..18ce1c5823 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/unadorned.go @@ -96,6 +96,12 @@ func (i *unadornedPostingsIteratorBitmap) ReplaceActual(actual *roaring.Bitmap) i.actual = actual.Iterator() } +// Resets the iterator to the beginning of the postings list. +// by resetting the actual iterator. +func (i *unadornedPostingsIteratorBitmap) ResetIterator() { + i.actual = i.actualBM.Iterator() +} + func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.PostingsIterator { return &unadornedPostingsIteratorBitmap{ actualBM: bm, @@ -106,7 +112,8 @@ func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.Postings const docNum1HitFinished = math.MaxUint64 type unadornedPostingsIterator1Hit struct { - docNum uint64 + docNumOrig uint64 // original 1-hit docNum used to create this iterator + docNum uint64 // current docNum } func (i *unadornedPostingsIterator1Hit) Next() (segment.Posting, error) { @@ -153,12 +160,22 @@ func (i *unadornedPostingsIterator1Hit) BytesWritten() uint64 { func (i *unadornedPostingsIterator1Hit) ResetBytesRead(uint64) {} +// ResetIterator resets the iterator to the original state. +func (i *unadornedPostingsIterator1Hit) ResetIterator() { + i.docNum = i.docNumOrig +} + func newUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) segment.PostingsIterator { return &unadornedPostingsIterator1Hit{ - docNum1Hit, + docNumOrig: docNum1Hit, + docNum: docNum1Hit, } } +type ResetablePostingsIterator interface { + ResetIterator() +} + type UnadornedPosting uint64 func (p UnadornedPosting) Number() uint64 { diff --git a/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go b/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go index e3ea9d7d00..fc338f54ed 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/collector/topn.go @@ -20,7 +20,6 @@ import ( "strconv" "time" - "github.com/blevesearch/bleve/v2/numeric" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/size" index "github.com/blevesearch/bleve_index_api" @@ -501,23 +500,7 @@ func (hc *TopNCollector) finalizeResults(r index.IndexReader) error { doc.Complete(nil) return nil }) - if err != nil { - return err - } - // Decode geo sort keys back to its distance values - for i, so := range hc.sort { - if _, ok := so.(*search.SortGeoDistance); ok { - for _, dm := range hc.results { - // The string is a int64 bit representation of a float64 distance - distInt, err := numeric.PrefixCoded(dm.Sort[i]).Int64() - if err != nil { - return err - } - dm.Sort[i] = strconv.FormatFloat(numeric.Int64ToFloat64(distInt), 'f', -1, 64) - } - } - } return err } diff --git a/vendor/github.com/blevesearch/bleve/v2/search/search.go b/vendor/github.com/blevesearch/bleve/v2/search/search.go index 5c930bce24..d199b79cb0 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/search.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/search.go @@ -154,6 +154,7 @@ type DocumentMatch struct { Locations FieldTermLocationMap `json:"locations,omitempty"` Fragments FieldFragmentMap `json:"fragments,omitempty"` Sort []string `json:"sort,omitempty"` + DecodedSort []string `json:"decoded_sort,omitempty"` // Fields contains the values for document fields listed in // SearchRequest.Fields. Text fields are returned as strings, numeric @@ -224,6 +225,7 @@ func (dm *DocumentMatch) Reset() *DocumentMatch { dm.IndexInternalID = indexInternalID[:0] // reuse the []interface{} already allocated (and reset len to 0) dm.Sort = sort[:0] + dm.DecodedSort = dm.DecodedSort[:0] // reuse the FieldTermLocations already allocated (and reset len to 0) dm.FieldTermLocations = ftls[:0] return dm @@ -263,6 +265,10 @@ func (dm *DocumentMatch) Size() int { sizeInBytes += size.SizeOfString + len(entry) } + for _, entry := range dm.DecodedSort { + sizeInBytes += size.SizeOfString + len(entry) + } + for k := range dm.Fields { sizeInBytes += size.SizeOfString + len(k) + size.SizeOfPtr diff --git a/vendor/github.com/blevesearch/bleve/v2/search/sort.go b/vendor/github.com/blevesearch/bleve/v2/search/sort.go index 67f143ef5a..28a7ac5dbf 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/sort.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/sort.go @@ -20,7 +20,9 @@ import ( "fmt" "math" "sort" + "strconv" "strings" + "time" "unicode/utf8" "github.com/blevesearch/bleve/v2/geo" @@ -36,6 +38,7 @@ var ( type SearchSort interface { UpdateVisitor(field string, term []byte) Value(a *DocumentMatch) string + DecodeValue(value string) string Descending() bool RequiresDocID() bool @@ -212,7 +215,9 @@ type SortOrder []SearchSort func (so SortOrder) Value(doc *DocumentMatch) { for _, soi := range so { - doc.Sort = append(doc.Sort, soi.Value(doc)) + value := soi.Value(doc) + doc.Sort = append(doc.Sort, value) + doc.DecodedSort = append(doc.DecodedSort, soi.DecodeValue(value)) } } @@ -390,6 +395,25 @@ func (s *SortField) Value(i *DocumentMatch) string { return iTerm } +func (s *SortField) DecodeValue(value string) string { + switch s.Type { + case SortFieldAsNumber: + i64, err := numeric.PrefixCoded(value).Int64() + if err != nil { + return value + } + return strconv.FormatFloat(numeric.Int64ToFloat64(i64), 'f', -1, 64) + case SortFieldAsDate: + i64, err := numeric.PrefixCoded(value).Int64() + if err != nil { + return value + } + return time.Unix(0, i64).UTC().String() + default: + return value + } +} + // Descending determines the order of the sort func (s *SortField) Descending() bool { return s.Desc @@ -545,6 +569,10 @@ func (s *SortDocID) Value(i *DocumentMatch) string { return i.ID } +func (s *SortDocID) DecodeValue(value string) string { + return value +} + // Descending determines the order of the sort func (s *SortDocID) Descending() bool { return s.Desc @@ -590,6 +618,10 @@ func (s *SortScore) Value(i *DocumentMatch) string { return "_score" } +func (s *SortScore) DecodeValue(value string) string { + return value +} + // Descending determines the order of the sort func (s *SortScore) Descending() bool { return s.Desc @@ -694,6 +726,14 @@ func (s *SortGeoDistance) Value(i *DocumentMatch) string { return string(numeric.MustNewPrefixCodedInt64(distInt64, 0)) } +func (s *SortGeoDistance) DecodeValue(value string) string { + distInt, err := numeric.PrefixCoded(value).Int64() + if err != nil { + return "" + } + return strconv.FormatFloat(numeric.Int64ToFloat64(distInt), 'f', -1, 64) +} + // Descending determines the order of the sort func (s *SortGeoDistance) Descending() bool { return s.Desc diff --git a/vendor/github.com/blevesearch/zapx/v16/docvalues.go b/vendor/github.com/blevesearch/zapx/v16/docvalues.go index 3d0d269f63..21124c4ee2 100644 --- a/vendor/github.com/blevesearch/zapx/v16/docvalues.go +++ b/vendor/github.com/blevesearch/zapx/v16/docvalues.go @@ -106,7 +106,7 @@ func (di *docValueReader) curChunkNumber() uint64 { return di.curChunkNum } -func (s *SegmentBase) loadFieldDocValueReader(field string, +func (sb *SegmentBase) loadFieldDocValueReader(field string, fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { // get the docValue offset for the given fields if fieldDvLocStart == fieldNotUninverted { @@ -118,15 +118,15 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, var numChunks, chunkOffsetsPosition uint64 if fieldDvLocEnd-fieldDvLocStart > 16 { - numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd]) + numChunks = binary.BigEndian.Uint64(sb.mem[fieldDvLocEnd-8 : fieldDvLocEnd]) // read the length of chunk offsets - chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) + chunkOffsetsLen := binary.BigEndian.Uint64(sb.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) // acquire position of chunk offsets chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen // 16 bytes since it corresponds to the length // of chunk offsets and the position of the offsets - s.incrementBytesRead(16) + sb.incrementBytesRead(16) } else { return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart) } @@ -140,14 +140,14 @@ func (s *SegmentBase) loadFieldDocValueReader(field string, // read the chunk offsets var offset uint64 for i := 0; i < int(numChunks); i++ { - loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64]) + loc, read := binary.Uvarint(sb.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64]) if read <= 0 { return nil, fmt.Errorf("corrupted chunk offset during segment load") } fdvIter.chunkOffsets[i] = loc offset += uint64(read) } - s.incrementBytesRead(offset) + sb.incrementBytesRead(offset) // set the data offset fdvIter.dvDataLoc = fieldDvLocStart return fdvIter, nil @@ -286,15 +286,15 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) { // VisitDocValues is an implementation of the // DocValueVisitable interface -func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, +func (sb *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, visitor index.DocValueVisitor, dvsIn segment.DocVisitState) ( segment.DocVisitState, error) { dvs, ok := dvsIn.(*docVisitState) if !ok || dvs == nil { dvs = &docVisitState{} } else { - if dvs.segment != s { - dvs.segment = s + if dvs.segment != sb { + dvs.segment = sb dvs.dvrs = nil dvs.bytesRead = 0 } @@ -304,11 +304,11 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, if dvs.dvrs == nil { dvs.dvrs = make(map[uint16]*docValueReader, len(fields)) for _, field := range fields { - if fieldIDPlus1, ok = s.fieldsMap[field]; !ok { + if fieldIDPlus1, ok = sb.fieldsMap[field]; !ok { continue } fieldID := fieldIDPlus1 - 1 - if dvIter, exists := s.fieldDvReaders[SectionInvertedTextIndex][fieldID]; exists && + if dvIter, exists := sb.fieldDvReaders[SectionInvertedTextIndex][fieldID]; exists && dvIter != nil { dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID]) } @@ -324,14 +324,14 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, docInChunk := localDocNum / chunkFactor var dvr *docValueReader for _, field := range fields { - if fieldIDPlus1, ok = s.fieldsMap[field]; !ok { + if fieldIDPlus1, ok = sb.fieldsMap[field]; !ok { continue } fieldID := fieldIDPlus1 - 1 if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { // check if the chunk is already loaded if docInChunk != dvr.curChunkNumber() { - err := dvr.loadDvChunk(docInChunk, s) + err := dvr.loadDvChunk(docInChunk, sb) if err != nil { return dvs, err } @@ -349,6 +349,6 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string, // VisitableDocValueFields returns the list of fields with // persisted doc value terms ready to be visitable using the // VisitDocumentFieldTerms method. -func (s *SegmentBase) VisitableDocValueFields() ([]string, error) { - return s.fieldDvNames, nil +func (sb *SegmentBase) VisitableDocValueFields() ([]string, error) { + return sb.fieldDvNames, nil } diff --git a/vendor/github.com/blevesearch/zapx/v16/faiss_vector_cache.go b/vendor/github.com/blevesearch/zapx/v16/faiss_vector_cache.go index f315db2b26..ce8e1bff48 100644 --- a/vendor/github.com/blevesearch/zapx/v16/faiss_vector_cache.go +++ b/vendor/github.com/blevesearch/zapx/v16/faiss_vector_cache.go @@ -52,31 +52,19 @@ func (vc *vectorIndexCache) Clear() { vc.m.Unlock() } -// loadDocVecIDMap indicates if a non-nil docVecIDMap should be returned. -// It is true when a filtered kNN query accesses the cache since it requires the -// map. It's false otherwise. +// loadOrCreate obtains the vector index from the cache or creates it if it's not +// present. It also returns the batch executor for the field if it's present in the +// cache. func (vc *vectorIndexCache) loadOrCreate(fieldID uint16, mem []byte, loadDocVecIDMap bool, except *roaring.Bitmap) ( index *faiss.IndexImpl, vecDocIDMap map[int64]uint32, docVecIDMap map[uint32][]int64, vecIDsToExclude []int64, err error) { - index, vecDocIDMap, docVecIDMap, vecIDsToExclude, err = vc.loadFromCache( - fieldID, loadDocVecIDMap, mem, except) - return index, vecDocIDMap, docVecIDMap, vecIDsToExclude, err -} - -// function to load the vectorDocIDMap and if required, docVecIDMap from cache -// If not, it will create these and add them to the cache. -func (vc *vectorIndexCache) loadFromCache(fieldID uint16, loadDocVecIDMap bool, - mem []byte, except *roaring.Bitmap) (index *faiss.IndexImpl, vecDocIDMap map[int64]uint32, - docVecIDMap map[uint32][]int64, vecIDsToExclude []int64, err error) { - vc.m.RLock() - entry, ok := vc.cache[fieldID] if ok { index, vecDocIDMap, docVecIDMap = entry.load() vecIDsToExclude = getVecIDsToExclude(vecDocIDMap, except) - if !loadDocVecIDMap || (loadDocVecIDMap && len(entry.docVecIDMap) > 0) { + if !loadDocVecIDMap || len(entry.docVecIDMap) > 0 { vc.m.RUnlock() return index, vecDocIDMap, docVecIDMap, vecIDsToExclude, nil } @@ -126,7 +114,7 @@ func (vc *vectorIndexCache) createAndCacheLOCKED(fieldID uint16, mem []byte, if entry != nil { index, vecDocIDMap, docVecIDMap = entry.load() vecIDsToExclude = getVecIDsToExclude(vecDocIDMap, except) - if !loadDocVecIDMap || (loadDocVecIDMap && len(entry.docVecIDMap) > 0) { + if !loadDocVecIDMap || len(entry.docVecIDMap) > 0 { return index, vecDocIDMap, docVecIDMap, vecIDsToExclude, nil } docVecIDMap = vc.addDocVecIDMapToCacheLOCKED(entry) diff --git a/vendor/github.com/blevesearch/zapx/v16/faiss_vector_posting.go b/vendor/github.com/blevesearch/zapx/v16/faiss_vector_posting.go index 0c823c13ed..2a77199c69 100644 --- a/vendor/github.com/blevesearch/zapx/v16/faiss_vector_posting.go +++ b/vendor/github.com/blevesearch/zapx/v16/faiss_vector_posting.go @@ -104,44 +104,44 @@ func (vpl *VecPostingsList) Iterator(prealloc segment.VecPostingsIterator) segme return vpl.iterator(preallocPI) } -func (p *VecPostingsList) iterator(rv *VecPostingsIterator) *VecPostingsIterator { +func (vpl *VecPostingsList) iterator(rv *VecPostingsIterator) *VecPostingsIterator { if rv == nil { rv = &VecPostingsIterator{} } else { *rv = VecPostingsIterator{} // clear the struct } // think on some of the edge cases over here. - if p.postings == nil { + if vpl.postings == nil { return rv } - rv.postings = p - rv.all = p.postings.Iterator() - if p.except != nil { - rv.ActualBM = roaring64.AndNot(p.postings, p.except) + rv.postings = vpl + rv.all = vpl.postings.Iterator() + if vpl.except != nil { + rv.ActualBM = roaring64.AndNot(vpl.postings, vpl.except) rv.Actual = rv.ActualBM.Iterator() } else { - rv.ActualBM = p.postings + rv.ActualBM = vpl.postings rv.Actual = rv.all // Optimize to use same iterator for all & Actual. } return rv } -func (p *VecPostingsList) Size() int { +func (vpl *VecPostingsList) Size() int { sizeInBytes := reflectStaticSizeVecPostingsList + SizeOfPtr - if p.except != nil { - sizeInBytes += int(p.except.GetSizeInBytes()) + if vpl.except != nil { + sizeInBytes += int(vpl.except.GetSizeInBytes()) } return sizeInBytes } -func (p *VecPostingsList) Count() uint64 { - if p.postings != nil { - n := p.postings.GetCardinality() +func (vpl *VecPostingsList) Count() uint64 { + if vpl.postings != nil { + n := vpl.postings.GetCardinality() var e uint64 - if p.except != nil { - e = p.postings.AndCardinality(p.except) + if vpl.except != nil { + e = vpl.postings.AndCardinality(vpl.except) } return n - e } @@ -171,51 +171,51 @@ type VecPostingsIterator struct { next VecPosting // reused across Next() calls } -func (i *VecPostingsIterator) nextCodeAtOrAfterClean(atOrAfter uint64) (uint64, bool, error) { - i.Actual.AdvanceIfNeeded(atOrAfter) +func (vpItr *VecPostingsIterator) nextCodeAtOrAfterClean(atOrAfter uint64) (uint64, bool, error) { + vpItr.Actual.AdvanceIfNeeded(atOrAfter) - if !i.Actual.HasNext() { + if !vpItr.Actual.HasNext() { return 0, false, nil // couldn't find anything } - return i.Actual.Next(), true, nil + return vpItr.Actual.Next(), true, nil } -func (i *VecPostingsIterator) nextCodeAtOrAfter(atOrAfter uint64) (uint64, bool, error) { - if i.Actual == nil || !i.Actual.HasNext() { +func (vpItr *VecPostingsIterator) nextCodeAtOrAfter(atOrAfter uint64) (uint64, bool, error) { + if vpItr.Actual == nil || !vpItr.Actual.HasNext() { return 0, false, nil } - if i.postings == nil || i.postings == emptyVecPostingsList { + if vpItr.postings == nil || vpItr.postings == emptyVecPostingsList { // couldn't find anything return 0, false, nil } - if i.postings.postings == i.ActualBM { - return i.nextCodeAtOrAfterClean(atOrAfter) + if vpItr.postings.postings == vpItr.ActualBM { + return vpItr.nextCodeAtOrAfterClean(atOrAfter) } - i.Actual.AdvanceIfNeeded(atOrAfter) + vpItr.Actual.AdvanceIfNeeded(atOrAfter) - if !i.Actual.HasNext() || !i.all.HasNext() { + if !vpItr.Actual.HasNext() || !vpItr.all.HasNext() { // couldn't find anything return 0, false, nil } - n := i.Actual.Next() - allN := i.all.Next() + n := vpItr.Actual.Next() + allN := vpItr.all.Next() // n is the next actual hit (excluding some postings), and // allN is the next hit in the full postings, and // if they don't match, move 'all' forwards until they do. for allN != n { - if !i.all.HasNext() { + if !vpItr.all.HasNext() { return 0, false, nil } - allN = i.all.Next() + allN = vpItr.all.Next() } - return uint64(n), true, nil + return n, true, nil } // a transformation function which stores both the score and the docNum as a single @@ -225,49 +225,49 @@ func getVectorCode(docNum uint32, score float32) uint64 { } // Next returns the next posting on the vector postings list, or nil at the end -func (i *VecPostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.VecPosting, error) { +func (vpItr *VecPostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.VecPosting, error) { // transform the docNum provided to the vector code format and use that to // get the next entry. the comparison still happens docNum wise since after // the transformation, the docNum occupies the upper 32 bits just an entry in // the postings list atOrAfter = getVectorCode(uint32(atOrAfter), 0) - code, exists, err := i.nextCodeAtOrAfter(atOrAfter) + code, exists, err := vpItr.nextCodeAtOrAfter(atOrAfter) if err != nil || !exists { return nil, err } - i.next = VecPosting{} // clear the struct - rv := &i.next + vpItr.next = VecPosting{} // clear the struct + rv := &vpItr.next rv.score = math.Float32frombits(uint32(code)) rv.docNum = code >> 32 return rv, nil } -func (itr *VecPostingsIterator) Next() (segment.VecPosting, error) { - return itr.nextAtOrAfter(0) +func (vpItr *VecPostingsIterator) Next() (segment.VecPosting, error) { + return vpItr.nextAtOrAfter(0) } -func (itr *VecPostingsIterator) Advance(docNum uint64) (segment.VecPosting, error) { - return itr.nextAtOrAfter(docNum) +func (vpItr *VecPostingsIterator) Advance(docNum uint64) (segment.VecPosting, error) { + return vpItr.nextAtOrAfter(docNum) } -func (i *VecPostingsIterator) Size() int { +func (vpItr *VecPostingsIterator) Size() int { sizeInBytes := reflectStaticSizePostingsIterator + SizeOfPtr + - i.next.Size() + vpItr.next.Size() return sizeInBytes } -func (vpl *VecPostingsIterator) ResetBytesRead(val uint64) { +func (vpItr *VecPostingsIterator) ResetBytesRead(val uint64) { } -func (vpl *VecPostingsIterator) BytesRead() uint64 { +func (vpItr *VecPostingsIterator) BytesRead() uint64 { return 0 } -func (vpl *VecPostingsIterator) BytesWritten() uint64 { +func (vpItr *VecPostingsIterator) BytesWritten() uint64 { return 0 } @@ -329,7 +329,7 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool // it isn't added to the final postings list. if docID, ok := vecDocIDMap[vecID]; ok { code := getVectorCode(docID, scores[i]) - pl.postings.Add(uint64(code)) + pl.postings.Add(code) } } } @@ -471,7 +471,7 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool if err != nil { return nil, err } - // If no error occured during the creation of the selector, then + // If no error occurred during the creation of the selector, then // it should be deleted once the search is complete. defer selector.Delete() // Ordering the retrieved centroid IDs by increasing order diff --git a/vendor/github.com/blevesearch/zapx/v16/merge.go b/vendor/github.com/blevesearch/zapx/v16/merge.go index 479f10be88..99a05c2148 100644 --- a/vendor/github.com/blevesearch/zapx/v16/merge.go +++ b/vendor/github.com/blevesearch/zapx/v16/merge.go @@ -537,21 +537,21 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, // copyStoredDocs writes out a segment's stored doc info, optimized by // using a single Write() call for the entire set of bytes. The // newDocNumOffsets is filled with the new offsets for each doc. -func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64, +func (sb *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64, w *CountHashWriter) error { - if s.numDocs <= 0 { + if sb.numDocs <= 0 { return nil } indexOffset0, storedOffset0, _, _, _ := - s.getDocStoredOffsets(0) // the segment's first doc + sb.getDocStoredOffsets(0) // the segment's first doc indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN := - s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc + sb.getDocStoredOffsets(sb.numDocs - 1) // the segment's last doc storedOffset0New := uint64(w.Count()) - storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN] + storedBytes := sb.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN] _, err := w.Write(storedBytes) if err != nil { return err @@ -560,7 +560,7 @@ func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64 // remap the storedOffset's for the docs into new offsets relative // to storedOffset0New, filling the given docNumOffsetsOut array for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 { - storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8]) + storedOffset := binary.BigEndian.Uint64(sb.mem[indexOffset : indexOffset+8]) storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New newDocNumOffsets[newDocNum] = storedOffsetNew newDocNum += 1 diff --git a/vendor/github.com/blevesearch/zapx/v16/read.go b/vendor/github.com/blevesearch/zapx/v16/read.go index e47d4c6abd..d4a10024dd 100644 --- a/vendor/github.com/blevesearch/zapx/v16/read.go +++ b/vendor/github.com/blevesearch/zapx/v16/read.go @@ -16,27 +16,27 @@ package zap import "encoding/binary" -func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) { - _, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum) +func (sb *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) { + _, storedOffset, n, metaLen, dataLen := sb.getDocStoredOffsets(docNum) - meta := s.mem[storedOffset+n : storedOffset+n+metaLen] - data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen] + meta := sb.mem[storedOffset+n : storedOffset+n+metaLen] + data := sb.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen] return meta, data } -func (s *SegmentBase) getDocStoredOffsets(docNum uint64) ( +func (sb *SegmentBase) getDocStoredOffsets(docNum uint64) ( uint64, uint64, uint64, uint64, uint64) { - indexOffset := s.storedIndexOffset + (8 * docNum) + indexOffset := sb.storedIndexOffset + (8 * docNum) - storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8]) + storedOffset := binary.BigEndian.Uint64(sb.mem[indexOffset : indexOffset+8]) var n uint64 - metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64]) + metaLen, read := binary.Uvarint(sb.mem[storedOffset : storedOffset+binary.MaxVarintLen64]) n += uint64(read) - dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64]) + dataLen, read := binary.Uvarint(sb.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64]) n += uint64(read) return indexOffset, storedOffset, n, metaLen, dataLen diff --git a/vendor/github.com/blevesearch/zapx/v16/section_inverted_text_index.go b/vendor/github.com/blevesearch/zapx/v16/section_inverted_text_index.go index 400a029684..1736877e50 100644 --- a/vendor/github.com/blevesearch/zapx/v16/section_inverted_text_index.go +++ b/vendor/github.com/blevesearch/zapx/v16/section_inverted_text_index.go @@ -612,8 +612,10 @@ func (io *invertedIndexOpaque) writeDicts(w *CountHashWriter) (dictOffsets []uin if io.IncludeDocValues[fieldID] { for docNum, docTerms := range docTermMap { if fieldTermMap, ok := io.extraDocValues[docNum]; ok { - if sTerm, ok := fieldTermMap[uint16(fieldID)]; ok { - docTerms = append(append(docTerms, sTerm...), termSeparator) + if sTerms, ok := fieldTermMap[uint16(fieldID)]; ok { + for _, sTerm := range sTerms { + docTerms = append(append(docTerms, sTerm...), termSeparator) + } } } if len(docTerms) > 0 { @@ -797,9 +799,9 @@ func (i *invertedIndexOpaque) realloc() { if f, ok := field.(index.GeoShapeField); ok { if _, exists := i.extraDocValues[docNum]; !exists { - i.extraDocValues[docNum] = make(map[uint16][]byte) + i.extraDocValues[docNum] = make(map[uint16][][]byte) } - i.extraDocValues[docNum][fieldID] = f.EncodedShape() + i.extraDocValues[docNum][fieldID] = append(i.extraDocValues[docNum][fieldID], f.EncodedShape()) } } @@ -810,7 +812,7 @@ func (i *invertedIndexOpaque) realloc() { } if i.extraDocValues == nil { - i.extraDocValues = map[int]map[uint16][]byte{} + i.extraDocValues = map[int]map[uint16][][]byte{} } for docNum, result := range i.results { @@ -978,8 +980,8 @@ type invertedIndexOpaque struct { // store terms that are unnecessary for the term dictionaries but needed in doc values // eg - encoded geoshapes - // docNum -> fieldID -> term - extraDocValues map[int]map[uint16][]byte + // docNum -> fieldID -> terms + extraDocValues map[int]map[uint16][][]byte builder *vellum.Builder builderBuf bytes.Buffer diff --git a/vendor/github.com/blevesearch/zapx/v16/segment.go b/vendor/github.com/blevesearch/zapx/v16/segment.go index 19aebe3e98..f2545e05c4 100644 --- a/vendor/github.com/blevesearch/zapx/v16/segment.go +++ b/vendor/github.com/blevesearch/zapx/v16/segment.go @@ -269,81 +269,81 @@ func (s *Segment) incrementBytesRead(val uint64) { atomic.AddUint64(&s.bytesRead, val) } -func (s *SegmentBase) BytesWritten() uint64 { - return atomic.LoadUint64(&s.bytesWritten) +func (sb *SegmentBase) BytesWritten() uint64 { + return atomic.LoadUint64(&sb.bytesWritten) } -func (s *SegmentBase) setBytesWritten(val uint64) { - atomic.AddUint64(&s.bytesWritten, val) +func (sb *SegmentBase) setBytesWritten(val uint64) { + atomic.AddUint64(&sb.bytesWritten, val) } -func (s *SegmentBase) BytesRead() uint64 { +func (sb *SegmentBase) BytesRead() uint64 { return 0 } -func (s *SegmentBase) ResetBytesRead(val uint64) {} +func (sb *SegmentBase) ResetBytesRead(val uint64) {} -func (s *SegmentBase) incrementBytesRead(val uint64) { - atomic.AddUint64(&s.bytesRead, val) +func (sb *SegmentBase) incrementBytesRead(val uint64) { + atomic.AddUint64(&sb.bytesRead, val) } -func (s *SegmentBase) loadFields() error { +func (sb *SegmentBase) loadFields() error { // NOTE for now we assume the fields index immediately precedes // the footer, and if this changes, need to adjust accordingly (or // store explicit length), where s.mem was sliced from s.mm in Open(). - fieldsIndexEnd := uint64(len(s.mem)) + fieldsIndexEnd := uint64(len(sb.mem)) // iterate through fields index var fieldID uint64 - for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd { - addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8]) + for sb.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd { + addr := binary.BigEndian.Uint64(sb.mem[sb.fieldsIndexOffset+(8*fieldID) : sb.fieldsIndexOffset+(8*fieldID)+8]) // accounting the address of the dictLoc being read from file - s.incrementBytesRead(8) + sb.incrementBytesRead(8) - dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd]) + dictLoc, read := binary.Uvarint(sb.mem[addr:fieldsIndexEnd]) n := uint64(read) - s.dictLocs = append(s.dictLocs, dictLoc) + sb.dictLocs = append(sb.dictLocs, dictLoc) var nameLen uint64 - nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd]) + nameLen, read = binary.Uvarint(sb.mem[addr+n : fieldsIndexEnd]) n += uint64(read) - name := string(s.mem[addr+n : addr+n+nameLen]) + name := string(sb.mem[addr+n : addr+n+nameLen]) - s.incrementBytesRead(n + nameLen) - s.fieldsInv = append(s.fieldsInv, name) - s.fieldsMap[name] = uint16(fieldID + 1) + sb.incrementBytesRead(n + nameLen) + sb.fieldsInv = append(sb.fieldsInv, name) + sb.fieldsMap[name] = uint16(fieldID + 1) fieldID++ } return nil } -func (s *SegmentBase) loadFieldsNew() error { - pos := s.sectionsIndexOffset +func (sb *SegmentBase) loadFieldsNew() error { + pos := sb.sectionsIndexOffset if pos == 0 { // this is the case only for older file formats - return s.loadFields() + return sb.loadFields() } seek := pos + binary.MaxVarintLen64 - if seek > uint64(len(s.mem)) { + if seek > uint64(len(sb.mem)) { // handling a buffer overflow case. // a rare case where the backing buffer is not large enough to be read directly via // a pos+binary.MaxVarintLen64 seek. For eg, this can happen when there is only // one field to be indexed in the entire batch of data and while writing out // these fields metadata, you write 1 + 8 bytes whereas the MaxVarintLen64 = 10. - seek = uint64(len(s.mem)) + seek = uint64(len(sb.mem)) } // read the number of fields - numFields, sz := binary.Uvarint(s.mem[pos:seek]) + numFields, sz := binary.Uvarint(sb.mem[pos:seek]) // here, the pos is incremented by the valid number bytes read from the buffer // so in the edge case pointed out above the numFields = 1, the sz = 1 as well. pos += uint64(sz) - s.incrementBytesRead(uint64(sz)) + sb.incrementBytesRead(uint64(sz)) // the following loop will be executed only once in the edge case pointed out above // since there is only field's offset store which occupies 8 bytes. @@ -352,17 +352,17 @@ func (s *SegmentBase) loadFieldsNew() error { // the specific section's parsing logic. var fieldID uint64 for fieldID < numFields { - addr := binary.BigEndian.Uint64(s.mem[pos : pos+8]) - s.incrementBytesRead(8) + addr := binary.BigEndian.Uint64(sb.mem[pos : pos+8]) + sb.incrementBytesRead(8) fieldSectionMap := make(map[uint16]uint64) - err := s.loadFieldNew(uint16(fieldID), addr, fieldSectionMap) + err := sb.loadFieldNew(uint16(fieldID), addr, fieldSectionMap) if err != nil { return err } - s.fieldsSectionsMap = append(s.fieldsSectionsMap, fieldSectionMap) + sb.fieldsSectionsMap = append(sb.fieldsSectionsMap, fieldSectionMap) fieldID++ pos += 8 @@ -371,7 +371,7 @@ func (s *SegmentBase) loadFieldsNew() error { return nil } -func (s *SegmentBase) loadFieldNew(fieldID uint16, pos uint64, +func (sb *SegmentBase) loadFieldNew(fieldID uint16, pos uint64, fieldSectionMap map[uint16]uint64) error { if pos == 0 { // there is no indexing structure present for this field/section @@ -379,23 +379,23 @@ func (s *SegmentBase) loadFieldNew(fieldID uint16, pos uint64, } fieldStartPos := pos // to track the number of bytes read - fieldNameLen, sz := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64]) + fieldNameLen, sz := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64]) pos += uint64(sz) - fieldName := string(s.mem[pos : pos+fieldNameLen]) + fieldName := string(sb.mem[pos : pos+fieldNameLen]) pos += fieldNameLen - s.fieldsInv = append(s.fieldsInv, fieldName) - s.fieldsMap[fieldName] = uint16(fieldID + 1) + sb.fieldsInv = append(sb.fieldsInv, fieldName) + sb.fieldsMap[fieldName] = uint16(fieldID + 1) - fieldNumSections, sz := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64]) + fieldNumSections, sz := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64]) pos += uint64(sz) for sectionIdx := uint64(0); sectionIdx < fieldNumSections; sectionIdx++ { // read section id - fieldSectionType := binary.BigEndian.Uint16(s.mem[pos : pos+2]) + fieldSectionType := binary.BigEndian.Uint16(sb.mem[pos : pos+2]) pos += 2 - fieldSectionAddr := binary.BigEndian.Uint64(s.mem[pos : pos+8]) + fieldSectionAddr := binary.BigEndian.Uint64(sb.mem[pos : pos+8]) pos += 8 fieldSectionMap[fieldSectionType] = fieldSectionAddr if fieldSectionType == SectionInvertedTextIndex { @@ -403,33 +403,33 @@ func (s *SegmentBase) loadFieldNew(fieldID uint16, pos uint64, // 0 and during query time, because there is no valid dictionary we // will just have follow a no-op path. if fieldSectionAddr == 0 { - s.dictLocs = append(s.dictLocs, 0) + sb.dictLocs = append(sb.dictLocs, 0) continue } read := 0 // skip the doc values - _, n := binary.Uvarint(s.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64]) + _, n := binary.Uvarint(sb.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64]) fieldSectionAddr += uint64(n) read += n - _, n = binary.Uvarint(s.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64]) + _, n = binary.Uvarint(sb.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64]) fieldSectionAddr += uint64(n) read += n - dictLoc, n := binary.Uvarint(s.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64]) + dictLoc, n := binary.Uvarint(sb.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64]) // account the bytes read while parsing the field's inverted index section - s.incrementBytesRead(uint64(read + n)) - s.dictLocs = append(s.dictLocs, dictLoc) + sb.incrementBytesRead(uint64(read + n)) + sb.dictLocs = append(sb.dictLocs, dictLoc) } } // account the bytes read while parsing the sections field index. - s.incrementBytesRead((pos - uint64(fieldStartPos)) + fieldNameLen) + sb.incrementBytesRead((pos - uint64(fieldStartPos)) + fieldNameLen) return nil } // Dictionary returns the term dictionary for the specified field -func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) { - dict, err := s.dictionary(field) +func (sb *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) { + dict, err := sb.dictionary(field) if err == nil && dict == nil { return emptyDictionary, nil } @@ -479,8 +479,8 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { } // Thesaurus returns the thesaurus with the specified name, or an empty thesaurus if not found. -func (s *SegmentBase) Thesaurus(name string) (segment.Thesaurus, error) { - thesaurus, err := s.thesaurus(name) +func (sb *SegmentBase) Thesaurus(name string) (segment.Thesaurus, error) { + thesaurus, err := sb.thesaurus(name) if err == nil && thesaurus == nil { return emptyThesaurus, nil } @@ -537,17 +537,17 @@ var visitDocumentCtxPool = sync.Pool{ // VisitStoredFields invokes the StoredFieldValueVisitor for each stored field // for the specified doc number -func (s *SegmentBase) VisitStoredFields(num uint64, visitor segment.StoredFieldValueVisitor) error { +func (sb *SegmentBase) VisitStoredFields(num uint64, visitor segment.StoredFieldValueVisitor) error { vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx) defer visitDocumentCtxPool.Put(vdc) - return s.visitStoredFields(vdc, num, visitor) + return sb.visitStoredFields(vdc, num, visitor) } -func (s *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64, +func (sb *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64, visitor segment.StoredFieldValueVisitor) error { // first make sure this is a valid number in this segment - if num < s.numDocs { - meta, compressed := s.getDocStoredMetaAndCompressed(num) + if num < sb.numDocs { + meta, compressed := sb.getDocStoredMetaAndCompressed(num) vdc.reader.Reset(meta) @@ -611,7 +611,7 @@ func (s *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64, } } value := uncompressed[offset : offset+l] - keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos) + keepGoing = visitor(sb.fieldsInv[field], byte(typ), value, arrayPos) } vdc.buf = uncompressed @@ -620,14 +620,14 @@ func (s *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64, } // DocID returns the value of the _id field for the given docNum -func (s *SegmentBase) DocID(num uint64) ([]byte, error) { - if num >= s.numDocs { +func (sb *SegmentBase) DocID(num uint64) ([]byte, error) { + if num >= sb.numDocs { return nil, nil } vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx) - meta, compressed := s.getDocStoredMetaAndCompressed(num) + meta, compressed := sb.getDocStoredMetaAndCompressed(num) vdc.reader.Reset(meta) @@ -644,17 +644,17 @@ func (s *SegmentBase) DocID(num uint64) ([]byte, error) { } // Count returns the number of documents in this segment. -func (s *SegmentBase) Count() uint64 { - return s.numDocs +func (sb *SegmentBase) Count() uint64 { + return sb.numDocs } // DocNumbers returns a bitset corresponding to the doc numbers of all the // provided _id strings -func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { +func (sb *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { rv := roaring.New() - if len(s.fieldsMap) > 0 { - idDict, err := s.dictionary("_id") + if len(sb.fieldsMap) > 0 { + idDict, err := sb.dictionary("_id") if err != nil { return nil, err } @@ -681,8 +681,8 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { } // Fields returns the field names used in this segment -func (s *SegmentBase) Fields() []string { - return s.fieldsInv +func (sb *SegmentBase) Fields() []string { + return sb.fieldsInv } // Path returns the path of this segment on disk @@ -907,44 +907,44 @@ func (s *Segment) loadDvReaders() error { // since segmentBase is an in-memory segment, it can be called only // for v16 file formats as part of InitSegmentBase() while introducing // a segment into the system. -func (s *SegmentBase) loadDvReaders() error { +func (sb *SegmentBase) loadDvReaders() error { // evaluate -> s.docValueOffset == fieldNotUninverted - if s.numDocs == 0 { + if sb.numDocs == 0 { return nil } - for fieldID, sections := range s.fieldsSectionsMap { + for fieldID, sections := range sb.fieldsSectionsMap { for secID, secOffset := range sections { if secOffset > 0 { // fixed encoding as of now, need to uvarint this pos := secOffset var read uint64 - fieldLocStart, n := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64]) + fieldLocStart, n := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64]) if n <= 0 { - return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %v", s.fieldsInv[fieldID]) + return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %v", sb.fieldsInv[fieldID]) } pos += uint64(n) read += uint64(n) - fieldLocEnd, n := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64]) + fieldLocEnd, n := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64]) if read <= 0 { - return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %v", s.fieldsInv[fieldID]) + return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %v", sb.fieldsInv[fieldID]) } pos += uint64(n) read += uint64(n) - s.incrementBytesRead(read) + sb.incrementBytesRead(read) - fieldDvReader, err := s.loadFieldDocValueReader(s.fieldsInv[fieldID], fieldLocStart, fieldLocEnd) + fieldDvReader, err := sb.loadFieldDocValueReader(sb.fieldsInv[fieldID], fieldLocStart, fieldLocEnd) if err != nil { return err } if fieldDvReader != nil { - if s.fieldDvReaders[secID] == nil { - s.fieldDvReaders[secID] = make(map[uint16]*docValueReader) + if sb.fieldDvReaders[secID] == nil { + sb.fieldDvReaders[secID] = make(map[uint16]*docValueReader) } - s.fieldDvReaders[secID][uint16(fieldID)] = fieldDvReader - s.fieldDvNames = append(s.fieldDvNames, s.fieldsInv[fieldID]) + sb.fieldDvReaders[secID][uint16(fieldID)] = fieldDvReader + sb.fieldDvNames = append(sb.fieldDvNames, sb.fieldsInv[fieldID]) } } } diff --git a/vendor/modules.txt b/vendor/modules.txt index 1a86a52633..a57a7dad01 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -161,7 +161,7 @@ github.com/bitly/go-simplejson # github.com/bits-and-blooms/bitset v1.22.0 ## explicit; go 1.16 github.com/bits-and-blooms/bitset -# github.com/blevesearch/bleve/v2 v2.5.1 +# github.com/blevesearch/bleve/v2 v2.5.2 ## explicit; go 1.23 github.com/blevesearch/bleve/v2 github.com/blevesearch/bleve/v2/analysis @@ -260,8 +260,8 @@ github.com/blevesearch/zapx/v14 # github.com/blevesearch/zapx/v15 v15.4.2 ## explicit; go 1.21 github.com/blevesearch/zapx/v15 -# github.com/blevesearch/zapx/v16 v16.2.3 -## explicit; go 1.21 +# github.com/blevesearch/zapx/v16 v16.2.4 +## explicit; go 1.23 github.com/blevesearch/zapx/v16 # github.com/bluele/gcache v0.0.2 ## explicit; go 1.15