Merge pull request #999 from opencloud-eu/dependabot/go_modules/github.com/blevesearch/bleve/v2-2.5.2

build(deps): bump github.com/blevesearch/bleve/v2 from 2.5.1 to 2.5.2
This commit is contained in:
Ralf Haferkamp
2025-06-05 12:14:58 +02:00
committed by GitHub
22 changed files with 472 additions and 383 deletions

4
go.mod
View File

@@ -11,7 +11,7 @@ require (
github.com/Nerzal/gocloak/v13 v13.9.0
github.com/bbalet/stopwords v1.0.0
github.com/beevik/etree v1.5.1
github.com/blevesearch/bleve/v2 v2.5.1
github.com/blevesearch/bleve/v2 v2.5.2
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/coreos/go-oidc/v3 v3.14.1
github.com/cs3org/go-cs3apis v0.0.0-20241105092511-3ad35d174fc1
@@ -148,7 +148,7 @@ require (
github.com/blevesearch/zapx/v13 v13.4.2 // indirect
github.com/blevesearch/zapx/v14 v14.4.2 // indirect
github.com/blevesearch/zapx/v15 v15.4.2 // indirect
github.com/blevesearch/zapx/v16 v16.2.3 // indirect
github.com/blevesearch/zapx/v16 v16.2.4 // indirect
github.com/bluele/gcache v0.0.2 // indirect
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
github.com/cenkalti/backoff/v5 v5.0.2 // indirect

8
go.sum
View File

@@ -148,8 +148,8 @@ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve/v2 v2.5.1 h1:cc/O++W2Hcjp1SU5ETHeE+QYWv2oV88ldYEPowdmg8M=
github.com/blevesearch/bleve/v2 v2.5.1/go.mod h1:9g/wnbWKm9AgXrU8Ecqi+IDdqjUHWymwkQRDg+5tafU=
github.com/blevesearch/bleve/v2 v2.5.2 h1:Ab0r0MODV2C5A6BEL87GqLBySqp/s9xFgceCju6BQk8=
github.com/blevesearch/bleve/v2 v2.5.2/go.mod h1:5Dj6dUQxZM6aqYT3eutTD/GpWKGFSsV8f7LDidFbwXo=
github.com/blevesearch/bleve_index_api v1.2.8 h1:Y98Pu5/MdlkRyLM0qDHostYo7i+Vv1cDNhqTeR4Sy6Y=
github.com/blevesearch/bleve_index_api v1.2.8/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/geo v0.2.3 h1:K9/vbGI9ehlXdxjxDRJtoAMt7zGAsMIzc6n8zWcwnhg=
@@ -182,8 +182,8 @@ github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT
github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8=
github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k=
github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw=
github.com/blevesearch/zapx/v16 v16.2.3 h1:7Y0r+a3diEvlazsncexq1qoFOcBd64xwMS7aDm4lo1s=
github.com/blevesearch/zapx/v16 v16.2.3/go.mod h1:wVJ+GtURAaRG9KQAMNYyklq0egV+XJlGcXNCE0OFjjA=
github.com/blevesearch/zapx/v16 v16.2.4 h1:tGgfvleXTAkwsD5mEzgM3zCS/7pgocTCnO1oyAUjlww=
github.com/blevesearch/zapx/v16 v16.2.4/go.mod h1:Rti/REtuuMmzwsI8/C/qIzRaEoSK/wiFYw5e5ctUKKs=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=

View File

@@ -2,11 +2,11 @@
We look forward to your contributions, but ask that you first review these guidelines.
### Sign the CLA
## Sign the CLA
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
### Submitting a Pull Request
## Submitting a Pull Request
All types of contributions are welcome, but please keep the following in mind:

View File

@@ -16,41 +16,41 @@ A modern indexing + search library in GO
* Index any GO data structure or JSON
* Intelligent defaults backed up by powerful configuration ([scorch](https://github.com/blevesearch/bleve/blob/master/index/scorch/README.md))
* Supported field types:
* `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector`
* `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector`
* Supported query types:
* `term`, `phrase`, `match`, `match_phrase`, `prefix`, `regexp`, `wildcard`, `fuzzy`
* term range, numeric range, date range, boolean field
* compound queries: `conjuncts`, `disjuncts`, boolean (`must`/`should`/`must_not`)
* [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
* `term`, `phrase`, `match`, `match_phrase`, `prefix`, `regexp`, `wildcard`, `fuzzy`
* term range, numeric range, date range, boolean field
* compound queries: `conjuncts`, `disjuncts`, boolean (`must`/`should`/`must_not`)
* [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
* Hybrid search: exact + semantic
* Query time boosting
* Search result match highlighting with document fragments
* Aggregations/faceting support:
* terms facet
* numeric range facet
* date range facet
* terms facet
* numeric range facet
* date range facet
## Indexing
```go
message := struct{
Id string
From string
Body string
message := struct {
Id string
From string
Body string
}{
Id: "example",
From: "xyz@couchbase.com",
Body: "bleve indexing is easy",
Id: "example",
From: "xyz@couchbase.com",
Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping)
if err != nil {
panic(err)
panic(err)
}
index.Index(message.Id, message)
```
@@ -69,10 +69,10 @@ searchResult, _ := index.Search(searchRequest)
To install the CLI for the latest release of bleve, run:
```bash
$ go install github.com/blevesearch/bleve/v2/cmd/bleve@latest
go install github.com/blevesearch/bleve/v2/cmd/bleve@latest
```
```
```text
$ bleve --help
Bleve is a command-line tool to interact with a bleve index.
@@ -113,6 +113,7 @@ Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdis
## Discussion/Issues
Discuss usage/development of bleve and/or report issues here:
* [Github issues](https://github.com/blevesearch/bleve/issues)
* [Google group](https://groups.google.com/forum/#!forum/bleve)

View File

@@ -2,11 +2,12 @@
## Supported Versions
We support the latest release (for example, bleve v2.3.x).
We support the latest release (for example, bleve v2.5.x).
## Reporting a Vulnerability
All security issues for this project should be reported by email to security@couchbase.com and fts-team@couchbase.com.
All security issues for this project should be reported via email to [security@couchbase.com](mailto:security@couchbase.com) and [fts-team@couchbase.com](mailto:fts-team@couchbase.com).
This mail will be delivered to the owners of this project.
- To ensure your report is NOT marked as spam, please include the word "security/vulnerability" along with the project name (blevesearch/bleve) in the subject of the email.

View File

@@ -1,8 +1,7 @@
# Geo spatial search support in bleve
Latest bleve spatial capabilities are powered by spatial hierarchical tokens generated from s2geometry.
You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the
extended functionality of our forked golang port of [s2geometry lib here](https://github.com/blevesearch/geo).
You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the extended functionality of our forked golang port of [s2geometry lib here](https://github.com/blevesearch/geo).
Users can continue to index and query `geopoint` field type and the existing queries like,
@@ -14,7 +13,7 @@ as before.
## New Spatial Field Type - geoshape
We have introduced a field type (`geoshape`) for representing the new spatial types.
We have introduced a field type (`geoshape`) for representing the new spatial types.
Using the new `geoshape` field type, users can unblock the spatial capabilities
for the [geojson](https://datatracker.ietf.org/doc/html/rfc7946) shapes like,
@@ -37,7 +36,7 @@ To specify GeoJSON data, use a nested field with:
- a field named type that specifies the GeoJSON object type and the type value will be case-insensitive.
- a field named coordinates that specifies the object's coordinates.
```
```text
"fieldName": {
"type": "GeoJSON Type",
"coordinates": <coordinates>
@@ -50,69 +49,67 @@ To specify GeoJSON data, use a nested field with:
- Shapes would be internally represented as geodesics.
- The GeoJSON specification strongly suggests splitting geometries so that neither of their parts crosses the antimeridian.
Examples for the various geojson shapes representations are as below.
## Point
The following specifies a [Point](https://tools.ietf.org/html/rfc7946#section-3.1.2) field in a document:
```
{
"type": "point",
"coordinates": [75.05687713623047,22.53539059204079]
}
```json
{
"type": "point",
"coordinates": [75.05687713623047, 22.53539059204079]
}
```
## Linestring
The following specifies a [Linestring](https://tools.ietf.org/html/rfc7946#section-3.1.4) field in a document:
```
{
"type": "linestring",
"coordinates": [
[ 77.01416015625, 23.0797317624497],
[ 78.134765625, 20.385825381874263]
]
```json
{
"type": "linestring",
"coordinates": [
[77.01416015625, 23.0797317624497],
[78.134765625, 20.385825381874263]
]
}
```
## Polygon
The following specifies a [Polygon](https://tools.ietf.org/html/rfc7946#section-3.1.6) field in a document:
```
```json
{
"type": "polygon",
"coordinates": [ [ [ 85.605, 57.207],
[ 86.396, 55.998],
[ 87.033, 56.716],
[ 85.605, 57.207]
] ]
"type": "polygon",
"coordinates": [
[
[85.605, 57.207],
[86.396, 55.998],
[87.033, 56.716],
[85.605, 57.207]
]
]
}
```
The first and last coordinates must match in order to close the polygon.
The first and last coordinates must match in order to close the polygon.
And the exterior coordinates have to be in Counter Clockwise Order in a polygon. (CCW)
## MultiPoint
The following specifies a [Multipoint](https://tools.ietf.org/html/rfc7946#section-3.1.3) field in a document:
```
```json
{
"type": "multipoint",
"coordinates": [
[ -115.8343505859375, 38.45789034424927],
[ -115.81237792968749, 38.19502155795575],
[ -120.80017089843749, 36.54053616262899],
[ -120.67932128906249, 36.33725319397006]
]
"type": "multipoint",
"coordinates": [
[-115.8343505859375, 38.45789034424927],
[-115.81237792968749, 38.19502155795575],
[-120.80017089843749, 36.54053616262899],
[-120.67932128906249, 36.33725319397006]
]
}
```
@@ -120,14 +117,23 @@ The following specifies a [Multipoint](https://tools.ietf.org/html/rfc7946#secti
The following specifies a [MultiLineString](https://tools.ietf.org/html/rfc7946#section-3.1.5) field in a document:
```
```json
{
"type": "multilinestring",
"coordinates": [
[ [ -118.31726074, 35.250105158],[ -117.509765624, 35.3756141] ],
[ [ -118.6962890, 34.624167789],[ -118.317260742, 35.03899204] ],
[ [ -117.9492187, 35.146862906], [ -117.6745605, 34.41144164] ]
]
"type": "multilinestring",
"coordinates": [
[
[-118.31726074, 35.250105158],
[-117.509765624, 35.3756141]
],
[
[-118.696289, 34.624167789],
[-118.317260742, 35.03899204]
],
[
[-117.9492187, 35.146862906],
[-117.6745605, 34.41144164]
]
]
}
```
@@ -135,112 +141,138 @@ The following specifies a [MultiLineString](https://tools.ietf.org/html/rfc7946#
The following specifies a [MultiPolygon](https://tools.ietf.org/html/rfc7946#section-3.1.7) field in a document:
```
```json
{
"type": "multipolygon",
"coordinates": [
[ [ [ -73.958, 40.8003 ], [ -73.9498, 40.7968 ],
[ -73.9737, 40.7648 ], [ -73.9814, 40.7681 ],
[ -73.958, 40.8003 ] ] ],
[ [ [ -73.958, 40.8003 ], [ -73.9498, 40.7968 ],
[ -73.9737, 40.7648 ], [ -73.958, 40.8003 ] ] ]
]
"type": "multipolygon",
"coordinates": [
[
[
[-73.958, 40.8003],
[-73.9498, 40.7968],
[-73.9737, 40.7648],
[-73.9814, 40.7681],
[-73.958, 40.8003]
]
],
[
[
[-73.958, 40.8003],
[-73.9498, 40.7968],
[-73.9737, 40.7648],
[-73.958, 40.8003]
]
]
]
}
```
## GeometryCollection
The following specifies a [GeometryCollection](https://tools.ietf.org/html/rfc7946#section-3.1.8) field in a document:
```
```json
{
"type": "geometrycollection",
"geometries": [
"type": "geometrycollection",
"geometries": [
{
"type": "multipoint",
"coordinates": [
[ -73.9580, 40.8003 ],
[ -73.9498, 40.7968 ],
[ -73.9737, 40.7648 ],
[ -73.9814, 40.7681 ]
[-73.958, 40.8003],
[-73.9498, 40.7968],
[-73.9737, 40.7648],
[-73.9814, 40.7681]
]
},
{
"type": "multilinestring",
"coordinates": [
[ [ -73.96943, 40.78519 ], [ -73.96082, 40.78095 ] ],
[ [ -73.96415, 40.79229 ], [ -73.95544, 40.78854 ] ],
[ [ -73.97162, 40.78205 ], [ -73.96374, 40.77715 ] ],
[ [ -73.97880, 40.77247 ], [ -73.97036, 40.76811 ] ]
[
[-73.96943, 40.78519],
[-73.96082, 40.78095]
],
[
[-73.96415, 40.79229],
[-73.95544, 40.78854]
],
[
[-73.97162, 40.78205],
[-73.96374, 40.77715]
],
[
[-73.9788, 40.77247],
[-73.97036, 40.76811]
]
]
},
{
"type" : "polygon",
"coordinates" : [
[ [ 0 , 0 ] , [ 3 , 6 ] , [ 6 , 1 ] , [ 0 , 0 ] ],
[ [ 2 , 2 ] , [ 3 , 3 ] , [ 4 , 2 ] , [ 2 , 2 ] ]
]
}
]
"type": "polygon",
"coordinates": [
[
[0, 0],
[3, 6],
[6, 1],
[0, 0]
],
[
[2, 2],
[3, 3],
[4, 2],
[2, 2]
]
]
}
]
}
```
## Circle
If the user wishes to cover a circular region over the earths surface, then they could use this shape.
If the user wishes to cover a circular region over the earth's surface, then they could use this shape.
A sample circular shape is as below.
```
{
"type": "circle",
"coordinates": [75.05687713623047,22.53539059204079],
"radius": "1000m"
```json
{
"type": "circle",
"coordinates": [75.05687713623047, 22.53539059204079],
"radius": "1000m"
}
```
Circle is specified over the center point coordinates along with the radius.
Example formats supported for radius are:
"5in" , "5inch" , "7yd" , "7yards", "9ft" , "9feet", "11km", "11kilometers", "3nm"
"3nauticalmiles", "13mm" , "13millimeters", "15cm", "15centimeters", "17mi", "17miles" "19m" or "19meters".
Example formats supported for radius are:
"5in" , "5inch" , "7yd" , "7yards", "9ft" , "9feet", "11km", "11kilometers", "3nm", "3nauticalmiles", "13mm" , "13millimeters", "15cm", "15centimeters", "17mi", "17miles", "19m" or "19meters".
If the unit cannot be determined, the entire string is parsed and the unit of meters is assumed.
## Envelope
Envelope type, which consists of coordinates for upper left and lower right points of the shape
to represent a bounding rectangle in the format [[minLon, maxLat], [maxLon, minLat]].
Envelope type, which consists of coordinates for upper left and lower right points of the shape to represent a bounding rectangle in the format [[minLon, maxLat], [maxLon, minLat]].
```
```json
{
"type": "envelope",
"coordinates": [
[72.83, 18.979],
[78.508,17.4555]
]
"type": "envelope",
"coordinates": [
[72.83, 18.979],
[78.508, 17.4555]
]
}
```
## GeoShape Query
Geoshape query support three types/filters of spatial querying capability across those
heterogeneous types of documents indexed.
Geoshape query support three types/filters of spatial querying capability across those heterogeneous types of documents indexed.
### Query Structure:
### Query Structure
```
```json
{
"query": {
"geometry": {
"shape": {
"type": "<shapeType>",
"coordinates": [[[ ]]]
"type": "<shapeType>",
"coordinates": [
[[]]
]
},
"relation": "<<filterName>>"
}
@@ -248,7 +280,6 @@ heterogeneous types of documents indexed.
}
```
*shapeType* => can be any of the aforementioned types like Point, LineString, Polygon, MultiPoint,
Geometrycollection, MultiLineString, MultiPolygon, Circle and Envelope.
@@ -256,16 +287,14 @@ Geometrycollection, MultiLineString, MultiPolygon, Circle and Envelope.
### Relation
| FilterName | Description |
| :-----------:| :-----------------------------------------------------------------: |
| `intersects` | Return all documents whose shape field intersects the query geometry. |
| `contains` | Return all documents whose shape field contains the query geometry |
| `within` | Return all documents whose shape field is within the query geometry. |
| FilterName | Description |
| :-----------:| :-----------------------------------------------------------------: |
| `intersects` | Return all documents whose shape field intersects the query geometry. |
| `contains` | Return all documents whose shape field contains the query geometry |
| `within` | Return all documents whose shape field is within the query geometry. |
------------------------------------------------------------------------------------------------------------------------
### Older Implementation
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).

View File

@@ -3,13 +3,16 @@
## Definitions
Batch
- A collection of Documents to mutate in the index.
Document
- Has a unique identifier (arbitrary bytes).
- Is comprised of a list of fields.
Field
- Has a name (string).
- Has a type (text, number, date, geopoint).
- Has a value (depending on type).
@@ -41,7 +44,7 @@ NOTE: If a document already contains a field \_id, it will be replaced. If this
### Proposed Structures
```
```go
type Segment interface {
Dictionary(field string) TermDictionary
@@ -92,9 +95,11 @@ type IndexSnapshot struct {
segment []SegmentSnapshot
}
```
**What about errors?**
**What about memory mgmnt or context?**
**Postings List separate iterator to separate stateful from stateless**
### Mutating the Index
The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches.
@@ -105,9 +110,9 @@ From this point forward, only Batch mutations will be discussed.
Sequence of Operations:
1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like:
1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like:
```
```go
foreach segment {
dict := segment.Dictionary("\_id")
postings := empty postings list
@@ -119,21 +124,21 @@ Sequence of Operations:
NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset.
2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool.
2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool.
3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information.
4. We now have everything we need to update the state of the system to include this new snapshot.
- Acquire a lock
- Create a new IndexSnapshot
- For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot.
- Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
- Append the new SegmentSnapshot to the IndexSnapshot
- Release the lock
4. We now have everything we need to update the state of the system to include this new snapshot.
- Acquire a lock
- Create a new IndexSnapshot
- For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot.
- Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
- Append the new SegmentSnapshot to the IndexSnapshot
- Release the lock
An ASCII art example:
```
```text
0 - Empty Index
No segments
@@ -209,7 +214,7 @@ Term search is the only searching primitive exposed in today's bleve.index API.
A term search for term T in field F will look something like this:
```
```go
searchResultPostings = empty
foreach segment {
dict := segment.Dictionary(F)
@@ -222,31 +227,31 @@ The searchResultPostings will be a new implementation of the TermFieldReader int
As a reminder this interface is:
```
```go
// TermFieldReader is the interface exposing the enumeration of documents
// containing a given term in a given field. Documents are returned in byte
// lexicographic order over their identifiers.
type TermFieldReader interface {
// Next returns the next document containing the term in this field, or nil
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
// is optional, and when non-nil, will be used instead of allocating memory.
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
// Next returns the next document containing the term in this field, or nil
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
// is optional, and when non-nil, will be used instead of allocating memory.
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
// Advance resets the enumeration at specified document or its immediate
// follower.
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
// Advance resets the enumeration at specified document or its immediate
// follower.
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
// Count returns the number of documents contains the term in this field.
Count() uint64
Close() error
// Count returns the number of documents contains the term in this field.
Count() uint64
Close() error
}
```
At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces:
- The weakest interface, only supports Next() no ordering at all.
- Ordered, supporting Advance()
- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
- The weakest interface, only supports Next() no ordering at all.
- Ordered, supporting Advance()
- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly.
@@ -254,7 +259,7 @@ NOTE: there is another place where we rely on the ordering of these hits, and th
An ASCII art example:
```
```text
Let's start with the IndexSnapshot we ended with earlier:
3 - Index Batch [ C' ]
@@ -320,7 +325,6 @@ In the future, interfaces to detect these non-serially operating TermFieldReader
Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage.
### Memory Tracking
All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers).
@@ -335,14 +339,12 @@ At runtime, the state of an index (it's IndexSnapshot) is not only the contents
This also relates to the topic rollback, addressed next...
### Rollback
One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo.
Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal.
### Internal Storage
The bleve.index API has support for "internal storage". The ability to store information under a separate name space.

View File

@@ -295,8 +295,10 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if len(bestRoster) == 0 {
return rv, nil
}
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
// create tasks with valid merges - i.e. there should be atleast 2 non-empty segments
if len(bestRoster) > 1 {
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
}
eligibles = removeSegments(eligibles, bestRoster)
}

View File

@@ -393,5 +393,7 @@ func (i *IndexSnapshot) unadornedTermFieldReader(
includeNorm: false,
includeTermVectors: false,
recycle: false,
// signal downstream that this is a special unadorned termFieldReader
unadorned: true,
}
}

View File

@@ -50,6 +50,7 @@ type IndexSnapshotTermFieldReader struct {
recycle bool
bytesRead uint64
ctx context.Context
unadorned bool
}
func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) {
@@ -146,14 +147,29 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo
// FIXME do something better
// for now, if we need to seek backwards, then restart from the beginning
if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 {
i2, err := i.snapshot.TermFieldReader(context.TODO(), i.term, i.field,
i.includeFreq, i.includeNorm, i.includeTermVectors)
if err != nil {
return nil, err
// Check if the TFR is a special unadorned composite optimization.
// Such a TFR will NOT have a valid `term` or `field` set, making it
// impossible for the TFR to replace itself with a new one.
if !i.unadorned {
i2, err := i.snapshot.TermFieldReader(context.TODO(), i.term, i.field,
i.includeFreq, i.includeNorm, i.includeTermVectors)
if err != nil {
return nil, err
}
// close the current term field reader before replacing it with a new one
_ = i.Close()
*i = *(i2.(*IndexSnapshotTermFieldReader))
} else {
// unadorned composite optimization
// we need to reset all the iterators
// back to the beginning, which effectively
// achives the same thing as the above
for _, iter := range i.iterators {
if optimizedIterator, ok := iter.(ResetablePostingsIterator); ok {
optimizedIterator.ResetIterator()
}
}
}
// close the current term field reader before replacing it with a new one
_ = i.Close()
*i = *(i2.(*IndexSnapshotTermFieldReader))
}
num, err := docInternalToNumber(ID)
if err != nil {

View File

@@ -96,6 +96,12 @@ func (i *unadornedPostingsIteratorBitmap) ReplaceActual(actual *roaring.Bitmap)
i.actual = actual.Iterator()
}
// Resets the iterator to the beginning of the postings list.
// by resetting the actual iterator.
func (i *unadornedPostingsIteratorBitmap) ResetIterator() {
i.actual = i.actualBM.Iterator()
}
func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.PostingsIterator {
return &unadornedPostingsIteratorBitmap{
actualBM: bm,
@@ -106,7 +112,8 @@ func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.Postings
const docNum1HitFinished = math.MaxUint64
type unadornedPostingsIterator1Hit struct {
docNum uint64
docNumOrig uint64 // original 1-hit docNum used to create this iterator
docNum uint64 // current docNum
}
func (i *unadornedPostingsIterator1Hit) Next() (segment.Posting, error) {
@@ -153,12 +160,22 @@ func (i *unadornedPostingsIterator1Hit) BytesWritten() uint64 {
func (i *unadornedPostingsIterator1Hit) ResetBytesRead(uint64) {}
// ResetIterator resets the iterator to the original state.
func (i *unadornedPostingsIterator1Hit) ResetIterator() {
i.docNum = i.docNumOrig
}
func newUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) segment.PostingsIterator {
return &unadornedPostingsIterator1Hit{
docNum1Hit,
docNumOrig: docNum1Hit,
docNum: docNum1Hit,
}
}
type ResetablePostingsIterator interface {
ResetIterator()
}
type UnadornedPosting uint64
func (p UnadornedPosting) Number() uint64 {

View File

@@ -20,7 +20,6 @@ import (
"strconv"
"time"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
@@ -501,23 +500,7 @@ func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
doc.Complete(nil)
return nil
})
if err != nil {
return err
}
// Decode geo sort keys back to its distance values
for i, so := range hc.sort {
if _, ok := so.(*search.SortGeoDistance); ok {
for _, dm := range hc.results {
// The string is a int64 bit representation of a float64 distance
distInt, err := numeric.PrefixCoded(dm.Sort[i]).Int64()
if err != nil {
return err
}
dm.Sort[i] = strconv.FormatFloat(numeric.Int64ToFloat64(distInt), 'f', -1, 64)
}
}
}
return err
}

View File

@@ -154,6 +154,7 @@ type DocumentMatch struct {
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
DecodedSort []string `json:"decoded_sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric
@@ -224,6 +225,7 @@ func (dm *DocumentMatch) Reset() *DocumentMatch {
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
dm.DecodedSort = dm.DecodedSort[:0]
// reuse the FieldTermLocations already allocated (and reset len to 0)
dm.FieldTermLocations = ftls[:0]
return dm
@@ -263,6 +265,10 @@ func (dm *DocumentMatch) Size() int {
sizeInBytes += size.SizeOfString + len(entry)
}
for _, entry := range dm.DecodedSort {
sizeInBytes += size.SizeOfString + len(entry)
}
for k := range dm.Fields {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr

View File

@@ -20,7 +20,9 @@ import (
"fmt"
"math"
"sort"
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/geo"
@@ -36,6 +38,7 @@ var (
type SearchSort interface {
UpdateVisitor(field string, term []byte)
Value(a *DocumentMatch) string
DecodeValue(value string) string
Descending() bool
RequiresDocID() bool
@@ -212,7 +215,9 @@ type SortOrder []SearchSort
func (so SortOrder) Value(doc *DocumentMatch) {
for _, soi := range so {
doc.Sort = append(doc.Sort, soi.Value(doc))
value := soi.Value(doc)
doc.Sort = append(doc.Sort, value)
doc.DecodedSort = append(doc.DecodedSort, soi.DecodeValue(value))
}
}
@@ -390,6 +395,25 @@ func (s *SortField) Value(i *DocumentMatch) string {
return iTerm
}
func (s *SortField) DecodeValue(value string) string {
switch s.Type {
case SortFieldAsNumber:
i64, err := numeric.PrefixCoded(value).Int64()
if err != nil {
return value
}
return strconv.FormatFloat(numeric.Int64ToFloat64(i64), 'f', -1, 64)
case SortFieldAsDate:
i64, err := numeric.PrefixCoded(value).Int64()
if err != nil {
return value
}
return time.Unix(0, i64).UTC().String()
default:
return value
}
}
// Descending determines the order of the sort
func (s *SortField) Descending() bool {
return s.Desc
@@ -545,6 +569,10 @@ func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
}
func (s *SortDocID) DecodeValue(value string) string {
return value
}
// Descending determines the order of the sort
func (s *SortDocID) Descending() bool {
return s.Desc
@@ -590,6 +618,10 @@ func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"
}
func (s *SortScore) DecodeValue(value string) string {
return value
}
// Descending determines the order of the sort
func (s *SortScore) Descending() bool {
return s.Desc
@@ -694,6 +726,14 @@ func (s *SortGeoDistance) Value(i *DocumentMatch) string {
return string(numeric.MustNewPrefixCodedInt64(distInt64, 0))
}
func (s *SortGeoDistance) DecodeValue(value string) string {
distInt, err := numeric.PrefixCoded(value).Int64()
if err != nil {
return ""
}
return strconv.FormatFloat(numeric.Int64ToFloat64(distInt), 'f', -1, 64)
}
// Descending determines the order of the sort
func (s *SortGeoDistance) Descending() bool {
return s.Desc

View File

@@ -106,7 +106,7 @@ func (di *docValueReader) curChunkNumber() uint64 {
return di.curChunkNum
}
func (s *SegmentBase) loadFieldDocValueReader(field string,
func (sb *SegmentBase) loadFieldDocValueReader(field string,
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields
if fieldDvLocStart == fieldNotUninverted {
@@ -118,15 +118,15 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
var numChunks, chunkOffsetsPosition uint64
if fieldDvLocEnd-fieldDvLocStart > 16 {
numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
numChunks = binary.BigEndian.Uint64(sb.mem[fieldDvLocEnd-8 : fieldDvLocEnd])
// read the length of chunk offsets
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
chunkOffsetsLen := binary.BigEndian.Uint64(sb.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
// acquire position of chunk offsets
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
// 16 bytes since it corresponds to the length
// of chunk offsets and the position of the offsets
s.incrementBytesRead(16)
sb.incrementBytesRead(16)
} else {
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
}
@@ -140,14 +140,14 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
// read the chunk offsets
var offset uint64
for i := 0; i < int(numChunks); i++ {
loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
loc, read := binary.Uvarint(sb.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64])
if read <= 0 {
return nil, fmt.Errorf("corrupted chunk offset during segment load")
}
fdvIter.chunkOffsets[i] = loc
offset += uint64(read)
}
s.incrementBytesRead(offset)
sb.incrementBytesRead(offset)
// set the data offset
fdvIter.dvDataLoc = fieldDvLocStart
return fdvIter, nil
@@ -286,15 +286,15 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
// VisitDocValues is an implementation of the
// DocValueVisitable interface
func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
func (sb *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
visitor index.DocValueVisitor, dvsIn segment.DocVisitState) (
segment.DocVisitState, error) {
dvs, ok := dvsIn.(*docVisitState)
if !ok || dvs == nil {
dvs = &docVisitState{}
} else {
if dvs.segment != s {
dvs.segment = s
if dvs.segment != sb {
dvs.segment = sb
dvs.dvrs = nil
dvs.bytesRead = 0
}
@@ -304,11 +304,11 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
if dvs.dvrs == nil {
dvs.dvrs = make(map[uint16]*docValueReader, len(fields))
for _, field := range fields {
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
if fieldIDPlus1, ok = sb.fieldsMap[field]; !ok {
continue
}
fieldID := fieldIDPlus1 - 1
if dvIter, exists := s.fieldDvReaders[SectionInvertedTextIndex][fieldID]; exists &&
if dvIter, exists := sb.fieldDvReaders[SectionInvertedTextIndex][fieldID]; exists &&
dvIter != nil {
dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
}
@@ -324,14 +324,14 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
docInChunk := localDocNum / chunkFactor
var dvr *docValueReader
for _, field := range fields {
if fieldIDPlus1, ok = s.fieldsMap[field]; !ok {
if fieldIDPlus1, ok = sb.fieldsMap[field]; !ok {
continue
}
fieldID := fieldIDPlus1 - 1
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded
if docInChunk != dvr.curChunkNumber() {
err := dvr.loadDvChunk(docInChunk, s)
err := dvr.loadDvChunk(docInChunk, sb)
if err != nil {
return dvs, err
}
@@ -349,6 +349,6 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
// VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method.
func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
return s.fieldDvNames, nil
func (sb *SegmentBase) VisitableDocValueFields() ([]string, error) {
return sb.fieldDvNames, nil
}

View File

@@ -52,31 +52,19 @@ func (vc *vectorIndexCache) Clear() {
vc.m.Unlock()
}
// loadDocVecIDMap indicates if a non-nil docVecIDMap should be returned.
// It is true when a filtered kNN query accesses the cache since it requires the
// map. It's false otherwise.
// loadOrCreate obtains the vector index from the cache or creates it if it's not
// present. It also returns the batch executor for the field if it's present in the
// cache.
func (vc *vectorIndexCache) loadOrCreate(fieldID uint16, mem []byte,
loadDocVecIDMap bool, except *roaring.Bitmap) (
index *faiss.IndexImpl, vecDocIDMap map[int64]uint32, docVecIDMap map[uint32][]int64,
vecIDsToExclude []int64, err error) {
index, vecDocIDMap, docVecIDMap, vecIDsToExclude, err = vc.loadFromCache(
fieldID, loadDocVecIDMap, mem, except)
return index, vecDocIDMap, docVecIDMap, vecIDsToExclude, err
}
// function to load the vectorDocIDMap and if required, docVecIDMap from cache
// If not, it will create these and add them to the cache.
func (vc *vectorIndexCache) loadFromCache(fieldID uint16, loadDocVecIDMap bool,
mem []byte, except *roaring.Bitmap) (index *faiss.IndexImpl, vecDocIDMap map[int64]uint32,
docVecIDMap map[uint32][]int64, vecIDsToExclude []int64, err error) {
vc.m.RLock()
entry, ok := vc.cache[fieldID]
if ok {
index, vecDocIDMap, docVecIDMap = entry.load()
vecIDsToExclude = getVecIDsToExclude(vecDocIDMap, except)
if !loadDocVecIDMap || (loadDocVecIDMap && len(entry.docVecIDMap) > 0) {
if !loadDocVecIDMap || len(entry.docVecIDMap) > 0 {
vc.m.RUnlock()
return index, vecDocIDMap, docVecIDMap, vecIDsToExclude, nil
}
@@ -126,7 +114,7 @@ func (vc *vectorIndexCache) createAndCacheLOCKED(fieldID uint16, mem []byte,
if entry != nil {
index, vecDocIDMap, docVecIDMap = entry.load()
vecIDsToExclude = getVecIDsToExclude(vecDocIDMap, except)
if !loadDocVecIDMap || (loadDocVecIDMap && len(entry.docVecIDMap) > 0) {
if !loadDocVecIDMap || len(entry.docVecIDMap) > 0 {
return index, vecDocIDMap, docVecIDMap, vecIDsToExclude, nil
}
docVecIDMap = vc.addDocVecIDMapToCacheLOCKED(entry)

View File

@@ -104,44 +104,44 @@ func (vpl *VecPostingsList) Iterator(prealloc segment.VecPostingsIterator) segme
return vpl.iterator(preallocPI)
}
func (p *VecPostingsList) iterator(rv *VecPostingsIterator) *VecPostingsIterator {
func (vpl *VecPostingsList) iterator(rv *VecPostingsIterator) *VecPostingsIterator {
if rv == nil {
rv = &VecPostingsIterator{}
} else {
*rv = VecPostingsIterator{} // clear the struct
}
// think on some of the edge cases over here.
if p.postings == nil {
if vpl.postings == nil {
return rv
}
rv.postings = p
rv.all = p.postings.Iterator()
if p.except != nil {
rv.ActualBM = roaring64.AndNot(p.postings, p.except)
rv.postings = vpl
rv.all = vpl.postings.Iterator()
if vpl.except != nil {
rv.ActualBM = roaring64.AndNot(vpl.postings, vpl.except)
rv.Actual = rv.ActualBM.Iterator()
} else {
rv.ActualBM = p.postings
rv.ActualBM = vpl.postings
rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
}
return rv
}
func (p *VecPostingsList) Size() int {
func (vpl *VecPostingsList) Size() int {
sizeInBytes := reflectStaticSizeVecPostingsList + SizeOfPtr
if p.except != nil {
sizeInBytes += int(p.except.GetSizeInBytes())
if vpl.except != nil {
sizeInBytes += int(vpl.except.GetSizeInBytes())
}
return sizeInBytes
}
func (p *VecPostingsList) Count() uint64 {
if p.postings != nil {
n := p.postings.GetCardinality()
func (vpl *VecPostingsList) Count() uint64 {
if vpl.postings != nil {
n := vpl.postings.GetCardinality()
var e uint64
if p.except != nil {
e = p.postings.AndCardinality(p.except)
if vpl.except != nil {
e = vpl.postings.AndCardinality(vpl.except)
}
return n - e
}
@@ -171,51 +171,51 @@ type VecPostingsIterator struct {
next VecPosting // reused across Next() calls
}
func (i *VecPostingsIterator) nextCodeAtOrAfterClean(atOrAfter uint64) (uint64, bool, error) {
i.Actual.AdvanceIfNeeded(atOrAfter)
func (vpItr *VecPostingsIterator) nextCodeAtOrAfterClean(atOrAfter uint64) (uint64, bool, error) {
vpItr.Actual.AdvanceIfNeeded(atOrAfter)
if !i.Actual.HasNext() {
if !vpItr.Actual.HasNext() {
return 0, false, nil // couldn't find anything
}
return i.Actual.Next(), true, nil
return vpItr.Actual.Next(), true, nil
}
func (i *VecPostingsIterator) nextCodeAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
if i.Actual == nil || !i.Actual.HasNext() {
func (vpItr *VecPostingsIterator) nextCodeAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
if vpItr.Actual == nil || !vpItr.Actual.HasNext() {
return 0, false, nil
}
if i.postings == nil || i.postings == emptyVecPostingsList {
if vpItr.postings == nil || vpItr.postings == emptyVecPostingsList {
// couldn't find anything
return 0, false, nil
}
if i.postings.postings == i.ActualBM {
return i.nextCodeAtOrAfterClean(atOrAfter)
if vpItr.postings.postings == vpItr.ActualBM {
return vpItr.nextCodeAtOrAfterClean(atOrAfter)
}
i.Actual.AdvanceIfNeeded(atOrAfter)
vpItr.Actual.AdvanceIfNeeded(atOrAfter)
if !i.Actual.HasNext() || !i.all.HasNext() {
if !vpItr.Actual.HasNext() || !vpItr.all.HasNext() {
// couldn't find anything
return 0, false, nil
}
n := i.Actual.Next()
allN := i.all.Next()
n := vpItr.Actual.Next()
allN := vpItr.all.Next()
// n is the next actual hit (excluding some postings), and
// allN is the next hit in the full postings, and
// if they don't match, move 'all' forwards until they do.
for allN != n {
if !i.all.HasNext() {
if !vpItr.all.HasNext() {
return 0, false, nil
}
allN = i.all.Next()
allN = vpItr.all.Next()
}
return uint64(n), true, nil
return n, true, nil
}
// a transformation function which stores both the score and the docNum as a single
@@ -225,49 +225,49 @@ func getVectorCode(docNum uint32, score float32) uint64 {
}
// Next returns the next posting on the vector postings list, or nil at the end
func (i *VecPostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.VecPosting, error) {
func (vpItr *VecPostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.VecPosting, error) {
// transform the docNum provided to the vector code format and use that to
// get the next entry. the comparison still happens docNum wise since after
// the transformation, the docNum occupies the upper 32 bits just an entry in
// the postings list
atOrAfter = getVectorCode(uint32(atOrAfter), 0)
code, exists, err := i.nextCodeAtOrAfter(atOrAfter)
code, exists, err := vpItr.nextCodeAtOrAfter(atOrAfter)
if err != nil || !exists {
return nil, err
}
i.next = VecPosting{} // clear the struct
rv := &i.next
vpItr.next = VecPosting{} // clear the struct
rv := &vpItr.next
rv.score = math.Float32frombits(uint32(code))
rv.docNum = code >> 32
return rv, nil
}
func (itr *VecPostingsIterator) Next() (segment.VecPosting, error) {
return itr.nextAtOrAfter(0)
func (vpItr *VecPostingsIterator) Next() (segment.VecPosting, error) {
return vpItr.nextAtOrAfter(0)
}
func (itr *VecPostingsIterator) Advance(docNum uint64) (segment.VecPosting, error) {
return itr.nextAtOrAfter(docNum)
func (vpItr *VecPostingsIterator) Advance(docNum uint64) (segment.VecPosting, error) {
return vpItr.nextAtOrAfter(docNum)
}
func (i *VecPostingsIterator) Size() int {
func (vpItr *VecPostingsIterator) Size() int {
sizeInBytes := reflectStaticSizePostingsIterator + SizeOfPtr +
i.next.Size()
vpItr.next.Size()
return sizeInBytes
}
func (vpl *VecPostingsIterator) ResetBytesRead(val uint64) {
func (vpItr *VecPostingsIterator) ResetBytesRead(val uint64) {
}
func (vpl *VecPostingsIterator) BytesRead() uint64 {
func (vpItr *VecPostingsIterator) BytesRead() uint64 {
return 0
}
func (vpl *VecPostingsIterator) BytesWritten() uint64 {
func (vpItr *VecPostingsIterator) BytesWritten() uint64 {
return 0
}
@@ -329,7 +329,7 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
// it isn't added to the final postings list.
if docID, ok := vecDocIDMap[vecID]; ok {
code := getVectorCode(docID, scores[i])
pl.postings.Add(uint64(code))
pl.postings.Add(code)
}
}
}
@@ -471,7 +471,7 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
if err != nil {
return nil, err
}
// If no error occured during the creation of the selector, then
// If no error occurred during the creation of the selector, then
// it should be deleted once the search is complete.
defer selector.Delete()
// Ordering the retrieved centroid IDs by increasing order

View File

@@ -537,21 +537,21 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
// copyStoredDocs writes out a segment's stored doc info, optimized by
// using a single Write() call for the entire set of bytes. The
// newDocNumOffsets is filled with the new offsets for each doc.
func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
func (sb *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64,
w *CountHashWriter) error {
if s.numDocs <= 0 {
if sb.numDocs <= 0 {
return nil
}
indexOffset0, storedOffset0, _, _, _ :=
s.getDocStoredOffsets(0) // the segment's first doc
sb.getDocStoredOffsets(0) // the segment's first doc
indexOffsetN, storedOffsetN, readN, metaLenN, dataLenN :=
s.getDocStoredOffsets(s.numDocs - 1) // the segment's last doc
sb.getDocStoredOffsets(sb.numDocs - 1) // the segment's last doc
storedOffset0New := uint64(w.Count())
storedBytes := s.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
storedBytes := sb.mem[storedOffset0 : storedOffsetN+readN+metaLenN+dataLenN]
_, err := w.Write(storedBytes)
if err != nil {
return err
@@ -560,7 +560,7 @@ func (s *SegmentBase) copyStoredDocs(newDocNum uint64, newDocNumOffsets []uint64
// remap the storedOffset's for the docs into new offsets relative
// to storedOffset0New, filling the given docNumOffsetsOut array
for indexOffset := indexOffset0; indexOffset <= indexOffsetN; indexOffset += 8 {
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
storedOffset := binary.BigEndian.Uint64(sb.mem[indexOffset : indexOffset+8])
storedOffsetNew := storedOffset - storedOffset0 + storedOffset0New
newDocNumOffsets[newDocNum] = storedOffsetNew
newDocNum += 1

View File

@@ -16,27 +16,27 @@ package zap
import "encoding/binary"
func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
_, storedOffset, n, metaLen, dataLen := s.getDocStoredOffsets(docNum)
func (sb *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []byte) {
_, storedOffset, n, metaLen, dataLen := sb.getDocStoredOffsets(docNum)
meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
meta := sb.mem[storedOffset+n : storedOffset+n+metaLen]
data := sb.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
return meta, data
}
func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
func (sb *SegmentBase) getDocStoredOffsets(docNum uint64) (
uint64, uint64, uint64, uint64, uint64) {
indexOffset := s.storedIndexOffset + (8 * docNum)
indexOffset := sb.storedIndexOffset + (8 * docNum)
storedOffset := binary.BigEndian.Uint64(s.mem[indexOffset : indexOffset+8])
storedOffset := binary.BigEndian.Uint64(sb.mem[indexOffset : indexOffset+8])
var n uint64
metaLen, read := binary.Uvarint(s.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
metaLen, read := binary.Uvarint(sb.mem[storedOffset : storedOffset+binary.MaxVarintLen64])
n += uint64(read)
dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
dataLen, read := binary.Uvarint(sb.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
n += uint64(read)
return indexOffset, storedOffset, n, metaLen, dataLen

View File

@@ -612,8 +612,10 @@ func (io *invertedIndexOpaque) writeDicts(w *CountHashWriter) (dictOffsets []uin
if io.IncludeDocValues[fieldID] {
for docNum, docTerms := range docTermMap {
if fieldTermMap, ok := io.extraDocValues[docNum]; ok {
if sTerm, ok := fieldTermMap[uint16(fieldID)]; ok {
docTerms = append(append(docTerms, sTerm...), termSeparator)
if sTerms, ok := fieldTermMap[uint16(fieldID)]; ok {
for _, sTerm := range sTerms {
docTerms = append(append(docTerms, sTerm...), termSeparator)
}
}
}
if len(docTerms) > 0 {
@@ -797,9 +799,9 @@ func (i *invertedIndexOpaque) realloc() {
if f, ok := field.(index.GeoShapeField); ok {
if _, exists := i.extraDocValues[docNum]; !exists {
i.extraDocValues[docNum] = make(map[uint16][]byte)
i.extraDocValues[docNum] = make(map[uint16][][]byte)
}
i.extraDocValues[docNum][fieldID] = f.EncodedShape()
i.extraDocValues[docNum][fieldID] = append(i.extraDocValues[docNum][fieldID], f.EncodedShape())
}
}
@@ -810,7 +812,7 @@ func (i *invertedIndexOpaque) realloc() {
}
if i.extraDocValues == nil {
i.extraDocValues = map[int]map[uint16][]byte{}
i.extraDocValues = map[int]map[uint16][][]byte{}
}
for docNum, result := range i.results {
@@ -978,8 +980,8 @@ type invertedIndexOpaque struct {
// store terms that are unnecessary for the term dictionaries but needed in doc values
// eg - encoded geoshapes
// docNum -> fieldID -> term
extraDocValues map[int]map[uint16][]byte
// docNum -> fieldID -> terms
extraDocValues map[int]map[uint16][][]byte
builder *vellum.Builder
builderBuf bytes.Buffer

View File

@@ -269,81 +269,81 @@ func (s *Segment) incrementBytesRead(val uint64) {
atomic.AddUint64(&s.bytesRead, val)
}
func (s *SegmentBase) BytesWritten() uint64 {
return atomic.LoadUint64(&s.bytesWritten)
func (sb *SegmentBase) BytesWritten() uint64 {
return atomic.LoadUint64(&sb.bytesWritten)
}
func (s *SegmentBase) setBytesWritten(val uint64) {
atomic.AddUint64(&s.bytesWritten, val)
func (sb *SegmentBase) setBytesWritten(val uint64) {
atomic.AddUint64(&sb.bytesWritten, val)
}
func (s *SegmentBase) BytesRead() uint64 {
func (sb *SegmentBase) BytesRead() uint64 {
return 0
}
func (s *SegmentBase) ResetBytesRead(val uint64) {}
func (sb *SegmentBase) ResetBytesRead(val uint64) {}
func (s *SegmentBase) incrementBytesRead(val uint64) {
atomic.AddUint64(&s.bytesRead, val)
func (sb *SegmentBase) incrementBytesRead(val uint64) {
atomic.AddUint64(&sb.bytesRead, val)
}
func (s *SegmentBase) loadFields() error {
func (sb *SegmentBase) loadFields() error {
// NOTE for now we assume the fields index immediately precedes
// the footer, and if this changes, need to adjust accordingly (or
// store explicit length), where s.mem was sliced from s.mm in Open().
fieldsIndexEnd := uint64(len(s.mem))
fieldsIndexEnd := uint64(len(sb.mem))
// iterate through fields index
var fieldID uint64
for s.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
addr := binary.BigEndian.Uint64(s.mem[s.fieldsIndexOffset+(8*fieldID) : s.fieldsIndexOffset+(8*fieldID)+8])
for sb.fieldsIndexOffset+(8*fieldID) < fieldsIndexEnd {
addr := binary.BigEndian.Uint64(sb.mem[sb.fieldsIndexOffset+(8*fieldID) : sb.fieldsIndexOffset+(8*fieldID)+8])
// accounting the address of the dictLoc being read from file
s.incrementBytesRead(8)
sb.incrementBytesRead(8)
dictLoc, read := binary.Uvarint(s.mem[addr:fieldsIndexEnd])
dictLoc, read := binary.Uvarint(sb.mem[addr:fieldsIndexEnd])
n := uint64(read)
s.dictLocs = append(s.dictLocs, dictLoc)
sb.dictLocs = append(sb.dictLocs, dictLoc)
var nameLen uint64
nameLen, read = binary.Uvarint(s.mem[addr+n : fieldsIndexEnd])
nameLen, read = binary.Uvarint(sb.mem[addr+n : fieldsIndexEnd])
n += uint64(read)
name := string(s.mem[addr+n : addr+n+nameLen])
name := string(sb.mem[addr+n : addr+n+nameLen])
s.incrementBytesRead(n + nameLen)
s.fieldsInv = append(s.fieldsInv, name)
s.fieldsMap[name] = uint16(fieldID + 1)
sb.incrementBytesRead(n + nameLen)
sb.fieldsInv = append(sb.fieldsInv, name)
sb.fieldsMap[name] = uint16(fieldID + 1)
fieldID++
}
return nil
}
func (s *SegmentBase) loadFieldsNew() error {
pos := s.sectionsIndexOffset
func (sb *SegmentBase) loadFieldsNew() error {
pos := sb.sectionsIndexOffset
if pos == 0 {
// this is the case only for older file formats
return s.loadFields()
return sb.loadFields()
}
seek := pos + binary.MaxVarintLen64
if seek > uint64(len(s.mem)) {
if seek > uint64(len(sb.mem)) {
// handling a buffer overflow case.
// a rare case where the backing buffer is not large enough to be read directly via
// a pos+binary.MaxVarintLen64 seek. For eg, this can happen when there is only
// one field to be indexed in the entire batch of data and while writing out
// these fields metadata, you write 1 + 8 bytes whereas the MaxVarintLen64 = 10.
seek = uint64(len(s.mem))
seek = uint64(len(sb.mem))
}
// read the number of fields
numFields, sz := binary.Uvarint(s.mem[pos:seek])
numFields, sz := binary.Uvarint(sb.mem[pos:seek])
// here, the pos is incremented by the valid number bytes read from the buffer
// so in the edge case pointed out above the numFields = 1, the sz = 1 as well.
pos += uint64(sz)
s.incrementBytesRead(uint64(sz))
sb.incrementBytesRead(uint64(sz))
// the following loop will be executed only once in the edge case pointed out above
// since there is only field's offset store which occupies 8 bytes.
@@ -352,17 +352,17 @@ func (s *SegmentBase) loadFieldsNew() error {
// the specific section's parsing logic.
var fieldID uint64
for fieldID < numFields {
addr := binary.BigEndian.Uint64(s.mem[pos : pos+8])
s.incrementBytesRead(8)
addr := binary.BigEndian.Uint64(sb.mem[pos : pos+8])
sb.incrementBytesRead(8)
fieldSectionMap := make(map[uint16]uint64)
err := s.loadFieldNew(uint16(fieldID), addr, fieldSectionMap)
err := sb.loadFieldNew(uint16(fieldID), addr, fieldSectionMap)
if err != nil {
return err
}
s.fieldsSectionsMap = append(s.fieldsSectionsMap, fieldSectionMap)
sb.fieldsSectionsMap = append(sb.fieldsSectionsMap, fieldSectionMap)
fieldID++
pos += 8
@@ -371,7 +371,7 @@ func (s *SegmentBase) loadFieldsNew() error {
return nil
}
func (s *SegmentBase) loadFieldNew(fieldID uint16, pos uint64,
func (sb *SegmentBase) loadFieldNew(fieldID uint16, pos uint64,
fieldSectionMap map[uint16]uint64) error {
if pos == 0 {
// there is no indexing structure present for this field/section
@@ -379,23 +379,23 @@ func (s *SegmentBase) loadFieldNew(fieldID uint16, pos uint64,
}
fieldStartPos := pos // to track the number of bytes read
fieldNameLen, sz := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64])
fieldNameLen, sz := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64])
pos += uint64(sz)
fieldName := string(s.mem[pos : pos+fieldNameLen])
fieldName := string(sb.mem[pos : pos+fieldNameLen])
pos += fieldNameLen
s.fieldsInv = append(s.fieldsInv, fieldName)
s.fieldsMap[fieldName] = uint16(fieldID + 1)
sb.fieldsInv = append(sb.fieldsInv, fieldName)
sb.fieldsMap[fieldName] = uint16(fieldID + 1)
fieldNumSections, sz := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64])
fieldNumSections, sz := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64])
pos += uint64(sz)
for sectionIdx := uint64(0); sectionIdx < fieldNumSections; sectionIdx++ {
// read section id
fieldSectionType := binary.BigEndian.Uint16(s.mem[pos : pos+2])
fieldSectionType := binary.BigEndian.Uint16(sb.mem[pos : pos+2])
pos += 2
fieldSectionAddr := binary.BigEndian.Uint64(s.mem[pos : pos+8])
fieldSectionAddr := binary.BigEndian.Uint64(sb.mem[pos : pos+8])
pos += 8
fieldSectionMap[fieldSectionType] = fieldSectionAddr
if fieldSectionType == SectionInvertedTextIndex {
@@ -403,33 +403,33 @@ func (s *SegmentBase) loadFieldNew(fieldID uint16, pos uint64,
// 0 and during query time, because there is no valid dictionary we
// will just have follow a no-op path.
if fieldSectionAddr == 0 {
s.dictLocs = append(s.dictLocs, 0)
sb.dictLocs = append(sb.dictLocs, 0)
continue
}
read := 0
// skip the doc values
_, n := binary.Uvarint(s.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64])
_, n := binary.Uvarint(sb.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64])
fieldSectionAddr += uint64(n)
read += n
_, n = binary.Uvarint(s.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64])
_, n = binary.Uvarint(sb.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64])
fieldSectionAddr += uint64(n)
read += n
dictLoc, n := binary.Uvarint(s.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64])
dictLoc, n := binary.Uvarint(sb.mem[fieldSectionAddr : fieldSectionAddr+binary.MaxVarintLen64])
// account the bytes read while parsing the field's inverted index section
s.incrementBytesRead(uint64(read + n))
s.dictLocs = append(s.dictLocs, dictLoc)
sb.incrementBytesRead(uint64(read + n))
sb.dictLocs = append(sb.dictLocs, dictLoc)
}
}
// account the bytes read while parsing the sections field index.
s.incrementBytesRead((pos - uint64(fieldStartPos)) + fieldNameLen)
sb.incrementBytesRead((pos - uint64(fieldStartPos)) + fieldNameLen)
return nil
}
// Dictionary returns the term dictionary for the specified field
func (s *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
dict, err := s.dictionary(field)
func (sb *SegmentBase) Dictionary(field string) (segment.TermDictionary, error) {
dict, err := sb.dictionary(field)
if err == nil && dict == nil {
return emptyDictionary, nil
}
@@ -479,8 +479,8 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
}
// Thesaurus returns the thesaurus with the specified name, or an empty thesaurus if not found.
func (s *SegmentBase) Thesaurus(name string) (segment.Thesaurus, error) {
thesaurus, err := s.thesaurus(name)
func (sb *SegmentBase) Thesaurus(name string) (segment.Thesaurus, error) {
thesaurus, err := sb.thesaurus(name)
if err == nil && thesaurus == nil {
return emptyThesaurus, nil
}
@@ -537,17 +537,17 @@ var visitDocumentCtxPool = sync.Pool{
// VisitStoredFields invokes the StoredFieldValueVisitor for each stored field
// for the specified doc number
func (s *SegmentBase) VisitStoredFields(num uint64, visitor segment.StoredFieldValueVisitor) error {
func (sb *SegmentBase) VisitStoredFields(num uint64, visitor segment.StoredFieldValueVisitor) error {
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
defer visitDocumentCtxPool.Put(vdc)
return s.visitStoredFields(vdc, num, visitor)
return sb.visitStoredFields(vdc, num, visitor)
}
func (s *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64,
func (sb *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64,
visitor segment.StoredFieldValueVisitor) error {
// first make sure this is a valid number in this segment
if num < s.numDocs {
meta, compressed := s.getDocStoredMetaAndCompressed(num)
if num < sb.numDocs {
meta, compressed := sb.getDocStoredMetaAndCompressed(num)
vdc.reader.Reset(meta)
@@ -611,7 +611,7 @@ func (s *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64,
}
}
value := uncompressed[offset : offset+l]
keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos)
keepGoing = visitor(sb.fieldsInv[field], byte(typ), value, arrayPos)
}
vdc.buf = uncompressed
@@ -620,14 +620,14 @@ func (s *SegmentBase) visitStoredFields(vdc *visitDocumentCtx, num uint64,
}
// DocID returns the value of the _id field for the given docNum
func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
if num >= s.numDocs {
func (sb *SegmentBase) DocID(num uint64) ([]byte, error) {
if num >= sb.numDocs {
return nil, nil
}
vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx)
meta, compressed := s.getDocStoredMetaAndCompressed(num)
meta, compressed := sb.getDocStoredMetaAndCompressed(num)
vdc.reader.Reset(meta)
@@ -644,17 +644,17 @@ func (s *SegmentBase) DocID(num uint64) ([]byte, error) {
}
// Count returns the number of documents in this segment.
func (s *SegmentBase) Count() uint64 {
return s.numDocs
func (sb *SegmentBase) Count() uint64 {
return sb.numDocs
}
// DocNumbers returns a bitset corresponding to the doc numbers of all the
// provided _id strings
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
func (sb *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
rv := roaring.New()
if len(s.fieldsMap) > 0 {
idDict, err := s.dictionary("_id")
if len(sb.fieldsMap) > 0 {
idDict, err := sb.dictionary("_id")
if err != nil {
return nil, err
}
@@ -681,8 +681,8 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
}
// Fields returns the field names used in this segment
func (s *SegmentBase) Fields() []string {
return s.fieldsInv
func (sb *SegmentBase) Fields() []string {
return sb.fieldsInv
}
// Path returns the path of this segment on disk
@@ -907,44 +907,44 @@ func (s *Segment) loadDvReaders() error {
// since segmentBase is an in-memory segment, it can be called only
// for v16 file formats as part of InitSegmentBase() while introducing
// a segment into the system.
func (s *SegmentBase) loadDvReaders() error {
func (sb *SegmentBase) loadDvReaders() error {
// evaluate -> s.docValueOffset == fieldNotUninverted
if s.numDocs == 0 {
if sb.numDocs == 0 {
return nil
}
for fieldID, sections := range s.fieldsSectionsMap {
for fieldID, sections := range sb.fieldsSectionsMap {
for secID, secOffset := range sections {
if secOffset > 0 {
// fixed encoding as of now, need to uvarint this
pos := secOffset
var read uint64
fieldLocStart, n := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64])
fieldLocStart, n := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64])
if n <= 0 {
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %v", s.fieldsInv[fieldID])
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %v", sb.fieldsInv[fieldID])
}
pos += uint64(n)
read += uint64(n)
fieldLocEnd, n := binary.Uvarint(s.mem[pos : pos+binary.MaxVarintLen64])
fieldLocEnd, n := binary.Uvarint(sb.mem[pos : pos+binary.MaxVarintLen64])
if read <= 0 {
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %v", s.fieldsInv[fieldID])
return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %v", sb.fieldsInv[fieldID])
}
pos += uint64(n)
read += uint64(n)
s.incrementBytesRead(read)
sb.incrementBytesRead(read)
fieldDvReader, err := s.loadFieldDocValueReader(s.fieldsInv[fieldID], fieldLocStart, fieldLocEnd)
fieldDvReader, err := sb.loadFieldDocValueReader(sb.fieldsInv[fieldID], fieldLocStart, fieldLocEnd)
if err != nil {
return err
}
if fieldDvReader != nil {
if s.fieldDvReaders[secID] == nil {
s.fieldDvReaders[secID] = make(map[uint16]*docValueReader)
if sb.fieldDvReaders[secID] == nil {
sb.fieldDvReaders[secID] = make(map[uint16]*docValueReader)
}
s.fieldDvReaders[secID][uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, s.fieldsInv[fieldID])
sb.fieldDvReaders[secID][uint16(fieldID)] = fieldDvReader
sb.fieldDvNames = append(sb.fieldDvNames, sb.fieldsInv[fieldID])
}
}
}

6
vendor/modules.txt vendored
View File

@@ -161,7 +161,7 @@ github.com/bitly/go-simplejson
# github.com/bits-and-blooms/bitset v1.22.0
## explicit; go 1.16
github.com/bits-and-blooms/bitset
# github.com/blevesearch/bleve/v2 v2.5.1
# github.com/blevesearch/bleve/v2 v2.5.2
## explicit; go 1.23
github.com/blevesearch/bleve/v2
github.com/blevesearch/bleve/v2/analysis
@@ -260,8 +260,8 @@ github.com/blevesearch/zapx/v14
# github.com/blevesearch/zapx/v15 v15.4.2
## explicit; go 1.21
github.com/blevesearch/zapx/v15
# github.com/blevesearch/zapx/v16 v16.2.3
## explicit; go 1.21
# github.com/blevesearch/zapx/v16 v16.2.4
## explicit; go 1.23
github.com/blevesearch/zapx/v16
# github.com/bluele/gcache v0.0.2
## explicit; go 1.15