Bump github.com/blevesearch/bleve/v2 from 2.3.7 to 2.3.9

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.7 to 2.3.9.
- [Release notes](https://github.com/blevesearch/bleve/releases)
- [Commits](https://github.com/blevesearch/bleve/compare/v2.3.7...v2.3.9)

---
updated-dependencies:
- dependency-name: github.com/blevesearch/bleve/v2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2023-08-15 06:44:05 +00:00
committed by Ralf Haferkamp
parent 82b600aef5
commit f9b69afa9e
58 changed files with 1138 additions and 330 deletions

18
go.mod
View File

@@ -9,7 +9,7 @@ require (
github.com/MicahParks/keyfunc v1.5.1
github.com/Nerzal/gocloak/v13 v13.1.0
github.com/bbalet/stopwords v1.0.0
github.com/blevesearch/bleve/v2 v2.3.7
github.com/blevesearch/bleve/v2 v2.3.9
github.com/coreos/go-oidc v2.2.1+incompatible
github.com/coreos/go-oidc/v3 v3.6.0
github.com/cs3org/go-cs3apis v0.0.0-20230516150832-730ac860c71d
@@ -115,7 +115,7 @@ require (
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/OneOfOne/xxhash v1.2.8 // indirect
github.com/ProtonMail/go-crypto v0.0.0-20220930113650-c6815a8c17ad // indirect
github.com/RoaringBitmap/roaring v0.9.4 // indirect
github.com/RoaringBitmap/roaring v1.2.3 // indirect
github.com/acomagu/bufpipe v1.0.3 // indirect
github.com/agnivade/levenshtein v1.1.1 // indirect
github.com/ajg/form v1.5.1 // indirect
@@ -134,16 +134,16 @@ require (
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.1.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.1.5 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
github.com/blevesearch/vellum v1.0.9 // indirect
github.com/blevesearch/zapx/v11 v11.3.7 // indirect
github.com/blevesearch/zapx/v12 v12.3.7 // indirect
github.com/blevesearch/zapx/v13 v13.3.7 // indirect
github.com/blevesearch/zapx/v14 v14.3.7 // indirect
github.com/blevesearch/zapx/v15 v15.3.9 // indirect
github.com/blevesearch/vellum v1.0.10 // indirect
github.com/blevesearch/zapx/v11 v11.3.9 // indirect
github.com/blevesearch/zapx/v12 v12.3.9 // indirect
github.com/blevesearch/zapx/v13 v13.3.9 // indirect
github.com/blevesearch/zapx/v14 v14.3.9 // indirect
github.com/blevesearch/zapx/v15 v15.3.12 // indirect
github.com/bluele/gcache v0.0.2 // indirect
github.com/bmizerany/pat v0.0.0-20210406213842-e4b6760bdd6f // indirect
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect

36
go.sum
View File

@@ -669,8 +669,8 @@ github.com/ProtonMail/go-crypto v0.0.0-20220930113650-c6815a8c17ad h1:QeeqI2zxxg
github.com/ProtonMail/go-crypto v0.0.0-20220930113650-c6815a8c17ad/go.mod h1:UBYPn8k0D56RtnR8RFQMjmh4KrZzWJ5o7Z9SYjossQ8=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo=
github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA=
github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY=
github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/acomagu/bufpipe v1.0.3 h1:fxAGrHZTgQ9w5QqVItgzwj235/uYZYgbXitB+dLupOk=
@@ -746,8 +746,8 @@ github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edY
github.com/bits-and-blooms/bitset v1.2.1 h1:M+/hrU9xlMp7t4TyTDQW97d3tRPVuKFC6zBEK16QnXY=
github.com/bits-and-blooms/bitset v1.2.1/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve/v2 v2.3.7 h1:nIfIrhv28tvgBpbVF8Dq7/U1zW/YiwSqg/PBgE3x8bo=
github.com/blevesearch/bleve/v2 v2.3.7/go.mod h1:2tToYD6mDeseIA13jcZiEEqYrVLg6xdk0v6+F7dWquU=
github.com/blevesearch/bleve/v2 v2.3.9 h1:pUMvK0mxAexqasZcVj8lazmWnEW5XiV0tASIqANiNTQ=
github.com/blevesearch/bleve/v2 v2.3.9/go.mod h1:1PibElcjlQMQHF9uS9mRv58ODQgj4pCWHA1Wfd+qagU=
github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw=
github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms=
github.com/blevesearch/geo v0.1.17 h1:AguzI6/5mHXapzB0gE9IKWo+wWPHZmXZoscHcjFgAFA=
@@ -758,26 +758,26 @@ github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZG
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.1.4 h1:LmGmo5twU3gV+natJbKmOktS9eMhokPGKWuR+jX84vk=
github.com/blevesearch/scorch_segment_api/v2 v2.1.4/go.mod h1:PgVnbbg/t1UkgezPDu8EHLi1BHQ17xUwsFdU6NnOYS0=
github.com/blevesearch/scorch_segment_api/v2 v2.1.5 h1:1g713kpCQZ8u4a3stRGBfrwVOuGRnmxOVU5MQkUPrHU=
github.com/blevesearch/scorch_segment_api/v2 v2.1.5/go.mod h1:f2nOkKS1HcjgIWZgDAErgBdxmr2eyt0Kn7IY+FU1Xe4=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
github.com/blevesearch/vellum v1.0.9 h1:PL+NWVk3dDGPCV0hoDu9XLLJgqU4E5s/dOeEJByQ2uQ=
github.com/blevesearch/vellum v1.0.9/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
github.com/blevesearch/zapx/v11 v11.3.7 h1:Y6yIAF/DVPiqZUA/jNgSLXmqewfzwHzuwfKyfdG+Xaw=
github.com/blevesearch/zapx/v11 v11.3.7/go.mod h1:Xk9Z69AoAWIOvWudNDMlxJDqSYGf90LS0EfnaAIvXCA=
github.com/blevesearch/zapx/v12 v12.3.7 h1:DfQ6rsmZfEK4PzzJJRXjiM6AObG02+HWvprlXQ1Y7eI=
github.com/blevesearch/zapx/v12 v12.3.7/go.mod h1:SgEtYIBGvM0mgIBn2/tQE/5SdrPXaJUaT/kVqpAPxm0=
github.com/blevesearch/zapx/v13 v13.3.7 h1:igIQg5eKmjw168I7av0Vtwedf7kHnQro/M+ubM4d2l8=
github.com/blevesearch/zapx/v13 v13.3.7/go.mod h1:yyrB4kJ0OT75UPZwT/zS+Ru0/jYKorCOOSY5dBzAy+s=
github.com/blevesearch/zapx/v14 v14.3.7 h1:gfe+fbWslDWP/evHLtp/GOvmNM3sw1BbqD7LhycBX20=
github.com/blevesearch/zapx/v14 v14.3.7/go.mod h1:9J/RbOkqZ1KSjmkOes03AkETX7hrXT0sFMpWH4ewC4w=
github.com/blevesearch/zapx/v15 v15.3.9 h1:/s9zqKxFaZKQTTcMO2b/Tup0ch5MSztlvw+frVDfIBk=
github.com/blevesearch/zapx/v15 v15.3.9/go.mod h1:m7Y6m8soYUvS7MjN9eKlz1xrLCcmqfFadmu7GhWIrLY=
github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI=
github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
github.com/blevesearch/zapx/v11 v11.3.9 h1:y3ijS4h4MJdmQ07MHASxat4owAixreK2xdo76w9ncrw=
github.com/blevesearch/zapx/v11 v11.3.9/go.mod h1:jcAYnQwlr+LqD2vLjDWjWiZDXDXGFqPbpPDRTd3XmS4=
github.com/blevesearch/zapx/v12 v12.3.9 h1:MXGLlZ03oxXH3DMJTZaBaRj2xb6t4wQVZeZK/wu1M6w=
github.com/blevesearch/zapx/v12 v12.3.9/go.mod h1:QXCMwmOkdLnMDgTN1P4CcuX5F851iUOtOwXbw0HMBYs=
github.com/blevesearch/zapx/v13 v13.3.9 h1:+VAz9V0VmllHXlZV4DCvfYj0nqaZHgF3MeEHwOyRBwQ=
github.com/blevesearch/zapx/v13 v13.3.9/go.mod h1:s+WjNp4WSDtrBVBpa37DUOd7S/Gr/jTZ7ST/MbCVj/0=
github.com/blevesearch/zapx/v14 v14.3.9 h1:wuqxATgsTCNHM9xsOFOeFp8H2heZ/gMX/tsl9lRK8U4=
github.com/blevesearch/zapx/v14 v14.3.9/go.mod h1:MWZ4v8AzFBRurhDzkLvokFW8ljcq9Evm27mkWe8OGbM=
github.com/blevesearch/zapx/v15 v15.3.12 h1:w/kU9aHyfMDEdwHGZzCiakC3HZ9z5gYlXaALDC4Dct8=
github.com/blevesearch/zapx/v15 v15.3.12/go.mod h1:tx53gDJS/7Oa3Je820cmVurqCuJ4dqdAy1kiDMV/IUo=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=

View File

@@ -1,32 +0,0 @@
language: go
sudo: false
install:
- go get -t github.com/RoaringBitmap/roaring
- go get -t golang.org/x/tools/cmd/cover
- go get -t github.com/mattn/goveralls
- go get -t github.com/mschoch/smat
notifications:
email: false
go:
- "1.13.x"
- "1.14.x"
- tip
# whitelist
branches:
only:
- master
script:
- goveralls -v -service travis-ci -ignore rle16_gen.go,rle_gen.go,rle.go || go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build
matrix:
allow_failures:
- go: tip

View File

@@ -1,4 +1,4 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
roaring [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring/roaring64) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
![Go-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-CI/badge.svg)
![Go-ARM-CI](https://github.com/RoaringBitmap/roaring/workflows/Go-ARM-CI/badge.svg)
@@ -7,10 +7,8 @@ roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https
This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
[lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/
@@ -18,7 +16,7 @@ Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene
[druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
[anacrolix/torrent]: https://github.com/anacrolix/torrent
[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home
[pilosa]: https://www.pilosa.com/
[kylin]: http://kylin.apache.org/
@@ -32,7 +30,7 @@ Roaring bitmaps are found to work well in many important applications:
The ``roaring`` Go library is used by
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent)
* [anacrolix/torrent]
* [runv](https://github.com/hyperhq/runv)
* [InfluxDB](https://www.influxdata.com)
* [Pilosa](https://www.pilosa.com/)
@@ -42,6 +40,7 @@ The ``roaring`` Go library is used by
* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
* [M3](https://github.com/m3db/m3)
* [trident](https://github.com/NetApp/trident)
* [Husky](https://www.datadoghq.com/blog/engineering/introducing-husky/)
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
@@ -148,10 +147,8 @@ formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally of
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
Better bitmap performance with Roaring bitmaps,
Software: Practice and Experience 46 (5), 2016.
http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549
Software: Practice and Experience 46 (5), 2016.[arXiv:1402.6407](http://arxiv.org/abs/1402.6407) This paper used data from http://lemire.me/data/realroaring2014.html
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. [arXiv:1603.06549](http://arxiv.org/abs/1603.06549)
### Dependencies
@@ -170,6 +167,15 @@ Note that the smat library requires Go 1.6 or better.
- go get -t github.com/RoaringBitmap/roaring
### Instructions for contributors
Using bash or other common shells:
```
$ git clone git@github.com:RoaringBitmap/roaring.git
$ export GO111MODULE=on
$ go mod tidy
$ go test -v
```
### Example
@@ -325,7 +331,7 @@ Only the 32-bit roaring format is standard and cross-operable between Java, C++,
### Documentation
Current documentation is available at http://godoc.org/github.com/RoaringBitmap/roaring and http://godoc.org/github.com/RoaringBitmap/roaring64
Current documentation is available at https://pkg.go.dev/github.com/RoaringBitmap/roaring and https://pkg.go.dev/github.com/RoaringBitmap/roaring/roaring64
### Goroutine safety

View File

@@ -1007,16 +1007,42 @@ func (ac *arrayContainer) containerType() contype {
return arrayContype
}
func (ac *arrayContainer) addOffset(x uint16) []container {
low := &arrayContainer{}
high := &arrayContainer{}
func (ac *arrayContainer) addOffset(x uint16) (container, container) {
var low, high *arrayContainer
if len(ac.content) == 0 {
return nil, nil
}
if y := uint32(ac.content[0]) + uint32(x); highbits(y) == 0 {
// Some elements will fall into low part, allocate a container.
// Checking the first one is enough because they are ordered.
low = &arrayContainer{}
}
if y := uint32(ac.content[len(ac.content)-1]) + uint32(x); highbits(y) > 0 {
// Some elements will fall into high part, allocate a container.
// Checking the last one is enough because they are ordered.
high = &arrayContainer{}
}
for _, val := range ac.content {
y := uint32(val) + uint32(x)
if highbits(y) > 0 {
// OK, if high == nil then highbits(y) == 0 for all y.
high.content = append(high.content, lowbits(y))
} else {
// OK, if low == nil then highbits(y) > 0 for all y.
low.content = append(low.content, lowbits(y))
}
}
return []container{low, high}
// Ensure proper nil interface.
if low == nil {
return nil, high
}
if high == nil {
return low, nil
}
return low, high
}

View File

@@ -350,7 +350,6 @@ func (bc *bitmapContainer) getCardinality() int {
return bc.cardinality
}
func (bc *bitmapContainer) isEmpty() bool {
return bc.cardinality == 0
}
@@ -1125,15 +1124,20 @@ func (bc *bitmapContainer) containerType() contype {
return bitmapContype
}
func (bc *bitmapContainer) addOffset(x uint16) []container {
low := newBitmapContainer()
high := newBitmapContainer()
func (bc *bitmapContainer) addOffset(x uint16) (container, container) {
var low, high *bitmapContainer
if bc.cardinality == 0 {
return nil, nil
}
b := uint32(x) >> 6
i := uint32(x) % 64
end := uint32(1024) - b
low = newBitmapContainer()
if i == 0 {
copy(low.bitmap[b:], bc.bitmap[:end])
copy(high.bitmap[:b], bc.bitmap[end:])
} else {
low.bitmap[b] = bc.bitmap[0] << i
for k := uint32(1); k < end; k++ {
@@ -1141,6 +1145,26 @@ func (bc *bitmapContainer) addOffset(x uint16) []container {
newval |= bc.bitmap[k-1] >> (64 - i)
low.bitmap[b+k] = newval
}
}
low.computeCardinality()
if low.cardinality == bc.cardinality {
// All elements from bc ended up in low, meaning high will be empty.
return low, nil
}
if low.cardinality == 0 {
// low is empty, let's reuse the container for high.
high = low
low = nil
} else {
// None of the containers will be empty, so allocate both.
high = newBitmapContainer()
}
if i == 0 {
copy(high.bitmap[:b], bc.bitmap[end:])
} else {
for k := end; k < 1024; k++ {
newval := bc.bitmap[k] << i
newval |= bc.bitmap[k-1] >> (64 - i)
@@ -1148,7 +1172,12 @@ func (bc *bitmapContainer) addOffset(x uint16) []container {
}
high.bitmap[b] = bc.bitmap[1023] >> (64 - i)
}
low.computeCardinality()
high.computeCardinality()
return []container{low, high}
// Ensure proper nil interface.
if low == nil {
return nil, high
}
return low, high
}

View File

@@ -1,4 +1,6 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints

View File

@@ -1,3 +1,4 @@
//go:build !go1.9
// +build !go1.9
package roaring

View File

@@ -1,4 +1,6 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints

View File

@@ -1,3 +1,4 @@
//go:build !go1.9
// +build !go1.9
package roaring

View File

@@ -121,6 +121,10 @@ func (x1 *Bitmap) repairAfterLazy() {
// FastAnd computes the intersection between many bitmaps quickly
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
// of manually calling "And" many times.
//
// Performance hints: if you have very large and tiny bitmaps,
// it may be beneficial performance-wise to put a tiny bitmap
// in first position.
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
if len(bitmaps) == 0 {
return NewBitmap()

View File

@@ -1,4 +1,6 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints

View File

@@ -1,3 +1,4 @@
//go:build amd64 && !appengine && !go1.9
// +build amd64,!appengine,!go1.9
package roaring

View File

@@ -1,3 +1,4 @@
//go:build !go1.9
// +build !go1.9
package roaring

View File

@@ -1,3 +1,4 @@
//go:build !amd64 || appengine || go1.9
// +build !amd64 appengine go1.9
package roaring

View File

@@ -53,6 +53,59 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
return rb.highlowcontainer.toBytes()
}
// Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for
// using bitmaps as elements in hash sets or as keys in hash maps, as well as
// generally quicker comparisons.
// The implementation is biased towards efficiency in little endian machines, so
// expect some extra CPU cycles and memory to be used if your machine is big endian.
// Likewise, don't use this to verify integrity unless you're certain you'll load
// the bitmap on a machine with the same endianess used to create it.
func (rb *Bitmap) Checksum() uint64 {
const (
offset = 14695981039346656037
prime = 1099511628211
)
var bytes []byte
hash := uint64(offset)
bytes = uint16SliceAsByteSlice(rb.highlowcontainer.keys)
for _, b := range bytes {
hash ^= uint64(b)
hash *= prime
}
for _, c := range rb.highlowcontainer.containers {
// 0 separator
hash ^= 0
hash *= prime
switch c := c.(type) {
case *bitmapContainer:
bytes = uint64SliceAsByteSlice(c.bitmap)
case *arrayContainer:
bytes = uint16SliceAsByteSlice(c.content)
case *runContainer16:
bytes = interval16SliceAsByteSlice(c.iv)
default:
panic("invalid container type")
}
if len(bytes) == 0 {
panic("empty containers are not supported")
}
for _, b := range bytes {
hash ^= uint64(b)
hash *= prime
}
}
return hash
}
// ReadFrom reads a serialized version of this bitmap from stream.
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
@@ -218,6 +271,14 @@ type intIterator struct {
hs uint32
iter shortPeekable
highlowcontainer *roaringArray
// These embedded iterators per container type help reduce load in the GC.
// This way, instead of making up-to 64k allocations per full iteration
// we get a single allocation and simply reinitialize the appropriate
// iterator and point to it in the generic `iter` member on each key bound.
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerShortIterator
}
// HasNext returns true if there are more integers to iterate over
@@ -227,8 +288,19 @@ func (ii *intIterator) HasNext() bool {
func (ii *intIterator) init() {
if ii.highlowcontainer.size() > ii.pos {
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getShortIterator()
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
c := ii.highlowcontainer.getContainerAtIndex(ii.pos)
switch t := c.(type) {
case *arrayContainer:
ii.shortIter = shortIterator{t.content, 0}
ii.iter = &ii.shortIter
case *runContainer16:
ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0}
ii.iter = &ii.runIter
case *bitmapContainer:
ii.bitmapIter = bitmapContainerShortIterator{t, t.NextSetBit(0)}
ii.iter = &ii.bitmapIter
}
}
}
@@ -249,14 +321,14 @@ func (ii *intIterator) PeekNext() uint32 {
// AdvanceIfNeeded advances as long as the next value is smaller than minval
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
to := minval >> 16
to := minval & 0xffff0000
for ii.HasNext() && (ii.hs>>16) < to {
for ii.HasNext() && ii.hs < to {
ii.pos++
ii.init()
}
if ii.HasNext() && (ii.hs>>16) == to {
if ii.HasNext() && ii.hs == to {
ii.iter.advanceIfNeeded(lowbits(minval))
if !ii.iter.hasNext() {
@@ -266,12 +338,17 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
}
}
func newIntIterator(a *Bitmap) *intIterator {
p := new(intIterator)
// IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type IntIterator = intIterator
// Initialize configures the existing iterator so that it can iterate through the values of
// the provided bitmap.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
func (p *intIterator) Initialize(a *Bitmap) {
p.pos = 0
p.highlowcontainer = &a.highlowcontainer
p.init()
return p
}
type intReverseIterator struct {
@@ -279,6 +356,10 @@ type intReverseIterator struct {
hs uint32
iter shortIterable
highlowcontainer *roaringArray
shortIter reverseIterator
runIter runReverseIterator16
bitmapIter reverseBitmapContainerShortIterator
}
// HasNext returns true if there are more integers to iterate over
@@ -288,8 +369,30 @@ func (ii *intReverseIterator) HasNext() bool {
func (ii *intReverseIterator) init() {
if ii.pos >= 0 {
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator()
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
c := ii.highlowcontainer.getContainerAtIndex(ii.pos)
switch t := c.(type) {
case *arrayContainer:
ii.shortIter = reverseIterator{t.content, len(t.content) - 1}
ii.iter = &ii.shortIter
case *runContainer16:
index := int(len(t.iv)) - 1
pos := uint16(0)
if index >= 0 {
pos = t.iv[index].length
}
ii.runIter = runReverseIterator16{rc: t, curIndex: index, curPosInIndex: pos}
ii.iter = &ii.runIter
case *bitmapContainer:
pos := -1
if t.cardinality > 0 {
pos = int(t.maximum())
}
ii.bitmapIter = reverseBitmapContainerShortIterator{t, pos}
ii.iter = &ii.bitmapIter
}
} else {
ii.iter = nil
}
@@ -305,12 +408,16 @@ func (ii *intReverseIterator) Next() uint32 {
return x
}
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
p := new(intReverseIterator)
// IntReverseIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type IntReverseIterator = intReverseIterator
// Initialize configures the existing iterator so that it can iterate through the values of
// the provided bitmap.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
func (p *intReverseIterator) Initialize(a *Bitmap) {
p.highlowcontainer = &a.highlowcontainer
p.pos = a.highlowcontainer.size() - 1
p.init()
return p
}
// ManyIntIterable allows you to iterate over the values in a Bitmap
@@ -326,12 +433,27 @@ type manyIntIterator struct {
hs uint32
iter manyIterable
highlowcontainer *roaringArray
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerManyIterator
}
func (ii *manyIntIterator) init() {
if ii.highlowcontainer.size() > ii.pos {
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getManyIterator()
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
c := ii.highlowcontainer.getContainerAtIndex(ii.pos)
switch t := c.(type) {
case *arrayContainer:
ii.shortIter = shortIterator{t.content, 0}
ii.iter = &ii.shortIter
case *runContainer16:
ii.runIter = runIterator16{rc: t, curIndex: 0, curPosInIndex: 0}
ii.iter = &ii.runIter
case *bitmapContainer:
ii.bitmapIter = bitmapContainerManyIterator{t, -1, 0}
ii.iter = &ii.bitmapIter
}
} else {
ii.iter = nil
}
@@ -373,12 +495,17 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int {
return n
}
func newManyIntIterator(a *Bitmap) *manyIntIterator {
p := new(manyIntIterator)
// ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type ManyIntIterator = manyIntIterator
// Initialize configures the existing iterator so that it can iterate through the values of
// the provided bitmap.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
func (p *manyIntIterator) Initialize(a *Bitmap) {
p.pos = 0
p.highlowcontainer = &a.highlowcontainer
p.init()
return p
}
// String creates a string representation of the Bitmap
@@ -410,7 +537,7 @@ func (rb *Bitmap) String() string {
// Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
// false, the iteration is halted.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
// There is no guarantee as to what order the values will be iterated
// There is no guarantee as to what order the values will be iterated.
func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
for i := 0; i < rb.highlowcontainer.size(); i++ {
hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
@@ -442,19 +569,25 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
return newIntIterator(rb)
p := new(intIterator)
p.Initialize(rb)
return p
}
// ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ReverseIterator() IntIterable {
return newIntReverseIterator(rb)
p := new(intReverseIterator)
p.Initialize(rb)
return p
}
// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ManyIterator() ManyIntIterable {
return newManyIntIterator(rb)
p := new(manyIntIterator)
p.Initialize(rb)
return p
}
// Clone creates a copy of the Bitmap
@@ -466,11 +599,17 @@ func (rb *Bitmap) Clone() *Bitmap {
// Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty
func (rb *Bitmap) Minimum() uint32 {
if len(rb.highlowcontainer.containers) == 0 {
panic("Empty bitmap")
}
return uint32(rb.highlowcontainer.containers[0].minimum()) | (uint32(rb.highlowcontainer.keys[0]) << 16)
}
// Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty
func (rb *Bitmap) Maximum() uint32 {
if len(rb.highlowcontainer.containers) == 0 {
panic("Empty bitmap")
}
lastindex := len(rb.highlowcontainer.containers) - 1
return uint32(rb.highlowcontainer.containers[lastindex].maximum()) | (uint32(rb.highlowcontainer.keys[lastindex]) << 16)
}
@@ -514,34 +653,38 @@ func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) {
containerOffset64 = offset >> 16
}
if containerOffset64 >= (1<<16) || containerOffset64 <= -(1<<16) {
return New()
answer = New()
if containerOffset64 >= (1<<16) || containerOffset64 < -(1<<16) {
return answer
}
containerOffset := int32(containerOffset64)
inOffset := (uint16)(offset - containerOffset64*(1<<16))
if inOffset == 0 {
answer = x.Clone()
for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
key := int32(answer.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset
if key >= 0 && key <= MaxUint16 {
answer.highlowcontainer.keys[pos] = uint16(key)
}
}
} else {
answer = New()
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := int32(x.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset
c := x.highlowcontainer.getContainerAtIndex(pos)
offsetted := c.addOffset(inOffset)
if key >= 0 && key <= MaxUint16 {
c := x.highlowcontainer.getContainerAtIndex(pos).clone()
answer.highlowcontainer.appendContainer(uint16(key), c, false)
}
}
} else {
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := int32(x.highlowcontainer.getKeyAtIndex(pos))
key += containerOffset
if !offsetted[0].isEmpty() && (key >= 0 && key <= MaxUint16) {
if key+1 < 0 || key > MaxUint16 {
continue
}
c := x.highlowcontainer.getContainerAtIndex(pos)
lo, hi := c.addOffset(inOffset)
if lo != nil && key >= 0 {
curSize := answer.highlowcontainer.size()
lastkey := int32(0)
@@ -551,15 +694,15 @@ func AddOffset64(x *Bitmap, offset int64) (answer *Bitmap) {
if curSize > 0 && lastkey == key {
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
orrseult := prev.ior(offsetted[0])
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
orresult := prev.ior(lo)
answer.highlowcontainer.setContainerAtIndex(curSize-1, orresult)
} else {
answer.highlowcontainer.appendContainer(uint16(key), offsetted[0], false)
answer.highlowcontainer.appendContainer(uint16(key), lo, false)
}
}
if !offsetted[1].isEmpty() && ((key+1) >= 0 && (key+1) <= MaxUint16) {
answer.highlowcontainer.appendContainer(uint16(key+1), offsetted[1], false)
if hi != nil && key+1 <= MaxUint16 {
answer.highlowcontainer.appendContainer(uint16(key+1), hi, false)
}
}
}
@@ -693,10 +836,6 @@ func (rb *Bitmap) Rank(x uint32) uint64 {
// the smallest element. Note that this function differs in convention from
// the Rank function which returns 1 on the smallest value.
func (rb *Bitmap) Select(x uint32) (uint32, error) {
if rb.GetCardinality() <= uint64(x) {
return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality())
}
remaining := x
for i := 0; i < rb.highlowcontainer.size(); i++ {
c := rb.highlowcontainer.getContainerAtIndex(i)
@@ -860,6 +999,28 @@ main:
return answer
}
// IntersectsWithInterval checks whether a bitmap 'rb' and an open interval '[x,y)' intersect.
func (rb *Bitmap) IntersectsWithInterval(x, y uint64) bool {
if x >= y {
return false
}
if x > MaxUint32 {
return false
}
it := intIterator{}
it.Initialize(rb)
it.AdvanceIfNeeded(uint32(x))
if !it.HasNext() {
return false
}
if uint64(it.Next()) >= y {
return false
}
return true
}
// Intersects checks whether two bitmap intersects, bitmaps are not modified
func (rb *Bitmap) Intersects(x2 *Bitmap) bool {
pos1 := 0
@@ -1552,27 +1713,3 @@ func (rb *Bitmap) Stats() Statistics {
}
return stats
}
func (rb *Bitmap) checkValidity() bool {
for _, c := range rb.highlowcontainer.containers {
switch c.(type) {
case *arrayContainer:
if c.getCardinality() > arrayDefaultMaxSize {
fmt.Println("Array containers are limited to size ", arrayDefaultMaxSize)
return false
}
case *bitmapContainer:
if c.getCardinality() <= arrayDefaultMaxSize {
fmt.Println("Bitmaps would be more concise as an array!")
return false
}
case *runContainer16:
if c.getSizeInBytes() > minOfInt(bitmapContainerSizeInBytes(), arrayContainerSizeInBytes(c.getCardinality())) {
fmt.Println("Inefficient run container!")
return false
}
}
}
return true
}

View File

@@ -4,12 +4,15 @@ import (
"bytes"
"encoding/binary"
"fmt"
"io"
"github.com/RoaringBitmap/roaring/internal"
"io"
)
type container interface {
addOffset(uint16) []container
// addOffset returns the (low, high) parts of the shifted container.
// Whenever one of them would be empty, nil will be returned instead to
// avoid unnecessary allocations.
addOffset(uint16) (container, container)
clone() container
and(container) container
@@ -551,9 +554,9 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
}
func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) {
var cookie uint32
var cookie uint32
var err error
if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header")
}
if len(cookieHeader) == 4 {
@@ -645,7 +648,7 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
}
nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf),
iv: byteSliceAsInterval16Slice(buf),
}
ra.containers[i] = &nb

View File

@@ -2281,7 +2281,7 @@ func runArrayUnionToRuns(rc *runContainer16, ac *arrayContainer) ([]interval16,
pos2++
}
}
cardMinusOne += previousInterval.length + 1
cardMinusOne += previousInterval.length
target = append(target, previousInterval)
return target, cardMinusOne
@@ -2582,9 +2582,27 @@ func (rc *runContainer16) serializedSizeInBytes() int {
return 2 + len(rc.iv)*4
}
func (rc *runContainer16) addOffset(x uint16) []container {
low := newRunContainer16()
high := newRunContainer16()
func (rc *runContainer16) addOffset(x uint16) (container, container) {
var low, high *runContainer16
if len(rc.iv) == 0 {
return nil, nil
}
first := uint32(rc.iv[0].start) + uint32(x)
if highbits(first) == 0 {
// Some elements will fall into low part, allocate a container.
// Checking the first one is enough because they are ordered.
low = newRunContainer16()
}
last := uint32(rc.iv[len(rc.iv)-1].start)
last += uint32(rc.iv[len(rc.iv)-1].length)
last += uint32(x)
if highbits(last) > 0 {
// Some elements will fall into high part, allocate a container.
// Checking the last one is enough because they are ordered.
high = newRunContainer16()
}
for _, iv := range rc.iv {
val := int(iv.start) + int(x)
@@ -2600,5 +2618,14 @@ func (rc *runContainer16) addOffset(x uint16) []container {
high.iv = append(high.iv, interval16{uint16(val & 0xffff), iv.length})
}
}
return []container{low, high}
// Ensure proper nil interface.
if low == nil {
return nil, high
}
if high == nil {
return low, nil
}
return low, high
}

View File

@@ -1,3 +1,4 @@
//go:build (!amd64 && !386 && !arm && !arm64 && !ppc64le && !mipsle && !mips64le && !mips64p32le && !wasm) || appengine
// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine
package roaring
@@ -84,6 +85,17 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
return by
}
func interval16SliceAsByteSlice(slice []interval16) []byte {
by := make([]byte, len(slice)*4)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v.start)
binary.LittleEndian.PutUint16(by[i*2+2:], v.length)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")

View File

@@ -1,3 +1,4 @@
//go:build (386 && !appengine) || (amd64 && !appengine) || (arm && !appengine) || (arm64 && !appengine) || (ppc64le && !appengine) || (mipsle && !appengine) || (mips64le && !appengine) || (mips64p32le && !appengine) || (wasm && !appengine)
// +build 386,!appengine amd64,!appengine arm,!appengine arm64,!appengine ppc64le,!appengine mipsle,!appengine mips64le,!appengine mips64p32le,!appengine wasm,!appengine
package roaring
@@ -56,6 +57,22 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
return result
}
func interval16SliceAsByteSlice(slice []interval16) []byte {
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
// update its capacity and length
header.Len *= 4
header.Cap *= 4
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return result
}
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
return uint64SliceAsByteSlice(bc.bitmap)
}
@@ -134,7 +151,124 @@ func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer.
func byteSliceAsContainerSlice(slice []byte) (result []container) {
var c container
containerSize := int(unsafe.Sizeof(c))
if len(slice)%containerSize != 0 {
panic("Slice size should be divisible by unsafe.Sizeof(container)")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / containerSize
rHeader.Cap = bHeader.Cap / containerSize
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsBitsetSlice(slice []byte) (result []bitmapContainer) {
bitsetSize := int(unsafe.Sizeof(bitmapContainer{}))
if len(slice)%bitsetSize != 0 {
panic("Slice size should be divisible by unsafe.Sizeof(bitmapContainer)")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / bitsetSize
rHeader.Cap = bHeader.Cap / bitsetSize
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsArraySlice(slice []byte) (result []arrayContainer) {
arraySize := int(unsafe.Sizeof(arrayContainer{}))
if len(slice)%arraySize != 0 {
panic("Slice size should be divisible by unsafe.Sizeof(arrayContainer)")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / arraySize
rHeader.Cap = bHeader.Cap / arraySize
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsRun16Slice(slice []byte) (result []runContainer16) {
run16Size := int(unsafe.Sizeof(runContainer16{}))
if len(slice)%run16Size != 0 {
panic("Slice size should be divisible by unsafe.Sizeof(runContainer16)")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / run16Size
rHeader.Cap = bHeader.Cap / run16Size
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsBoolSlice(slice []byte) (result []bool) {
boolSize := int(unsafe.Sizeof(true))
if len(slice)%boolSize != 0 {
panic("Slice size should be divisible by unsafe.Sizeof(bool)")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / boolSize
rHeader.Cap = bHeader.Cap / boolSize
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
// FrozenView creates a static view of a serialized bitmap stored in buf.
// It uses CRoaring's frozen bitmap format.
//
// The format specification is available here:
@@ -198,13 +332,13 @@ func (rb *Bitmap) FrozenView(buf []byte) error {
const FROZEN_COOKIE = 13766
var (
FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE")
FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
FrozenBitmapOverpopulated = errors.New("too many containers")
FrozenBitmapUnexpectedData = errors.New("spurious data in input")
FrozenBitmapInvalidCookie = errors.New("header does not contain the FROZEN_COOKIE")
FrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
FrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
FrozenBitmapOverpopulated = errors.New("too many containers")
FrozenBitmapUnexpectedData = errors.New("spurious data in input")
FrozenBitmapInvalidTypecode = errors.New("unrecognized typecode")
FrozenBitmapBufferTooSmall = errors.New("buffer too small")
FrozenBitmapBufferTooSmall = errors.New("buffer too small")
)
func (ra *roaringArray) frozenView(buf []byte) error {
@@ -213,14 +347,14 @@ func (ra *roaringArray) frozenView(buf []byte) error {
}
headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:])
if headerBE & 0x7fff == FROZEN_COOKIE {
if headerBE&0x7fff == FROZEN_COOKIE {
return FrozenBitmapBigEndian
}
header := binary.LittleEndian.Uint32(buf[len(buf)-4:])
buf = buf[:len(buf)-4]
if header & 0x7fff != FROZEN_COOKIE {
if header&0x7fff != FROZEN_COOKIE {
return FrozenBitmapInvalidCookie
}
@@ -243,29 +377,29 @@ func (ra *roaringArray) frozenView(buf []byte) error {
keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
buf = buf[:len(buf)-2*nCont]
nBitmap, nArray, nRun := uint64(0), uint64(0), uint64(0)
nArrayEl, nRunEl := uint64(0), uint64(0)
nBitmap, nArray, nRun := 0, 0, 0
nArrayEl, nRunEl := 0, 0
for i, t := range types {
switch (t) {
switch t {
case 1:
nBitmap++
case 2:
nArray++
nArrayEl += uint64(counts[i])+1
nArrayEl += int(counts[i]) + 1
case 3:
nRun++
nRunEl += uint64(counts[i])
nRunEl += int(counts[i])
default:
return FrozenBitmapInvalidTypecode
}
}
if uint64(len(buf)) < (1 << 13)*nBitmap + 4*nRunEl + 2*nArrayEl {
if len(buf) < (1<<13)*nBitmap+4*nRunEl+2*nArrayEl {
return FrozenBitmapIncomplete
}
bitsetsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBitmap])
buf = buf[(1 << 13)*nBitmap:]
bitsetsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBitmap])
buf = buf[(1<<13)*nBitmap:]
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
buf = buf[4*nRunEl:]
@@ -277,27 +411,44 @@ func (ra *roaringArray) frozenView(buf []byte) error {
return FrozenBitmapUnexpectedData
}
// TODO: maybe arena_alloc all this.
containers := make([]container, nCont)
bitsets := make([]bitmapContainer, nBitmap)
arrays := make([]arrayContainer, nArray)
runs := make([]runContainer16, nRun)
needCOW := make([]bool, nCont)
var c container
containersSz := int(unsafe.Sizeof(c))*nCont
bitsetsSz := int(unsafe.Sizeof(bitmapContainer{}))*nBitmap
arraysSz := int(unsafe.Sizeof(arrayContainer{}))*nArray
runsSz := int(unsafe.Sizeof(runContainer16{}))*nRun
needCOWSz := int(unsafe.Sizeof(true))*nCont
iBitset, iArray, iRun := uint64(0), uint64(0), uint64(0)
bitmapArenaSz := containersSz + bitsetsSz + arraysSz + runsSz + needCOWSz
bitmapArena := make([]byte, bitmapArenaSz)
containers := byteSliceAsContainerSlice(bitmapArena[:containersSz])
bitmapArena = bitmapArena[containersSz:]
bitsets := byteSliceAsBitsetSlice(bitmapArena[:bitsetsSz])
bitmapArena = bitmapArena[bitsetsSz:]
arrays := byteSliceAsArraySlice(bitmapArena[:arraysSz])
bitmapArena = bitmapArena[arraysSz:]
runs := byteSliceAsRun16Slice(bitmapArena[:runsSz])
bitmapArena = bitmapArena[runsSz:]
needCOW := byteSliceAsBoolSlice(bitmapArena)
iBitset, iArray, iRun := 0, 0, 0
for i, t := range types {
needCOW[i] = true
switch (t) {
switch t {
case 1:
containers[i] = &bitsets[iBitset]
bitsets[iBitset].cardinality = int(counts[i])+1
bitsets[iBitset].cardinality = int(counts[i]) + 1
bitsets[iBitset].bitmap = bitsetsArena[:1024]
bitsetsArena = bitsetsArena[1024:]
iBitset++
case 2:
containers[i] = &arrays[iArray]
sz := int(counts[i])+1
sz := int(counts[i]) + 1
arrays[iArray].content = arraysArena[:sz]
arraysArena = arraysArena[sz:]
iArray++
@@ -363,13 +514,13 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
}
}
serialSize := 4 + 5*nCont + (1 << 13)*nBits + 4*nRunEl + 2*nArrayEl
serialSize := 4 + 5*nCont + (1<<13)*nBits + 4*nRunEl + 2*nArrayEl
if len(buf) < serialSize {
return 0, FrozenBitmapBufferTooSmall
}
bitsArena := byteSliceAsUint64Slice(buf[:(1 << 13)*nBits])
buf = buf[(1 << 13)*nBits:]
bitsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBits])
buf = buf[(1<<13)*nBits:]
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
buf = buf[4*nRunEl:]
@@ -386,7 +537,7 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
types := buf[:nCont]
buf = buf[nCont:]
header := uint32(FROZEN_COOKIE|(nCont << 15))
header := uint32(FROZEN_COOKIE | (nCont << 15))
binary.LittleEndian.PutUint32(buf[:4], header)
copy(keys, bm.highlowcontainer.keys[:])
@@ -396,13 +547,13 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
case *bitmapContainer:
copy(bitsArena, v.bitmap)
bitsArena = bitsArena[1024:]
counts[i] = uint16(v.cardinality-1)
counts[i] = uint16(v.cardinality - 1)
types[i] = 1
case *arrayContainer:
copy(arraysArena, v.content)
arraysArena = arraysArena[len(v.content):]
elems := len(v.content)
counts[i] = uint16(elems-1)
counts[i] = uint16(elems - 1)
types[i] = 2
case *runContainer16:
copy(runsArena, v.iv)
@@ -415,3 +566,87 @@ func (bm *Bitmap) FreezeTo(buf []byte) (int, error) {
return serialSize, nil
}
func (bm *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) {
// FIXME: this is a naive version that iterates 4 times through the
// containers and allocates 3*len(containers) bytes; it's quite likely
// it can be done more efficiently.
containers := bm.highlowcontainer.containers
written := 0
for _, c := range containers {
c, ok := c.(*bitmapContainer)
if !ok {
continue
}
n, err := wr.Write(uint64SliceAsByteSlice(c.bitmap))
written += n
if err != nil {
return written, err
}
}
for _, c := range containers {
c, ok := c.(*runContainer16)
if !ok {
continue
}
n, err := wr.Write(interval16SliceAsByteSlice(c.iv))
written += n
if err != nil {
return written, err
}
}
for _, c := range containers {
c, ok := c.(*arrayContainer)
if !ok {
continue
}
n, err := wr.Write(uint16SliceAsByteSlice(c.content))
written += n
if err != nil {
return written, err
}
}
n, err := wr.Write(uint16SliceAsByteSlice(bm.highlowcontainer.keys))
written += n
if err != nil {
return written, err
}
countTypeBuf := make([]byte, 3*len(containers))
counts := byteSliceAsUint16Slice(countTypeBuf[:2*len(containers)])
types := countTypeBuf[2*len(containers):]
for i, c := range containers {
switch c := c.(type) {
case *bitmapContainer:
counts[i] = uint16(c.cardinality - 1)
types[i] = 1
case *arrayContainer:
elems := len(c.content)
counts[i] = uint16(elems - 1)
types[i] = 2
case *runContainer16:
runs := len(c.iv)
counts[i] = uint16(runs)
types[i] = 3
}
}
n, err = wr.Write(countTypeBuf)
written += n
if err != nil {
return written, err
}
header := uint32(FROZEN_COOKIE | (len(containers) << 15))
if err := binary.Write(wr, binary.LittleEndian, header); err != nil {
return written, err
}
written += 4
return written, nil
}

View File

@@ -1,3 +1,4 @@
//go:build gofuzz
// +build gofuzz
package roaring

View File

@@ -1,3 +1,4 @@
//go:build arm64 && !gccgo && !appengine
// +build arm64,!gccgo,!appengine
package roaring

View File

@@ -1,3 +1,4 @@
//go:build !arm64 || gccgo || appengine
// +build !arm64 gccgo appengine
package roaring

View File

@@ -1,3 +1,4 @@
//go:build gofuzz
// +build gofuzz
/*
@@ -62,8 +63,8 @@ import (
"fmt"
"sort"
"github.com/mschoch/smat"
"github.com/bits-and-blooms/bitset"
"github.com/mschoch/smat"
)
// fuzz test using state machine driven by byte stream.

View File

@@ -9,7 +9,7 @@
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
A modern text indexing library in go
## Features
@@ -24,8 +24,8 @@ modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
* [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring
* Boosting
* Search result match highlighting
* Query time boosting
* Search result match highlighting with document fragments
* Aggregations/faceting support:
* Terms Facet
* Numeric Range Facet
@@ -97,6 +97,12 @@ Flags:
Use "bleve [command] --help" for more information about a command.
```
## Text Analysis
Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
## Text Analysis Wizard
[bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com)

View File

@@ -0,0 +1,174 @@
/*
This code was ported from the Open Search Project
https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
The algorithm itself was created by Mark Harwood
https://github.com/markharwood
*/
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package en
import (
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const PluralStemmerName = "stemmer_en_plural"
type EnglishPluralStemmerFilter struct {
}
func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
return &EnglishPluralStemmerFilter{}
}
func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = []byte(stem(string(token.Term)))
}
return input
}
func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewEnglishPluralStemmerFilter(), nil
}
func init() {
registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
}
// ----------------------------------------------------------------------------
// Words ending in oes that retain the e when stemmed
var oesExceptions = []string{"shoes", "canoes", "oboes"}
// Words ending in ches that retain the e when stemmed
var chesExceptions = []string{
"cliches",
"avalanches",
"mustaches",
"moustaches",
"quiches",
"headaches",
"heartaches",
"porsches",
"tranches",
"caches",
}
func stem(word string) string {
runes := []rune(strings.ToLower(word))
if len(runes) < 3 || runes[len(runes)-1] != 's' {
return string(runes)
}
switch runes[len(runes)-2] {
case 'u':
fallthrough
case 's':
return string(runes)
case 'e':
// Modified ies->y logic from original s-stemmer - only work on strings > 4
// so spies -> spy still but pies->pie.
// The original code also special-cased aies and eies for no good reason as far as I can tell.
// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
if len(runes) > 4 && runes[len(runes)-3] == 'i' {
runes[len(runes)-3] = 'y'
return string(runes[0 : len(runes)-2])
}
// Suffix rules to remove any dangling "e"
if len(runes) > 3 {
// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
if len(runes) > 4 && runes[len(runes)-3] == 'x' {
return string(runes[0 : len(runes)-2])
}
// oes
if len(runes) > 3 && runes[len(runes)-3] == 'o' {
if isException(runes, oesExceptions) {
// Only remove the S
return string(runes[0 : len(runes)-1])
}
// Remove the es
return string(runes[0 : len(runes)-2])
}
if len(runes) > 4 {
// shes/sses
if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
return string(runes[0 : len(runes)-2])
}
// ches
if len(runes) > 4 {
if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
if isException(runes, chesExceptions) {
// Only remove the S
return string(runes[0 : len(runes)-1])
}
// Remove the es
return string(runes[0 : len(runes)-2])
}
}
}
}
fallthrough
default:
return string(runes[0 : len(runes)-1])
}
}
func isException(word []rune, exceptions []string) bool {
for _, exception := range exceptions {
exceptionRunes := []rune(exception)
exceptionPos := len(exceptionRunes) - 1
wordPos := len(word) - 1
matched := true
for exceptionPos >= 0 && wordPos >= 0 {
if exceptionRunes[exceptionPos] != word[wordPos] {
matched = false
break
}
exceptionPos--
wordPos--
}
if matched {
return true
}
}
return false
}

View File

@@ -588,6 +588,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
indexSnapshot := s.currentSnapshot()
if indexSnapshot == nil {
return nil
}
defer func() {
_ = indexSnapshot.Close()
}()

View File

@@ -102,10 +102,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
// this is because there are chances of having a series of loadChunk calls,
// and they have to be added together before sending the bytesRead at this point
// upstream.
if delta := i.iterators[i.segmentOffset].BytesRead() - prevBytesRead; delta > 0 {
i.incrementBytesRead(delta)
bytesRead := i.iterators[i.segmentOffset].BytesRead()
if bytesRead > prevBytesRead {
i.incrementBytesRead(bytesRead - prevBytesRead)
}
return rv, nil
}
i.segmentOffset++
@@ -204,6 +204,8 @@ func (i *IndexSnapshotTermFieldReader) Close() error {
// reader's bytesRead value
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead)
}
search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead)
}
if i.snapshot != nil {

View File

@@ -124,16 +124,16 @@ func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields [
}
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
if tempRow.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf.buf)
if err != nil {
return err
}
value, err := i.kvreader.Get(keyBuf[:keySize])
value, err := i.kvreader.Get(keyBuf.buf[:keySize])
if err != nil {
return err
}

View File

@@ -134,18 +134,23 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) {
return
}
var rowBufferPool sync.Pool
func GetRowBuffer() []byte {
if rb, ok := rowBufferPool.Get().([]byte); ok {
return rb
} else {
return make([]byte, RowBufferSize)
}
type rowBuffer struct {
buf []byte
}
func PutRowBuffer(buf []byte) {
rowBufferPool.Put(buf)
var rowBufferPool sync.Pool
func GetRowBuffer() *rowBuffer {
if rb, ok := rowBufferPool.Get().(*rowBuffer); ok {
return rb
} else {
buf := make([]byte, RowBufferSize)
return &rowBuffer{buf: buf}
}
}
func PutRowBuffer(rb *rowBuffer) {
rowBufferPool.Put(rb)
}
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
@@ -169,14 +174,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
for _, row := range addRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] += 1
}
addKeyBytes += row.KeySize()
addValBytes += row.ValueSize()
@@ -197,14 +202,14 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] -= 1
}
deleteKeyBytes += row.KeySize()
}
@@ -541,26 +546,26 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []In
switch row := row.(type) {
case *TermFrequencyRow:
if existingTermKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
if row.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
keySize, _ := row.KeyTo(keyBuf.buf)
if _, ok := existingTermKeys[string(keyBuf.buf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf[:keySize]))
delete(existingTermKeys, string(keyBuf.buf[:keySize]))
continue
}
}
addRows = append(addRows, row)
case *StoredRow:
if existingStoredKeys != nil {
if row.KeySize() > len(keyBuf) {
keyBuf = make([]byte, row.KeySize())
if row.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf)
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
keySize, _ := row.KeyTo(keyBuf.buf)
if _, ok := existingStoredKeys[string(keyBuf.buf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf[:keySize]))
delete(existingStoredKeys, string(keyBuf.buf[:keySize]))
continue
}
}
@@ -1047,23 +1052,23 @@ func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*
}
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
if tempRow.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf.buf)
if err != nil {
return nil, err
}
value, err := kvreader.Get(keyBuf[:keySize])
value, err := kvreader.Get(keyBuf.buf[:keySize])
if err != nil {
return nil, err
}
if value == nil {
return nil, nil
}
backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value)
backIndexRow, err := NewBackIndexRowKV(keyBuf.buf[:keySize], value)
if err != nil {
return nil, err
}

View File

@@ -474,9 +474,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
// accounted by invoking this callback when the TFR is closed.
// 2. the docvalues portion (accounted in collector) and the retrieval
// of stored fields bytes (by LoadAndHighlightFields)
var totalBytesRead uint64
var totalSearchCost uint64
sendBytesRead := func(bytesRead uint64) {
totalBytesRead += bytesRead
totalSearchCost += bytesRead
}
ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey,
@@ -495,11 +495,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
err = serr
}
if sr != nil {
sr.BytesRead = totalBytesRead
sr.Cost = totalSearchCost
}
if sr, ok := indexReader.(*scorch.IndexSnapshot); ok {
sr.UpdateIOStats(totalBytesRead)
sr.UpdateIOStats(totalSearchCost)
}
search.RecordSearchCost(ctx, search.DoneM, 0)
}()
if req.Facets != nil {
@@ -574,6 +576,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
var storedFieldsCost uint64
for _, hit := range hits {
if i.name != "" {
hit.Index = i.name
@@ -582,9 +585,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if err != nil {
return nil, err
}
totalBytesRead += storedFieldsBytes
storedFieldsCost += storedFieldsBytes
}
totalSearchCost += storedFieldsCost
search.RecordSearchCost(ctx, search.AddM, storedFieldsCost)
atomic.AddUint64(&i.stats.searches, 1)
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))

View File

@@ -140,11 +140,11 @@ func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
return nil
}
// documentMappingForPath only returns EXACT matches for a sub document
// or for an explicitly mapped field, if you want to find the
// closest document mapping to a field not explicitly mapped
// use closestDocMapping
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping {
// documentMappingForPath returns the EXACT and closest matches for a sub
// document or for an explicitly mapped field; the closest most specific
// document mapping could be one that matches part of the provided path.
func (dm *DocumentMapping) documentMappingForPath(path string) (
*DocumentMapping, *DocumentMapping) {
pathElements := decodePath(path)
current := dm
OUTER:
@@ -165,27 +165,9 @@ OUTER:
}
}
return nil
return nil, current
}
return current
}
// closestDocMapping findest the most specific document mapping that matches
// part of the provided path
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping {
pathElements := decodePath(path)
current := dm
OUTER:
for _, pathElement := range pathElements {
for name, subDocMapping := range current.Properties {
if name == pathElement {
current = subDocMapping
continue OUTER
}
}
break
}
return current
return current, current
}
// NewDocumentMapping returns a new document mapping
@@ -408,8 +390,7 @@ func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString)
subDocMapping, closestDocMapping := dm.documentMappingForPath(pathString)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {

View File

@@ -326,7 +326,7 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled
allMapping := docMapping.documentMappingForPath("_all")
allMapping, _ := docMapping.documentMappingForPath("_all")
if allMapping == nil || allMapping.Enabled {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors)
doc.AddField(field)
@@ -364,8 +364,9 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
return analyzerName
}
}
// now try the default mapping
pathMapping := im.DefaultMapping.documentMappingForPath(path)
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {
@@ -377,7 +378,16 @@ func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
// next we will try default analyzers for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
rv := docMapping.defaultAnalyzerName(pathDecoded)
if docMapping.Enabled {
rv := docMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default analyzer for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
@@ -411,7 +421,7 @@ func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
pathMapping := docMapping.documentMappingForPath(path)
pathMapping, _ := docMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {

View File

@@ -225,3 +225,28 @@ func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQu
func NewIPRangeQuery(cidr string) *query.IPRangeQuery {
return query.NewIPRangeQuery(cidr)
}
// NewGeoShapeQuery creates a new Query for matching the given geo shape.
// This method can be used for creating geoshape queries for shape types
// like: point, linestring, polygon, multipoint, multilinestring,
// multipolygon and envelope.
func NewGeoShapeQuery(coordinates [][][][]float64, typ, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeoShapeQuery(coordinates, typ, relation)
}
// NewGeoShapeCircleQuery creates a new query for a geoshape that is a
// circle given center point and the radius. Radius formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined,
// the entire string is parsed and the unit of meters is assumed.
func NewGeoShapeCircleQuery(coordinates []float64, radius, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeoShapeCircleQuery(coordinates, radius, relation)
}
// NewGeometryCollectionQuery creates a new query for the provided
// geometrycollection coordinates and types, which could contain
// multiple geo shapes.
func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeometryCollectionQuery(coordinates, types, relation)
}

View File

@@ -485,15 +485,27 @@ func (ss *SearchStatus) Merge(other *SearchStatus) {
// A SearchResult describes the results of executing
// a SearchRequest.
//
// Status - Whether the search was executed on the underlying indexes successfully
// or failed, and the corresponding errors.
// Request - The SearchRequest that was executed.
// Hits - The list of documents that matched the query and their corresponding
// scores, score explanation, location info and so on.
// Total - The total number of documents that matched the query.
// Cost - indicates how expensive was the query with respect to bytes read
// from the mmaped index files.
// MaxScore - The maximum score seen across all document hits seen for this query.
// Took - The time taken to execute the search.
// Facets - The facet results for the search.
type SearchResult struct {
Status *SearchStatus `json:"status"`
Request *SearchRequest `json:"request"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
BytesRead uint64 `json:"bytesRead"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
Status *SearchStatus `json:"status"`
Request *SearchRequest `json:"request"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
Cost uint64 `json:"cost"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
}
func (sr *SearchResult) Size() int {
@@ -566,7 +578,7 @@ func (sr *SearchResult) Merge(other *SearchResult) {
sr.Status.Merge(other.Status)
sr.Hits = append(sr.Hits, other.Hits...)
sr.Total += other.Total
sr.BytesRead += other.BytesRead
sr.Cost += other.Cost
if other.MaxScore > sr.MaxScore {
sr.MaxScore = other.MaxScore
}

View File

@@ -200,6 +200,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
hc.needDocIds = hc.needDocIds || loadID
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
@@ -208,6 +209,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
if hc.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
}
@@ -232,6 +234,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
// total bytes read as part of docValues being read every hit
// which must be accounted by invoking the callback.
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead)
search.RecordSearchCost(ctx, search.AddM, hc.bytesRead)
}
// help finalize/flush the results in case
@@ -367,7 +371,20 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
fieldsRequiredForFaceting := facetsBuilder.RequiredFields()
// for each of these fields, append only if not already there in hc.neededFields.
for _, field := range fieldsRequiredForFaceting {
found := false
for _, neededField := range hc.neededFields {
if field == neededField {
found = true
break
}
}
if !found {
hc.neededFields = append(hc.neededFields, field)
}
}
}
// finalizeResults starts with the heap containing the final top size+skip

View File

@@ -63,6 +63,8 @@ func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader,
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts

View File

@@ -61,6 +61,8 @@ func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexRea
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options)
}

View File

@@ -64,6 +64,8 @@ func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err

View File

@@ -107,6 +107,8 @@ func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader,
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field,
q.BoostVal.Value(), options)
}

View File

@@ -77,6 +77,7 @@ func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Numeric)
return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}

View File

@@ -27,10 +27,6 @@ var reflectStaticSizeDocumentMatch int
var reflectStaticSizeSearchContext int
var reflectStaticSizeLocation int
const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
type SearchIOStatsCallbackFunc func(uint64)
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())

View File

@@ -59,7 +59,8 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
}
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcher(ctx, indexReader, candidates, field,
@@ -71,13 +72,15 @@ type fuzzyCandidates struct {
bytesRead uint64
}
func reportIOStats(bytesRead uint64, ctx context.Context) {
func reportIOStats(ctx context.Context, bytesRead uint64) {
// The fuzzy, regexp like queries essentially load a dictionary,
// which potentially incurs a cost that must be accounted by
// using the callback to report the value.
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
if ctx != nil {
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
}
}
}

View File

@@ -49,7 +49,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
return nil, err
}
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(dvReader,
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader,
field, minLon, minLat, maxLon, maxLat)), nil
}
}
@@ -85,7 +85,7 @@ func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReade
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher,
buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
buildRectFilter(ctx, dvReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
@@ -201,7 +201,7 @@ func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, fiel
return isIndexed, closeF, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
@@ -222,6 +222,11 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
}
})
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
for i := range lons {
if geo.BoundingBoxContains(lons[i], lats[i],
minLon, minLat, maxLon, maxLat) {

View File

@@ -66,7 +66,7 @@ func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexRea
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(ctx, rectSearcher,
buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
buildDistFilter(ctx, dvReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
@@ -113,7 +113,7 @@ func boxSearcher(ctx context.Context, indexReader index.IndexReader,
return boxSearcher, nil
}
func buildDistFilter(dvReader index.DocValueReader, field string,
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
@@ -134,6 +134,11 @@ func buildDistFilter(dvReader index.DocValueReader, field string,
}
})
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
for i := range lons {
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
if dist <= maxDist/1000 {

View File

@@ -71,7 +71,7 @@ func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexRe
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(ctx, rectSearcher,
buildPolygonFilter(dvReader, field, coordinates)), nil
buildPolygonFilter(ctx, dvReader, field, coordinates)), nil
}
const float64EqualityThreshold = 1e-6
@@ -83,7 +83,7 @@ func almostEqual(a, b float64) bool {
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
coordinates []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
@@ -107,6 +107,11 @@ func buildPolygonFilter(dvReader index.DocValueReader, field string,
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
nVertices := len(coordinates)
if len(coordinates) < 3 {
return false

View File

@@ -54,7 +54,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
}
return NewFilteringSearcher(ctx, mSearcher,
buildRelationFilterOnShapes(dvReader, field, relation, shape)), nil
buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil
}
@@ -63,7 +63,7 @@ func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, sha
// implementation of doc values.
var termSeparatorSplitSlice = []byte{0xff}
func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string,
relation string, shape index.GeoJSON) FilterFunc {
// this is for accumulating the shape's actual complete value
// spread across multiple docvalue visitor callbacks.
@@ -116,6 +116,11 @@ func buildRelationFilterOnShapes(dvReader index.DocValueReader, field string,
})
if err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
return found
}

View File

@@ -88,7 +88,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
// reporting back the IO stats with respect to the dictionary
// loaded, using the context
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
// cannot return MatchNoneSearcher because of interaction with
@@ -110,7 +111,8 @@ func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
}
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcherBytes(ctx, indexReader, terms, field,

View File

@@ -102,7 +102,8 @@ func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, patte
}
if ctx != nil {
reportIOStats(dictBytesRead, ctx)
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,

View File

@@ -39,6 +39,9 @@ type TermSearcher struct {
}
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
if isTermQuery(ctx) {
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
}
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
}
@@ -140,3 +143,14 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
return nil, nil
}
func isTermQuery(ctx context.Context) bool {
if ctx != nil {
// if the ctx already has a value set for query type
// it would've been done at a non term searcher level.
_, ok := ctx.Value(search.QueryTypeKey).(string)
return !ok
}
// if the context is nil, then don't set the query type
return false
}

View File

@@ -49,7 +49,8 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
}
if ctx != nil {
reportIOStats(fieldDict.BytesRead(), ctx)
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)

View File

@@ -84,7 +84,8 @@ func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader,
}
if ctx != nil {
reportIOStats(fieldDict.BytesRead(), ctx)
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)

View File

@@ -14,6 +14,8 @@
package search
import "context"
func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
rv := locations[0]
@@ -67,3 +69,52 @@ func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch)
return dest
}
const SearchIOStatsCallbackKey = "_search_io_stats_callback_key"
type SearchIOStatsCallbackFunc func(uint64)
// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
// - add: increment the cost of a search operation
// (which can be specific to a query type as well)
// - abort: query was aborted due to a cancel of search's context (for eg),
// which can be handled differently as well
// - done: indicates that a search was complete and the tracked cost can be
// handled safely by the implementation.
type SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
SearchQueryType, uint64)
type SearchIncrementalCostCallbackMsg uint
type SearchQueryType uint
const (
Term = SearchQueryType(1 << iota)
Geo
Numeric
GenericCost
)
const (
AddM = SearchIncrementalCostCallbackMsg(1 << iota)
AbortM
DoneM
)
const SearchIncrementalCostKey = "_search_incremental_cost_key"
const QueryTypeKey = "_query_type_key"
func RecordSearchCost(ctx context.Context,
msg SearchIncrementalCostCallbackMsg, bytes uint64) {
if ctx != nil {
queryType, ok := ctx.Value(QueryTypeKey).(SearchQueryType)
if !ok {
// for the cost of the non query type specific factors such as
// doc values and stored fields section.
queryType = GenericCost
}
aggCallbackFn := ctx.Value(SearchIncrementalCostKey)
if aggCallbackFn != nil {
aggCallbackFn.(SearchIncrementalCostCallbackFn)(msg, queryType, bytes)
}
}
}

View File

@@ -361,9 +361,6 @@ type builderNode struct {
func (n *builderNode) reset() {
n.final = false
n.finalOutput = 0
for i := range n.trans {
n.trans[i] = emptyTransition
}
n.trans = n.trans[:0]
n.next = nil
}
@@ -393,8 +390,6 @@ func (n *builderNode) equiv(o *builderNode) bool {
return true
}
var emptyTransition = transition{}
type transition struct {
out uint64
addr int

View File

@@ -719,6 +719,10 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
}
err := segment.visitStoredFields(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool {
fieldID := int(fieldsMap[field]) - 1
if fieldID < 0 {
// no entry for field in fieldsMap
return false
}
vals[fieldID] = append(vals[fieldID], value)
typs[fieldID] = append(typs[fieldID], typ)

View File

@@ -109,7 +109,6 @@ type PostingsList struct {
chunkSize uint64
// atomic access to this variable
bytesRead uint64
}
@@ -303,12 +302,17 @@ func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
return fmt.Errorf("error loading roaring bitmap: %v", err)
}
rv.chunkSize, err = getChunkSize(d.sb.chunkMode,
chunkSize, err := getChunkSize(d.sb.chunkMode,
rv.postings.GetCardinality(), d.sb.numDocs)
if err != nil {
return err
} else if chunkSize == 0 {
return fmt.Errorf("chunk size is zero, chunkMode: %v, numDocs: %v",
d.sb.chunkMode, d.sb.numDocs)
}
rv.chunkSize = chunkSize
return nil
}
@@ -344,7 +348,6 @@ type PostingsIterator struct {
includeFreqNorm bool
includeLocs bool
// atomic access to this variable
bytesRead uint64
}

View File

@@ -103,7 +103,7 @@ type SegmentBase struct {
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
// atomic access to this variable
// atomic access to these variables
bytesRead uint64
bytesWritten uint64
@@ -319,6 +319,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
if vellumLen == 0 {
sb.m.Unlock()
return nil, fmt.Errorf("empty dictionary for field: %v", field)
}
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
rv.incrementBytesRead(uint64(read) + vellumLen)
rv.fst, err = vellum.Load(fstBytes)

30
vendor/modules.txt vendored
View File

@@ -59,7 +59,7 @@ github.com/ProtonMail/go-crypto/openpgp/internal/ecc
github.com/ProtonMail/go-crypto/openpgp/internal/encoding
github.com/ProtonMail/go-crypto/openpgp/packet
github.com/ProtonMail/go-crypto/openpgp/s2k
# github.com/RoaringBitmap/roaring v0.9.4
# github.com/RoaringBitmap/roaring v1.2.3
## explicit; go 1.14
github.com/RoaringBitmap/roaring
github.com/RoaringBitmap/roaring/internal
@@ -155,7 +155,7 @@ github.com/bitly/go-simplejson
# github.com/bits-and-blooms/bitset v1.2.1
## explicit; go 1.14
github.com/bits-and-blooms/bitset
# github.com/blevesearch/bleve/v2 v2.3.7
# github.com/blevesearch/bleve/v2 v2.3.9
## explicit; go 1.19
github.com/blevesearch/bleve/v2
github.com/blevesearch/bleve/v2/analysis
@@ -208,8 +208,8 @@ github.com/blevesearch/gtreap
# github.com/blevesearch/mmap-go v1.0.4
## explicit; go 1.13
github.com/blevesearch/mmap-go
# github.com/blevesearch/scorch_segment_api/v2 v2.1.4
## explicit; go 1.18
# github.com/blevesearch/scorch_segment_api/v2 v2.1.5
## explicit; go 1.19
github.com/blevesearch/scorch_segment_api/v2
# github.com/blevesearch/segment v0.9.1
## explicit; go 1.18
@@ -221,26 +221,26 @@ github.com/blevesearch/snowballstem/english
# github.com/blevesearch/upsidedown_store_api v1.0.2
## explicit; go 1.18
github.com/blevesearch/upsidedown_store_api
# github.com/blevesearch/vellum v1.0.9
# github.com/blevesearch/vellum v1.0.10
## explicit; go 1.18
github.com/blevesearch/vellum
github.com/blevesearch/vellum/levenshtein
github.com/blevesearch/vellum/regexp
github.com/blevesearch/vellum/utf8
# github.com/blevesearch/zapx/v11 v11.3.7
## explicit; go 1.18
# github.com/blevesearch/zapx/v11 v11.3.9
## explicit; go 1.19
github.com/blevesearch/zapx/v11
# github.com/blevesearch/zapx/v12 v12.3.7
## explicit; go 1.18
# github.com/blevesearch/zapx/v12 v12.3.9
## explicit; go 1.19
github.com/blevesearch/zapx/v12
# github.com/blevesearch/zapx/v13 v13.3.7
## explicit; go 1.18
# github.com/blevesearch/zapx/v13 v13.3.9
## explicit; go 1.19
github.com/blevesearch/zapx/v13
# github.com/blevesearch/zapx/v14 v14.3.7
## explicit; go 1.18
# github.com/blevesearch/zapx/v14 v14.3.9
## explicit; go 1.19
github.com/blevesearch/zapx/v14
# github.com/blevesearch/zapx/v15 v15.3.9
## explicit; go 1.18
# github.com/blevesearch/zapx/v15 v15.3.12
## explicit; go 1.19
github.com/blevesearch/zapx/v15
# github.com/bluele/gcache v0.0.2
## explicit; go 1.15