build(deps): bump github.com/blevesearch/bleve/v2 from 2.4.4 to 2.5.0

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.4.4 to 2.5.0.
- [Release notes](https://github.com/blevesearch/bleve/releases)
- [Commits](https://github.com/blevesearch/bleve/compare/v2.4.4...v2.5.0)

---
updated-dependencies:
- dependency-name: github.com/blevesearch/bleve/v2
  dependency-version: 2.5.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2025-04-29 08:41:09 +00:00
committed by GitHub
parent 3d9de26f1c
commit 0c84ba3ad2
254 changed files with 17180 additions and 2192 deletions

26
go.mod
View File

@@ -11,7 +11,7 @@ require (
github.com/Nerzal/gocloak/v13 v13.9.0
github.com/bbalet/stopwords v1.0.0
github.com/beevik/etree v1.5.0
github.com/blevesearch/bleve/v2 v2.4.4
github.com/blevesearch/bleve/v2 v2.5.0
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/coreos/go-oidc/v3 v3.14.1
github.com/cs3org/go-cs3apis v0.0.0-20241105092511-3ad35d174fc1
@@ -121,7 +121,7 @@ require (
github.com/Masterminds/sprig v2.22.0+incompatible // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/go-crypto v1.1.5 // indirect
github.com/RoaringBitmap/roaring v1.9.3 // indirect
github.com/RoaringBitmap/roaring/v2 v2.4.5 // indirect
github.com/agnivade/levenshtein v1.2.1 // indirect
github.com/ajg/form v1.5.1 // indirect
github.com/alexedwards/argon2id v1.0.0 // indirect
@@ -131,24 +131,24 @@ require (
github.com/aws/aws-sdk-go v1.55.6 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bitly/go-simplejson v0.5.0 // indirect
github.com/bits-and-blooms/bitset v1.12.0 // indirect
github.com/blevesearch/bleve_index_api v1.1.12 // indirect
github.com/bits-and-blooms/bitset v1.22.0 // indirect
github.com/blevesearch/bleve_index_api v1.2.7 // indirect
github.com/blevesearch/geo v0.1.20 // indirect
github.com/blevesearch/go-faiss v1.0.24 // indirect
github.com/blevesearch/go-faiss v1.0.25 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.2.16 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.3.9 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
github.com/blevesearch/vellum v1.0.10 // indirect
github.com/blevesearch/zapx/v11 v11.3.10 // indirect
github.com/blevesearch/zapx/v12 v12.3.10 // indirect
github.com/blevesearch/zapx/v13 v13.3.10 // indirect
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
github.com/blevesearch/zapx/v15 v15.3.16 // indirect
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
github.com/blevesearch/vellum v1.1.0 // indirect
github.com/blevesearch/zapx/v11 v11.4.1 // indirect
github.com/blevesearch/zapx/v12 v12.4.1 // indirect
github.com/blevesearch/zapx/v13 v13.4.1 // indirect
github.com/blevesearch/zapx/v14 v14.4.1 // indirect
github.com/blevesearch/zapx/v15 v15.4.1 // indirect
github.com/blevesearch/zapx/v16 v16.2.2 // indirect
github.com/bluele/gcache v0.0.2 // indirect
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect

52
go.sum
View File

@@ -87,8 +87,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE
github.com/OpenDNS/vegadns2client v0.0.0-20180418235048-a3fa4a771d87/go.mod h1:iGLljf5n9GjT6kc0HBvyI1nOKnGQbNB66VzSNbK5iks=
github.com/ProtonMail/go-crypto v1.1.5 h1:eoAQfK2dwL+tFSFpr7TbOaPNUbPiJj4fLYwwGE1FQO4=
github.com/ProtonMail/go-crypto v1.1.5/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4S2OByM=
github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
github.com/RoaringBitmap/roaring/v2 v2.4.5 h1:uGrrMreGjvAtTBobc0g5IrW1D5ldxDQYe2JW2gggRdg=
github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/hVXDS2dXi7/eUFE0=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
@@ -142,45 +142,46 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y=
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve/v2 v2.4.4 h1:RwwLGjUm54SwyyykbrZs4vc1qjzYic4ZnAnY9TwNl60=
github.com/blevesearch/bleve/v2 v2.4.4/go.mod h1:fa2Eo6DP7JR+dMFpQe+WiZXINKSunh7WBtlDGbolKXk=
github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+5kexNy1RXfegY=
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/bleve/v2 v2.5.0 h1:HzYqBy/5/M9Ul9ESEmXzN/3Jl7YpmWBdHM/+zzv/3k4=
github.com/blevesearch/bleve/v2 v2.5.0/go.mod h1:PcJzTPnEynO15dCf9isxOga7YFRa/cMSsbnRwnszXUk=
github.com/blevesearch/bleve_index_api v1.2.7 h1:c8r9vmbaYQroAMSGag7zq5gEVPiuXrUQDqfnj7uYZSY=
github.com/blevesearch/bleve_index_api v1.2.7/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.24 h1:K79IvKjoKHdi7FdiXEsAhxpMuns0x4fM0BO93bW5jLI=
github.com/blevesearch/go-faiss v1.0.24/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.25 h1:lel1rkOUGbT1CJ0YgzKwC7k+XH0XVBHnCVWahdCXk4U=
github.com/blevesearch/go-faiss v1.0.25/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.2.16 h1:uGvKVvG7zvSxCwcm4/ehBa9cCEuZVE+/zvrSl57QUVY=
github.com/blevesearch/scorch_segment_api/v2 v2.2.16/go.mod h1:VF5oHVbIFTu+znY1v30GjSpT5+9YFs9dV2hjvuh34F0=
github.com/blevesearch/scorch_segment_api/v2 v2.3.9 h1:X6nJXnNHl7nasXW+U6y2Ns2Aw8F9STszkYkyBfQ+p0o=
github.com/blevesearch/scorch_segment_api/v2 v2.3.9/go.mod h1:IrzspZlVjhf4X29oJiEhBxEteTqOY9RlYlk1lCmYHr4=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI=
github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=
github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs=
github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8=
github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE=
github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5YgrzCeK3M1QwvZIpxYhChkXp7/L0RhDYsxXoE=
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
github.com/blevesearch/zapx/v11 v11.4.1 h1:qFCPlFbsEdwbbckJkysptSQOsHn4s6ZOHL5GMAIAVHA=
github.com/blevesearch/zapx/v11 v11.4.1/go.mod h1:qNOGxIqdPC1MXauJCD9HBG487PxviTUUbmChFOAosGs=
github.com/blevesearch/zapx/v12 v12.4.1 h1:K77bhypII60a4v8mwvav7r4IxWA8qxhNjgF9xGdb9eQ=
github.com/blevesearch/zapx/v12 v12.4.1/go.mod h1:QRPrlPOzAxBNMI0MkgdD+xsTqx65zbuPr3Ko4Re49II=
github.com/blevesearch/zapx/v13 v13.4.1 h1:EnkEMZFUK0lsW/jOJJF2xOcp+W8TjEsyeN5BeAZEYYE=
github.com/blevesearch/zapx/v13 v13.4.1/go.mod h1:e6duBMlCvgbH9rkzNMnUa9hRI9F7ri2BRcHfphcmGn8=
github.com/blevesearch/zapx/v14 v14.4.1 h1:G47kGCshknBZzZAtjcnIAMn3oNx8XBLxp8DMq18ogyE=
github.com/blevesearch/zapx/v14 v14.4.1/go.mod h1:O7sDxiaL2r2PnCXbhh1Bvm7b4sP+jp4unE9DDPWGoms=
github.com/blevesearch/zapx/v15 v15.4.1 h1:B5IoTMUCEzFdc9FSQbhVOxAY+BO17c05866fNruiI7g=
github.com/blevesearch/zapx/v15 v15.4.1/go.mod h1:b/MreHjYeQoLjyY2+UaM0hGZZUajEbE0xhnr1A2/Q6Y=
github.com/blevesearch/zapx/v16 v16.2.2 h1:MifKJVRTEhMTgSlle2bDRTb39BGc9jXFRLPZc6r0Rzk=
github.com/blevesearch/zapx/v16 v16.2.2/go.mod h1:B9Pk4G1CqtErgQV9DyCSA9Lb7WZe4olYfGw7fVDZ4sk=
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
@@ -1663,6 +1664,7 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=

View File

@@ -1,13 +0,0 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countLeadingZeros(x uint64) int {
return bits.LeadingZeros64(x)
}

View File

@@ -1,13 +0,0 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countTrailingZeros(x uint64) int {
return bits.TrailingZeros64(x)
}

View File

@@ -10,7 +10,7 @@
This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Redpanda](https://github.com/redpanda-data/redpanda), [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
[lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/
@@ -163,7 +163,7 @@ they include
- github.com/philhofer/fwd
- github.com/jtolds/gls
Note that the smat library requires Go 1.6 or better.
Note that the smat library requires Go 1.15 or better.
#### Installation
@@ -188,7 +188,7 @@ package main
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/RoaringBitmap/roaring/v2"
"bytes"
)
@@ -249,15 +249,20 @@ consider the following sample of code:
buf := new(bytes.Buffer)
size,err:=rb.WriteTo(buf)
if err != nil {
t.Errorf("Failed writing")
fmt.Println("Failed writing") // return or panic
}
newrb:= New()
size,err=newrb.ReadFrom(buf)
if err != nil {
t.Errorf("Failed reading")
fmt.Println("Failed reading") // return or panic
}
// if buf is an untrusted source, you should validate the result
// (this adds a bit of complexity but it is necessary for security)
if newrb.Validate() != nil {
fmt.Println("Failed validation") // return or panic
}
if ! rb.Equals(newrb) {
t.Errorf("Cannot retrieve serialized version")
fmt.Println("Cannot retrieve serialized version")
}
```
@@ -280,7 +285,7 @@ package main
import (
"fmt"
"github.com/RoaringBitmap/roaring/roaring64"
"github.com/RoaringBitmap/roaring/v2/roaring64"
"bytes"
)
@@ -356,7 +361,7 @@ https://coveralls.io/github/RoaringBitmap/roaring?branch=master
Type
go test -bench Benchmark -run -
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
run the following:
@@ -369,9 +374,8 @@ BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
You can use roaring with gore:
- go get -u github.com/motemen/gore
- go install github.com/x-motemen/gore/cmd/gore@latest
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github.com/RoaringBitmap/roaring
```go
$ gore

View File

@@ -1,6 +1,7 @@
package roaring
import (
"errors"
"fmt"
)
@@ -8,6 +9,11 @@ type arrayContainer struct {
content []uint16
}
var (
ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
ErrArrayInvalidSize = errors.New("invalid array size")
)
func (ac *arrayContainer) String() string {
s := "{"
for it := ac.getShortIterator(); it.hasNext(); {
@@ -26,8 +32,7 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
_ = x[len(ac.content)-1+i]
_ = ac.content[len(ac.content)-1]
for k := 0; k < len(ac.content); k++ {
x[k+i] =
uint32(ac.content[k]) | mask
x[k+i] = uint32(ac.content[k]) | mask
}
return i + len(ac.content)
}
@@ -60,10 +65,26 @@ func (ac *arrayContainer) minimum() uint16 {
return ac.content[0] // assume not empty
}
func (ac *arrayContainer) safeMinimum() (uint16, error) {
if len(ac.content) == 0 {
return 0, errors.New("empty array")
}
return ac.minimum(), nil
}
func (ac *arrayContainer) maximum() uint16 {
return ac.content[len(ac.content)-1] // assume not empty
}
func (ac *arrayContainer) safeMaximum() (uint16, error) {
if len(ac.content) == 0 {
return 0, errors.New("empty array")
}
return ac.maximum(), nil
}
func (ac *arrayContainer) getSizeInBytes() int {
return ac.getCardinality() * 2
}
@@ -168,7 +189,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
answer.content = make([]uint16, newCardinality, newCardinality) // a hack for sure
copy(answer.content, ac.content[:startIndex])
outPos := startIndex
@@ -194,11 +215,9 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
}
answer.content = answer.content[:newCardinality]
return answer
}
func (ac *arrayContainer) equals(o container) bool {
srb, ok := o.(*arrayContainer)
if ok {
// Check if the containers are the same object.
@@ -239,8 +258,8 @@ func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
bc := newBitmapContainer()
bc.loadData(ac)
return bc
}
func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
// Special case adding to the end of the container.
l := len(ac.content)
@@ -352,7 +371,6 @@ func (ac *arrayContainer) ior(a container) container {
return ac.iorArray(x)
case *bitmapContainer:
return a.(*bitmapContainer).orArray(ac)
//return ac.iorBitmap(x) // note: this does not make sense
case *runContainer16:
if x.isFull() {
return x.clone()
@@ -589,7 +607,6 @@ func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
}
ac.content = ac.content[:pos]
return ac
}
func (ac *arrayContainer) xor(a container) container {
@@ -630,7 +647,6 @@ func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
answer.content = answer.content[:length]
return answer
}
func (ac *arrayContainer) andNot(a container) container {
@@ -822,7 +838,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
} else { // no expansion needed
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
if cardinalityChange < 0 {
for i := startIndex + newValuesInRange; i < newCardinality; i++ {
ac.content[i] = ac.content[i-cardinalityChange]
}
@@ -915,7 +930,6 @@ func (ac *arrayContainer) rank(x uint16) int {
return answer + 1
}
return -answer - 1
}
func (ac *arrayContainer) selectInt(x uint16) int {
@@ -971,6 +985,179 @@ func (ac *arrayContainer) realloc(size int) {
}
}
// previousValue returns either the target if found or the previous smaller present value.
// If the target is out of bounds a -1 is returned.
// Ex: target=4 ac=[2,3,4,6,7] returns 4
// Ex: target=5 ac=[2,3,4,6,7] returns 4
// Ex: target=6 ac=[2,3,4,6,7] returns 6
// Ex: target=8 ac=[2,3,4,6,7] returns 7
// Ex: target=1 ac=[2,3,4,6,7] returns -1
// Ex: target=0 ac=[2,3,4,6,7] returns -1
func (ac *arrayContainer) previousValue(target uint16) int {
result := binarySearchUntil(ac.content, target)
if result.index == len(ac.content) {
return int(ac.maximum())
}
if result.outOfBounds() {
return -1
}
return int(result.value)
}
// previousAbsentValue returns either the target if not found or the next larger missing value.
// If the target is out of bounds a -1 is returned
// Ex: target=4 ac=[1,2,3,4,6,7] returns 0
// Ex: target=5 ac=[1,2,3,4,6,7] returns 5
// Ex: target=6 ac=[1,2,3,4,6,7] returns 5
// Ex: target=8 ac=[1,2,3,4,6,7] returns 8
func (ac *arrayContainer) previousAbsentValue(target uint16) int {
cardinality := len(ac.content)
if cardinality == 0 {
return int(target)
}
if target > ac.maximum() {
return int(target)
}
result := binarySearchPast(ac.content, target)
if result.notFound() {
return int(target)
}
// If the target was found at index 1, then the next value down must be result.value-1
if result.index == 1 {
if ac.minimum() != result.value-1 {
return int(result.value - 1)
}
}
low := -1
high := result.index
// This uses the pigeon-hole principle.
// the if statement compares the difference in indices vs
// the difference in values. Suppose mid = 10 and result.index = 5
// with ac.content[mid] = 100 and target = 10
// then we have 5 slots for values but we need to fit in 90 values
// so some of the values must be missing
for low+1 < high {
midIndex := (high + low) >> 1
indexDifference := result.index - midIndex
valueDifference := target - ac.content[midIndex]
if indexDifference < int(valueDifference) {
low = midIndex
} else {
high = midIndex
}
}
if high == 0 {
return int(ac.minimum()) - 1
}
return int(ac.content[high] - 1)
}
// nextAbsentValue returns either the target if not found or the next larger missing value.
// If the target is out of bounds a -1 is returned
// Ex: target=4 ac=[1,2,3,4,6,7] returns 5
// Ex: target=5 ac=[1,2,3,4,6,7] returns 5
// Ex: target=0 ac=[1,2,3,4,6,7] returns 0
// Ex: target=8 ac=[1,2,3,4,6,7] returns 8
func (ac *arrayContainer) nextAbsentValue(target uint16) int {
cardinality := len(ac.content)
if cardinality == 0 {
return int(target)
}
if target < ac.minimum() {
return int(target)
}
result := binarySearchPast(ac.content, target)
if result.notFound() {
return int(target)
}
if result.index == cardinality-2 {
if ac.maximum() != result.value+1 {
return int(result.value + 1)
}
}
low := result.index
high := len(ac.content)
// This uses the pigeon-hole principle.
// the if statement compares the difference in indices vs
// the difference in values. Suppose mid = 10 and result.index = 5
// with ac.content[mid] = 100 and target = 10
// then we have 5 slots for values but we need to fit in 90 values
// so some of the values must be missing
for low+1 < high {
midIndex := (high + low) >> 1
indexDifference := midIndex - result.index
valueDifference := ac.content[midIndex] - target
if indexDifference < int(valueDifference) {
high = midIndex
} else {
low = midIndex
}
}
if low == cardinality-1 {
return int(ac.content[cardinality-1] + 1)
}
return int(ac.content[low] + 1)
}
// nextValue returns either the target if found or the next larger value.
// if the target is out of bounds a -1 is returned
//
// Ex: target=4 ac=[1,2,3,4,6,7] returns 4
// Ex: target=5 ac=[1,2,3,4,6,7] returns 6
// Ex: target=6 ac=[1,2,3,4,6,7] returns 6
// Ex: target=0 ac=[1,2,3,4,6,7] returns 1
// Ex: target=100 ac=[1,2,3,4,6,7] returns -1
func (ac *arrayContainer) nextValue(target uint16) int {
cardinality := len(ac.content)
if cardinality == 0 {
return -1
}
//if target < ac.minimum() {
// return -1
//}
//if target > ac.maximum() {
// return -1
// }
result := binarySearchUntil(ac.content, target)
if result.exactMatch {
return int(result.value)
}
if !result.exactMatch && result.index == -1 {
return int(ac.content[0])
}
if result.outOfBounds() {
return -1
}
if result.index < len(ac.content)-1 {
return int(ac.content[result.index+1])
}
return -1
}
func newArrayContainer() *arrayContainer {
p := new(arrayContainer)
return p
@@ -1039,15 +1226,12 @@ func (ac *arrayContainer) numberOfRuns() (nr int) {
// convert to run or array *if needed*
func (ac *arrayContainer) toEfficientContainer() container {
numRuns := ac.numberOfRuns()
sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
card := ac.getCardinality()
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return newRunContainer16FromArray(ac)
}
if card <= arrayDefaultMaxSize {
@@ -1099,3 +1283,28 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {
return low, high
}
// validate checks cardinality and sort order of the array container
func (ac *arrayContainer) validate() error {
cardinality := ac.getCardinality()
if cardinality <= 0 {
return ErrArrayInvalidSize
}
if cardinality > arrayDefaultMaxSize {
return ErrArrayInvalidSize
}
previous := ac.content[0]
for i := 1; i < len(ac.content); i++ {
next := ac.content[i]
if previous > next {
return ErrArrayIncorrectSort
}
previous = next
}
return nil
}

View File

@@ -1,7 +1,9 @@
package roaring
import (
"errors"
"fmt"
"math/bits"
"unsafe"
)
@@ -56,6 +58,17 @@ func (bc *bitmapContainer) minimum() uint16 {
return MaxUint16
}
func (bc *bitmapContainer) safeMinimum() (uint16, error) {
if len(bc.bitmap) == 0 {
return 0, errors.New("Empty bitmap")
}
val := bc.minimum()
if val == MaxUint16 {
return 0, errors.New("Empty bitmap")
}
return val, nil
}
// i should be non-zero
func clz(i uint64) int {
n := 1
@@ -94,6 +107,17 @@ func (bc *bitmapContainer) maximum() uint16 {
return uint16(0)
}
func (bc *bitmapContainer) safeMaximum() (uint16, error) {
if len(bc.bitmap) == 0 {
return 0, errors.New("Empty bitmap")
}
val := bc.maximum()
if val == uint16(0) {
return 0, errors.New("Empty bitmap")
}
return val, nil
}
func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool {
iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)}
@@ -116,6 +140,7 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
bcsi.i = bcsi.ptr.NextSetBit(uint(bcsi.i) + 1)
return uint16(j)
}
func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
@@ -201,6 +226,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
return n
}
// nextMany64 returns the number of values added to the buffer
func (bcmi *bitmapContainerManyIterator) nextMany64(hs uint64, buf []uint64) int {
n := 0
base := bcmi.base
@@ -237,11 +263,10 @@ func (bc *bitmapContainer) getManyIterator() manyIterable {
}
func (bc *bitmapContainer) getSizeInBytes() int {
return len(bc.bitmap) * 8 // + bcBaseBytes
return len(bc.bitmap) * 8
}
func (bc *bitmapContainer) serializedSizeInBytes() int {
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
return len(bc.bitmap) * 8
}
@@ -313,6 +338,7 @@ func (bc *bitmapContainer) iaddReturnMinimized(i uint16) container {
return bc
}
// iadd adds the arg i, returning true if not already present
func (bc *bitmapContainer) iadd(i uint16) bool {
x := int(i)
previous := bc.bitmap[x/64]
@@ -441,7 +467,7 @@ func (bc *bitmapContainer) ior(a container) container {
if bc.isFull() {
return newRunContainer16Range(0, MaxUint16)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}
panic(fmt.Errorf("unsupported container type %T", a))
@@ -516,7 +542,7 @@ func (bc *bitmapContainer) orArray(value2 *arrayContainer) container {
}
func (bc *bitmapContainer) orArrayCardinality(value2 *arrayContainer) int {
answer := 0
answer := bc.getCardinality()
c := value2.getCardinality()
for k := 0; k < c; k++ {
// branchless:
@@ -819,9 +845,8 @@ func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
}
ac := newArrayContainerSize(newcardinality)
fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
ac.content = ac.content[:newcardinality] //not sure why i need this
ac.content = ac.content[:newcardinality]
return ac
}
func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
@@ -842,7 +867,6 @@ func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
}
}
return false
}
func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
@@ -995,7 +1019,7 @@ func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) containe
return bc
}
func (bc *bitmapContainer) contains(i uint16) bool { //testbit
func (bc *bitmapContainer) contains(i uint16) bool { // testbit
x := uint(i)
w := bc.bitmap[x>>6]
mask := uint64(1) << (x & 63)
@@ -1051,7 +1075,7 @@ func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
}
func (bc *bitmapContainer) fillArray(container []uint16) {
//TODO: rewrite in assembly
// TODO: rewrite in assembly
pos := 0
base := 0
for k := 0; k < len(bc.bitmap); k++ {
@@ -1066,6 +1090,7 @@ func (bc *bitmapContainer) fillArray(container []uint16) {
}
}
// NextSetBit returns the next set bit e.g the next int packed into the bitmaparray
func (bc *bitmapContainer) NextSetBit(i uint) int {
var (
x = i / 64
@@ -1088,12 +1113,22 @@ func (bc *bitmapContainer) NextSetBit(i uint) int {
return -1
}
// PrevSetBit returns the previous set bit e.g the previous int packed into the bitmaparray
func (bc *bitmapContainer) PrevSetBit(i int) int {
if i < 0 {
return -1
}
x := i / 64
if x >= len(bc.bitmap) {
return bc.uPrevSetBit(uint(i))
}
func (bc *bitmapContainer) uPrevSetBit(i uint) int {
var (
x = i >> 6
length = uint(len(bc.bitmap))
)
if x >= length {
return -1
}
@@ -1103,12 +1138,16 @@ func (bc *bitmapContainer) PrevSetBit(i int) int {
w = w << uint(63-b)
if w != 0 {
return i - countLeadingZeros(w)
return int(i) - countLeadingZeros(w)
}
orig := x
x--
for ; x >= 0; x-- {
if x > orig {
return -1
}
for ; x < orig; x-- {
if bc.bitmap[x] != 0 {
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x])
return int((x*64)+63) - countLeadingZeros(bc.bitmap[x])
}
}
return -1
@@ -1141,7 +1180,6 @@ func (bc *bitmapContainer) numberOfRuns() int {
// convert to run or array *if needed*
func (bc *bitmapContainer) toEfficientContainer() container {
numRuns := bc.numberOfRuns()
sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
@@ -1149,7 +1187,7 @@ func (bc *bitmapContainer) toEfficientContainer() container {
card := bc.getCardinality()
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return newRunContainer16FromBitmapContainer(bc)
}
if card <= arrayDefaultMaxSize {
@@ -1159,7 +1197,6 @@ func (bc *bitmapContainer) toEfficientContainer() container {
}
func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
if len(rc.iv) == 1 {
return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
}
@@ -1169,7 +1206,7 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}
@@ -1234,3 +1271,171 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {
return low, high
}
// nextValue returns either the `target` if found or the next largest value.
// if the target is out of bounds a -1 is returned
//
// Example :
// Suppose the bitmap container represents the following slice
// [1,2,10,11,100]
// target=0 returns 1
// target=1 returns 1
// target=10 returns 10
// target=90 returns 100
func (bc *bitmapContainer) nextValue(target uint16) int {
if bc.cardinality == 0 {
return -1
}
return bc.NextSetBit(uint(target))
}
// nextAbsentValue returns the next absent value.
// if the target is out of bounds a -1 is returned
func (bc *bitmapContainer) nextAbsentValue(target uint16) int {
if bc.cardinality == 0 {
return -1
}
var (
x = target >> 6
length = uint(len(bc.bitmap))
)
if uint(x) >= length {
return -1
}
w := bc.bitmap[x]
w = w >> uint(target%64)
if w == 0 {
return int(target)
}
// Check if all 1's
// if statement - we skip the if we have all ones [1,1,1,1...1]
if ^w != 0 {
if countTrailingZeros(w) > 0 {
// we have something like [X,Y,Z, 0,0,0]. This means the target bit is zero
return int(target)
}
// other wise something like [X,Y,0,1,1,1..1], where x and y can be either 1 or 0.
trailing := countTrailingOnes(w)
return int(target) + trailing
}
x++
for ; uint(x) < length; x++ {
if bc.bitmap[x] == 0 {
return int(x * 64)
}
if ^bc.bitmap[x] != 0 {
trailing := countTrailingOnes(bc.bitmap[x])
return int(x*64) + trailing
}
}
return -1
}
// previousValue returns either the `target` if found or the previous largest value.
// if the target is out of bounds a -1 is returned
// Example :
// Suppose the bitmap container represents the following slice
// [1,2,10,11,100]
// target=0 returns -1
// target=1 returns -1
// target=2 returns -1
// target=10 returns 9
// target=50 returns 10
// target=100 returns 99
func (bc *bitmapContainer) previousValue(target uint16) int {
if bc.cardinality == 0 {
return -1
}
return bc.uPrevSetBit(uint(target))
}
// previousAbsentValue returns the next absent value.
func (bc *bitmapContainer) previousAbsentValue(target uint16) int {
if bc.cardinality == 0 {
return -1
}
var (
x = target >> 6
length = uint(len(bc.bitmap))
)
if uint(x) >= length {
return -1
}
w := bc.bitmap[x]
shifted := w >> uint(target%64)
if shifted == 0 {
return int(target)
}
// Check if all 1's
// if statement - we skip if we have all ones [1,1,1,1...1] as no value is absent
if ^shifted != 0 {
if countTrailingZeros(shifted) > 0 {
// we have something like shifted=[X,Y,Z,..., 0,0,0]. This means the target bit is zero
return int(target)
}
// The rotate will rotate the target bit into the leading position.
// We know the target bit is not zero because of the countTrailingZero check above
// We then shift the target bit out of the way.
// Assume a structure like an original structure like [X,Y,Z,..., Target, A, B,C...]
// shifted will be [X,Y,Z...Target]
// shiftedRotated will be [A,B,C....]
// If countLeadingZeros > 0 then A is zero, if not at least A is 1 return
// Else count the number of ones's until a 0
shiftedRotated := bits.RotateLeft64(w, int(64-uint(target%64))-1) << 1
leadingZeros := countLeadingZeros(shiftedRotated)
if leadingZeros > 0 {
return int(target) - 1
}
leadingOnes := countLeadingOnes(shiftedRotated)
if leadingOnes > 0 {
return int(target) - leadingOnes - 1
}
}
x++
for ; uint(x) < length; x++ {
if bc.bitmap[x] == 0 {
return int(x * 64)
}
if ^bc.bitmap[x] != 0 {
trailing := countTrailingOnes(bc.bitmap[x])
return int(x*64) + trailing
}
}
return -1
}
// validate checks that the container size is non-negative
func (bc *bitmapContainer) validate() error {
if bc.cardinality < arrayDefaultMaxSize {
return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
}
if maxCapacity < len(bc.bitmap)*64 {
return fmt.Errorf("bitmap slize size %d exceeded max capacity %d", maxCapacity, len(bc.bitmap)*64)
}
if bc.cardinality > maxCapacity {
return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
}
if bc.cardinality != int(popcntSlice(bc.bitmap)) {
return fmt.Errorf("bitmap container size %d did not match underlying slice length: %d", bc.cardinality, int(popcntSlice(bc.bitmap)))
}
return nil
}

19
vendor/github.com/RoaringBitmap/roaring/v2/clz.go generated vendored Normal file
View File

@@ -0,0 +1,19 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
// countLeadingOnes returns the number of leading zeros bits in x; the result is 64 for x == 0.
func countLeadingZeros(x uint64) int {
return bits.LeadingZeros64(x)
}
// countLeadingOnes returns the number of leading ones bits in x; the result is 0 for x == 0.
func countLeadingOnes(x uint64) int {
return bits.LeadingZeros64(^x)
}

21
vendor/github.com/RoaringBitmap/roaring/v2/ctz.go generated vendored Normal file
View File

@@ -0,0 +1,21 @@
//go:build go1.9
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
// countTrailingZeros returns the number of trailing zero bits in x; the result is 64 for x == 0.
func countTrailingZeros(x uint64) int {
return bits.TrailingZeros64(x)
}
// countTrailingOnes returns the number of trailing one bits in x
// The result is 64 for x == 9,223,372,036,854,775,807.
// The result is 0 for x == 0.
func countTrailingOnes(x uint64) int {
return bits.TrailingZeros64(^x)
}

View File

@@ -12,7 +12,7 @@ import (
"io"
"strconv"
"github.com/RoaringBitmap/roaring/internal"
"github.com/RoaringBitmap/roaring/v2/internal"
"github.com/bits-and-blooms/bitset"
)
@@ -26,7 +26,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
buf := new(bytes.Buffer)
_, err := rb.WriteTo(buf)
return base64.StdEncoding.EncodeToString(buf.Bytes()), err
}
// FromBase64 deserializes a bitmap from Base64
@@ -54,10 +53,12 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
return rb.highlowcontainer.toBytes()
}
const wordSize = uint64(64)
const log2WordSize = uint64(6)
const capacity = ^uint64(0)
const bitmapContainerSize = (1 << 16) / 64 // bitmap size in words
const (
wordSize = uint64(64)
log2WordSize = uint64(6)
capacity = ^uint64(0)
bitmapContainerSize = (1 << 16) / 64 // bitmap size in words
)
// DenseSize returns the size of the bitmap when stored as a dense bitmap.
func (rb *Bitmap) DenseSize() uint64 {
@@ -276,14 +277,19 @@ func (rb *Bitmap) Checksum() uint64 {
return hash
}
// FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy.
// FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy
// (for advanced users only, you must be an expert Go programmer!).
// E.g., you can use this method to read a serialized bitmap from a memory-mapped file written out
// with the WriteTo method.
// The format specification is
// https://github.com/RoaringBitmap/RoaringFormatSpec
// It is the caller's responsibility to ensure that the input data is not modified and remains valid for the entire lifetime of this bitmap.
// This method avoids small allocations but holds references to the input data buffer. It is GC-friendly, but it may consume more memory eventually.
// The containers in the resulting bitmap are immutable containers tied to the provided byte array and they rely on
// copy-on-write which means that modifying them creates copies. Thus FromUnsafeBytes is more likely to be appropriate for read-only use cases,
// when the resulting bitmap can be considered immutable.
//
// See also the FromBuffer function.
// See also the FromBuffer function. We recommend benchmarking both functions to determine which one is more suitable for your use case.
// See https://github.com/RoaringBitmap/roaring/pull/395 for more details.
func (rb *Bitmap) FromUnsafeBytes(data []byte, cookieHeader ...byte) (p int64, err error) {
stream := internal.NewByteBuffer(data)
@@ -291,11 +297,13 @@ func (rb *Bitmap) FromUnsafeBytes(data []byte, cookieHeader ...byte) (p int64, e
}
// ReadFrom reads a serialized version of this bitmap from stream.
// E.g., you can use this method to read a serialized bitmap from a file written
// with the WriteTo method.
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
// Since io.Reader is regarded as a stream and cannot be read twice.
// So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
// Since io.Reader is regarded as a stream and cannot be read twice,
// we add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
// It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly.
func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
stream, ok := reader.(internal.ByteInput)
@@ -313,7 +321,18 @@ func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer
// MustReadFrom calls ReadFrom internally.
// After deserialization Validate will be called.
// If the Bitmap fails to validate, a panic with the validation error will be thrown
func (rb *Bitmap) MustReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
rb.ReadFrom(reader, cookieHeader...)
if err := rb.Validate(); err != nil {
panic(err)
}
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer (E.g., as written by WriteTo).
//
// The format specification is available here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
@@ -960,7 +979,6 @@ func (rb *Bitmap) CheckedAdd(x uint32) bool {
newac := newArrayContainer()
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
return true
}
// AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add)
@@ -998,7 +1016,6 @@ func (rb *Bitmap) CheckedRemove(x uint32) bool {
return C.getCardinality() < oldcard
}
return false
}
// IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0))
@@ -1088,7 +1105,7 @@ main:
break main
}
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
} else { //s1 > s2
} else { // s1 > s2
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
if pos2 == length2 {
break main
@@ -1187,7 +1204,7 @@ main:
break main
}
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
} else { //s1 > s2
} else { // s1 > s2
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
if pos2 == length2 {
break main
@@ -1256,7 +1273,7 @@ main:
break main
}
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
} else { //s1 > s2
} else { // s1 > s2
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
if pos2 == length2 {
break main
@@ -1396,7 +1413,7 @@ main:
break main
}
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
} else { //s1 > s2
} else { // s1 > s2
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
if pos2 == length2 {
break main
@@ -1584,7 +1601,7 @@ main:
}
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
} else { //s1 > s2
} else { // s1 > s2
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
if pos2 == length2 {
break main
@@ -1632,7 +1649,6 @@ func BitmapOf(dat ...uint32) *Bitmap {
// The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
// while uint64(0x100000000) cannot be represented as a 32-bit value.
func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
if rangeEnd > MaxUint32+1 {
panic("rangeEnd > MaxUint32+1")
}
@@ -1869,6 +1885,206 @@ func (rb *Bitmap) CloneCopyOnWriteContainers() {
rb.highlowcontainer.cloneCopyOnWriteContainers()
}
// NextValue returns the next largest value in the bitmap, or -1
// if none is present. This function should not be used inside
// a performance-sensitive loop: prefer iterators if
// performance is a concern.
func (rb *Bitmap) NextValue(target uint32) int64 {
originalKey := highbits(target)
query := lowbits(target)
var nextValue int64
nextValue = -1
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
for containerIndex < rb.highlowcontainer.size() && nextValue == -1 {
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
container := rb.highlowcontainer.getContainer(containerKey)
// if containerKey > orginalKey then we are past the container which mapped to the orignal key
// in that case we can just return the minimum from that container
var responseBit int64
if containerKey > originalKey {
bit, err := container.safeMinimum()
if err == nil {
responseBit = -1
}
responseBit = int64(bit)
} else {
responseBit = int64(container.nextValue(query))
}
if responseBit == -1 {
nextValue = -1
} else {
nextValue = int64(combineLoHi32(uint32(responseBit), uint32(containerKey)))
}
containerIndex++
}
return nextValue
}
// PreviousValue returns the previous largest value in the bitmap, or -1
// if none is present. This function should not be used inside
// a performance-sensitive loop: prefer iterators if
// performance is a concern.
func (rb *Bitmap) PreviousValue(target uint32) int64 {
if rb.IsEmpty() {
return -1
}
originalKey := highbits(uint32(target))
query := lowbits(uint32(target))
var prevValue int64
prevValue = -1
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
if containerIndex == rb.highlowcontainer.size() {
return int64(rb.Maximum())
}
if rb.highlowcontainer.getKeyAtIndex(containerIndex) > originalKey {
// target absent, key of first container after target too high
containerIndex--
}
for containerIndex != -1 && prevValue == -1 {
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
container := rb.highlowcontainer.getContainer(containerKey)
// if containerKey > originalKey then we are past the container which mapped to the original key
// in that case we can just return the minimum from that container
var responseBit int
if containerKey < originalKey {
bit, err := container.safeMaximum()
if err == nil {
responseBit = -1
}
responseBit = int(bit)
} else {
responseBit = container.previousValue(query)
}
if responseBit == -1 {
prevValue = -1
} else {
prevValue = int64(combineLoHi32(uint32(responseBit), uint32(containerKey)))
}
containerIndex--
}
return prevValue
}
// NextAbsentValue returns the next largest missing value in the bitmap, or -1
// if none is present. This function should not be used inside
// a performance-sensitive loop: prefer iterators if
// performance is a concern.
func (rb *Bitmap) NextAbsentValue(target uint32) int64 {
originalKey := highbits(target)
query := lowbits(target)
var nextValue int64
nextValue = -1
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
if containerIndex == rb.highlowcontainer.size() {
// if we are here it means no container found, just return the target
return int64(target)
}
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
keyspace := uint32(containerKey) << 16
if target < keyspace {
// target is less than the start of the keyspace start
// that means target cannot be in the keyspace
return int64(target)
}
container := rb.highlowcontainer.getContainer(containerKey)
nextValue = int64(container.nextAbsentValue(query))
for {
if nextValue != (1 << 16) {
return int64(combineLoHi32(uint32(nextValue), keyspace))
}
if containerIndex == rb.highlowcontainer.size()-1 {
val, err := container.safeMaximum()
if err == nil {
return -1
}
return int64(val) + 1
}
containerIndex++
nextContainerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
if containerKey < nextContainerKey {
// There is a gap between keys
// Just increment the current key and shift to get HoB
return int64(containerKey+1) << 16
}
containerKey = nextContainerKey
container = rb.highlowcontainer.getContainer(containerKey)
nextValue = int64(container.nextAbsentValue(0))
}
}
// PreviousAbsentValue returns the previous largest missing value in the bitmap, or -1
// if none is present. This function should not be used inside
// a performance-sensitive loop: prefer iterators if
// performance is a concern.
func (rb *Bitmap) PreviousAbsentValue(target uint32) int64 {
originalKey := highbits(target)
query := lowbits(target)
var prevValue int64
prevValue = -1
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
if containerIndex == rb.highlowcontainer.size() {
// if we are here it means no container found, just return the target
return int64(target)
}
if containerIndex == -1 {
// if we are here it means no container found, just return the target
return int64(target)
}
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
keyspace := uint32(containerKey) << 16
if target < keyspace {
// target is less than the start of the keyspace start
// that means target cannot be in the keyspace
return int64(target)
}
container := rb.highlowcontainer.getContainer(containerKey)
prevValue = int64(container.previousAbsentValue(query))
for {
if prevValue != -1 {
return int64(combineLoHi32(uint32(prevValue), keyspace))
}
if containerIndex == 0 {
val, err := container.safeMinimum()
if err == nil {
// OR panic, Java panics
return -1
}
return int64(val) - 1
}
containerIndex--
nextContainerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
if nextContainerKey < containerKey-1 {
// There is a gap between keys, eg missing container
// Just decrement the current key and shift to get HoB of the missing container
return (int64(containerKey) << 16) - 1
}
containerKey = nextContainerKey
container = rb.highlowcontainer.getContainer(containerKey)
highestPossible16 := (1 << 16) - 1
prevValue = int64(container.previousAbsentValue(uint16(highestPossible16)))
}
}
// FlipInt calls Flip after casting the parameters (convenience method)
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
return Flip(bm, uint64(rangeStart), uint64(rangeEnd))
@@ -1916,3 +2132,10 @@ func (rb *Bitmap) Stats() Statistics {
}
return stats
}
// Validate checks if the bitmap is internally consistent.
// You may call it after deserialization to check that the bitmap is valid.
// This function returns an error if the bitmap is invalid, nil otherwise.
func (rb *Bitmap) Validate() error {
return rb.highlowcontainer.validate()
}

View File

@@ -3,17 +3,9 @@ package roaring64
import (
"fmt"
"io"
"math/bits"
"math/big"
"runtime"
"sync"
"sync/atomic"
)
const (
// Min64BitSigned - Minimum 64 bit value
Min64BitSigned = -9223372036854775808
// Max64BitSigned - Maximum 64 bit value
Max64BitSigned = 9223372036854775807
)
// BSI is at its simplest is an array of bitmaps that represent an encoded
@@ -32,13 +24,16 @@ type BSI struct {
runOptimized bool
}
// NewBSI constructs a new BSI. Min/Max values are optional. If set to 0
// then the underlying BSI will be automatically sized.
// NewBSI constructs a new BSI. Note that it is your responsibility to ensure that
// the min/max values are set correctly. Queries CompareValue, MinMax, etc. will not
// work correctly if the min/max values are not set correctly.
func NewBSI(maxValue int64, minValue int64) *BSI {
bitsz := bits.Len64(uint64(minValue))
if bits.Len64(uint64(maxValue)) > bitsz {
bitsz = bits.Len64(uint64(maxValue))
bitszmin := big.NewInt(minValue).BitLen() + 1
bitszmax := big.NewInt(maxValue).BitLen() + 1
bitsz := bitszmin
if bitszmax > bitsz {
bitsz = bitszmax
}
ba := make([]Bitmap, bitsz)
return &BSI{bA: ba, MaxValue: maxValue, MinValue: minValue}
@@ -81,41 +76,97 @@ func (b *BSI) GetCardinality() uint64 {
// BitCount returns the number of bits needed to represent values.
func (b *BSI) BitCount() int {
return len(b.bA)
return len(b.bA) - 1 // Exclude sign bit
}
// SetValue sets a value for a given columnID.
func (b *BSI) SetValue(columnID uint64, value int64) {
// IsBigUInt returns the number of bits needed to represent values.
func (b *BSI) isBig() bool {
return len(b.bA) > 64
}
// IsNegative returns true for negative values
func (b *BSI) IsNegative(columnID uint64) bool {
if len(b.bA) == 0 {
return false
}
return b.bA[b.BitCount()].Contains(columnID)
}
// SetBigValue sets a value that exceeds 64 bits
func (b *BSI) SetBigValue(columnID uint64, value *big.Int) {
// If max/min values are set to zero then automatically determine bit array size
if b.MaxValue == 0 && b.MinValue == 0 {
minBits := bits.Len64(uint64(value))
minBits := value.BitLen() + 1
if minBits == 1 {
minBits = 2
}
for len(b.bA) < minBits {
b.bA = append(b.bA, Bitmap{})
}
}
for i := 0; i < b.BitCount(); i++ {
if uint64(value)&(1<<uint64(i)) > 0 {
b.bA[i].Add(columnID)
} else {
for i := b.BitCount(); i >= 0; i-- {
if value.Bit(i) == 0 {
b.bA[i].Remove(columnID)
} else {
b.bA[i].Add(columnID)
}
}
b.eBM.Add(columnID)
}
// SetValue sets a value for a given columnID.
func (b *BSI) SetValue(columnID uint64, value int64) {
b.SetBigValue(columnID, big.NewInt(value))
}
// GetValue gets the value at the column ID. Second param will be false for non-existent values.
func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) {
bv, exists := b.GetBigValue(columnID)
if !exists {
return
}
if !bv.IsInt64() {
if bv.Sign() == -1 {
msg := fmt.Errorf("can't represent a negative %d bit value as an int64", b.BitCount())
panic(msg)
}
if bv.Sign() == 1 {
msg := fmt.Errorf("can't represent a positive %d bit value as an int64", b.BitCount())
panic(msg)
}
}
return bv.Int64(), exists
}
// GetBigValue gets the value at the column ID. Second param will be false for non-existent values.
func (b *BSI) GetBigValue(columnID uint64) (value *big.Int, exists bool) {
exists = b.eBM.Contains(columnID)
if !exists {
return
}
for i := 0; i < b.BitCount(); i++ {
val := big.NewInt(0)
for i := b.BitCount(); i >= 0; i-- {
if b.bA[i].Contains(columnID) {
value |= 1 << i
bigBit := big.NewInt(1)
bigBit.Lsh(bigBit, uint(i))
val.Or(val, bigBit)
}
}
return
if b.IsNegative(columnID) {
val = negativeTwosComplementToInt(val)
}
return val, exists
}
func negativeTwosComplementToInt(val *big.Int) *big.Int {
inverted := new(big.Int).Not(val)
mask := new(big.Int).Lsh(big.NewInt(1), uint(val.BitLen()))
inverted.And(inverted, mask.Sub(mask, big.NewInt(1)))
inverted.Add(inverted, big.NewInt(1))
val.Neg(inverted)
return val
}
type action func(t *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup)
@@ -235,13 +286,15 @@ const (
type task struct {
bsi *BSI
op Operation
valueOrStart int64
end int64
values map[int64]struct{}
valueOrStart *big.Int
end *big.Int
values map[string]struct{}
bits *Bitmap
}
// CompareValue compares value.
// Values should be in the range of the BSI (max, min). If the value is outside the range, the result
// might erroneous. The operation parameter indicates the type of comparison to be made.
// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
// The parallelism parameter indicates the number of CPU threads to be applied for processing. A value
@@ -249,6 +302,26 @@ type task struct {
func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int64,
foundSet *Bitmap) *Bitmap {
return b.CompareBigValue(parallelism, op, big.NewInt(valueOrStart), big.NewInt(end), foundSet)
}
// CompareBigValue compares value.
// Values should be in the range of the BSI (max, min). If the value is outside the range, the result
// might erroneous. The operation parameter indicates the type of comparison to be made.
// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
// The parallelism parameter indicates the number of CPU threads to be applied for processing. A value
// of zero indicates that all available CPU resources will be potentially utilized.
func (b *BSI) CompareBigValue(parallelism int, op Operation, valueOrStart, end *big.Int,
foundSet *Bitmap) *Bitmap {
if valueOrStart == nil {
valueOrStart = b.MinMaxBig(parallelism, MIN, &b.eBM)
}
if end == nil && op == RANGE {
end = b.MinMaxBig(parallelism, MAX, &b.eBM)
}
comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end}
if foundSet == nil {
return parallelExecutor(parallelism, comp, compareValue, &b.eBM)
@@ -256,6 +329,53 @@ func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int6
return parallelExecutor(parallelism, comp, compareValue, foundSet)
}
// Returns a twos complement value given a value, the return will be bit extended to 'bits' length
// if the value is negative
func twosComplement(num *big.Int, bitCount int) *big.Int {
// Check if the number is negative
isNegative := num.Sign() < 0
// Get the absolute value if negative
abs := new(big.Int).Abs(num)
// Convert to binary string
binStr := abs.Text(2)
// Pad with zeros to the left
if len(binStr) < bitCount {
binStr = fmt.Sprintf("%0*s", bitCount, binStr)
}
// If negative, calculate two's complement
if isNegative {
// Invert bits
inverted := make([]byte, len(binStr))
for i := range binStr {
if binStr[i] == '0' {
inverted[i] = '1'
} else {
inverted[i] = '0'
}
}
// Add 1
carry := byte(1)
for i := len(inverted) - 1; i >= 0; i-- {
inverted[i] += carry
if inverted[i] == '2' {
inverted[i] = '0'
} else {
break
}
}
binStr = string(inverted)
}
bigInt := new(big.Int)
_, _ = bigInt.SetString(binStr, 2)
return bigInt
}
func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
defer wg.Done()
@@ -265,32 +385,31 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
results.RunOptimize()
}
x := e.bsi.BitCount()
startIsNegative := x == 64 && uint64(e.valueOrStart)&(1<<uint64(x-1)) > 0
endIsNegative := x == 64 && uint64(e.end)&(1<<uint64(x-1)) > 0
startIsNegative := e.valueOrStart.Sign() == -1
endIsNegative := true
if e.end != nil {
endIsNegative = e.end.Sign() == -1
}
for i := 0; i < len(batch); i++ {
cID := batch[i]
eq1, eq2 := true, true
lt1, lt2, gt1 := false, false, false
j := e.bsi.BitCount() - 1
isNegative := false
if x == 64 {
isNegative = e.bsi.bA[j].Contains(cID)
j--
}
j := e.bsi.BitCount()
isNegative := e.bsi.IsNegative(cID)
compStartValue := e.valueOrStart
compEndValue := e.end
if isNegative != startIsNegative {
compStartValue = ^e.valueOrStart + 1
compStartValue = twosComplement(e.valueOrStart, e.bsi.BitCount()+1)
}
if isNegative != endIsNegative {
compEndValue = ^e.end + 1
if isNegative != endIsNegative && e.end != nil {
compEndValue = twosComplement(e.end, e.bsi.BitCount()+1)
}
for ; j >= 0; j-- {
sliceContainsBit := e.bsi.bA[j].Contains(cID)
if uint64(compStartValue)&(1<<uint64(j)) > 0 {
if compStartValue.Bit(j) == 1 {
// BIT in value is SET
if !sliceContainsBit {
if eq1 {
@@ -303,7 +422,9 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
}
}
eq1 = false
break
if e.op != RANGE {
break
}
}
}
} else {
@@ -319,6 +440,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
}
}
eq1 = false
if e.op != RANGE {
break
}
@@ -326,7 +448,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
}
}
if e.op == RANGE && uint64(compEndValue)&(1<<uint64(j)) > 0 {
if e.op == RANGE && compEndValue.Bit(j) == 1 {
// BIT in value is SET
if !sliceContainsBit {
if eq2 {
@@ -347,11 +469,9 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
lt2 = true
}
eq2 = false
break
}
}
}
}
switch e.op {
@@ -387,15 +507,24 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
resultsChan <- results
}
// MinMax - Find minimum or maximum value.
// MinMax - Find minimum or maximum int64 value.
func (b *BSI) MinMax(parallelism int, op Operation, foundSet *Bitmap) int64 {
return b.MinMaxBig(parallelism, op, foundSet).Int64()
}
// MinMaxBig - Find minimum or maximum value.
func (b *BSI) MinMaxBig(parallelism int, op Operation, foundSet *Bitmap) *big.Int {
var n int = parallelism
if n == 0 {
n = runtime.NumCPU()
}
resultsChan := make(chan int64, n)
resultsChan := make(chan *big.Int, n)
if foundSet == nil {
foundSet = &b.eBM
}
card := foundSet.GetCardinality()
x := card / uint64(n)
@@ -418,63 +547,87 @@ func (b *BSI) MinMax(parallelism int, op Operation, foundSet *Bitmap) int64 {
wg.Wait()
close(resultsChan)
var minMax int64
var minMax *big.Int
minSigned, maxSigned := minMaxSignedInt(b.BitCount() + 1)
if op == MAX {
minMax = Min64BitSigned
minMax = minSigned
} else {
minMax = Max64BitSigned
minMax = maxSigned
}
for val := range resultsChan {
if (op == MAX && val > minMax) || (op == MIN && val <= minMax) {
if (op == MAX && val.Cmp(minMax) > 0) || (op == MIN && val.Cmp(minMax) <= 0) {
minMax = val
}
}
return minMax
}
func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg *sync.WaitGroup) {
func minMaxSignedInt(bits int) (*big.Int, *big.Int) {
// Calculate the maximum value
max := new(big.Int).Lsh(big.NewInt(1), uint(bits-1))
max.Sub(max, big.NewInt(1))
// Calculate the minimum value
min := new(big.Int).Neg(max)
min.Sub(min, big.NewInt(1))
return min, max
}
func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int, wg *sync.WaitGroup) {
defer wg.Done()
x := b.BitCount()
var value int64 = Max64BitSigned
x := b.BitCount() + 1
var value *big.Int
minSigned, maxSigned := minMaxSignedInt(x)
if op == MAX {
value = Min64BitSigned
value = minSigned
} else {
value = maxSigned
}
for i := 0; i < len(batch); i++ {
cID := batch[i]
eq := true
lt, gt := false, false
j := b.BitCount() - 1
var cVal int64
valueIsNegative := uint64(value)&(1<<uint64(x-1)) > 0 && bits.Len64(uint64(value)) == 64
isNegative := false
if x == 64 {
isNegative = b.bA[j].Contains(cID)
if isNegative {
cVal |= 1 << uint64(j)
}
j--
}
j := b.BitCount()
cVal := new(big.Int)
valueIsNegative := value.Sign() == -1
isNegative := b.IsNegative(cID)
compValue := value
if isNegative != valueIsNegative {
compValue = ^value + 1
// convert compValue to twos complement
inverted := new(big.Int).Not(compValue)
mask := new(big.Int).Lsh(big.NewInt(1), uint(compValue.BitLen()))
inverted.And(inverted, mask.Sub(mask, big.NewInt(1)))
inverted.Add(inverted, big.NewInt(1))
}
done := false
for ; j >= 0; j-- {
sliceContainsBit := b.bA[j].Contains(cID)
if sliceContainsBit {
cVal |= 1 << uint64(j)
bigBit := big.NewInt(1)
bigBit.Lsh(bigBit, uint(j))
cVal.Or(cVal, bigBit)
if isNegative {
cVal = negativeTwosComplementToInt(cVal)
}
}
if uint64(compValue)&(1<<uint64(j)) > 0 {
if done {
continue
}
if compValue.Bit(j) == 1 {
// BIT in value is SET
if !sliceContainsBit {
if eq {
eq = false
if op == MAX && valueIsNegative && !isNegative {
gt = true
break
done = true
}
if op == MIN && (!valueIsNegative || (valueIsNegative == isNegative)) {
lt = true
@@ -491,11 +644,13 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg
}
if op == MAX && (valueIsNegative || (valueIsNegative == isNegative)) {
gt = true
done = true
}
}
}
}
}
if lt || gt {
value = cVal
}
@@ -506,19 +661,37 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg
// Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
// is also returned (for calculating the average).
func (b *BSI) Sum(foundSet *Bitmap) (sum int64, count uint64) {
func (b *BSI) Sum(foundSet *Bitmap) (int64, uint64) {
val, count := b.SumBigValues(foundSet)
return val.Int64(), count
}
// SumBigValues - Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
// is also returned (for calculating the average). This method will sum arbitrarily large values.
func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) {
if foundSet == nil {
foundSet = &b.eBM
}
sum = new(big.Int)
count = foundSet.GetCardinality()
resultsChan := make(chan int64, b.BitCount())
var wg sync.WaitGroup
for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
atomic.AddInt64(&sum, int64(foundSet.AndCardinality(&b.bA[j])<<uint(j)))
resultsChan <- int64(foundSet.AndCardinality(&b.bA[j]) << uint(j))
}(i)
}
wg.Wait()
return
close(resultsChan)
for val := range resultsChan {
sum.Add(sum, big.NewInt(val))
}
sum.Sub(sum, big.NewInt(int64(foundSet.AndCardinality(&b.bA[b.BitCount()])<<uint(b.BitCount()))))
return sum, count
}
// Transpose calls b.IntersectAndTranspose(0, b.eBM)
@@ -533,7 +706,9 @@ func (b *BSI) Transpose() *Bitmap {
//
// TODO: This implementation is functional but not performant, needs to be re-written perhaps using SIMD SSE2 instructions.
func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *Bitmap) *Bitmap {
if foundSet == nil {
foundSet = &b.eBM
}
trans := &task{bsi: b}
return parallelExecutor(parallelism, trans, transpose, foundSet)
}
@@ -563,12 +738,12 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
bits := len(b.bA)
for i := 0; i < len(bsis); i++ {
if len(bsis[i].bA) > bits {
bits = bsis[i].BitCount()
bits = len(bsis[i].bA )
}
}
// Make sure we have enough bit slices
for bits > b.BitCount() {
for bits > len(b.bA) {
bm := Bitmap{}
bm.RunOptimize()
b.bA = append(b.bA, bm)
@@ -725,10 +900,20 @@ func (b *BSI) WriteTo(w io.Writer) (n int64, err error) {
// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
func (b *BSI) BatchEqual(parallelism int, values []int64) *Bitmap {
//convert list of int64 values to big.Int(s)
bigValues := make([]*big.Int, len(values))
for i, v := range values {
bigValues[i] = big.NewInt(v)
}
return b.BatchEqualBig(parallelism, bigValues)
}
valMap := make(map[int64]struct{}, len(values))
// BatchEqualBig returns a bitmap containing the column IDs where the values are contained within the list of values provided.
func (b *BSI) BatchEqualBig(parallelism int, values []*big.Int) *Bitmap {
valMap := make(map[string]struct{}, len(values))
for i := 0; i < len(values); i++ {
valMap[values[i]] = struct{}{}
valMap[string(values[i].Bytes())] = struct{}{}
}
comp := &task{bsi: b, values: valMap}
return parallelExecutor(parallelism, comp, batchEqual, &b.eBM)
@@ -746,8 +931,8 @@ func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap,
for i := 0; i < len(batch); i++ {
cID := batch[i]
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
if _, yes := e.values[int64(value)]; yes {
if value, ok := e.bsi.GetBigValue(uint64(cID)); ok {
if _, yes := e.values[string(value.Bytes())]; yes {
results.Add(cID)
}
}
@@ -786,8 +971,8 @@ func (b *BSI) ClearValues(foundSet *Bitmap) {
// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
func (b *BSI) NewBSIRetainSet(foundSet *Bitmap) *BSI {
newBSI := NewBSI(b.MaxValue, b.MinValue)
newBSI.bA = make([]Bitmap, b.BitCount())
newBSI := NewDefaultBSI()
newBSI.bA = make([]Bitmap, b.BitCount()+1)
var wg sync.WaitGroup
wg.Add(1)
go func() {
@@ -823,13 +1008,13 @@ func (b *BSI) Add(other *BSI) {
func (b *BSI) addDigit(foundSet *Bitmap, i int) {
if i >= len(b.bA) {
if i >= b.BitCount()+1 || b.BitCount() == 0 {
b.bA = append(b.bA, Bitmap{})
}
carry := And(&b.bA[i], foundSet)
b.bA[i].Xor(foundSet)
if !carry.IsEmpty() {
if i+1 >= len(b.bA) {
if i+1 >= b.BitCount() {
b.bA = append(b.bA, Bitmap{})
}
b.addDigit(carry, i+1)
@@ -841,7 +1026,12 @@ func (b *BSI) addDigit(foundSet *Bitmap, i int) {
// is useful for situations where there is a one-to-many relationship between the vectored integer sets. The resulting BSI
// contains the number of times a particular value appeared in the input BSI.
func (b *BSI) TransposeWithCounts(parallelism int, foundSet, filterSet *Bitmap) *BSI {
if foundSet == nil {
foundSet = &b.eBM
}
if filterSet == nil {
filterSet = &b.eBM
}
return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, filterSet, true)
}
@@ -871,6 +1061,9 @@ func transposeWithCounts(input *BSI, filterSet *Bitmap, batch []uint64, resultsC
// Increment - In-place increment of values in a BSI. Found set select columns for incrementing.
func (b *BSI) Increment(foundSet *Bitmap) {
if foundSet == nil {
foundSet = &b.eBM
}
b.addDigit(foundSet, 0)
b.eBM.Or(foundSet)
}

View File

@@ -1,7 +1,7 @@
package roaring64
import (
"github.com/RoaringBitmap/roaring"
"github.com/RoaringBitmap/roaring/v2"
)
// IntIterable64 allows you to iterate over the values in a Bitmap

View File

@@ -4,7 +4,7 @@ import (
"fmt"
"runtime"
"github.com/RoaringBitmap/roaring"
"github.com/RoaringBitmap/roaring/v2"
)
var defaultWorkerCount = runtime.NumCPU()
@@ -144,6 +144,8 @@ func (c parChunk) size() int {
return c.ra.size()
}
// parNaiveStartAt returns the index of the first key that is inclusive between start and last
// Returns the size if there is no such key
func parNaiveStartAt(ra *roaringArray64, start uint32, last uint32) int {
for idx, key := range ra.keys {
if key >= start && key <= last {
@@ -170,7 +172,6 @@ func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
key2 = ra2.getKeyAtIndex(idx2)
for key1 <= last && key2 <= last {
if key1 < key2 {
answer.appendCopy(*ra1, idx1)
idx1++
@@ -188,7 +189,7 @@ func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
} else {
c1 := ra1.getContainerAtIndex(idx1)
//answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
// answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
answer.appendContainer(key1, roaring.Or(c1, ra2.getContainerAtIndex(idx2)), false)
idx1++
idx2++
@@ -261,7 +262,7 @@ func iorOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
} else {
c1 := ra1.getWritableContainerAtIndex(idx1)
//ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
// ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
c1.Or(ra2.getContainerAtIndex(idx2))
ra1.setContainerAtIndex(idx1, c1)

View File

@@ -8,12 +8,14 @@ import (
"io"
"strconv"
"github.com/RoaringBitmap/roaring"
"github.com/RoaringBitmap/roaring/internal"
"github.com/RoaringBitmap/roaring/v2"
"github.com/RoaringBitmap/roaring/v2/internal"
)
const serialCookieNoRunContainer = 12346 // only arrays and bitmaps
const serialCookie = 12347 // runs, arrays, and bitmaps
const (
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
serialCookie = 12347 // runs, arrays, and bitmaps
)
// Bitmap represents a compressed bitmap where you can add integers.
type Bitmap struct {
@@ -25,7 +27,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
buf := new(bytes.Buffer)
_, err := rb.WriteTo(buf)
return base64.StdEncoding.EncodeToString(buf.Bytes()), err
}
// FromBase64 deserializes a bitmap from Base64
@@ -52,7 +53,6 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
// implementations (Java, Go, C++) and it has a specification :
// https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
var n int64
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(rb.highlowcontainer.size()))
@@ -87,11 +87,10 @@ func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
stream := internal.NewByteBuffer(data)
sizeBuf := make([]byte, 8)
n, err := stream.Read(sizeBuf)
_, err = stream.Read(sizeBuf)
if err != nil {
return 0, err
}
p += int64(n)
size := binary.LittleEndian.Uint64(sizeBuf)
rb.highlowcontainer.resize(0)
@@ -115,17 +114,16 @@ func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
if err != nil {
return 0, fmt.Errorf("error in bitmap.UnsafeFromBytes: could not read key #%d: %w", i, err)
}
p += 4
rb.highlowcontainer.keys[i] = binary.LittleEndian.Uint32(keyBuf)
rb.highlowcontainer.containers[i] = roaring.NewBitmap()
n, err := rb.highlowcontainer.containers[i].ReadFrom(stream)
if n == 0 || err != nil {
return int64(n), fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
}
p += int64(n)
}
return p, nil
return stream.GetReadBytes(), nil
}
// ReadFrom reads a serialized version of this bitmap from stream.
@@ -167,23 +165,15 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {
rb.highlowcontainer.keys[i] = binary.LittleEndian.Uint32(keyBuf)
rb.highlowcontainer.containers[i] = roaring.NewBitmap()
n, err := rb.highlowcontainer.containers[i].ReadFrom(stream)
if n == 0 || err != nil {
return int64(n), fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
}
p += int64(n)
}
return p, nil
}
// FromBuffer creates a bitmap from its serialized version stored in buffer
// func (rb *Bitmap) FromBuffer(data []byte) (p int64, err error) {
//
// // TODO: Add buffer interning as in base roaring package.
// buf := bytes.NewBuffer(data)
// return rb.ReadFrom(buf)
// }
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
// (same as ToBytes)
func (rb *Bitmap) MarshalBinary() ([]byte, error) {
@@ -1251,6 +1241,10 @@ func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
return rb.highlowcontainer.serializedSizeInBytes()
}
func (rb *Bitmap) Validate() error {
return rb.highlowcontainer.validate()
}
// Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into
// a 64-bit roaring bitmap. No copy is made.
func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap {

View File

@@ -1,7 +1,9 @@
package roaring64
import (
"github.com/RoaringBitmap/roaring"
"errors"
"github.com/RoaringBitmap/roaring/v2"
)
type roaringArray64 struct {
@@ -11,6 +13,11 @@ type roaringArray64 struct {
copyOnWrite bool
}
var (
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)
// runOptimize compresses the element containers to minimize space consumed.
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
// A: since we aren't changing the logical content, just the representation,
@@ -140,7 +147,6 @@ func (ra *roaringArray64) clear() {
}
func (ra *roaringArray64) clone() *roaringArray64 {
sa := roaringArray64{}
sa.copyOnWrite = ra.copyOnWrite
@@ -328,6 +334,15 @@ func (ra *roaringArray64) hasRunCompression() bool {
return false
}
/**
* Find the smallest integer index strictly larger than pos such that array[index].key&gt;=min. If none can
* be found, return size. Based on code by O. Kaser.
*
* @param min minimal value
* @param pos index to exceed
* @return the smallest index greater than pos such that array[index].key is at least as large as
* min, or size if it is not possible.
*/
func (ra *roaringArray64) advanceUntil(min uint32, pos int) int {
lower := pos + 1
@@ -401,3 +416,47 @@ func (ra *roaringArray64) serializedSizeInBytes() uint64 {
}
return answer
}
func (ra *roaringArray64) checkKeysSorted() bool {
if len(ra.keys) == 0 || len(ra.keys) == 1 {
return true
}
previous := ra.keys[0]
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
next := ra.keys[nextIdx]
if previous >= next {
return false
}
previous = next
}
return true
}
// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray64) validate() error {
if !ra.checkKeysSorted() {
return ErrKeySortOrder
}
if len(ra.keys) != len(ra.containers) {
return ErrCardinalityConstraint
}
if len(ra.keys) != len(ra.needCopyOnWrite) {
return ErrCardinalityConstraint
}
for _, maps := range ra.containers {
err := maps.Validate()
if err != nil {
return err
}
if maps.IsEmpty() {
return errors.New("empty container")
}
}
return nil
}

View File

@@ -1,6 +1,6 @@
package roaring64
import "github.com/RoaringBitmap/roaring"
import "github.com/RoaringBitmap/roaring/v2"
func highbits(x uint64) uint32 {
return uint32(x >> 32)

View File

@@ -3,10 +3,11 @@ package roaring
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/RoaringBitmap/roaring/internal"
"github.com/RoaringBitmap/roaring/v2/internal"
)
type container interface {
@@ -30,7 +31,6 @@ type container interface {
iadd(x uint16) bool // inplace, returns true if x was new.
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
//addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
iremove(x uint16) bool // inplace, returns true if x was present.
@@ -61,7 +61,6 @@ type container interface {
lazyOR(r container) container
lazyIOR(r container) container
getSizeInBytes() int
//removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int
@@ -71,6 +70,14 @@ type container interface {
toEfficientContainer() container
String() string
containerType() contype
safeMinimum() (uint16, error)
safeMaximum() (uint16, error)
nextValue(x uint16) int
previousValue(x uint16) int
nextAbsentValue(x uint16) int
previousAbsentValue(x uint16) int
validate() error
}
type contype uint8
@@ -82,6 +89,11 @@ const (
run32Contype
)
var (
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)
// careful: range is [firstOfRange,lastOfRange]
func rangeOfOnes(start, last int) container {
if start > MaxUint16 {
@@ -178,7 +190,6 @@ func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
} else {
// since there is no copy-on-write, we need to clone the container (this is important)
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
}
}
}
@@ -204,7 +215,6 @@ func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
} else {
// since there is no copy-on-write, we need to clone the container (this is important)
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
}
}
}
@@ -239,7 +249,6 @@ func (ra *roaringArray) clear() {
}
func (ra *roaringArray) clone() *roaringArray {
sa := roaringArray{}
sa.copyOnWrite = ra.copyOnWrite
@@ -288,6 +297,8 @@ func (ra *roaringArray) cloneCopyOnWriteContainers() {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
//}
// getContainer returns the container with key `x`
// if no such container exists `nil` is returned
func (ra *roaringArray) getContainer(x uint16) container {
i := ra.binarySearch(0, int64(len(ra.keys)), x)
if i < 0 {
@@ -325,7 +336,6 @@ func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) co
return ra.getContainerAtIndex(pos).or(other)
}
return ra.getContainerAtIndex(pos).ior(other)
}
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
@@ -336,7 +346,10 @@ func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
return ra.containers[i]
}
// getIndex returns the index of the container with key `x`
// if no such container exists a negative value is returned
func (ra *roaringArray) getIndex(x uint16) int {
// Todo : test
// before the binary search, we optimize for frequent cases
size := len(ra.keys)
if (size == 0) || (ra.keys[size-1] == x) {
@@ -396,7 +409,10 @@ func (ra *roaringArray) size() int {
return len(ra.keys)
}
// binarySearch returns the index of the key.
// negative value returned if not found
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
// TODO: add unit tests
low := begin
high := end - 1
for low+16 <= high {
@@ -455,7 +471,6 @@ func (ra *roaringArray) headerSize() uint64 {
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
}
return 4 + 4 + 8*size
}
// should be dirt cheap
@@ -489,7 +504,7 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
nw += 2
// compute isRun bitmap without temporary allocation
var runbitmapslice = buf[nw : nw+isRunSizeInBytes]
runbitmapslice := buf[nw : nw+isRunSizeInBytes]
for i, c := range ra.containers {
switch c.(type) {
case *runContainer16:
@@ -577,7 +592,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
// create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8
isRunBitmap, err = stream.Next(isRunBitmapSize)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
@@ -596,7 +610,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
// descriptive header
buf, err := stream.Next(2 * 2 * int(size))
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
@@ -637,13 +650,11 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container
nr, err := stream.ReadUInt16()
if err != nil {
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
}
buf, err := stream.Next(int(nr) * 4)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
@@ -656,7 +667,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
} else if card > arrayDefaultMaxSize {
// bitmap container
buf, err := stream.Next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
@@ -670,7 +680,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
} else {
// array container
buf, err := stream.Next(card * 2)
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
}
@@ -696,6 +705,15 @@ func (ra *roaringArray) hasRunCompression() bool {
return false
}
/**
* Find the smallest integer index larger than pos such that array[index].key&gt;=min. If none can
* be found, return size. Based on code by O. Kaser.
*
* @param min minimal value
* @param pos index to exceed
* @return the smallest index greater than pos such that array[index].key is at least as large as
* min, or size if it is not possible.
*/
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
lower := pos + 1
@@ -759,3 +777,44 @@ func (ra *roaringArray) needsCopyOnWrite(i int) bool {
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
ra.needCopyOnWrite[i] = true
}
func (ra *roaringArray) checkKeysSorted() bool {
if len(ra.keys) == 0 || len(ra.keys) == 1 {
return true
}
previous := ra.keys[0]
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
next := ra.keys[nextIdx]
if previous >= next {
return false
}
previous = next
}
return true
}
// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray) validate() error {
if !ra.checkKeysSorted() {
return ErrKeySortOrder
}
if len(ra.keys) != len(ra.containers) {
return ErrCardinalityConstraint
}
if len(ra.keys) != len(ra.needCopyOnWrite) {
return ErrCardinalityConstraint
}
for _, container := range ra.containers {
err := container.validate()
if err != nil {
return err
}
}
return nil
}

View File

@@ -39,9 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import (
"errors"
"fmt"
"sort"
"unsafe"
)
// runContainer16 does run-length encoding of sets of
@@ -59,6 +59,16 @@ type interval16 struct {
length uint16 // length minus 1
}
var (
ErrRunIntervalsEmpty = errors.New("run contained no interval")
ErrRunNonSorted = errors.New("runs were not sorted")
ErrRunIntervalEqual = errors.New("intervals were equal")
ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
ErrRunIntervalSize = errors.New("too many intervals relative to data")
MaxNumIntervals = 2048
MaxIntervalsSum = 2048
)
func newInterval16Range(start, last uint16) interval16 {
if last < start {
panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start))
@@ -201,7 +211,6 @@ func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer
// somewhat efficiently. For reference, see the Java
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
rc := &runContainer16{}
nbrRuns := bc.numberOfRuns()
if nbrRuns == 0 {
@@ -251,7 +260,6 @@ func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
curWord = curWordWith1s & (curWordWith1s + 1)
// We've lathered and rinsed, so repeat...
}
}
// newRunContainer16FromArray populates a new
@@ -293,7 +301,6 @@ func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 {
// If you have a small number of additions to an already
// big runContainer16, calling Add() may be faster.
func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) {
rc2 := newRunContainer16FromVals(alreadySorted, vals...)
un := rc.union(rc2)
rc.iv = un.iv
@@ -374,7 +381,6 @@ func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) {
// union merges two runContainer16s, producing
// a new runContainer16 with the union of rc and b.
func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
// rc is also known as 'a' here, but golint insisted we
// call it rc for consistency with the rest of the methods.
@@ -457,7 +463,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
break aAdds
}
}
}
if !bDone {
@@ -471,7 +476,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
break bAdds
}
}
}
m = append(m, merged)
@@ -489,7 +493,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
// unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b.
func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
// rc is also known as 'a' here, but golint insisted we
// call it rc for consistency with the rest of the methods.
answer := uint(0)
@@ -528,7 +531,7 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
}
if !mergedUpdated {
// we know that merged is disjoint from cura and curb
//m = append(m, merged)
// m = append(m, merged)
answer += uint(merged.last()) - uint(merged.start) + 1
mergedUsed = false
}
@@ -539,11 +542,11 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
if !canMerge16(cura, curb) {
if cura.start < curb.start {
answer += uint(cura.last()) - uint(cura.start) + 1
//m = append(m, cura)
// m = append(m, cura)
na++
} else {
answer += uint(curb.last()) - uint(curb.start) + 1
//m = append(m, curb)
// m = append(m, curb)
nb++
}
} else {
@@ -574,7 +577,6 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
break aAdds
}
}
}
if !bDone {
@@ -588,10 +590,9 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
break bAdds
}
}
}
//m = append(m, merged)
// m = append(m, merged)
answer += uint(merged.last()) - uint(merged.start) + 1
}
for _, r := range rc.iv[na:] {
@@ -615,7 +616,6 @@ func (rc *runContainer16) indexOfIntervalAtOrAfter(key int, startIndex int) int
// intersect returns a new runContainer16 holding the
// intersection of rc (also known as 'a') and b.
func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
a := rc
numa := int(len(a.iv))
numb := int(len(b.iv))
@@ -645,8 +645,7 @@ func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
toploop:
for acuri < numa && bcuri < numb {
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
if !isOverlap {
switch {
@@ -664,7 +663,6 @@ toploop:
}
bstart = int(b.iv[bcuri].start)
}
} else {
// isOverlap
output = append(output, intersection)
@@ -748,8 +746,7 @@ toploop:
for acuri < numa && bcuri < numb {
pass++
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
if !isOverlap {
switch {
@@ -767,7 +764,6 @@ toploop:
}
bstart = int(b.iv[bcuri].start)
}
} else {
// isOverlap
answer += int(intersection.last()) - int(intersection.start) + 1
@@ -941,7 +937,7 @@ func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (w
// b) whichInterval16 == -1 if key is before our first
// interval16 in rc.iv;
//
// c) whichInterval16 is set to the minimum index of rc.iv
// c) whichInterval16 is set to the maximum index of rc.iv
// which comes strictly before the key;
// so rc.iv[whichInterval16].last < key,
// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
@@ -1014,10 +1010,10 @@ func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 {
return rc
}
const baseRc16Size = int(unsafe.Sizeof(runContainer16{}))
const perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0)))
const (
baseRc16Size = 2
perIntervalRc16Size = 4
)
// see also runContainer16SerializedSizeInBytes(numRuns int) int
@@ -1030,7 +1026,7 @@ func (rc *runContainer16) getSizeInBytes() int {
// runContainer16SerializedSizeInBytes returns the number of bytes of disk
// required to hold numRuns in a runContainer16.
func runContainer16SerializedSizeInBytes(numRuns int) int {
return perIntervalRc16Size*numRuns + baseDiskRc16Size
return perIntervalRc16Size*numRuns + baseRc16Size
}
// Add adds a single value k to the set.
@@ -1274,7 +1270,7 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
break
}
} else {
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
ri.curPosInIndex += uint16(moreVals) // moreVals always fits in uint16
}
}
@@ -1315,7 +1311,7 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
break
}
} else {
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
ri.curPosInIndex += uint16(moreVals) // moreVals always fits in uint16
}
}
@@ -1324,7 +1320,6 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
// remove removes key from the container.
func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
var index int
index, wasPresent, _ = rc.search(int(key))
if !wasPresent {
@@ -1361,7 +1356,7 @@ func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
*curPosInIndex--
// if we leave *curIndex alone, then Next() will work properly even after the delete.
default:
//middle
// middle
// split into two, adding an interval16
new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1)
@@ -1376,7 +1371,6 @@ func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
*curIndex++
*curPosInIndex = 0
}
}
func have4Overlap16(astart, alast, bstart, blast int) bool {
@@ -1503,6 +1497,26 @@ func (iv interval16) isSuperSetOf(b interval16) bool {
return iv.start <= b.start && b.last() <= iv.last()
}
func (iv interval16) isNonContiguousDisjoint(b interval16) bool {
// cover the zero start case
if iv.start == b.start {
return false
}
nonContiguous1 := uint32(iv.start) == uint32(b.last())+1 || uint32(iv.last()) == uint32(b.start)+1
nonContiguous2 := uint32(b.start) == uint32(iv.last())+1 || uint32(b.last()) == uint32(iv.start)+1
if nonContiguous1 || nonContiguous2 {
return false
}
ivl := iv.last()
bl := b.last()
c1 := iv.start <= b.start && b.start <= ivl
c2 := b.start <= iv.start && iv.start <= bl
return !c1 && !c2
}
func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int) {
isect, isEmpty := intersectInterval16s(iv, del)
@@ -1678,7 +1692,6 @@ func (rc *runContainer16) isubtract(del interval16) {
// port of run_container_andnot from CRoaring...
// https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 {
if len(b.iv) == 0 || len(rc.iv) == 0 {
return rc
}
@@ -1781,10 +1794,25 @@ func (rc *runContainer16) minimum() uint16 {
return rc.iv[0].start // assume not empty
}
func (rc *runContainer16) safeMinimum() (uint16, error) {
if len(rc.iv) == 0 {
return 0, errors.New("Empty runs")
}
return rc.minimum(), nil
}
func (rc *runContainer16) maximum() uint16 {
return rc.iv[len(rc.iv)-1].last() // assume not empty
}
func (rc *runContainer16) safeMaximum() (uint16, error) {
if len(rc.iv) == 0 {
return 0, errors.New("Empty runs")
}
return rc.maximum(), nil // assume not empty
}
func (rc *runContainer16) isFull() bool {
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
}
@@ -1949,7 +1977,6 @@ func (rc *runContainer16) getManyIterator() manyIterable {
// add the values in the range [firstOfRange, endx). endx
// is still abe to express 2^16 because it is an int not an uint16.
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
if firstOfRange > endx {
panic(fmt.Sprintf("invalid %v = endx > firstOfRange", endx))
}
@@ -2002,7 +2029,6 @@ func (rc *runContainer16) not(firstOfRange, endx int) container {
// makes 2 more passes through the arrays than should be
// strictly necessary. Measure both ways though--this may not matter.
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
if firstOfRange > endx {
panic(fmt.Sprintf("invalid %v = endx > firstOfRange == %v", endx, firstOfRange))
}
@@ -2066,12 +2092,12 @@ func (rc *runContainer16) equals(o container) bool {
rit := rc.getShortIterator()
bit := o.getShortIterator()
//k := 0
// k := 0
for rit.hasNext() {
if bit.next() != rit.next() {
return false
}
//k++
// k++
}
return true
}
@@ -2132,7 +2158,7 @@ func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int
for i := range rc.iv {
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
}
//bc.computeCardinality()
// bc.computeCardinality()
return answer
}
@@ -2150,7 +2176,7 @@ func (rc *runContainer16) orArray(ac *arrayContainer) container {
}
intervals, cardMinusOne := runArrayUnionToRuns(rc, ac)
result := newRunContainer16TakeOwnership(intervals)
if len(intervals) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
if len(intervals) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
return newBitmapContainerFromRun(result)
}
if len(intervals)*2 > 1+int(cardMinusOne) {
@@ -2190,7 +2216,6 @@ func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
}
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
it := bc.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
@@ -2206,11 +2231,11 @@ func (rc *runContainer16) iorArray(ac *arrayContainer) container {
return rc
}
var cardMinusOne uint16
//TODO: perform the union algorithm in-place using rc.iv
// TODO: perform the union algorithm in-place using rc.iv
// this can be done with methods like the in-place array container union
// but maybe lazily moving the remaining elements back.
rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
if len(rc.iv) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
if len(rc.iv) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
return newBitmapContainerFromRun(rc)
}
if len(rc.iv)*2 > 1+int(cardMinusOne) {
@@ -2438,12 +2463,8 @@ func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
}
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
rcb.iandNotBitmapSurely(x2b)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
rc2 := rc.AndNotRunContainer16(x2)
*rc = *rc2
return rc
}
@@ -2492,7 +2513,7 @@ func (rc *runContainer16) toEfficientContainer() container {
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
card := rc.getCardinality()
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return rc
}
if card <= arrayDefaultMaxSize {
@@ -2511,7 +2532,6 @@ func (rc *runContainer16) toArrayContainer() *arrayContainer {
}
func newRunContainer16FromContainer(c container) *runContainer16 {
switch x := c.(type) {
case *runContainer16:
return x.Clone()
@@ -2622,3 +2642,169 @@ func (rc *runContainer16) addOffset(x uint16) (container, container) {
return low, high
}
// nextValue returns either the `target` if found or the next larger value.
// If the target is in the interior or a run then `target` will be returned
// Ex: If our run structure resmembles [[a,c], [d,f]] with a <= target <= c then `target` will be returned.
// Ex: If c < target < d then d is returned.
// Ex: If target < a then a is returned
// if the target > max, this is out of bounds and -1 is returned
func (rc *runContainer16) nextValue(target uint16) int {
if len(rc.iv) == 0 {
return -1
}
whichIndex, alreadyPresent, _ := rc.search(int(target))
if alreadyPresent {
return int(target)
}
if whichIndex == -1 {
return int(rc.iv[0].start)
}
if whichIndex == len(rc.iv)-1 {
return -1
}
// The if relies on the non-contiguous nature of runs.
// If we have two runs [a,b] and another run [c,d]
// We can rely on the invariant that b+1 < c
// We will return c
possibleNext := whichIndex + 1
if possibleNext < len(rc.iv) {
return int(rc.iv[possibleNext].start)
}
return -1
}
// nextAbsentValue returns the next absent value.
// By construction the next absent value will be located between gaps in runs
//
// Ex: if our runs resemble [[a,b],[c,d]] and a <= target <= b then b+1 will not be equal to c, b+1 will be returned
// Ex: if target < a then target is returned
// Ex: if target > d then target is returned
func (rc *runContainer16) nextAbsentValue(target uint16) int {
whichIndex, alreadyPresent, _ := rc.search(int(target))
if !alreadyPresent {
return int(target)
}
return int(rc.iv[whichIndex].last()) + 1
}
// previousValue will return the previous present value
// If the target is in the interior of a run then `target` will be returned
//
// Example:
// If our run structure resmembles [[a,c], [d,f]] with a <= target <= c then target will be returned.
// If c < target < d then c is returned.
// if target > f then f is returned
// if the target is less than a, this is out of bounds and -1 is returned
func (rc *runContainer16) previousValue(target uint16) int {
whichIndex, alreadyPresent, _ := rc.search(int(target))
if len(rc.iv) == 0 {
return int(target)
}
if alreadyPresent {
return int(target)
}
if whichIndex == -1 {
return -1
}
return int(rc.iv[whichIndex].last())
}
// previousAbsentValue will return the previous absent value
// If the target is in the interior of a run then then the start of the range minus 1 will be returned
//
// Example:
// If our run structure resmembles [[x,z], [a,c], [d,f]] with a <= target <= c then a-1 will be returned.
// if the target < x then target is returned
// if target > f then target is returned
func (rc *runContainer16) previousAbsentValue(target uint16) int {
whichIndex, alreadyPresent, _ := rc.search(int(target))
if !alreadyPresent {
return int(target)
}
return int(rc.iv[whichIndex].start) - 1
}
// isNonContiguousDisjoint returns an error if the intervals overlap e.g have non-empty intersection
func isNonContiguousDisjoint(outer interval16, inner interval16) error {
if !outer.isNonContiguousDisjoint(inner) {
return ErrRunIntervalOverlap
}
return nil
}
// validate checks the run container referential integrity
// Ensures runs are not degenerate, non-contiguous and non-overlapping
func (rc *runContainer16) validate() error {
if rc.getCardinality() == 0 {
return ErrRunIntervalsEmpty
}
intervalsSum := 0
for outeridx := range rc.iv {
// The length being stored is the actual length - 1.
// So we need to add 1 to get the actual length.
// It is not possible to have a run with length 0.
outerInterval := rc.iv[outeridx]
intervalsSum += outerInterval.runlen()
for inneridx := outeridx + 1; inneridx < len(rc.iv); inneridx++ {
innerInterval := rc.iv[inneridx]
if outerInterval.equal(innerInterval) {
return ErrRunIntervalEqual
}
// only check the start of runs
// if the run length overlap the next check will catch that.
if outerInterval.start >= innerInterval.start {
return ErrRunNonSorted
}
err := isNonContiguousDisjoint(outerInterval, innerInterval)
if err != nil {
return err
}
}
}
/*
if number of distinct values in the container >= 2048 then
check that the number of runs is no more than 2047
(otherwise you could use a bitset container)
else
check that the number of runs < (number of distinct values) / 2
(otherwise you could use an array container)
*/
sizeAsRunContainer := runContainer16SerializedSizeInBytes(len(rc.iv))
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
sizeAsArrayContainer := arrayContainerSizeInBytes(intervalsSum)
// this is always ok:
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return nil
}
if sizeAsRunContainer >= sizeAsBitmapContainer {
return ErrRunIntervalSize
}
if sizeAsRunContainer >= sizeAsArrayContainer {
return ErrRunIntervalSize
}
return nil
}

View File

@@ -299,6 +299,15 @@ func (rb *Bitmap) FrozenView(buf []byte) error {
return rb.highlowcontainer.frozenView(buf)
}
func (rb *Bitmap) MustFrozenView(buf []byte) error {
if err := rb.FrozenView(buf); err != nil {
return err
}
err := rb.Validate()
return err
}
/* Verbatim specification from CRoaring.
*
* FROZEN SERIALIZATION FORMAT DESCRIPTION

View File

@@ -1,26 +1,12 @@
package roaring
func equal(a, b []uint16) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
if 0 == len(set2) {
if len(set2) == 0 {
buffer = buffer[:len(set1)]
for k := 0; k < len(set1); k++ {
buffer[k] = set1[k]
}
copy(buffer, set1)
return len(set1)
}
if 0 == len(set1) {
if len(set1) == 0 {
return 0
}
pos := 0
@@ -66,7 +52,6 @@ func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
}
}
return pos
}
func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
@@ -135,6 +120,7 @@ func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
return pos
}
// union2by2Cardinality computes the cardinality of the union
func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
pos := 0
k1 := 0
@@ -186,8 +172,8 @@ func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
func intersection2by2(
set1 []uint16,
set2 []uint16,
buffer []uint16) int {
buffer []uint16,
) int {
if len(set1)*64 < len(set2) {
return onesidedgallopingintersect2by2(set1, set2, buffer)
} else if len(set2)*64 < len(set1) {
@@ -197,10 +183,11 @@ func intersection2by2(
}
}
// intersection2by2Cardinality computes the cardinality of the intersection
func intersection2by2Cardinality(
set1 []uint16,
set2 []uint16) int {
set2 []uint16,
) int {
if len(set1)*64 < len(set2) {
return onesidedgallopingintersect2by2Cardinality(set1, set2)
} else if len(set2)*64 < len(set1) {
@@ -210,44 +197,45 @@ func intersection2by2Cardinality(
}
}
// intersects2by2 computes whether the two sets intersect
func intersects2by2(
set1 []uint16,
set2 []uint16) bool {
set2 []uint16,
) bool {
// could be optimized if one set is much larger than the other one
if (0 == len(set1)) || (0 == len(set2)) {
if (len(set1) == 0) || (len(set2) == 0) {
return false
}
k1 := 0
k2 := 0
s1 := set1[k1]
s2 := set2[k2]
index1 := 0
index2 := 0
value1 := set1[index1]
value2 := set2[index2]
mainwhile:
for {
if s2 < s1 {
if value2 < value1 {
for {
k2++
if k2 == len(set2) {
index2++
if index2 == len(set2) {
break mainwhile
}
s2 = set2[k2]
if s2 >= s1 {
value2 = set2[index2]
if value2 >= value1 {
break
}
}
}
if s1 < s2 {
if value1 < value2 {
for {
k1++
if k1 == len(set1) {
index1++
if index1 == len(set1) {
break mainwhile
}
s1 = set1[k1]
if s1 >= s2 {
value1 = set1[index1]
if value1 >= value2 {
break
}
}
} else {
// (set2[k2] == set1[k1])
return true
@@ -259,9 +247,9 @@ mainwhile:
func localintersect2by2(
set1 []uint16,
set2 []uint16,
buffer []uint16) int {
if (0 == len(set1)) || (0 == len(set2)) {
buffer []uint16,
) int {
if (len(set1) == 0) || (len(set2) == 0) {
return 0
}
k1 := 0
@@ -295,7 +283,6 @@ mainwhile:
break
}
}
} else {
// (set2[k2] == set1[k1])
buffer[pos] = s1
@@ -315,57 +302,57 @@ mainwhile:
return pos
}
// / localintersect2by2Cardinality computes the cardinality of the intersection
func localintersect2by2Cardinality(
set1 []uint16,
set2 []uint16) int {
if (0 == len(set1)) || (0 == len(set2)) {
set2 []uint16,
) int {
if (len(set1) == 0) || (len(set2) == 0) {
return 0
}
k1 := 0
k2 := 0
index1 := 0
index2 := 0
pos := 0
s1 := set1[k1]
s2 := set2[k2]
value1 := set1[index1]
value2 := set2[index2]
mainwhile:
for {
if s2 < s1 {
if value2 < value1 {
for {
k2++
if k2 == len(set2) {
index2++
if index2 == len(set2) {
break mainwhile
}
s2 = set2[k2]
if s2 >= s1 {
value2 = set2[index2]
if value2 >= value1 {
break
}
}
}
if s1 < s2 {
if value1 < value2 {
for {
k1++
if k1 == len(set1) {
index1++
if index1 == len(set1) {
break mainwhile
}
s1 = set1[k1]
if s1 >= s2 {
value1 = set1[index1]
if value1 >= value2 {
break
}
}
} else {
// (set2[k2] == set1[k1])
pos++
k1++
if k1 == len(set1) {
index1++
if index1 == len(set1) {
break
}
s1 = set1[k1]
k2++
if k2 == len(set2) {
value1 = set1[index1]
index2++
if index2 == len(set2) {
break
}
s2 = set2[k2]
value2 = set2[index2]
}
}
return pos
@@ -375,7 +362,8 @@ func advanceUntil(
array []uint16,
pos int,
length int,
min uint16) int {
min uint16,
) int {
lower := pos + 1
if lower >= length || array[lower] >= min {
@@ -423,14 +411,13 @@ func advanceUntil(
}
}
return upper
}
func onesidedgallopingintersect2by2(
smallset []uint16,
largeset []uint16,
buffer []uint16) int {
buffer []uint16,
) int {
if 0 == len(smallset) {
return 0
}
@@ -478,8 +465,8 @@ mainwhile:
func onesidedgallopingintersect2by2Cardinality(
smallset []uint16,
largeset []uint16) int {
largeset []uint16,
) int {
if 0 == len(smallset) {
return 0
}
@@ -548,3 +535,131 @@ func binarySearch(array []uint16, ikey uint16) int {
}
return -(low + 1)
}
// searchResult provides information about a search request.
// The values will depend on the context of the search
type searchResult struct {
value uint16
index int
exactMatch bool
}
// notFound returns a bool depending the search context
// For cases `previousValue` and `nextValue` if target is present in the slice
// this function will return `true` otherwise `false`
// For `nextAbsentValue` and `previousAbsentValue` this will only return `False`
func (sr *searchResult) notFound() bool {
return !sr.exactMatch
}
// outOfBounds indicates whether the target was outside the lower and upper bounds of the container
func (sr *searchResult) outOfBounds() bool {
return sr.index <= -1
}
// binarySearchUntil is a helper function around binarySearchUntilWithBounds
// The user does not have to pass in the lower and upper bound
// The lower bound is taken to be `0` and the upper bound `len(array)-1`
func binarySearchUntil(array []uint16, target uint16) searchResult {
return binarySearchUntilWithBounds(array, target, 0, len(array)-1)
}
// binarySearchUntilWithBounds returns a `searchResult`.
// If an exact match is found the `searchResult{target, <index>, true}` will be returned, where `<index>` is
// `target`s index in `array`, and `result.notFound()` evaluates to `false`.
// If a match is not found, but `target` was in-bounds then the result.index will be the closest smaller value
// Example: [ 8,9,11,12] if the target was 10, then `searchResult{9, 1, false}` will be returned.
// If `target` was out of bounds `searchResult{0, -1, false}` will be returned.
func binarySearchUntilWithBounds(array []uint16, target uint16, lowIndex int, maxIndex int) searchResult {
highIndex := maxIndex
closestIndex := -1
if target < array[lowIndex] {
return searchResult{0, closestIndex, false}
}
if target > array[maxIndex] {
return searchResult{0, len(array), false}
}
for lowIndex <= highIndex {
middleIndex := (lowIndex + highIndex) / 2
middleValue := array[middleIndex]
if middleValue == target {
return searchResult{middleValue, middleIndex, true}
}
if target < middleValue {
if middleIndex > 0 && target > array[middleIndex-1] {
return searchResult{array[middleIndex-1], middleIndex - 1, false}
}
highIndex = middleIndex
} else {
if middleIndex < maxIndex && target < array[middleIndex+1] {
return searchResult{middleValue, middleIndex, false}
}
lowIndex = middleIndex + 1
}
}
return searchResult{array[closestIndex], closestIndex, false}
}
// binarySearchPast is a wrapper around binarySearchPastWithBounds
// The user does not have to pass in the lower and upper bound
// The lower bound is taken to be `0` and the upper bound `len(array)-1`
func binarySearchPast(array []uint16, target uint16) searchResult {
return binarySearchPastWithBounds(array, target, 0, len(array)-1)
}
// binarySearchPastWithBounds looks for the smallest value larger than or equal to `target`
// If `target` is out of bounds a `searchResult` indicating out of bounds is returned
// `target` does not have to exist in the slice.
//
// Example:
// Suppose the slice is [...10,13...] with `target` equal to 11
// The searchResult will have searchResult.value = 13
func binarySearchPastWithBounds(array []uint16, target uint16, lowIndex int, maxIndex int) searchResult {
highIndex := maxIndex
closestIndex := -1
if target < array[lowIndex] {
return searchResult{0, closestIndex, false}
}
if target > array[maxIndex] {
return searchResult{0, len(array), false}
}
for lowIndex <= highIndex {
middleIndex := (lowIndex + highIndex) / 2
middleValue := array[middleIndex]
if middleValue == target {
return searchResult{middleValue, middleIndex, true}
}
if target < middleValue {
if middleIndex > 0 && target > array[middleIndex-1] {
return searchResult{array[middleIndex], middleIndex, false}
}
highIndex = middleIndex
} else {
if middleIndex < maxIndex && target < array[middleIndex+1] {
return searchResult{array[middleIndex+1], middleIndex + 1, false}
}
lowIndex = middleIndex + 1
}
}
return searchResult{array[closestIndex], closestIndex, false}
}

View File

@@ -52,6 +52,7 @@ func fill(arr []uint64, val uint64) {
arr[i] = val
}
}
func fillRange(arr []uint64, start, end int, val uint64) {
for i := start; i < end; i++ {
arr[i] = val
@@ -112,10 +113,19 @@ func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
func highbits(x uint32) uint16 {
return uint16(x >> 16)
}
func lowbits(x uint32) uint16 {
return uint16(x & maxLowBit)
}
func combineLoHi16(lob uint16, hob uint16) uint32 {
return combineLoHi32(uint32(lob), uint32(hob))
}
func combineLoHi32(lob uint32, hob uint32) uint32 {
return uint32(lob) | (hob << 16)
}
const maxLowBit = 0xFFFF
func flipBitmapRange(bitmap []uint64, start int, end int) {
@@ -146,7 +156,6 @@ func resetBitmapRange(bitmap []uint64, start int, end int) {
bitmap[i] = 0
}
bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
}
func setBitmapRange(bitmap []uint64, start int, end int) {
@@ -242,7 +251,6 @@ func selectBitPosition(w uint64, j int) int {
}
}
return seen + int(counter)
}
func panicOn(err error) {

View File

@@ -12,7 +12,7 @@ This library is part of the [awesome go collection](https://github.com/avelino/a
* [beego](https://github.com/beego/beego)
* [CubeFS](https://github.com/cubefs/cubefs)
* [Amazon EKS Distro](https://github.com/aws/eks-distro)
* [sourcegraph](https://github.com/sourcegraph/sourcegraph)
* [sourcegraph](https://github.com/sourcegraph/sourcegraph-public-snapshot)
* [torrent](https://github.com/anacrolix/torrent)
@@ -25,7 +25,7 @@ It provides methods for setting, clearing, flipping, and testing individual inte
But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits.
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used.
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk automatically, but `Shrink` and `Compact` methods are available. On creation, a hint can be given for the number of bits that will be used.
Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining.
@@ -69,6 +69,13 @@ func main() {
}
```
If you have Go 1.23 or better, you can iterate over the set bits like so:
```go
for i := range b.EachSet() {}
```
Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc
@@ -125,13 +132,20 @@ E.g.,
## Memory Usage
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](https://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
## Implementation Note
The `roaring` library allows you to go back and forth between compressed Roaring bitmaps and the conventional bitset instances:
```Go
mybitset := roaringbitmap.ToBitSet()
newroaringbitmap := roaring.FromBitSet(mybitset)
```
Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed.
It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped.
### Goroutine safety
In general, it's not safe to access the same BitSet using different goroutines--they are unsynchronized for performance.
Should you want to access a BitSet from more than one goroutine, you should provide synchronization. Typically this is done by using channels to pass the *BitSet around (in Go style; so there is only ever one owner), or by using `sync.Mutex` to serialize operations on BitSets.
## Installation

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,23 @@
//go:build go1.23
// +build go1.23
package bitset
import (
"iter"
"math/bits"
)
func (b *BitSet) EachSet() iter.Seq[uint] {
return func(yield func(uint) bool) {
for wordIndex, word := range b.set {
idx := 0
for trail := bits.TrailingZeros64(word); trail != 64; trail = bits.TrailingZeros64(word >> idx) {
if !yield(uint(wordIndex<<log2WordSize + idx + trail)) {
return
}
idx += trail + 1
}
}
}
}

8866
vendor/github.com/bits-and-blooms/bitset/pext.gen.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,53 +1,59 @@
package bitset
// bit population count, take from
// https://code.google.com/p/go/issues/detail?id=4988#c11
// credit: https://code.google.com/u/arnehormann/
func popcount(x uint64) (n uint64) {
x -= (x >> 1) & 0x5555555555555555
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
x += x >> 4
x &= 0x0f0f0f0f0f0f0f0f
x *= 0x0101010101010101
return x >> 56
}
import "math/bits"
func popcntSliceGo(s []uint64) uint64 {
cnt := uint64(0)
func popcntSlice(s []uint64) uint64 {
var cnt int
for _, x := range s {
cnt += popcount(x)
cnt += bits.OnesCount64(x)
}
return cnt
return uint64(cnt)
}
func popcntMaskSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] &^ m[i])
func popcntMaskSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
return cnt
for i := range s {
cnt += bits.OnesCount64(s[i] &^ m[i])
}
return uint64(cnt)
}
func popcntAndSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] & m[i])
func popcntAndSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
return cnt
for i := range s {
cnt += bits.OnesCount64(s[i] & m[i])
}
return uint64(cnt)
}
func popcntOrSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] | m[i])
func popcntOrSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
return cnt
for i := range s {
cnt += bits.OnesCount64(s[i] | m[i])
}
return uint64(cnt)
}
func popcntXorSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] ^ m[i])
func popcntXorSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
return cnt
for i := range s {
cnt += bits.OnesCount64(s[i] ^ m[i])
}
return uint64(cnt)
}

View File

@@ -1,62 +0,0 @@
//go:build go1.9
// +build go1.9
package bitset
import "math/bits"
func popcntSlice(s []uint64) uint64 {
var cnt int
for _, x := range s {
cnt += bits.OnesCount64(x)
}
return uint64(cnt)
}
func popcntMaskSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] &^ m[i])
}
return uint64(cnt)
}
func popcntAndSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] & m[i])
}
return uint64(cnt)
}
func popcntOrSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] | m[i])
}
return uint64(cnt)
}
func popcntXorSlice(s, m []uint64) uint64 {
var cnt int
// this explicit check eliminates a bounds check in the loop
if len(m) < len(s) {
panic("mask slice is too short")
}
for i := range s {
cnt += bits.OnesCount64(s[i] ^ m[i])
}
return uint64(cnt)
}

View File

@@ -1,68 +0,0 @@
//go:build !go1.9 && amd64 && !appengine
// +build !go1.9,amd64,!appengine
package bitset
// *** the following functions are defined in popcnt_amd64.s
//go:noescape
func hasAsm() bool
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
var useAsm = hasAsm()
//go:noescape
func popcntSliceAsm(s []uint64) uint64
//go:noescape
func popcntMaskSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntAndSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntOrSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntXorSliceAsm(s, m []uint64) uint64
func popcntSlice(s []uint64) uint64 {
if useAsm {
return popcntSliceAsm(s)
}
return popcntSliceGo(s)
}
func popcntMaskSlice(s, m []uint64) uint64 {
if useAsm {
return popcntMaskSliceAsm(s, m)
}
return popcntMaskSliceGo(s, m)
}
func popcntAndSlice(s, m []uint64) uint64 {
if useAsm {
return popcntAndSliceAsm(s, m)
}
return popcntAndSliceGo(s, m)
}
func popcntOrSlice(s, m []uint64) uint64 {
if useAsm {
return popcntOrSliceAsm(s, m)
}
return popcntOrSliceGo(s, m)
}
func popcntXorSlice(s, m []uint64) uint64 {
if useAsm {
return popcntXorSliceAsm(s, m)
}
return popcntXorSliceGo(s, m)
}

View File

@@ -1,104 +0,0 @@
// +build !go1.9
// +build amd64,!appengine
TEXT ·hasAsm(SB),4,$0-1
MOVQ $1, AX
CPUID
SHRQ $23, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
TEXT ·popcntSliceAsm(SB),4,$0-32
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntSliceEnd
popcntSliceLoop:
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
ADDQ DX, AX
ADDQ $8, SI
LOOP popcntSliceLoop
popcntSliceEnd:
MOVQ AX, ret+24(FP)
RET
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntMaskSliceEnd
MOVQ m+24(FP), DI
popcntMaskSliceLoop:
MOVQ (DI), DX
NOTQ DX
ANDQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntMaskSliceLoop
popcntMaskSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntAndSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntAndSliceEnd
MOVQ m+24(FP), DI
popcntAndSliceLoop:
MOVQ (DI), DX
ANDQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntAndSliceLoop
popcntAndSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntOrSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntOrSliceEnd
MOVQ m+24(FP), DI
popcntOrSliceLoop:
MOVQ (DI), DX
ORQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntOrSliceLoop
popcntOrSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntXorSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntXorSliceEnd
MOVQ m+24(FP), DI
popcntXorSliceLoop:
MOVQ (DI), DX
XORQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntXorSliceLoop
popcntXorSliceEnd:
MOVQ AX, ret+48(FP)
RET

View File

@@ -1,25 +0,0 @@
//go:build !go1.9 && (!amd64 || appengine)
// +build !go1.9
// +build !amd64 appengine
package bitset
func popcntSlice(s []uint64) uint64 {
return popcntSliceGo(s)
}
func popcntMaskSlice(s, m []uint64) uint64 {
return popcntMaskSliceGo(s, m)
}
func popcntAndSlice(s, m []uint64) uint64 {
return popcntAndSliceGo(s, m)
}
func popcntOrSlice(s, m []uint64) uint64 {
return popcntOrSliceGo(s, m)
}
func popcntXorSlice(s, m []uint64) uint64 {
return popcntXorSliceGo(s, m)
}

47
vendor/github.com/bits-and-blooms/bitset/select.go generated vendored Normal file
View File

@@ -0,0 +1,47 @@
package bitset
import "math/bits"
func select64(w uint64, j uint) uint {
seen := 0
// Divide 64bit
part := w & 0xFFFFFFFF
n := uint(bits.OnesCount64(part))
if n <= j {
part = w >> 32
seen += 32
j -= n
}
ww := part
// Divide 32bit
part = ww & 0xFFFF
n = uint(bits.OnesCount64(part))
if n <= j {
part = ww >> 16
seen += 16
j -= n
}
ww = part
// Divide 16bit
part = ww & 0xFF
n = uint(bits.OnesCount64(part))
if n <= j {
part = ww >> 8
seen += 8
j -= n
}
ww = part
// Lookup in final byte
counter := 0
for ; counter < 8; counter++ {
j -= uint((ww >> counter) & 1)
if j+1 == 0 {
break
}
}
return uint(seen + counter)
}

View File

@@ -1,15 +0,0 @@
//go:build !go1.9
// +build !go1.9
package bitset
var deBruijn = [...]byte{
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
func trailingZeroes64(v uint64) uint {
return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58])
}

View File

@@ -1,10 +0,0 @@
//go:build go1.9
// +build go1.9
package bitset
import "math/bits"
func trailingZeroes64(v uint64) uint {
return uint(bits.TrailingZeros64(v))
}

View File

@@ -3,9 +3,9 @@ sudo: false
language: go
go:
- "1.12.x"
- "1.13.x"
- "1.14.x"
- "1.21.x"
- "1.22.x"
- "1.23.x"
script:
- go get golang.org/x/tools/cmd/cover
@@ -17,9 +17,9 @@ script:
- go vet $(go list ./... | grep -v vendor/)
- go test ./test -v -indexType scorch
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
- docs/project-code-coverage.sh
- docs/build_children.sh
- scripts/project-code-coverage.sh
- scripts/build_children.sh
notifications:
email:
- marty.schoch@gmail.com
- fts-team@couchbase.com

View File

@@ -1,11 +1,11 @@
# ![bleve](docs/bleve.png) bleve
[![Tests](https://github.com/blevesearch/bleve/workflows/Tests/badge.svg?branch=master&event=push)](https://github.com/blevesearch/bleve/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster)
[![Tests](https://github.com/blevesearch/bleve/actions/workflows/tests.yml/badge.svg?branch=master&event=push)](https://github.com/blevesearch/bleve/actions/workflows/tests.yml?query=event%3Apush+branch%3Amaster)
[![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master)
[![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Go Reference](https://pkg.go.dev/badge/github.com/blevesearch/bleve/v2.svg)](https://pkg.go.dev/github.com/blevesearch/bleve/v2)
[![Join the chat](https://badges.gitter.im/join_chat.svg)](https://app.gitter.im/#/room/#blevesearch_bleve:gitter.im)
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
[![Go Report Card](https://goreportcard.com/badge/github.com/blevesearch/bleve/v2)](https://goreportcard.com/report/github.com/blevesearch/bleve/v2)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -24,7 +24,8 @@ A modern indexing + search library in GO
* [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) scoring
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
* Hybrid search: exact + semantic
* Query time boosting
* Search result match highlighting with document fragments
@@ -42,7 +43,7 @@ message := struct{
Body string
}{
Id: "example",
From: "marty.schoch@gmail.com",
From: "xyz@couchbase.com",
Body: "bleve indexing is easy",
}

View File

@@ -101,7 +101,10 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {

View File

@@ -34,5 +34,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -48,5 +48,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -60,5 +60,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
}
func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -43,5 +43,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
}
func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
}
func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
}
func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
}
func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
}
func init() {
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -66,5 +66,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
}
func init() {
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -63,7 +63,10 @@ func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache
}
func init() {
registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
err := registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
// ----------------------------------------------------------------------------

View File

@@ -63,5 +63,8 @@ func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.
}
func init() {
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
err := registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -45,5 +45,8 @@ func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *regis
}
func init() {
registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
err := registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -29,5 +29,8 @@ func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.C
}
func init() {
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -340,5 +340,8 @@ func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (
}
func init() {
registry.RegisterTokenMap(StopName, TokenMapConstructor)
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -47,7 +47,10 @@ func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.C
}
func init() {
registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
err := registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
if err != nil {
panic(err)
}
}
// toLowerDeferredCopy will function exactly like

View File

@@ -49,5 +49,8 @@ func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cac
}
func init() {
registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
err := registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -66,5 +66,8 @@ func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.
}
func init() {
registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
err := registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -45,5 +45,8 @@ func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *regis
}
func init() {
registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
err := registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
if err != nil {
panic(err)
}
}

View File

@@ -115,7 +115,10 @@ func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.
}
func init() {
registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
err := registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
if err != nil {
panic(err)
}
}
func convertType(segmentWordType int) analysis.TokenType {

View File

@@ -106,6 +106,15 @@ type DateTimeParser interface {
ParseDateTime(string) (time.Time, string, error)
}
const SynonymSourceType = "synonym"
type SynonymSourceVisitor func(name string, item SynonymSource) error
type SynonymSource interface {
Analyzer() string
Collection() string
}
type ByteArrayConverter interface {
Convert([]byte) (interface{}, error)
}

View File

@@ -19,7 +19,7 @@ Example Opening New Index, Indexing Data
message := struct{
Id: "example"
From: "marty.schoch@gmail.com",
From: "xyz@couchbase.com",
Body: "bleve indexing is easy",
}

View File

@@ -34,6 +34,7 @@ type Document struct {
Fields []Field `json:"fields"`
CompositeFields []*CompositeField
StoredFieldsSize uint64
indexed bool
}
func (d *Document) StoredFieldsBytes() uint64 {
@@ -48,6 +49,13 @@ func NewDocument(id string) *Document {
}
}
func NewSynonymDocument(id string) *Document {
return &Document{
id: id,
Fields: make([]Field, 0),
}
}
func (d *Document) Size() int {
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
len(d.id)
@@ -133,3 +141,19 @@ func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
func (d *Document) HasComposite() bool {
return len(d.CompositeFields) > 0
}
func (d *Document) VisitSynonymFields(visitor index.SynonymFieldVisitor) {
for _, f := range d.Fields {
if sf, ok := f.(index.SynonymField); ok {
visitor(sf)
}
}
}
func (d *Document) SetIndexed() {
d.indexed = true
}
func (d *Document) Indexed() bool {
return d.indexed
}

View File

@@ -116,13 +116,13 @@ func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
options: DefaultBooleanIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions)
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultBooleanIndexingOptions)
}
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options index.FieldIndexingOptions) *BooleanField {

View File

@@ -87,13 +87,6 @@ func (n *GeoShapeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
func (n *GeoShapeField) Analyze() {
// compute the bytes representation for the coordinates
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.encodedValue),
Term: n.encodedValue,
Position: 1,
Type: analysis.AlphaNumeric,
})
rti := geo.GetSpatialAnalyzerPlugin("s2")
terms := rti.GetIndexTokens(n.shape)
@@ -126,6 +119,10 @@ func (n *GeoShapeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *GeoShapeField) EncodedShape() []byte {
return n.encodedValue
}
func NewGeoShapeField(name string, arrayPositions []uint64,
coordinates [][][][]float64, typ string) *GeoShapeField {
return NewGeoShapeFieldWithIndexingOptions(name, arrayPositions,

View File

@@ -31,7 +31,7 @@ func init() {
reflectStaticSizeIPField = int(reflect.TypeOf(f).Size())
}
const DefaultIPIndexingOptions = index.StoreField | index.IndexField | index.DocValues | index.IncludeTermVectors
const DefaultIPIndexingOptions = index.StoreField | index.IndexField | index.DocValues
type IPField struct {
name string
@@ -115,7 +115,7 @@ func NewIPFieldFromBytes(name string, arrayPositions []uint64, value []byte) *IP
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
options: DefaultIPIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}

View File

@@ -0,0 +1,149 @@
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeSynonymField int
func init() {
var f SynonymField
reflectStaticSizeSynonymField = int(reflect.TypeOf(f).Size())
}
const DefaultSynonymIndexingOptions = index.IndexField
type SynonymField struct {
name string
analyzer analysis.Analyzer
options index.FieldIndexingOptions
input []string
synonyms []string
numPlainTextBytes uint64
// populated during analysis
synonymMap map[string][]string
}
func (s *SynonymField) Size() int {
return reflectStaticSizeSynonymField + size.SizeOfPtr +
len(s.name)
}
func (s *SynonymField) Name() string {
return s.name
}
func (s *SynonymField) ArrayPositions() []uint64 {
return nil
}
func (s *SynonymField) Options() index.FieldIndexingOptions {
return s.options
}
func (s *SynonymField) NumPlainTextBytes() uint64 {
return s.numPlainTextBytes
}
func (s *SynonymField) AnalyzedLength() int {
return 0
}
func (s *SynonymField) EncodedFieldType() byte {
return 'y'
}
func (s *SynonymField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return nil
}
func (s *SynonymField) Analyze() {
var analyzedInput []string
if len(s.input) > 0 {
analyzedInput = make([]string, 0, len(s.input))
for _, term := range s.input {
analyzedTerm := analyzeSynonymTerm(term, s.analyzer)
if analyzedTerm != "" {
analyzedInput = append(analyzedInput, analyzedTerm)
}
}
}
analyzedSynonyms := make([]string, 0, len(s.synonyms))
for _, syn := range s.synonyms {
analyzedTerm := analyzeSynonymTerm(syn, s.analyzer)
if analyzedTerm != "" {
analyzedSynonyms = append(analyzedSynonyms, analyzedTerm)
}
}
s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
}
func (s *SynonymField) Value() []byte {
return nil
}
func (s *SynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) {
for term, synonyms := range s.synonymMap {
visitor(term, synonyms)
}
}
func NewSynonymField(name string, analyzer analysis.Analyzer, input []string, synonyms []string) *SynonymField {
return &SynonymField{
name: name,
analyzer: analyzer,
options: DefaultSynonymIndexingOptions,
input: input,
synonyms: synonyms,
}
}
func processSynonymData(input []string, synonyms []string) map[string][]string {
var synonymMap map[string][]string
if len(input) > 0 {
// Map each term to the same list of synonyms.
synonymMap = make(map[string][]string, len(input))
for _, term := range input {
synonymMap[term] = synonyms
}
} else {
synonymMap = make(map[string][]string, len(synonyms))
// Precompute a map where each synonym points to all other synonyms.
for i, elem := range synonyms {
synonymMap[elem] = make([]string, 0, len(synonyms)-1)
for j, otherElem := range synonyms {
if i != j {
synonymMap[elem] = append(synonymMap[elem], otherElem)
}
}
}
}
return synonymMap
}
func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
tokenStream := analyzer.Analyze([]byte(term))
if len(tokenStream) == 1 {
return string(tokenStream[0].Term)
}
return ""
}

View File

@@ -27,6 +27,7 @@ const (
ErrorEmptyID
ErrorIndexReadInconsistency
ErrorTwoPhaseSearchInconsistency
ErrorSynonymSearchNotSupported
)
// Error represents a more strongly typed bleve error for detecting
@@ -49,4 +50,5 @@ var errorMessages = map[Error]string{
ErrorEmptyID: "document ID cannot be empty",
ErrorIndexReadInconsistency: "index read inconsistency detected",
ErrorTwoPhaseSearchInconsistency: "2-phase search failed, likely due to an overlapping topology change",
ErrorSynonymSearchNotSupported: "synonym search not supported",
}

View File

@@ -1,4 +1,4 @@
# geo support in bleve
# Geo spatial search support in bleve
Latest bleve spatial capabilities are powered by spatial hierarchical tokens generated from s2geometry.
You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the

View File

@@ -139,7 +139,7 @@ func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, f
var minLonL, maxLonL float64
if minLatL > minLatRad && maxLatL < maxLatRad {
deltaLon := asin(sin(radDistance) / cos(radLat))
deltaLon := math.Asin(math.Sin(radDistance) / math.Cos(radLat))
minLonL = radLon - deltaLon
if minLonL < minLonRad {
minLonL += 2 * math.Pi

View File

@@ -88,11 +88,11 @@ func ParseDistanceUnit(u string) (float64, error) {
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
x1 := lat1 * degreesToRadian
x2 := lat2 * degreesToRadian
h1 := 1 - cos(x1-x2)
h2 := 1 - cos((lon1-lon2)*degreesToRadian)
h := (h1 + cos(x1)*cos(x2)*h2) / 2
h1 := 1 - math.Cos(x1-x2)
h2 := 1 - math.Cos((lon1-lon2)*degreesToRadian)
h := (h1 + math.Cos(x1)*math.Cos(x2)*h2) / 2
avgLat := (x1 + x2) / 2
diameter := earthDiameter(avgLat)
return diameter * asin(math.Min(1, math.Sqrt(h)))
return diameter * math.Asin(math.Min(1, math.Sqrt(h)))
}

View File

@@ -236,14 +236,19 @@ func extract2DCoordinates(thing interface{}) [][]float64 {
func extract3DCoordinates(thing interface{}) (c [][][]float64) {
coords := reflect.ValueOf(thing)
for i := 0; i < coords.Len(); i++ {
vals := coords.Index(i)
if !coords.IsValid() {
return nil
}
edges := vals.Interface()
if es, ok := edges.([]interface{}); ok {
loop := extract2DCoordinates(es)
if len(loop) > 0 {
c = append(c, loop)
if coords.Kind() == reflect.Slice {
for i := 0; i < coords.Len(); i++ {
vals := coords.Index(i)
edges := vals.Interface()
if es, ok := edges.([]interface{}); ok {
loop := extract2DCoordinates(es)
if len(loop) > 0 {
c = append(c, loop)
}
}
}
}

View File

@@ -19,104 +19,16 @@ import (
)
var earthDiameterPerLatitude []float64
var sinTab []float64
var cosTab []float64
var asinTab []float64
var asinDer1DivF1Tab []float64
var asinDer2DivF2Tab []float64
var asinDer3DivF3Tab []float64
var asinDer4DivF4Tab []float64
const radiusTabsSize = (1 << 10) + 1
const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
const radiusIndexer = 1 / radiusDelta
const sinCosTabsSize = (1 << 11) + 1
const asinTabsSize = (1 << 13) + 1
const oneDivF2 = 1 / 2.0
const oneDivF3 = 1 / 6.0
const oneDivF4 = 1 / 24.0
// 1.57079632673412561417e+00 first 33 bits of pi/2
var pio2Hi = math.Float64frombits(0x3FF921FB54400000)
// 6.07710050650619224932e-11 pi/2 - PIO2_HI
var pio2Lo = math.Float64frombits(0x3DD0B4611A626331)
var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00
var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17
var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01
var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01
var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01
var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02
var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04
var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05
var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00
var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00
var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01
var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02
var twoPiHi = 4 * pio2Hi
var twoPiLo = 4 * pio2Lo
var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1
var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1
var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo)
var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99
var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian)
var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1)
var asinIndexer = 1 / asinDelta
const (
radiusTabsSize = (1 << 10) + 1
radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
radiusIndexer = 1 / radiusDelta
)
func init() {
// initializes the tables used for the sloppy math functions
// sin and cos
sinTab = make([]float64, sinCosTabsSize)
cosTab = make([]float64, sinCosTabsSize)
sinCosPiIndex := (sinCosTabsSize - 1) / 2
sinCosPiMul2Index := 2 * sinCosPiIndex
sinCosPiMul05Index := sinCosPiIndex / 2
sinCosPiMul15Index := 3 * sinCosPiIndex / 2
for i := 0; i < sinCosTabsSize; i++ {
// angle: in [0,2*PI].
angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo
sinAngle := math.Sin(angle)
cosAngle := math.Cos(angle)
// For indexes corresponding to null cosine or sine, we make sure the value is zero
// and not an epsilon. This allows for a much better accuracy for results close to zero.
if i == sinCosPiIndex {
sinAngle = 0.0
} else if i == sinCosPiMul2Index {
sinAngle = 0.0
} else if i == sinCosPiMul05Index {
sinAngle = 0.0
} else if i == sinCosPiMul15Index {
sinAngle = 0.0
}
sinTab[i] = sinAngle
cosTab[i] = cosAngle
}
// asin
asinTab = make([]float64, asinTabsSize)
asinDer1DivF1Tab = make([]float64, asinTabsSize)
asinDer2DivF2Tab = make([]float64, asinTabsSize)
asinDer3DivF3Tab = make([]float64, asinTabsSize)
asinDer4DivF4Tab = make([]float64, asinTabsSize)
for i := 0; i < asinTabsSize; i++ {
// x: in [0,ASIN_MAX_VALUE_FOR_TABS].
x := float64(i) * asinDelta
asinTab[i] = math.Asin(x)
oneMinusXSqInv := 1.0 / (1 - x*x)
oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv)
oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv
oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv
oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv
asinDer1DivF1Tab[i] = oneMinusXSqInv05
asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2
asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3
asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4
}
// earth radius
a := 6378137.0
b := 6356752.31420
@@ -145,68 +57,3 @@ func earthDiameter(lat float64) float64 {
}
return earthDiameterPerLatitude[int(index)]
}
var pio2 = math.Pi / 2
func sin(a float64) float64 {
return cos(a - pio2)
}
// cos is a sloppy math (faster) implementation of math.Cos
func cos(a float64) float64 {
if a < 0.0 {
a = -a
}
if a > sinCosMaxValueForIntModulo {
return math.Cos(a)
}
// index: possibly outside tables range.
index := int(a*sinCosIndexer + 0.5)
delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo
// Making sure index is within tables range.
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo.
index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1)
indexCos := cosTab[index]
indexSin := sinTab[index]
return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4)))
}
// asin is a sloppy math (faster) implementation of math.Asin
func asin(a float64) float64 {
var negateResult bool
if a < 0 {
a = -a
negateResult = true
}
if a <= asinMaxValueForTabs {
index := int(a*asinIndexer + 0.5)
delta := a - float64(index)*asinDelta
result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index])))
if negateResult {
return -result
}
return result
}
// value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
// This part is derived from fdlibm.
if a < 1 {
t := (1.0 - a) * 0.5
p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5)))))
q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4)))
s := math.Sqrt(t)
z := s + s*(p/q)
result := asinPio2Hi - ((z + z) - asinPio2Lo)
if negateResult {
return -result
}
return result
}
// value >= 1.0, or value is NaN
if a == 1.0 {
if negateResult {
return -math.Pi / 2
}
return math.Pi / 2
}
return math.NaN()
}

View File

@@ -16,6 +16,7 @@ package bleve
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/index/upsidedown"
@@ -63,6 +64,36 @@ func (b *Batch) Index(id string, data interface{}) error {
return nil
}
func (b *Batch) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
if id == "" {
return ErrorEmptyID
}
if eventIndex, ok := b.index.(index.EventIndex); ok {
eventIndex.FireIndexEvent()
}
synMap, ok := b.index.Mapping().(mapping.SynonymMapping)
if !ok {
return ErrorSynonymSearchNotSupported
}
if err := definition.Validate(); err != nil {
return err
}
doc := document.NewSynonymDocument(id)
err := synMap.MapSynonymDocument(doc, collection, definition.Input, definition.Synonyms)
if err != nil {
return err
}
b.internal.Update(doc)
b.lastDocSize = uint64(doc.Size() +
len(id) + size.SizeOfString) // overhead from internal
b.totalSize += b.lastDocSize
return nil
}
func (b *Batch) LastDocSize() uint64 {
return b.lastDocSize
}
@@ -323,3 +354,35 @@ type IndexCopyable interface {
// FileSystemDirectory is the default implementation for the
// index.Directory interface.
type FileSystemDirectory string
// SynonymDefinition represents a synonym mapping in Bleve.
// Each instance associates one or more input terms with a list of synonyms,
// defining how terms are treated as equivalent in searches.
type SynonymDefinition struct {
// Input is an optional list of terms for unidirectional synonym mapping.
// When terms are specified in Input, they will map to the terms in Synonyms,
// making the relationship unidirectional (each Input maps to all Synonyms).
// If Input is omitted, the relationship is bidirectional among all Synonyms.
Input []string `json:"input,omitempty"`
// Synonyms is a list of terms that are considered equivalent.
// If Input is specified, each term in Input will map to each term in Synonyms.
// If Input is not specified, the Synonyms list will be treated bidirectionally,
// meaning each term in Synonyms is treated as synonymous with all others.
Synonyms []string `json:"synonyms"`
}
func (sd *SynonymDefinition) Validate() error {
if len(sd.Synonyms) == 0 {
return fmt.Errorf("synonym definition must have at least one synonym")
}
return nil
}
// SynonymIndex supports indexing synonym definitions alongside regular documents.
// Synonyms, grouped by collection name, define term relationships for query expansion in searches.
type SynonymIndex interface {
Index
// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
IndexSynonym(id string, collection string, definition *SynonymDefinition) error
}

View File

@@ -19,7 +19,7 @@ import (
"os"
"sync"
"github.com/RoaringBitmap/roaring"
"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
bolt "go.etcd.io/bbolt"
@@ -303,7 +303,7 @@ func (o *Builder) Close() error {
}
// fill the root bolt with this fake index snapshot
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin, nil)
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin, nil, nil)
if err != nil {
_ = tx.Rollback()
_ = rootBolt.Close()

Some files were not shown because too many files have changed in this diff Show More