mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-01-04 11:19:39 -06:00
build(deps): bump github.com/blevesearch/bleve/v2 from 2.4.4 to 2.5.0
Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.4.4 to 2.5.0. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.4.4...v2.5.0) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-version: 2.5.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
26
go.mod
26
go.mod
@@ -11,7 +11,7 @@ require (
|
||||
github.com/Nerzal/gocloak/v13 v13.9.0
|
||||
github.com/bbalet/stopwords v1.0.0
|
||||
github.com/beevik/etree v1.5.0
|
||||
github.com/blevesearch/bleve/v2 v2.4.4
|
||||
github.com/blevesearch/bleve/v2 v2.5.0
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible
|
||||
github.com/coreos/go-oidc/v3 v3.14.1
|
||||
github.com/cs3org/go-cs3apis v0.0.0-20241105092511-3ad35d174fc1
|
||||
@@ -121,7 +121,7 @@ require (
|
||||
github.com/Masterminds/sprig v2.22.0+incompatible // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/ProtonMail/go-crypto v1.1.5 // indirect
|
||||
github.com/RoaringBitmap/roaring v1.9.3 // indirect
|
||||
github.com/RoaringBitmap/roaring/v2 v2.4.5 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/ajg/form v1.5.1 // indirect
|
||||
github.com/alexedwards/argon2id v1.0.0 // indirect
|
||||
@@ -131,24 +131,24 @@ require (
|
||||
github.com/aws/aws-sdk-go v1.55.6 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bitly/go-simplejson v0.5.0 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.12.0 // indirect
|
||||
github.com/blevesearch/bleve_index_api v1.1.12 // indirect
|
||||
github.com/bits-and-blooms/bitset v1.22.0 // indirect
|
||||
github.com/blevesearch/bleve_index_api v1.2.7 // indirect
|
||||
github.com/blevesearch/geo v0.1.20 // indirect
|
||||
github.com/blevesearch/go-faiss v1.0.24 // indirect
|
||||
github.com/blevesearch/go-faiss v1.0.25 // indirect
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
|
||||
github.com/blevesearch/gtreap v0.1.1 // indirect
|
||||
github.com/blevesearch/mmap-go v1.0.4 // indirect
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.2.16 // indirect
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.9 // indirect
|
||||
github.com/blevesearch/segment v0.9.1 // indirect
|
||||
github.com/blevesearch/snowballstem v0.9.0 // indirect
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
|
||||
github.com/blevesearch/vellum v1.0.10 // indirect
|
||||
github.com/blevesearch/zapx/v11 v11.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v12 v12.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v13 v13.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
|
||||
github.com/blevesearch/zapx/v15 v15.3.16 // indirect
|
||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b // indirect
|
||||
github.com/blevesearch/vellum v1.1.0 // indirect
|
||||
github.com/blevesearch/zapx/v11 v11.4.1 // indirect
|
||||
github.com/blevesearch/zapx/v12 v12.4.1 // indirect
|
||||
github.com/blevesearch/zapx/v13 v13.4.1 // indirect
|
||||
github.com/blevesearch/zapx/v14 v14.4.1 // indirect
|
||||
github.com/blevesearch/zapx/v15 v15.4.1 // indirect
|
||||
github.com/blevesearch/zapx/v16 v16.2.2 // indirect
|
||||
github.com/bluele/gcache v0.0.2 // indirect
|
||||
github.com/bombsimon/logrusr/v3 v3.1.0 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
|
||||
|
||||
52
go.sum
52
go.sum
@@ -87,8 +87,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE
|
||||
github.com/OpenDNS/vegadns2client v0.0.0-20180418235048-a3fa4a771d87/go.mod h1:iGLljf5n9GjT6kc0HBvyI1nOKnGQbNB66VzSNbK5iks=
|
||||
github.com/ProtonMail/go-crypto v1.1.5 h1:eoAQfK2dwL+tFSFpr7TbOaPNUbPiJj4fLYwwGE1FQO4=
|
||||
github.com/ProtonMail/go-crypto v1.1.5/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
|
||||
github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4S2OByM=
|
||||
github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
|
||||
github.com/RoaringBitmap/roaring/v2 v2.4.5 h1:uGrrMreGjvAtTBobc0g5IrW1D5ldxDQYe2JW2gggRdg=
|
||||
github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/hVXDS2dXi7/eUFE0=
|
||||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
||||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||
@@ -142,45 +142,46 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y=
|
||||
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
|
||||
github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
|
||||
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4=
|
||||
github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
|
||||
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
|
||||
github.com/blevesearch/bleve/v2 v2.4.4 h1:RwwLGjUm54SwyyykbrZs4vc1qjzYic4ZnAnY9TwNl60=
|
||||
github.com/blevesearch/bleve/v2 v2.4.4/go.mod h1:fa2Eo6DP7JR+dMFpQe+WiZXINKSunh7WBtlDGbolKXk=
|
||||
github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+5kexNy1RXfegY=
|
||||
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
|
||||
github.com/blevesearch/bleve/v2 v2.5.0 h1:HzYqBy/5/M9Ul9ESEmXzN/3Jl7YpmWBdHM/+zzv/3k4=
|
||||
github.com/blevesearch/bleve/v2 v2.5.0/go.mod h1:PcJzTPnEynO15dCf9isxOga7YFRa/cMSsbnRwnszXUk=
|
||||
github.com/blevesearch/bleve_index_api v1.2.7 h1:c8r9vmbaYQroAMSGag7zq5gEVPiuXrUQDqfnj7uYZSY=
|
||||
github.com/blevesearch/bleve_index_api v1.2.7/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0=
|
||||
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
|
||||
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
|
||||
github.com/blevesearch/go-faiss v1.0.24 h1:K79IvKjoKHdi7FdiXEsAhxpMuns0x4fM0BO93bW5jLI=
|
||||
github.com/blevesearch/go-faiss v1.0.24/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
|
||||
github.com/blevesearch/go-faiss v1.0.25 h1:lel1rkOUGbT1CJ0YgzKwC7k+XH0XVBHnCVWahdCXk4U=
|
||||
github.com/blevesearch/go-faiss v1.0.25/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
|
||||
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
|
||||
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
|
||||
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
|
||||
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.2.16 h1:uGvKVvG7zvSxCwcm4/ehBa9cCEuZVE+/zvrSl57QUVY=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.2.16/go.mod h1:VF5oHVbIFTu+znY1v30GjSpT5+9YFs9dV2hjvuh34F0=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.9 h1:X6nJXnNHl7nasXW+U6y2Ns2Aw8F9STszkYkyBfQ+p0o=
|
||||
github.com/blevesearch/scorch_segment_api/v2 v2.3.9/go.mod h1:IrzspZlVjhf4X29oJiEhBxEteTqOY9RlYlk1lCmYHr4=
|
||||
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
|
||||
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
|
||||
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
|
||||
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
|
||||
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
|
||||
github.com/blevesearch/vellum v1.0.10 h1:HGPJDT2bTva12hrHepVT3rOyIKFFF4t7Gf6yMxyMIPI=
|
||||
github.com/blevesearch/vellum v1.0.10/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
|
||||
github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
|
||||
github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
|
||||
github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=
|
||||
github.com/blevesearch/zapx/v12 v12.3.10/go.mod h1:0yeZg6JhaGxITlsS5co73aqPtM04+ycnI6D1v0mhbCs=
|
||||
github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIqMGahDE8=
|
||||
github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
|
||||
github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
|
||||
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
|
||||
github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE=
|
||||
github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
|
||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b h1:ju9Az5YgrzCeK3M1QwvZIpxYhChkXp7/L0RhDYsxXoE=
|
||||
github.com/blevesearch/zapx/v16 v16.1.9-0.20241217210638-a0519e7caf3b/go.mod h1:BlrYNpOu4BvVRslmIG+rLtKhmjIaRhIbG8sb9scGTwI=
|
||||
github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
|
||||
github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
|
||||
github.com/blevesearch/zapx/v11 v11.4.1 h1:qFCPlFbsEdwbbckJkysptSQOsHn4s6ZOHL5GMAIAVHA=
|
||||
github.com/blevesearch/zapx/v11 v11.4.1/go.mod h1:qNOGxIqdPC1MXauJCD9HBG487PxviTUUbmChFOAosGs=
|
||||
github.com/blevesearch/zapx/v12 v12.4.1 h1:K77bhypII60a4v8mwvav7r4IxWA8qxhNjgF9xGdb9eQ=
|
||||
github.com/blevesearch/zapx/v12 v12.4.1/go.mod h1:QRPrlPOzAxBNMI0MkgdD+xsTqx65zbuPr3Ko4Re49II=
|
||||
github.com/blevesearch/zapx/v13 v13.4.1 h1:EnkEMZFUK0lsW/jOJJF2xOcp+W8TjEsyeN5BeAZEYYE=
|
||||
github.com/blevesearch/zapx/v13 v13.4.1/go.mod h1:e6duBMlCvgbH9rkzNMnUa9hRI9F7ri2BRcHfphcmGn8=
|
||||
github.com/blevesearch/zapx/v14 v14.4.1 h1:G47kGCshknBZzZAtjcnIAMn3oNx8XBLxp8DMq18ogyE=
|
||||
github.com/blevesearch/zapx/v14 v14.4.1/go.mod h1:O7sDxiaL2r2PnCXbhh1Bvm7b4sP+jp4unE9DDPWGoms=
|
||||
github.com/blevesearch/zapx/v15 v15.4.1 h1:B5IoTMUCEzFdc9FSQbhVOxAY+BO17c05866fNruiI7g=
|
||||
github.com/blevesearch/zapx/v15 v15.4.1/go.mod h1:b/MreHjYeQoLjyY2+UaM0hGZZUajEbE0xhnr1A2/Q6Y=
|
||||
github.com/blevesearch/zapx/v16 v16.2.2 h1:MifKJVRTEhMTgSlle2bDRTb39BGc9jXFRLPZc6r0Rzk=
|
||||
github.com/blevesearch/zapx/v16 v16.2.2/go.mod h1:B9Pk4G1CqtErgQV9DyCSA9Lb7WZe4olYfGw7fVDZ4sk=
|
||||
github.com/bluele/gcache v0.0.2 h1:WcbfdXICg7G/DGBh1PFfcirkWOQV+v077yF1pSy3DGw=
|
||||
github.com/bluele/gcache v0.0.2/go.mod h1:m15KV+ECjptwSPxKhOhQoAFQVtUFjTVkc3H8o0t/fp0=
|
||||
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
|
||||
@@ -1663,6 +1664,7 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
|
||||
|
||||
13
vendor/github.com/RoaringBitmap/roaring/clz.go
generated
vendored
13
vendor/github.com/RoaringBitmap/roaring/clz.go
generated
vendored
@@ -1,13 +0,0 @@
|
||||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func countLeadingZeros(x uint64) int {
|
||||
return bits.LeadingZeros64(x)
|
||||
}
|
||||
13
vendor/github.com/RoaringBitmap/roaring/ctz.go
generated
vendored
13
vendor/github.com/RoaringBitmap/roaring/ctz.go
generated
vendored
@@ -1,13 +0,0 @@
|
||||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func countTrailingZeros(x uint64) int {
|
||||
return bits.TrailingZeros64(x)
|
||||
}
|
||||
@@ -10,7 +10,7 @@
|
||||
This is a go version of the Roaring bitmap data structure.
|
||||
|
||||
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
|
||||
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
|
||||
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Redpanda](https://github.com/redpanda-data/redpanda), [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
|
||||
|
||||
[lucene]: https://lucene.apache.org/
|
||||
[solr]: https://lucene.apache.org/solr/
|
||||
@@ -163,7 +163,7 @@ they include
|
||||
- github.com/philhofer/fwd
|
||||
- github.com/jtolds/gls
|
||||
|
||||
Note that the smat library requires Go 1.6 or better.
|
||||
Note that the smat library requires Go 1.15 or better.
|
||||
|
||||
#### Installation
|
||||
|
||||
@@ -188,7 +188,7 @@ package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
@@ -249,15 +249,20 @@ consider the following sample of code:
|
||||
buf := new(bytes.Buffer)
|
||||
size,err:=rb.WriteTo(buf)
|
||||
if err != nil {
|
||||
t.Errorf("Failed writing")
|
||||
fmt.Println("Failed writing") // return or panic
|
||||
}
|
||||
newrb:= New()
|
||||
size,err=newrb.ReadFrom(buf)
|
||||
if err != nil {
|
||||
t.Errorf("Failed reading")
|
||||
fmt.Println("Failed reading") // return or panic
|
||||
}
|
||||
// if buf is an untrusted source, you should validate the result
|
||||
// (this adds a bit of complexity but it is necessary for security)
|
||||
if newrb.Validate() != nil {
|
||||
fmt.Println("Failed validation") // return or panic
|
||||
}
|
||||
if ! rb.Equals(newrb) {
|
||||
t.Errorf("Cannot retrieve serialized version")
|
||||
fmt.Println("Cannot retrieve serialized version")
|
||||
}
|
||||
```
|
||||
|
||||
@@ -280,7 +285,7 @@ package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring/roaring64"
|
||||
"github.com/RoaringBitmap/roaring/v2/roaring64"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
@@ -356,7 +361,7 @@ https://coveralls.io/github/RoaringBitmap/roaring?branch=master
|
||||
Type
|
||||
|
||||
go test -bench Benchmark -run -
|
||||
|
||||
|
||||
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
|
||||
run the following:
|
||||
|
||||
@@ -369,9 +374,8 @@ BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
|
||||
|
||||
You can use roaring with gore:
|
||||
|
||||
- go get -u github.com/motemen/gore
|
||||
- go install github.com/x-motemen/gore/cmd/gore@latest
|
||||
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
|
||||
- go get github.com/RoaringBitmap/roaring
|
||||
|
||||
```go
|
||||
$ gore
|
||||
@@ -1,6 +1,7 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
@@ -8,6 +9,11 @@ type arrayContainer struct {
|
||||
content []uint16
|
||||
}
|
||||
|
||||
var (
|
||||
ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
|
||||
ErrArrayInvalidSize = errors.New("invalid array size")
|
||||
)
|
||||
|
||||
func (ac *arrayContainer) String() string {
|
||||
s := "{"
|
||||
for it := ac.getShortIterator(); it.hasNext(); {
|
||||
@@ -26,8 +32,7 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
|
||||
_ = x[len(ac.content)-1+i]
|
||||
_ = ac.content[len(ac.content)-1]
|
||||
for k := 0; k < len(ac.content); k++ {
|
||||
x[k+i] =
|
||||
uint32(ac.content[k]) | mask
|
||||
x[k+i] = uint32(ac.content[k]) | mask
|
||||
}
|
||||
return i + len(ac.content)
|
||||
}
|
||||
@@ -60,10 +65,26 @@ func (ac *arrayContainer) minimum() uint16 {
|
||||
return ac.content[0] // assume not empty
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) safeMinimum() (uint16, error) {
|
||||
if len(ac.content) == 0 {
|
||||
return 0, errors.New("empty array")
|
||||
}
|
||||
|
||||
return ac.minimum(), nil
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) maximum() uint16 {
|
||||
return ac.content[len(ac.content)-1] // assume not empty
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) safeMaximum() (uint16, error) {
|
||||
if len(ac.content) == 0 {
|
||||
return 0, errors.New("empty array")
|
||||
}
|
||||
|
||||
return ac.maximum(), nil
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getSizeInBytes() int {
|
||||
return ac.getCardinality() * 2
|
||||
}
|
||||
@@ -168,7 +189,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
|
||||
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
|
||||
}
|
||||
answer := newArrayContainer()
|
||||
answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
|
||||
answer.content = make([]uint16, newCardinality, newCardinality) // a hack for sure
|
||||
|
||||
copy(answer.content, ac.content[:startIndex])
|
||||
outPos := startIndex
|
||||
@@ -194,11 +215,9 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
|
||||
}
|
||||
answer.content = answer.content[:newCardinality]
|
||||
return answer
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) equals(o container) bool {
|
||||
|
||||
srb, ok := o.(*arrayContainer)
|
||||
if ok {
|
||||
// Check if the containers are the same object.
|
||||
@@ -239,8 +258,8 @@ func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
|
||||
bc := newBitmapContainer()
|
||||
bc.loadData(ac)
|
||||
return bc
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
|
||||
// Special case adding to the end of the container.
|
||||
l := len(ac.content)
|
||||
@@ -352,7 +371,6 @@ func (ac *arrayContainer) ior(a container) container {
|
||||
return ac.iorArray(x)
|
||||
case *bitmapContainer:
|
||||
return a.(*bitmapContainer).orArray(ac)
|
||||
//return ac.iorBitmap(x) // note: this does not make sense
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return x.clone()
|
||||
@@ -589,7 +607,6 @@ func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
|
||||
}
|
||||
ac.content = ac.content[:pos]
|
||||
return ac
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) xor(a container) container {
|
||||
@@ -630,7 +647,6 @@ func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
|
||||
length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
|
||||
answer.content = answer.content[:length]
|
||||
return answer
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) andNot(a container) container {
|
||||
@@ -822,7 +838,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
|
||||
} else { // no expansion needed
|
||||
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
|
||||
if cardinalityChange < 0 {
|
||||
|
||||
for i := startIndex + newValuesInRange; i < newCardinality; i++ {
|
||||
ac.content[i] = ac.content[i-cardinalityChange]
|
||||
}
|
||||
@@ -915,7 +930,6 @@ func (ac *arrayContainer) rank(x uint16) int {
|
||||
return answer + 1
|
||||
}
|
||||
return -answer - 1
|
||||
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) selectInt(x uint16) int {
|
||||
@@ -971,6 +985,179 @@ func (ac *arrayContainer) realloc(size int) {
|
||||
}
|
||||
}
|
||||
|
||||
// previousValue returns either the target if found or the previous smaller present value.
|
||||
// If the target is out of bounds a -1 is returned.
|
||||
// Ex: target=4 ac=[2,3,4,6,7] returns 4
|
||||
// Ex: target=5 ac=[2,3,4,6,7] returns 4
|
||||
// Ex: target=6 ac=[2,3,4,6,7] returns 6
|
||||
// Ex: target=8 ac=[2,3,4,6,7] returns 7
|
||||
// Ex: target=1 ac=[2,3,4,6,7] returns -1
|
||||
// Ex: target=0 ac=[2,3,4,6,7] returns -1
|
||||
func (ac *arrayContainer) previousValue(target uint16) int {
|
||||
result := binarySearchUntil(ac.content, target)
|
||||
|
||||
if result.index == len(ac.content) {
|
||||
return int(ac.maximum())
|
||||
}
|
||||
|
||||
if result.outOfBounds() {
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(result.value)
|
||||
}
|
||||
|
||||
// previousAbsentValue returns either the target if not found or the next larger missing value.
|
||||
// If the target is out of bounds a -1 is returned
|
||||
// Ex: target=4 ac=[1,2,3,4,6,7] returns 0
|
||||
// Ex: target=5 ac=[1,2,3,4,6,7] returns 5
|
||||
// Ex: target=6 ac=[1,2,3,4,6,7] returns 5
|
||||
// Ex: target=8 ac=[1,2,3,4,6,7] returns 8
|
||||
func (ac *arrayContainer) previousAbsentValue(target uint16) int {
|
||||
cardinality := len(ac.content)
|
||||
|
||||
if cardinality == 0 {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
if target > ac.maximum() {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
result := binarySearchPast(ac.content, target)
|
||||
|
||||
if result.notFound() {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
// If the target was found at index 1, then the next value down must be result.value-1
|
||||
if result.index == 1 {
|
||||
if ac.minimum() != result.value-1 {
|
||||
return int(result.value - 1)
|
||||
}
|
||||
}
|
||||
|
||||
low := -1
|
||||
high := result.index
|
||||
|
||||
// This uses the pigeon-hole principle.
|
||||
// the if statement compares the difference in indices vs
|
||||
// the difference in values. Suppose mid = 10 and result.index = 5
|
||||
// with ac.content[mid] = 100 and target = 10
|
||||
// then we have 5 slots for values but we need to fit in 90 values
|
||||
// so some of the values must be missing
|
||||
for low+1 < high {
|
||||
midIndex := (high + low) >> 1
|
||||
indexDifference := result.index - midIndex
|
||||
valueDifference := target - ac.content[midIndex]
|
||||
if indexDifference < int(valueDifference) {
|
||||
low = midIndex
|
||||
} else {
|
||||
high = midIndex
|
||||
}
|
||||
}
|
||||
|
||||
if high == 0 {
|
||||
return int(ac.minimum()) - 1
|
||||
}
|
||||
|
||||
return int(ac.content[high] - 1)
|
||||
}
|
||||
|
||||
// nextAbsentValue returns either the target if not found or the next larger missing value.
|
||||
// If the target is out of bounds a -1 is returned
|
||||
// Ex: target=4 ac=[1,2,3,4,6,7] returns 5
|
||||
// Ex: target=5 ac=[1,2,3,4,6,7] returns 5
|
||||
// Ex: target=0 ac=[1,2,3,4,6,7] returns 0
|
||||
// Ex: target=8 ac=[1,2,3,4,6,7] returns 8
|
||||
func (ac *arrayContainer) nextAbsentValue(target uint16) int {
|
||||
cardinality := len(ac.content)
|
||||
|
||||
if cardinality == 0 {
|
||||
return int(target)
|
||||
}
|
||||
if target < ac.minimum() {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
result := binarySearchPast(ac.content, target)
|
||||
|
||||
if result.notFound() {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
if result.index == cardinality-2 {
|
||||
if ac.maximum() != result.value+1 {
|
||||
return int(result.value + 1)
|
||||
}
|
||||
}
|
||||
|
||||
low := result.index
|
||||
high := len(ac.content)
|
||||
|
||||
// This uses the pigeon-hole principle.
|
||||
// the if statement compares the difference in indices vs
|
||||
// the difference in values. Suppose mid = 10 and result.index = 5
|
||||
// with ac.content[mid] = 100 and target = 10
|
||||
// then we have 5 slots for values but we need to fit in 90 values
|
||||
// so some of the values must be missing
|
||||
for low+1 < high {
|
||||
midIndex := (high + low) >> 1
|
||||
indexDifference := midIndex - result.index
|
||||
valueDifference := ac.content[midIndex] - target
|
||||
if indexDifference < int(valueDifference) {
|
||||
high = midIndex
|
||||
} else {
|
||||
low = midIndex
|
||||
}
|
||||
}
|
||||
|
||||
if low == cardinality-1 {
|
||||
return int(ac.content[cardinality-1] + 1)
|
||||
}
|
||||
|
||||
return int(ac.content[low] + 1)
|
||||
}
|
||||
|
||||
// nextValue returns either the target if found or the next larger value.
|
||||
// if the target is out of bounds a -1 is returned
|
||||
//
|
||||
// Ex: target=4 ac=[1,2,3,4,6,7] returns 4
|
||||
// Ex: target=5 ac=[1,2,3,4,6,7] returns 6
|
||||
// Ex: target=6 ac=[1,2,3,4,6,7] returns 6
|
||||
// Ex: target=0 ac=[1,2,3,4,6,7] returns 1
|
||||
// Ex: target=100 ac=[1,2,3,4,6,7] returns -1
|
||||
func (ac *arrayContainer) nextValue(target uint16) int {
|
||||
cardinality := len(ac.content)
|
||||
if cardinality == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
//if target < ac.minimum() {
|
||||
// return -1
|
||||
//}
|
||||
//if target > ac.maximum() {
|
||||
// return -1
|
||||
// }
|
||||
|
||||
result := binarySearchUntil(ac.content, target)
|
||||
if result.exactMatch {
|
||||
return int(result.value)
|
||||
}
|
||||
|
||||
if !result.exactMatch && result.index == -1 {
|
||||
return int(ac.content[0])
|
||||
}
|
||||
if result.outOfBounds() {
|
||||
return -1
|
||||
}
|
||||
|
||||
if result.index < len(ac.content)-1 {
|
||||
return int(ac.content[result.index+1])
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func newArrayContainer() *arrayContainer {
|
||||
p := new(arrayContainer)
|
||||
return p
|
||||
@@ -1039,15 +1226,12 @@ func (ac *arrayContainer) numberOfRuns() (nr int) {
|
||||
|
||||
// convert to run or array *if needed*
|
||||
func (ac *arrayContainer) toEfficientContainer() container {
|
||||
|
||||
numRuns := ac.numberOfRuns()
|
||||
|
||||
sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
card := ac.getCardinality()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return newRunContainer16FromArray(ac)
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
@@ -1099,3 +1283,28 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {
|
||||
|
||||
return low, high
|
||||
}
|
||||
|
||||
// validate checks cardinality and sort order of the array container
|
||||
func (ac *arrayContainer) validate() error {
|
||||
cardinality := ac.getCardinality()
|
||||
|
||||
if cardinality <= 0 {
|
||||
return ErrArrayInvalidSize
|
||||
}
|
||||
|
||||
if cardinality > arrayDefaultMaxSize {
|
||||
return ErrArrayInvalidSize
|
||||
}
|
||||
|
||||
previous := ac.content[0]
|
||||
for i := 1; i < len(ac.content); i++ {
|
||||
next := ac.content[i]
|
||||
if previous > next {
|
||||
return ErrArrayIncorrectSort
|
||||
}
|
||||
previous = next
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/bits"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
@@ -56,6 +58,17 @@ func (bc *bitmapContainer) minimum() uint16 {
|
||||
return MaxUint16
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) safeMinimum() (uint16, error) {
|
||||
if len(bc.bitmap) == 0 {
|
||||
return 0, errors.New("Empty bitmap")
|
||||
}
|
||||
val := bc.minimum()
|
||||
if val == MaxUint16 {
|
||||
return 0, errors.New("Empty bitmap")
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// i should be non-zero
|
||||
func clz(i uint64) int {
|
||||
n := 1
|
||||
@@ -94,6 +107,17 @@ func (bc *bitmapContainer) maximum() uint16 {
|
||||
return uint16(0)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) safeMaximum() (uint16, error) {
|
||||
if len(bc.bitmap) == 0 {
|
||||
return 0, errors.New("Empty bitmap")
|
||||
}
|
||||
val := bc.maximum()
|
||||
if val == uint16(0) {
|
||||
return 0, errors.New("Empty bitmap")
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool {
|
||||
iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)}
|
||||
|
||||
@@ -116,6 +140,7 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
|
||||
bcsi.i = bcsi.ptr.NextSetBit(uint(bcsi.i) + 1)
|
||||
return uint16(j)
|
||||
}
|
||||
|
||||
func (bcsi *bitmapContainerShortIterator) hasNext() bool {
|
||||
return bcsi.i >= 0
|
||||
}
|
||||
@@ -201,6 +226,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
|
||||
return n
|
||||
}
|
||||
|
||||
// nextMany64 returns the number of values added to the buffer
|
||||
func (bcmi *bitmapContainerManyIterator) nextMany64(hs uint64, buf []uint64) int {
|
||||
n := 0
|
||||
base := bcmi.base
|
||||
@@ -237,11 +263,10 @@ func (bc *bitmapContainer) getManyIterator() manyIterable {
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getSizeInBytes() int {
|
||||
return len(bc.bitmap) * 8 // + bcBaseBytes
|
||||
return len(bc.bitmap) * 8
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) serializedSizeInBytes() int {
|
||||
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
|
||||
return len(bc.bitmap) * 8
|
||||
}
|
||||
|
||||
@@ -313,6 +338,7 @@ func (bc *bitmapContainer) iaddReturnMinimized(i uint16) container {
|
||||
return bc
|
||||
}
|
||||
|
||||
// iadd adds the arg i, returning true if not already present
|
||||
func (bc *bitmapContainer) iadd(i uint16) bool {
|
||||
x := int(i)
|
||||
previous := bc.bitmap[x/64]
|
||||
@@ -441,7 +467,7 @@ func (bc *bitmapContainer) ior(a container) container {
|
||||
if bc.isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
// bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
panic(fmt.Errorf("unsupported container type %T", a))
|
||||
@@ -516,7 +542,7 @@ func (bc *bitmapContainer) orArray(value2 *arrayContainer) container {
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) orArrayCardinality(value2 *arrayContainer) int {
|
||||
answer := 0
|
||||
answer := bc.getCardinality()
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
// branchless:
|
||||
@@ -819,9 +845,8 @@ func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
|
||||
}
|
||||
ac := newArrayContainerSize(newcardinality)
|
||||
fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
|
||||
ac.content = ac.content[:newcardinality] //not sure why i need this
|
||||
ac.content = ac.content[:newcardinality]
|
||||
return ac
|
||||
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
|
||||
@@ -842,7 +867,6 @@ func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
|
||||
}
|
||||
}
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
|
||||
@@ -995,7 +1019,7 @@ func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) containe
|
||||
return bc
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) contains(i uint16) bool { //testbit
|
||||
func (bc *bitmapContainer) contains(i uint16) bool { // testbit
|
||||
x := uint(i)
|
||||
w := bc.bitmap[x>>6]
|
||||
mask := uint64(1) << (x & 63)
|
||||
@@ -1051,7 +1075,7 @@ func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) fillArray(container []uint16) {
|
||||
//TODO: rewrite in assembly
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
base := 0
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
@@ -1066,6 +1090,7 @@ func (bc *bitmapContainer) fillArray(container []uint16) {
|
||||
}
|
||||
}
|
||||
|
||||
// NextSetBit returns the next set bit e.g the next int packed into the bitmaparray
|
||||
func (bc *bitmapContainer) NextSetBit(i uint) int {
|
||||
var (
|
||||
x = i / 64
|
||||
@@ -1088,12 +1113,22 @@ func (bc *bitmapContainer) NextSetBit(i uint) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// PrevSetBit returns the previous set bit e.g the previous int packed into the bitmaparray
|
||||
func (bc *bitmapContainer) PrevSetBit(i int) int {
|
||||
if i < 0 {
|
||||
return -1
|
||||
}
|
||||
x := i / 64
|
||||
if x >= len(bc.bitmap) {
|
||||
|
||||
return bc.uPrevSetBit(uint(i))
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) uPrevSetBit(i uint) int {
|
||||
var (
|
||||
x = i >> 6
|
||||
length = uint(len(bc.bitmap))
|
||||
)
|
||||
|
||||
if x >= length {
|
||||
return -1
|
||||
}
|
||||
|
||||
@@ -1103,12 +1138,16 @@ func (bc *bitmapContainer) PrevSetBit(i int) int {
|
||||
|
||||
w = w << uint(63-b)
|
||||
if w != 0 {
|
||||
return i - countLeadingZeros(w)
|
||||
return int(i) - countLeadingZeros(w)
|
||||
}
|
||||
orig := x
|
||||
x--
|
||||
for ; x >= 0; x-- {
|
||||
if x > orig {
|
||||
return -1
|
||||
}
|
||||
for ; x < orig; x-- {
|
||||
if bc.bitmap[x] != 0 {
|
||||
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x])
|
||||
return int((x*64)+63) - countLeadingZeros(bc.bitmap[x])
|
||||
}
|
||||
}
|
||||
return -1
|
||||
@@ -1141,7 +1180,6 @@ func (bc *bitmapContainer) numberOfRuns() int {
|
||||
|
||||
// convert to run or array *if needed*
|
||||
func (bc *bitmapContainer) toEfficientContainer() container {
|
||||
|
||||
numRuns := bc.numberOfRuns()
|
||||
|
||||
sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
|
||||
@@ -1149,7 +1187,7 @@ func (bc *bitmapContainer) toEfficientContainer() container {
|
||||
card := bc.getCardinality()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return newRunContainer16FromBitmapContainer(bc)
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
@@ -1159,7 +1197,6 @@ func (bc *bitmapContainer) toEfficientContainer() container {
|
||||
}
|
||||
|
||||
func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
|
||||
|
||||
if len(rc.iv) == 1 {
|
||||
return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
|
||||
}
|
||||
@@ -1169,7 +1206,7 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
|
||||
setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
// bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
|
||||
@@ -1234,3 +1271,171 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {
|
||||
|
||||
return low, high
|
||||
}
|
||||
|
||||
// nextValue returns either the `target` if found or the next largest value.
|
||||
// if the target is out of bounds a -1 is returned
|
||||
//
|
||||
// Example :
|
||||
// Suppose the bitmap container represents the following slice
|
||||
// [1,2,10,11,100]
|
||||
// target=0 returns 1
|
||||
// target=1 returns 1
|
||||
// target=10 returns 10
|
||||
// target=90 returns 100
|
||||
func (bc *bitmapContainer) nextValue(target uint16) int {
|
||||
if bc.cardinality == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
return bc.NextSetBit(uint(target))
|
||||
}
|
||||
|
||||
// nextAbsentValue returns the next absent value.
|
||||
// if the target is out of bounds a -1 is returned
|
||||
func (bc *bitmapContainer) nextAbsentValue(target uint16) int {
|
||||
if bc.cardinality == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
var (
|
||||
x = target >> 6
|
||||
length = uint(len(bc.bitmap))
|
||||
)
|
||||
if uint(x) >= length {
|
||||
return -1
|
||||
}
|
||||
w := bc.bitmap[x]
|
||||
w = w >> uint(target%64)
|
||||
if w == 0 {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
// Check if all 1's
|
||||
// if statement - we skip the if we have all ones [1,1,1,1...1]
|
||||
if ^w != 0 {
|
||||
|
||||
if countTrailingZeros(w) > 0 {
|
||||
// we have something like [X,Y,Z, 0,0,0]. This means the target bit is zero
|
||||
return int(target)
|
||||
}
|
||||
|
||||
// other wise something like [X,Y,0,1,1,1..1], where x and y can be either 1 or 0.
|
||||
|
||||
trailing := countTrailingOnes(w)
|
||||
return int(target) + trailing
|
||||
|
||||
}
|
||||
x++
|
||||
for ; uint(x) < length; x++ {
|
||||
if bc.bitmap[x] == 0 {
|
||||
return int(x * 64)
|
||||
}
|
||||
if ^bc.bitmap[x] != 0 {
|
||||
trailing := countTrailingOnes(bc.bitmap[x])
|
||||
return int(x*64) + trailing
|
||||
}
|
||||
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// previousValue returns either the `target` if found or the previous largest value.
|
||||
// if the target is out of bounds a -1 is returned
|
||||
|
||||
// Example :
|
||||
// Suppose the bitmap container represents the following slice
|
||||
// [1,2,10,11,100]
|
||||
// target=0 returns -1
|
||||
// target=1 returns -1
|
||||
// target=2 returns -1
|
||||
// target=10 returns 9
|
||||
// target=50 returns 10
|
||||
// target=100 returns 99
|
||||
func (bc *bitmapContainer) previousValue(target uint16) int {
|
||||
if bc.cardinality == 0 {
|
||||
return -1
|
||||
}
|
||||
return bc.uPrevSetBit(uint(target))
|
||||
}
|
||||
|
||||
// previousAbsentValue returns the next absent value.
|
||||
func (bc *bitmapContainer) previousAbsentValue(target uint16) int {
|
||||
if bc.cardinality == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
var (
|
||||
x = target >> 6
|
||||
length = uint(len(bc.bitmap))
|
||||
)
|
||||
if uint(x) >= length {
|
||||
return -1
|
||||
}
|
||||
w := bc.bitmap[x]
|
||||
shifted := w >> uint(target%64)
|
||||
if shifted == 0 {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
// Check if all 1's
|
||||
// if statement - we skip if we have all ones [1,1,1,1...1] as no value is absent
|
||||
if ^shifted != 0 {
|
||||
|
||||
if countTrailingZeros(shifted) > 0 {
|
||||
// we have something like shifted=[X,Y,Z,..., 0,0,0]. This means the target bit is zero
|
||||
return int(target)
|
||||
}
|
||||
|
||||
// The rotate will rotate the target bit into the leading position.
|
||||
// We know the target bit is not zero because of the countTrailingZero check above
|
||||
// We then shift the target bit out of the way.
|
||||
// Assume a structure like an original structure like [X,Y,Z,..., Target, A, B,C...]
|
||||
// shifted will be [X,Y,Z...Target]
|
||||
// shiftedRotated will be [A,B,C....]
|
||||
// If countLeadingZeros > 0 then A is zero, if not at least A is 1 return
|
||||
// Else count the number of ones's until a 0
|
||||
shiftedRotated := bits.RotateLeft64(w, int(64-uint(target%64))-1) << 1
|
||||
leadingZeros := countLeadingZeros(shiftedRotated)
|
||||
if leadingZeros > 0 {
|
||||
return int(target) - 1
|
||||
}
|
||||
leadingOnes := countLeadingOnes(shiftedRotated)
|
||||
if leadingOnes > 0 {
|
||||
return int(target) - leadingOnes - 1
|
||||
}
|
||||
|
||||
}
|
||||
x++
|
||||
for ; uint(x) < length; x++ {
|
||||
if bc.bitmap[x] == 0 {
|
||||
return int(x * 64)
|
||||
}
|
||||
if ^bc.bitmap[x] != 0 {
|
||||
trailing := countTrailingOnes(bc.bitmap[x])
|
||||
return int(x*64) + trailing
|
||||
}
|
||||
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// validate checks that the container size is non-negative
|
||||
func (bc *bitmapContainer) validate() error {
|
||||
if bc.cardinality < arrayDefaultMaxSize {
|
||||
return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
|
||||
}
|
||||
|
||||
if maxCapacity < len(bc.bitmap)*64 {
|
||||
return fmt.Errorf("bitmap slize size %d exceeded max capacity %d", maxCapacity, len(bc.bitmap)*64)
|
||||
}
|
||||
|
||||
if bc.cardinality > maxCapacity {
|
||||
return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
|
||||
}
|
||||
|
||||
if bc.cardinality != int(popcntSlice(bc.bitmap)) {
|
||||
return fmt.Errorf("bitmap container size %d did not match underlying slice length: %d", bc.cardinality, int(popcntSlice(bc.bitmap)))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
19
vendor/github.com/RoaringBitmap/roaring/v2/clz.go
generated
vendored
Normal file
19
vendor/github.com/RoaringBitmap/roaring/v2/clz.go
generated
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// countLeadingOnes returns the number of leading zeros bits in x; the result is 64 for x == 0.
|
||||
func countLeadingZeros(x uint64) int {
|
||||
return bits.LeadingZeros64(x)
|
||||
}
|
||||
|
||||
// countLeadingOnes returns the number of leading ones bits in x; the result is 0 for x == 0.
|
||||
func countLeadingOnes(x uint64) int {
|
||||
return bits.LeadingZeros64(^x)
|
||||
}
|
||||
21
vendor/github.com/RoaringBitmap/roaring/v2/ctz.go
generated
vendored
Normal file
21
vendor/github.com/RoaringBitmap/roaring/v2/ctz.go
generated
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// countTrailingZeros returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
||||
func countTrailingZeros(x uint64) int {
|
||||
return bits.TrailingZeros64(x)
|
||||
}
|
||||
|
||||
// countTrailingOnes returns the number of trailing one bits in x
|
||||
// The result is 64 for x == 9,223,372,036,854,775,807.
|
||||
// The result is 0 for x == 0.
|
||||
func countTrailingOnes(x uint64) int {
|
||||
return bits.TrailingZeros64(^x)
|
||||
}
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/internal"
|
||||
"github.com/RoaringBitmap/roaring/v2/internal"
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
)
|
||||
|
||||
@@ -26,7 +26,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
_, err := rb.WriteTo(buf)
|
||||
return base64.StdEncoding.EncodeToString(buf.Bytes()), err
|
||||
|
||||
}
|
||||
|
||||
// FromBase64 deserializes a bitmap from Base64
|
||||
@@ -54,10 +53,12 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
|
||||
return rb.highlowcontainer.toBytes()
|
||||
}
|
||||
|
||||
const wordSize = uint64(64)
|
||||
const log2WordSize = uint64(6)
|
||||
const capacity = ^uint64(0)
|
||||
const bitmapContainerSize = (1 << 16) / 64 // bitmap size in words
|
||||
const (
|
||||
wordSize = uint64(64)
|
||||
log2WordSize = uint64(6)
|
||||
capacity = ^uint64(0)
|
||||
bitmapContainerSize = (1 << 16) / 64 // bitmap size in words
|
||||
)
|
||||
|
||||
// DenseSize returns the size of the bitmap when stored as a dense bitmap.
|
||||
func (rb *Bitmap) DenseSize() uint64 {
|
||||
@@ -276,14 +277,19 @@ func (rb *Bitmap) Checksum() uint64 {
|
||||
return hash
|
||||
}
|
||||
|
||||
// FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy.
|
||||
// FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy
|
||||
// (for advanced users only, you must be an expert Go programmer!).
|
||||
// E.g., you can use this method to read a serialized bitmap from a memory-mapped file written out
|
||||
// with the WriteTo method.
|
||||
// The format specification is
|
||||
// https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
// It is the caller's responsibility to ensure that the input data is not modified and remains valid for the entire lifetime of this bitmap.
|
||||
// This method avoids small allocations but holds references to the input data buffer. It is GC-friendly, but it may consume more memory eventually.
|
||||
// The containers in the resulting bitmap are immutable containers tied to the provided byte array and they rely on
|
||||
// copy-on-write which means that modifying them creates copies. Thus FromUnsafeBytes is more likely to be appropriate for read-only use cases,
|
||||
// when the resulting bitmap can be considered immutable.
|
||||
//
|
||||
// See also the FromBuffer function.
|
||||
// See also the FromBuffer function. We recommend benchmarking both functions to determine which one is more suitable for your use case.
|
||||
// See https://github.com/RoaringBitmap/roaring/pull/395 for more details.
|
||||
func (rb *Bitmap) FromUnsafeBytes(data []byte, cookieHeader ...byte) (p int64, err error) {
|
||||
stream := internal.NewByteBuffer(data)
|
||||
@@ -291,11 +297,13 @@ func (rb *Bitmap) FromUnsafeBytes(data []byte, cookieHeader ...byte) (p int64, e
|
||||
}
|
||||
|
||||
// ReadFrom reads a serialized version of this bitmap from stream.
|
||||
// E.g., you can use this method to read a serialized bitmap from a file written
|
||||
// with the WriteTo method.
|
||||
// The format is compatible with other RoaringBitmap
|
||||
// implementations (Java, C) and is documented here:
|
||||
// https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
// Since io.Reader is regarded as a stream and cannot be read twice.
|
||||
// So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
|
||||
// Since io.Reader is regarded as a stream and cannot be read twice,
|
||||
// we add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
|
||||
// It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly.
|
||||
func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
|
||||
stream, ok := reader.(internal.ByteInput)
|
||||
@@ -313,7 +321,18 @@ func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err
|
||||
return
|
||||
}
|
||||
|
||||
// FromBuffer creates a bitmap from its serialized version stored in buffer
|
||||
// MustReadFrom calls ReadFrom internally.
|
||||
// After deserialization Validate will be called.
|
||||
// If the Bitmap fails to validate, a panic with the validation error will be thrown
|
||||
func (rb *Bitmap) MustReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
|
||||
rb.ReadFrom(reader, cookieHeader...)
|
||||
if err := rb.Validate(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// FromBuffer creates a bitmap from its serialized version stored in buffer (E.g., as written by WriteTo).
|
||||
//
|
||||
// The format specification is available here:
|
||||
// https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
@@ -960,7 +979,6 @@ func (rb *Bitmap) CheckedAdd(x uint32) bool {
|
||||
newac := newArrayContainer()
|
||||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
|
||||
return true
|
||||
|
||||
}
|
||||
|
||||
// AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add)
|
||||
@@ -998,7 +1016,6 @@ func (rb *Bitmap) CheckedRemove(x uint32) bool {
|
||||
return C.getCardinality() < oldcard
|
||||
}
|
||||
return false
|
||||
|
||||
}
|
||||
|
||||
// IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0))
|
||||
@@ -1088,7 +1105,7 @@ main:
|
||||
break main
|
||||
}
|
||||
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else { //s1 > s2
|
||||
} else { // s1 > s2
|
||||
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
@@ -1187,7 +1204,7 @@ main:
|
||||
break main
|
||||
}
|
||||
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else { //s1 > s2
|
||||
} else { // s1 > s2
|
||||
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
@@ -1256,7 +1273,7 @@ main:
|
||||
break main
|
||||
}
|
||||
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else { //s1 > s2
|
||||
} else { // s1 > s2
|
||||
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
@@ -1396,7 +1413,7 @@ main:
|
||||
break main
|
||||
}
|
||||
s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else { //s1 > s2
|
||||
} else { // s1 > s2
|
||||
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
@@ -1584,7 +1601,7 @@ main:
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else { //s1 > s2
|
||||
} else { // s1 > s2
|
||||
pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
@@ -1632,7 +1649,6 @@ func BitmapOf(dat ...uint32) *Bitmap {
|
||||
// The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
|
||||
// while uint64(0x100000000) cannot be represented as a 32-bit value.
|
||||
func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
|
||||
|
||||
if rangeEnd > MaxUint32+1 {
|
||||
panic("rangeEnd > MaxUint32+1")
|
||||
}
|
||||
@@ -1869,6 +1885,206 @@ func (rb *Bitmap) CloneCopyOnWriteContainers() {
|
||||
rb.highlowcontainer.cloneCopyOnWriteContainers()
|
||||
}
|
||||
|
||||
// NextValue returns the next largest value in the bitmap, or -1
|
||||
// if none is present. This function should not be used inside
|
||||
// a performance-sensitive loop: prefer iterators if
|
||||
// performance is a concern.
|
||||
func (rb *Bitmap) NextValue(target uint32) int64 {
|
||||
originalKey := highbits(target)
|
||||
query := lowbits(target)
|
||||
var nextValue int64
|
||||
nextValue = -1
|
||||
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
|
||||
for containerIndex < rb.highlowcontainer.size() && nextValue == -1 {
|
||||
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
|
||||
container := rb.highlowcontainer.getContainer(containerKey)
|
||||
// if containerKey > orginalKey then we are past the container which mapped to the orignal key
|
||||
// in that case we can just return the minimum from that container
|
||||
var responseBit int64
|
||||
if containerKey > originalKey {
|
||||
bit, err := container.safeMinimum()
|
||||
if err == nil {
|
||||
responseBit = -1
|
||||
}
|
||||
responseBit = int64(bit)
|
||||
} else {
|
||||
responseBit = int64(container.nextValue(query))
|
||||
}
|
||||
|
||||
if responseBit == -1 {
|
||||
nextValue = -1
|
||||
} else {
|
||||
nextValue = int64(combineLoHi32(uint32(responseBit), uint32(containerKey)))
|
||||
}
|
||||
containerIndex++
|
||||
}
|
||||
|
||||
return nextValue
|
||||
}
|
||||
|
||||
// PreviousValue returns the previous largest value in the bitmap, or -1
|
||||
// if none is present. This function should not be used inside
|
||||
// a performance-sensitive loop: prefer iterators if
|
||||
// performance is a concern.
|
||||
func (rb *Bitmap) PreviousValue(target uint32) int64 {
|
||||
if rb.IsEmpty() {
|
||||
return -1
|
||||
}
|
||||
|
||||
originalKey := highbits(uint32(target))
|
||||
query := lowbits(uint32(target))
|
||||
var prevValue int64
|
||||
prevValue = -1
|
||||
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
|
||||
|
||||
if containerIndex == rb.highlowcontainer.size() {
|
||||
return int64(rb.Maximum())
|
||||
}
|
||||
|
||||
if rb.highlowcontainer.getKeyAtIndex(containerIndex) > originalKey {
|
||||
// target absent, key of first container after target too high
|
||||
containerIndex--
|
||||
}
|
||||
|
||||
for containerIndex != -1 && prevValue == -1 {
|
||||
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
|
||||
container := rb.highlowcontainer.getContainer(containerKey)
|
||||
// if containerKey > originalKey then we are past the container which mapped to the original key
|
||||
// in that case we can just return the minimum from that container
|
||||
var responseBit int
|
||||
if containerKey < originalKey {
|
||||
bit, err := container.safeMaximum()
|
||||
|
||||
if err == nil {
|
||||
responseBit = -1
|
||||
}
|
||||
responseBit = int(bit)
|
||||
} else {
|
||||
responseBit = container.previousValue(query)
|
||||
}
|
||||
|
||||
if responseBit == -1 {
|
||||
prevValue = -1
|
||||
} else {
|
||||
prevValue = int64(combineLoHi32(uint32(responseBit), uint32(containerKey)))
|
||||
}
|
||||
containerIndex--
|
||||
}
|
||||
|
||||
return prevValue
|
||||
}
|
||||
|
||||
// NextAbsentValue returns the next largest missing value in the bitmap, or -1
|
||||
// if none is present. This function should not be used inside
|
||||
// a performance-sensitive loop: prefer iterators if
|
||||
// performance is a concern.
|
||||
func (rb *Bitmap) NextAbsentValue(target uint32) int64 {
|
||||
originalKey := highbits(target)
|
||||
query := lowbits(target)
|
||||
var nextValue int64
|
||||
nextValue = -1
|
||||
|
||||
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
|
||||
if containerIndex == rb.highlowcontainer.size() {
|
||||
// if we are here it means no container found, just return the target
|
||||
return int64(target)
|
||||
}
|
||||
|
||||
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
|
||||
|
||||
keyspace := uint32(containerKey) << 16
|
||||
if target < keyspace {
|
||||
// target is less than the start of the keyspace start
|
||||
// that means target cannot be in the keyspace
|
||||
return int64(target)
|
||||
}
|
||||
|
||||
container := rb.highlowcontainer.getContainer(containerKey)
|
||||
nextValue = int64(container.nextAbsentValue(query))
|
||||
for {
|
||||
if nextValue != (1 << 16) {
|
||||
return int64(combineLoHi32(uint32(nextValue), keyspace))
|
||||
}
|
||||
|
||||
if containerIndex == rb.highlowcontainer.size()-1 {
|
||||
val, err := container.safeMaximum()
|
||||
if err == nil {
|
||||
return -1
|
||||
}
|
||||
return int64(val) + 1
|
||||
}
|
||||
containerIndex++
|
||||
nextContainerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
|
||||
if containerKey < nextContainerKey {
|
||||
// There is a gap between keys
|
||||
// Just increment the current key and shift to get HoB
|
||||
return int64(containerKey+1) << 16
|
||||
}
|
||||
containerKey = nextContainerKey
|
||||
container = rb.highlowcontainer.getContainer(containerKey)
|
||||
nextValue = int64(container.nextAbsentValue(0))
|
||||
}
|
||||
}
|
||||
|
||||
// PreviousAbsentValue returns the previous largest missing value in the bitmap, or -1
|
||||
// if none is present. This function should not be used inside
|
||||
// a performance-sensitive loop: prefer iterators if
|
||||
// performance is a concern.
|
||||
func (rb *Bitmap) PreviousAbsentValue(target uint32) int64 {
|
||||
originalKey := highbits(target)
|
||||
query := lowbits(target)
|
||||
var prevValue int64
|
||||
prevValue = -1
|
||||
|
||||
containerIndex := rb.highlowcontainer.advanceUntil(originalKey, -1)
|
||||
|
||||
if containerIndex == rb.highlowcontainer.size() {
|
||||
// if we are here it means no container found, just return the target
|
||||
return int64(target)
|
||||
}
|
||||
|
||||
if containerIndex == -1 {
|
||||
// if we are here it means no container found, just return the target
|
||||
return int64(target)
|
||||
}
|
||||
|
||||
containerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
|
||||
keyspace := uint32(containerKey) << 16
|
||||
if target < keyspace {
|
||||
// target is less than the start of the keyspace start
|
||||
// that means target cannot be in the keyspace
|
||||
return int64(target)
|
||||
}
|
||||
|
||||
container := rb.highlowcontainer.getContainer(containerKey)
|
||||
prevValue = int64(container.previousAbsentValue(query))
|
||||
for {
|
||||
if prevValue != -1 {
|
||||
return int64(combineLoHi32(uint32(prevValue), keyspace))
|
||||
}
|
||||
|
||||
if containerIndex == 0 {
|
||||
val, err := container.safeMinimum()
|
||||
if err == nil {
|
||||
// OR panic, Java panics
|
||||
return -1
|
||||
}
|
||||
return int64(val) - 1
|
||||
}
|
||||
containerIndex--
|
||||
nextContainerKey := rb.highlowcontainer.getKeyAtIndex(containerIndex)
|
||||
if nextContainerKey < containerKey-1 {
|
||||
// There is a gap between keys, eg missing container
|
||||
// Just decrement the current key and shift to get HoB of the missing container
|
||||
return (int64(containerKey) << 16) - 1
|
||||
}
|
||||
containerKey = nextContainerKey
|
||||
container = rb.highlowcontainer.getContainer(containerKey)
|
||||
highestPossible16 := (1 << 16) - 1
|
||||
prevValue = int64(container.previousAbsentValue(uint16(highestPossible16)))
|
||||
}
|
||||
}
|
||||
|
||||
// FlipInt calls Flip after casting the parameters (convenience method)
|
||||
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
|
||||
return Flip(bm, uint64(rangeStart), uint64(rangeEnd))
|
||||
@@ -1916,3 +2132,10 @@ func (rb *Bitmap) Stats() Statistics {
|
||||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
// Validate checks if the bitmap is internally consistent.
|
||||
// You may call it after deserialization to check that the bitmap is valid.
|
||||
// This function returns an error if the bitmap is invalid, nil otherwise.
|
||||
func (rb *Bitmap) Validate() error {
|
||||
return rb.highlowcontainer.validate()
|
||||
}
|
||||
@@ -3,17 +3,9 @@ package roaring64
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math/bits"
|
||||
"math/big"
|
||||
"runtime"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
const (
|
||||
// Min64BitSigned - Minimum 64 bit value
|
||||
Min64BitSigned = -9223372036854775808
|
||||
// Max64BitSigned - Maximum 64 bit value
|
||||
Max64BitSigned = 9223372036854775807
|
||||
)
|
||||
|
||||
// BSI is at its simplest is an array of bitmaps that represent an encoded
|
||||
@@ -32,13 +24,16 @@ type BSI struct {
|
||||
runOptimized bool
|
||||
}
|
||||
|
||||
// NewBSI constructs a new BSI. Min/Max values are optional. If set to 0
|
||||
// then the underlying BSI will be automatically sized.
|
||||
// NewBSI constructs a new BSI. Note that it is your responsibility to ensure that
|
||||
// the min/max values are set correctly. Queries CompareValue, MinMax, etc. will not
|
||||
// work correctly if the min/max values are not set correctly.
|
||||
func NewBSI(maxValue int64, minValue int64) *BSI {
|
||||
|
||||
bitsz := bits.Len64(uint64(minValue))
|
||||
if bits.Len64(uint64(maxValue)) > bitsz {
|
||||
bitsz = bits.Len64(uint64(maxValue))
|
||||
bitszmin := big.NewInt(minValue).BitLen() + 1
|
||||
bitszmax := big.NewInt(maxValue).BitLen() + 1
|
||||
bitsz := bitszmin
|
||||
if bitszmax > bitsz {
|
||||
bitsz = bitszmax
|
||||
}
|
||||
ba := make([]Bitmap, bitsz)
|
||||
return &BSI{bA: ba, MaxValue: maxValue, MinValue: minValue}
|
||||
@@ -81,41 +76,97 @@ func (b *BSI) GetCardinality() uint64 {
|
||||
|
||||
// BitCount returns the number of bits needed to represent values.
|
||||
func (b *BSI) BitCount() int {
|
||||
return len(b.bA)
|
||||
return len(b.bA) - 1 // Exclude sign bit
|
||||
}
|
||||
|
||||
// SetValue sets a value for a given columnID.
|
||||
func (b *BSI) SetValue(columnID uint64, value int64) {
|
||||
// IsBigUInt returns the number of bits needed to represent values.
|
||||
func (b *BSI) isBig() bool {
|
||||
return len(b.bA) > 64
|
||||
}
|
||||
|
||||
// IsNegative returns true for negative values
|
||||
func (b *BSI) IsNegative(columnID uint64) bool {
|
||||
if len(b.bA) == 0 {
|
||||
return false
|
||||
}
|
||||
return b.bA[b.BitCount()].Contains(columnID)
|
||||
}
|
||||
|
||||
// SetBigValue sets a value that exceeds 64 bits
|
||||
func (b *BSI) SetBigValue(columnID uint64, value *big.Int) {
|
||||
// If max/min values are set to zero then automatically determine bit array size
|
||||
if b.MaxValue == 0 && b.MinValue == 0 {
|
||||
minBits := bits.Len64(uint64(value))
|
||||
minBits := value.BitLen() + 1
|
||||
if minBits == 1 {
|
||||
minBits = 2
|
||||
}
|
||||
for len(b.bA) < minBits {
|
||||
b.bA = append(b.bA, Bitmap{})
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < b.BitCount(); i++ {
|
||||
if uint64(value)&(1<<uint64(i)) > 0 {
|
||||
b.bA[i].Add(columnID)
|
||||
} else {
|
||||
for i := b.BitCount(); i >= 0; i-- {
|
||||
if value.Bit(i) == 0 {
|
||||
b.bA[i].Remove(columnID)
|
||||
} else {
|
||||
b.bA[i].Add(columnID)
|
||||
}
|
||||
}
|
||||
b.eBM.Add(columnID)
|
||||
}
|
||||
|
||||
// SetValue sets a value for a given columnID.
|
||||
func (b *BSI) SetValue(columnID uint64, value int64) {
|
||||
b.SetBigValue(columnID, big.NewInt(value))
|
||||
}
|
||||
|
||||
// GetValue gets the value at the column ID. Second param will be false for non-existent values.
|
||||
func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) {
|
||||
bv, exists := b.GetBigValue(columnID)
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
if !bv.IsInt64() {
|
||||
if bv.Sign() == -1 {
|
||||
msg := fmt.Errorf("can't represent a negative %d bit value as an int64", b.BitCount())
|
||||
panic(msg)
|
||||
}
|
||||
if bv.Sign() == 1 {
|
||||
msg := fmt.Errorf("can't represent a positive %d bit value as an int64", b.BitCount())
|
||||
panic(msg)
|
||||
}
|
||||
}
|
||||
return bv.Int64(), exists
|
||||
}
|
||||
|
||||
// GetBigValue gets the value at the column ID. Second param will be false for non-existent values.
|
||||
func (b *BSI) GetBigValue(columnID uint64) (value *big.Int, exists bool) {
|
||||
exists = b.eBM.Contains(columnID)
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
for i := 0; i < b.BitCount(); i++ {
|
||||
val := big.NewInt(0)
|
||||
for i := b.BitCount(); i >= 0; i-- {
|
||||
if b.bA[i].Contains(columnID) {
|
||||
value |= 1 << i
|
||||
bigBit := big.NewInt(1)
|
||||
bigBit.Lsh(bigBit, uint(i))
|
||||
val.Or(val, bigBit)
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
if b.IsNegative(columnID) {
|
||||
val = negativeTwosComplementToInt(val)
|
||||
}
|
||||
return val, exists
|
||||
}
|
||||
|
||||
func negativeTwosComplementToInt(val *big.Int) *big.Int {
|
||||
inverted := new(big.Int).Not(val)
|
||||
mask := new(big.Int).Lsh(big.NewInt(1), uint(val.BitLen()))
|
||||
inverted.And(inverted, mask.Sub(mask, big.NewInt(1)))
|
||||
inverted.Add(inverted, big.NewInt(1))
|
||||
val.Neg(inverted)
|
||||
return val
|
||||
}
|
||||
|
||||
type action func(t *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup)
|
||||
@@ -235,13 +286,15 @@ const (
|
||||
type task struct {
|
||||
bsi *BSI
|
||||
op Operation
|
||||
valueOrStart int64
|
||||
end int64
|
||||
values map[int64]struct{}
|
||||
valueOrStart *big.Int
|
||||
end *big.Int
|
||||
values map[string]struct{}
|
||||
bits *Bitmap
|
||||
}
|
||||
|
||||
// CompareValue compares value.
|
||||
// Values should be in the range of the BSI (max, min). If the value is outside the range, the result
|
||||
// might erroneous. The operation parameter indicates the type of comparison to be made.
|
||||
// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
|
||||
// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
|
||||
// The parallelism parameter indicates the number of CPU threads to be applied for processing. A value
|
||||
@@ -249,6 +302,26 @@ type task struct {
|
||||
func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int64,
|
||||
foundSet *Bitmap) *Bitmap {
|
||||
|
||||
return b.CompareBigValue(parallelism, op, big.NewInt(valueOrStart), big.NewInt(end), foundSet)
|
||||
}
|
||||
|
||||
// CompareBigValue compares value.
|
||||
// Values should be in the range of the BSI (max, min). If the value is outside the range, the result
|
||||
// might erroneous. The operation parameter indicates the type of comparison to be made.
|
||||
// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
|
||||
// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
|
||||
// The parallelism parameter indicates the number of CPU threads to be applied for processing. A value
|
||||
// of zero indicates that all available CPU resources will be potentially utilized.
|
||||
func (b *BSI) CompareBigValue(parallelism int, op Operation, valueOrStart, end *big.Int,
|
||||
foundSet *Bitmap) *Bitmap {
|
||||
|
||||
if valueOrStart == nil {
|
||||
valueOrStart = b.MinMaxBig(parallelism, MIN, &b.eBM)
|
||||
}
|
||||
if end == nil && op == RANGE {
|
||||
end = b.MinMaxBig(parallelism, MAX, &b.eBM)
|
||||
}
|
||||
|
||||
comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end}
|
||||
if foundSet == nil {
|
||||
return parallelExecutor(parallelism, comp, compareValue, &b.eBM)
|
||||
@@ -256,6 +329,53 @@ func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int6
|
||||
return parallelExecutor(parallelism, comp, compareValue, foundSet)
|
||||
}
|
||||
|
||||
// Returns a twos complement value given a value, the return will be bit extended to 'bits' length
|
||||
// if the value is negative
|
||||
func twosComplement(num *big.Int, bitCount int) *big.Int {
|
||||
// Check if the number is negative
|
||||
isNegative := num.Sign() < 0
|
||||
|
||||
// Get the absolute value if negative
|
||||
abs := new(big.Int).Abs(num)
|
||||
|
||||
// Convert to binary string
|
||||
binStr := abs.Text(2)
|
||||
|
||||
// Pad with zeros to the left
|
||||
if len(binStr) < bitCount {
|
||||
binStr = fmt.Sprintf("%0*s", bitCount, binStr)
|
||||
}
|
||||
|
||||
// If negative, calculate two's complement
|
||||
if isNegative {
|
||||
// Invert bits
|
||||
inverted := make([]byte, len(binStr))
|
||||
for i := range binStr {
|
||||
if binStr[i] == '0' {
|
||||
inverted[i] = '1'
|
||||
} else {
|
||||
inverted[i] = '0'
|
||||
}
|
||||
}
|
||||
|
||||
// Add 1
|
||||
carry := byte(1)
|
||||
for i := len(inverted) - 1; i >= 0; i-- {
|
||||
inverted[i] += carry
|
||||
if inverted[i] == '2' {
|
||||
inverted[i] = '0'
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
binStr = string(inverted)
|
||||
}
|
||||
|
||||
bigInt := new(big.Int)
|
||||
_, _ = bigInt.SetString(binStr, 2)
|
||||
return bigInt
|
||||
}
|
||||
|
||||
func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
|
||||
|
||||
defer wg.Done()
|
||||
@@ -265,32 +385,31 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
|
||||
results.RunOptimize()
|
||||
}
|
||||
|
||||
x := e.bsi.BitCount()
|
||||
startIsNegative := x == 64 && uint64(e.valueOrStart)&(1<<uint64(x-1)) > 0
|
||||
endIsNegative := x == 64 && uint64(e.end)&(1<<uint64(x-1)) > 0
|
||||
startIsNegative := e.valueOrStart.Sign() == -1
|
||||
endIsNegative := true
|
||||
if e.end != nil {
|
||||
endIsNegative = e.end.Sign() == -1
|
||||
}
|
||||
|
||||
for i := 0; i < len(batch); i++ {
|
||||
cID := batch[i]
|
||||
eq1, eq2 := true, true
|
||||
lt1, lt2, gt1 := false, false, false
|
||||
j := e.bsi.BitCount() - 1
|
||||
isNegative := false
|
||||
if x == 64 {
|
||||
isNegative = e.bsi.bA[j].Contains(cID)
|
||||
j--
|
||||
}
|
||||
j := e.bsi.BitCount()
|
||||
isNegative := e.bsi.IsNegative(cID)
|
||||
compStartValue := e.valueOrStart
|
||||
compEndValue := e.end
|
||||
if isNegative != startIsNegative {
|
||||
compStartValue = ^e.valueOrStart + 1
|
||||
compStartValue = twosComplement(e.valueOrStart, e.bsi.BitCount()+1)
|
||||
}
|
||||
if isNegative != endIsNegative {
|
||||
compEndValue = ^e.end + 1
|
||||
if isNegative != endIsNegative && e.end != nil {
|
||||
compEndValue = twosComplement(e.end, e.bsi.BitCount()+1)
|
||||
}
|
||||
|
||||
for ; j >= 0; j-- {
|
||||
sliceContainsBit := e.bsi.bA[j].Contains(cID)
|
||||
|
||||
if uint64(compStartValue)&(1<<uint64(j)) > 0 {
|
||||
if compStartValue.Bit(j) == 1 {
|
||||
// BIT in value is SET
|
||||
if !sliceContainsBit {
|
||||
if eq1 {
|
||||
@@ -303,7 +422,9 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
|
||||
}
|
||||
}
|
||||
eq1 = false
|
||||
break
|
||||
if e.op != RANGE {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -319,6 +440,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
|
||||
}
|
||||
}
|
||||
eq1 = false
|
||||
|
||||
if e.op != RANGE {
|
||||
break
|
||||
}
|
||||
@@ -326,7 +448,7 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
|
||||
}
|
||||
}
|
||||
|
||||
if e.op == RANGE && uint64(compEndValue)&(1<<uint64(j)) > 0 {
|
||||
if e.op == RANGE && compEndValue.Bit(j) == 1 {
|
||||
// BIT in value is SET
|
||||
if !sliceContainsBit {
|
||||
if eq2 {
|
||||
@@ -347,11 +469,9 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
|
||||
lt2 = true
|
||||
}
|
||||
eq2 = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
switch e.op {
|
||||
@@ -387,15 +507,24 @@ func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.Wa
|
||||
resultsChan <- results
|
||||
}
|
||||
|
||||
// MinMax - Find minimum or maximum value.
|
||||
// MinMax - Find minimum or maximum int64 value.
|
||||
func (b *BSI) MinMax(parallelism int, op Operation, foundSet *Bitmap) int64 {
|
||||
return b.MinMaxBig(parallelism, op, foundSet).Int64()
|
||||
}
|
||||
|
||||
// MinMaxBig - Find minimum or maximum value.
|
||||
func (b *BSI) MinMaxBig(parallelism int, op Operation, foundSet *Bitmap) *big.Int {
|
||||
|
||||
var n int = parallelism
|
||||
if n == 0 {
|
||||
n = runtime.NumCPU()
|
||||
}
|
||||
|
||||
resultsChan := make(chan int64, n)
|
||||
resultsChan := make(chan *big.Int, n)
|
||||
|
||||
if foundSet == nil {
|
||||
foundSet = &b.eBM
|
||||
}
|
||||
|
||||
card := foundSet.GetCardinality()
|
||||
x := card / uint64(n)
|
||||
@@ -418,63 +547,87 @@ func (b *BSI) MinMax(parallelism int, op Operation, foundSet *Bitmap) int64 {
|
||||
wg.Wait()
|
||||
|
||||
close(resultsChan)
|
||||
var minMax int64
|
||||
var minMax *big.Int
|
||||
minSigned, maxSigned := minMaxSignedInt(b.BitCount() + 1)
|
||||
if op == MAX {
|
||||
minMax = Min64BitSigned
|
||||
minMax = minSigned
|
||||
} else {
|
||||
minMax = Max64BitSigned
|
||||
minMax = maxSigned
|
||||
}
|
||||
|
||||
for val := range resultsChan {
|
||||
if (op == MAX && val > minMax) || (op == MIN && val <= minMax) {
|
||||
if (op == MAX && val.Cmp(minMax) > 0) || (op == MIN && val.Cmp(minMax) <= 0) {
|
||||
minMax = val
|
||||
}
|
||||
}
|
||||
return minMax
|
||||
}
|
||||
|
||||
func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg *sync.WaitGroup) {
|
||||
func minMaxSignedInt(bits int) (*big.Int, *big.Int) {
|
||||
// Calculate the maximum value
|
||||
max := new(big.Int).Lsh(big.NewInt(1), uint(bits-1))
|
||||
max.Sub(max, big.NewInt(1))
|
||||
|
||||
// Calculate the minimum value
|
||||
min := new(big.Int).Neg(max)
|
||||
min.Sub(min, big.NewInt(1))
|
||||
|
||||
return min, max
|
||||
}
|
||||
|
||||
func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan *big.Int, wg *sync.WaitGroup) {
|
||||
|
||||
defer wg.Done()
|
||||
|
||||
x := b.BitCount()
|
||||
var value int64 = Max64BitSigned
|
||||
x := b.BitCount() + 1
|
||||
var value *big.Int
|
||||
minSigned, maxSigned := minMaxSignedInt(x)
|
||||
if op == MAX {
|
||||
value = Min64BitSigned
|
||||
value = minSigned
|
||||
} else {
|
||||
value = maxSigned
|
||||
}
|
||||
|
||||
for i := 0; i < len(batch); i++ {
|
||||
cID := batch[i]
|
||||
eq := true
|
||||
lt, gt := false, false
|
||||
j := b.BitCount() - 1
|
||||
var cVal int64
|
||||
valueIsNegative := uint64(value)&(1<<uint64(x-1)) > 0 && bits.Len64(uint64(value)) == 64
|
||||
isNegative := false
|
||||
if x == 64 {
|
||||
isNegative = b.bA[j].Contains(cID)
|
||||
if isNegative {
|
||||
cVal |= 1 << uint64(j)
|
||||
}
|
||||
j--
|
||||
}
|
||||
j := b.BitCount()
|
||||
cVal := new(big.Int)
|
||||
valueIsNegative := value.Sign() == -1
|
||||
isNegative := b.IsNegative(cID)
|
||||
|
||||
compValue := value
|
||||
if isNegative != valueIsNegative {
|
||||
compValue = ^value + 1
|
||||
// convert compValue to twos complement
|
||||
inverted := new(big.Int).Not(compValue)
|
||||
mask := new(big.Int).Lsh(big.NewInt(1), uint(compValue.BitLen()))
|
||||
inverted.And(inverted, mask.Sub(mask, big.NewInt(1)))
|
||||
inverted.Add(inverted, big.NewInt(1))
|
||||
}
|
||||
|
||||
done := false
|
||||
for ; j >= 0; j-- {
|
||||
sliceContainsBit := b.bA[j].Contains(cID)
|
||||
if sliceContainsBit {
|
||||
cVal |= 1 << uint64(j)
|
||||
bigBit := big.NewInt(1)
|
||||
bigBit.Lsh(bigBit, uint(j))
|
||||
cVal.Or(cVal, bigBit)
|
||||
if isNegative {
|
||||
cVal = negativeTwosComplementToInt(cVal)
|
||||
}
|
||||
}
|
||||
if uint64(compValue)&(1<<uint64(j)) > 0 {
|
||||
if done {
|
||||
continue
|
||||
}
|
||||
if compValue.Bit(j) == 1 {
|
||||
// BIT in value is SET
|
||||
if !sliceContainsBit {
|
||||
if eq {
|
||||
eq = false
|
||||
if op == MAX && valueIsNegative && !isNegative {
|
||||
gt = true
|
||||
break
|
||||
done = true
|
||||
}
|
||||
if op == MIN && (!valueIsNegative || (valueIsNegative == isNegative)) {
|
||||
lt = true
|
||||
@@ -491,11 +644,13 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg
|
||||
}
|
||||
if op == MAX && (valueIsNegative || (valueIsNegative == isNegative)) {
|
||||
gt = true
|
||||
done = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lt || gt {
|
||||
value = cVal
|
||||
}
|
||||
@@ -506,19 +661,37 @@ func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg
|
||||
|
||||
// Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
|
||||
// is also returned (for calculating the average).
|
||||
func (b *BSI) Sum(foundSet *Bitmap) (sum int64, count uint64) {
|
||||
func (b *BSI) Sum(foundSet *Bitmap) (int64, uint64) {
|
||||
val, count := b.SumBigValues(foundSet)
|
||||
return val.Int64(), count
|
||||
}
|
||||
|
||||
// SumBigValues - Sum all values contained within the foundSet. As a convenience, the cardinality of the foundSet
|
||||
// is also returned (for calculating the average). This method will sum arbitrarily large values.
|
||||
func (b *BSI) SumBigValues(foundSet *Bitmap) (sum *big.Int, count uint64) {
|
||||
if foundSet == nil {
|
||||
foundSet = &b.eBM
|
||||
}
|
||||
sum = new(big.Int)
|
||||
count = foundSet.GetCardinality()
|
||||
resultsChan := make(chan int64, b.BitCount())
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < b.BitCount(); i++ {
|
||||
wg.Add(1)
|
||||
go func(j int) {
|
||||
defer wg.Done()
|
||||
atomic.AddInt64(&sum, int64(foundSet.AndCardinality(&b.bA[j])<<uint(j)))
|
||||
resultsChan <- int64(foundSet.AndCardinality(&b.bA[j]) << uint(j))
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
return
|
||||
close(resultsChan)
|
||||
|
||||
for val := range resultsChan {
|
||||
sum.Add(sum, big.NewInt(val))
|
||||
}
|
||||
sum.Sub(sum, big.NewInt(int64(foundSet.AndCardinality(&b.bA[b.BitCount()])<<uint(b.BitCount()))))
|
||||
|
||||
return sum, count
|
||||
}
|
||||
|
||||
// Transpose calls b.IntersectAndTranspose(0, b.eBM)
|
||||
@@ -533,7 +706,9 @@ func (b *BSI) Transpose() *Bitmap {
|
||||
//
|
||||
// TODO: This implementation is functional but not performant, needs to be re-written perhaps using SIMD SSE2 instructions.
|
||||
func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *Bitmap) *Bitmap {
|
||||
|
||||
if foundSet == nil {
|
||||
foundSet = &b.eBM
|
||||
}
|
||||
trans := &task{bsi: b}
|
||||
return parallelExecutor(parallelism, trans, transpose, foundSet)
|
||||
}
|
||||
@@ -563,12 +738,12 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
|
||||
bits := len(b.bA)
|
||||
for i := 0; i < len(bsis); i++ {
|
||||
if len(bsis[i].bA) > bits {
|
||||
bits = bsis[i].BitCount()
|
||||
bits = len(bsis[i].bA )
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we have enough bit slices
|
||||
for bits > b.BitCount() {
|
||||
for bits > len(b.bA) {
|
||||
bm := Bitmap{}
|
||||
bm.RunOptimize()
|
||||
b.bA = append(b.bA, bm)
|
||||
@@ -725,10 +900,20 @@ func (b *BSI) WriteTo(w io.Writer) (n int64, err error) {
|
||||
|
||||
// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
|
||||
func (b *BSI) BatchEqual(parallelism int, values []int64) *Bitmap {
|
||||
//convert list of int64 values to big.Int(s)
|
||||
bigValues := make([]*big.Int, len(values))
|
||||
for i, v := range values {
|
||||
bigValues[i] = big.NewInt(v)
|
||||
}
|
||||
return b.BatchEqualBig(parallelism, bigValues)
|
||||
}
|
||||
|
||||
valMap := make(map[int64]struct{}, len(values))
|
||||
// BatchEqualBig returns a bitmap containing the column IDs where the values are contained within the list of values provided.
|
||||
func (b *BSI) BatchEqualBig(parallelism int, values []*big.Int) *Bitmap {
|
||||
|
||||
valMap := make(map[string]struct{}, len(values))
|
||||
for i := 0; i < len(values); i++ {
|
||||
valMap[values[i]] = struct{}{}
|
||||
valMap[string(values[i].Bytes())] = struct{}{}
|
||||
}
|
||||
comp := &task{bsi: b, values: valMap}
|
||||
return parallelExecutor(parallelism, comp, batchEqual, &b.eBM)
|
||||
@@ -746,8 +931,8 @@ func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap,
|
||||
|
||||
for i := 0; i < len(batch); i++ {
|
||||
cID := batch[i]
|
||||
if value, ok := e.bsi.GetValue(uint64(cID)); ok {
|
||||
if _, yes := e.values[int64(value)]; yes {
|
||||
if value, ok := e.bsi.GetBigValue(uint64(cID)); ok {
|
||||
if _, yes := e.values[string(value.Bytes())]; yes {
|
||||
results.Add(cID)
|
||||
}
|
||||
}
|
||||
@@ -786,8 +971,8 @@ func (b *BSI) ClearValues(foundSet *Bitmap) {
|
||||
// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
|
||||
func (b *BSI) NewBSIRetainSet(foundSet *Bitmap) *BSI {
|
||||
|
||||
newBSI := NewBSI(b.MaxValue, b.MinValue)
|
||||
newBSI.bA = make([]Bitmap, b.BitCount())
|
||||
newBSI := NewDefaultBSI()
|
||||
newBSI.bA = make([]Bitmap, b.BitCount()+1)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
@@ -823,13 +1008,13 @@ func (b *BSI) Add(other *BSI) {
|
||||
|
||||
func (b *BSI) addDigit(foundSet *Bitmap, i int) {
|
||||
|
||||
if i >= len(b.bA) {
|
||||
if i >= b.BitCount()+1 || b.BitCount() == 0 {
|
||||
b.bA = append(b.bA, Bitmap{})
|
||||
}
|
||||
carry := And(&b.bA[i], foundSet)
|
||||
b.bA[i].Xor(foundSet)
|
||||
if !carry.IsEmpty() {
|
||||
if i+1 >= len(b.bA) {
|
||||
if i+1 >= b.BitCount() {
|
||||
b.bA = append(b.bA, Bitmap{})
|
||||
}
|
||||
b.addDigit(carry, i+1)
|
||||
@@ -841,7 +1026,12 @@ func (b *BSI) addDigit(foundSet *Bitmap, i int) {
|
||||
// is useful for situations where there is a one-to-many relationship between the vectored integer sets. The resulting BSI
|
||||
// contains the number of times a particular value appeared in the input BSI.
|
||||
func (b *BSI) TransposeWithCounts(parallelism int, foundSet, filterSet *Bitmap) *BSI {
|
||||
|
||||
if foundSet == nil {
|
||||
foundSet = &b.eBM
|
||||
}
|
||||
if filterSet == nil {
|
||||
filterSet = &b.eBM
|
||||
}
|
||||
return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, filterSet, true)
|
||||
}
|
||||
|
||||
@@ -871,6 +1061,9 @@ func transposeWithCounts(input *BSI, filterSet *Bitmap, batch []uint64, resultsC
|
||||
|
||||
// Increment - In-place increment of values in a BSI. Found set select columns for incrementing.
|
||||
func (b *BSI) Increment(foundSet *Bitmap) {
|
||||
if foundSet == nil {
|
||||
foundSet = &b.eBM
|
||||
}
|
||||
b.addDigit(foundSet, 0)
|
||||
b.eBM.Or(foundSet)
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
package roaring64
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
)
|
||||
|
||||
// IntIterable64 allows you to iterate over the values in a Bitmap
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
)
|
||||
|
||||
var defaultWorkerCount = runtime.NumCPU()
|
||||
@@ -144,6 +144,8 @@ func (c parChunk) size() int {
|
||||
return c.ra.size()
|
||||
}
|
||||
|
||||
// parNaiveStartAt returns the index of the first key that is inclusive between start and last
|
||||
// Returns the size if there is no such key
|
||||
func parNaiveStartAt(ra *roaringArray64, start uint32, last uint32) int {
|
||||
for idx, key := range ra.keys {
|
||||
if key >= start && key <= last {
|
||||
@@ -170,7 +172,6 @@ func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
|
||||
if key1 < key2 {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
@@ -188,7 +189,7 @@ func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||
} else {
|
||||
c1 := ra1.getContainerAtIndex(idx1)
|
||||
|
||||
//answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||
// answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||
answer.appendContainer(key1, roaring.Or(c1, ra2.getContainerAtIndex(idx2)), false)
|
||||
idx1++
|
||||
idx2++
|
||||
@@ -261,7 +262,7 @@ func iorOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||
} else {
|
||||
c1 := ra1.getWritableContainerAtIndex(idx1)
|
||||
|
||||
//ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||
// ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||
c1.Or(ra2.getContainerAtIndex(idx2))
|
||||
ra1.setContainerAtIndex(idx1, c1)
|
||||
|
||||
@@ -8,12 +8,14 @@ import (
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/RoaringBitmap/roaring/internal"
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"github.com/RoaringBitmap/roaring/v2/internal"
|
||||
)
|
||||
|
||||
const serialCookieNoRunContainer = 12346 // only arrays and bitmaps
|
||||
const serialCookie = 12347 // runs, arrays, and bitmaps
|
||||
const (
|
||||
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
|
||||
serialCookie = 12347 // runs, arrays, and bitmaps
|
||||
)
|
||||
|
||||
// Bitmap represents a compressed bitmap where you can add integers.
|
||||
type Bitmap struct {
|
||||
@@ -25,7 +27,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
_, err := rb.WriteTo(buf)
|
||||
return base64.StdEncoding.EncodeToString(buf.Bytes()), err
|
||||
|
||||
}
|
||||
|
||||
// FromBase64 deserializes a bitmap from Base64
|
||||
@@ -52,7 +53,6 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
|
||||
// implementations (Java, Go, C++) and it has a specification :
|
||||
// https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
|
||||
func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
|
||||
|
||||
var n int64
|
||||
buf := make([]byte, 8)
|
||||
binary.LittleEndian.PutUint64(buf, uint64(rb.highlowcontainer.size()))
|
||||
@@ -87,11 +87,10 @@ func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
|
||||
func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
|
||||
stream := internal.NewByteBuffer(data)
|
||||
sizeBuf := make([]byte, 8)
|
||||
n, err := stream.Read(sizeBuf)
|
||||
_, err = stream.Read(sizeBuf)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
p += int64(n)
|
||||
size := binary.LittleEndian.Uint64(sizeBuf)
|
||||
|
||||
rb.highlowcontainer.resize(0)
|
||||
@@ -115,17 +114,16 @@ func (rb *Bitmap) FromUnsafeBytes(data []byte) (p int64, err error) {
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in bitmap.UnsafeFromBytes: could not read key #%d: %w", i, err)
|
||||
}
|
||||
p += 4
|
||||
rb.highlowcontainer.keys[i] = binary.LittleEndian.Uint32(keyBuf)
|
||||
rb.highlowcontainer.containers[i] = roaring.NewBitmap()
|
||||
n, err := rb.highlowcontainer.containers[i].ReadFrom(stream)
|
||||
|
||||
if n == 0 || err != nil {
|
||||
return int64(n), fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
|
||||
}
|
||||
p += int64(n)
|
||||
}
|
||||
|
||||
return p, nil
|
||||
return stream.GetReadBytes(), nil
|
||||
}
|
||||
|
||||
// ReadFrom reads a serialized version of this bitmap from stream.
|
||||
@@ -167,23 +165,15 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {
|
||||
rb.highlowcontainer.keys[i] = binary.LittleEndian.Uint32(keyBuf)
|
||||
rb.highlowcontainer.containers[i] = roaring.NewBitmap()
|
||||
n, err := rb.highlowcontainer.containers[i].ReadFrom(stream)
|
||||
|
||||
if n == 0 || err != nil {
|
||||
return int64(n), fmt.Errorf("Could not deserialize bitmap for key #%d: %s", i, err)
|
||||
}
|
||||
p += int64(n)
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// FromBuffer creates a bitmap from its serialized version stored in buffer
|
||||
// func (rb *Bitmap) FromBuffer(data []byte) (p int64, err error) {
|
||||
//
|
||||
// // TODO: Add buffer interning as in base roaring package.
|
||||
// buf := bytes.NewBuffer(data)
|
||||
// return rb.ReadFrom(buf)
|
||||
// }
|
||||
|
||||
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
|
||||
// (same as ToBytes)
|
||||
func (rb *Bitmap) MarshalBinary() ([]byte, error) {
|
||||
@@ -1251,6 +1241,10 @@ func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
|
||||
return rb.highlowcontainer.serializedSizeInBytes()
|
||||
}
|
||||
|
||||
func (rb *Bitmap) Validate() error {
|
||||
return rb.highlowcontainer.validate()
|
||||
}
|
||||
|
||||
// Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into
|
||||
// a 64-bit roaring bitmap. No copy is made.
|
||||
func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap {
|
||||
@@ -1,7 +1,9 @@
|
||||
package roaring64
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"errors"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
)
|
||||
|
||||
type roaringArray64 struct {
|
||||
@@ -11,6 +13,11 @@ type roaringArray64 struct {
|
||||
copyOnWrite bool
|
||||
}
|
||||
|
||||
var (
|
||||
ErrKeySortOrder = errors.New("keys were out of order")
|
||||
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
|
||||
)
|
||||
|
||||
// runOptimize compresses the element containers to minimize space consumed.
|
||||
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
||||
// A: since we aren't changing the logical content, just the representation,
|
||||
@@ -140,7 +147,6 @@ func (ra *roaringArray64) clear() {
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) clone() *roaringArray64 {
|
||||
|
||||
sa := roaringArray64{}
|
||||
sa.copyOnWrite = ra.copyOnWrite
|
||||
|
||||
@@ -328,6 +334,15 @@ func (ra *roaringArray64) hasRunCompression() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the smallest integer index strictly larger than pos such that array[index].key>=min. If none can
|
||||
* be found, return size. Based on code by O. Kaser.
|
||||
*
|
||||
* @param min minimal value
|
||||
* @param pos index to exceed
|
||||
* @return the smallest index greater than pos such that array[index].key is at least as large as
|
||||
* min, or size if it is not possible.
|
||||
*/
|
||||
func (ra *roaringArray64) advanceUntil(min uint32, pos int) int {
|
||||
lower := pos + 1
|
||||
|
||||
@@ -401,3 +416,47 @@ func (ra *roaringArray64) serializedSizeInBytes() uint64 {
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) checkKeysSorted() bool {
|
||||
if len(ra.keys) == 0 || len(ra.keys) == 1 {
|
||||
return true
|
||||
}
|
||||
previous := ra.keys[0]
|
||||
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
|
||||
next := ra.keys[nextIdx]
|
||||
if previous >= next {
|
||||
return false
|
||||
}
|
||||
previous = next
|
||||
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// validate checks the referential integrity
|
||||
// ensures len(keys) == len(containers), recurses and checks each container type
|
||||
func (ra *roaringArray64) validate() error {
|
||||
if !ra.checkKeysSorted() {
|
||||
return ErrKeySortOrder
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.containers) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.needCopyOnWrite) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
for _, maps := range ra.containers {
|
||||
err := maps.Validate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if maps.IsEmpty() {
|
||||
return errors.New("empty container")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
package roaring64
|
||||
|
||||
import "github.com/RoaringBitmap/roaring"
|
||||
import "github.com/RoaringBitmap/roaring/v2"
|
||||
|
||||
func highbits(x uint64) uint32 {
|
||||
return uint32(x >> 32)
|
||||
@@ -3,10 +3,11 @@ package roaring
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/internal"
|
||||
"github.com/RoaringBitmap/roaring/v2/internal"
|
||||
)
|
||||
|
||||
type container interface {
|
||||
@@ -30,7 +31,6 @@ type container interface {
|
||||
iadd(x uint16) bool // inplace, returns true if x was new.
|
||||
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||
|
||||
//addRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
||||
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
|
||||
iremove(x uint16) bool // inplace, returns true if x was present.
|
||||
@@ -61,7 +61,6 @@ type container interface {
|
||||
lazyOR(r container) container
|
||||
lazyIOR(r container) container
|
||||
getSizeInBytes() int
|
||||
//removeRange(start, final int) container // range is [firstOfRange,lastOfRange) (unused)
|
||||
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
|
||||
selectInt(x uint16) int // selectInt returns the xth integer in the container
|
||||
serializedSizeInBytes() int
|
||||
@@ -71,6 +70,14 @@ type container interface {
|
||||
toEfficientContainer() container
|
||||
String() string
|
||||
containerType() contype
|
||||
|
||||
safeMinimum() (uint16, error)
|
||||
safeMaximum() (uint16, error)
|
||||
nextValue(x uint16) int
|
||||
previousValue(x uint16) int
|
||||
nextAbsentValue(x uint16) int
|
||||
previousAbsentValue(x uint16) int
|
||||
validate() error
|
||||
}
|
||||
|
||||
type contype uint8
|
||||
@@ -82,6 +89,11 @@ const (
|
||||
run32Contype
|
||||
)
|
||||
|
||||
var (
|
||||
ErrKeySortOrder = errors.New("keys were out of order")
|
||||
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
|
||||
)
|
||||
|
||||
// careful: range is [firstOfRange,lastOfRange]
|
||||
func rangeOfOnes(start, last int) container {
|
||||
if start > MaxUint16 {
|
||||
@@ -178,7 +190,6 @@ func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -204,7 +215,6 @@ func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -239,7 +249,6 @@ func (ra *roaringArray) clear() {
|
||||
}
|
||||
|
||||
func (ra *roaringArray) clone() *roaringArray {
|
||||
|
||||
sa := roaringArray{}
|
||||
sa.copyOnWrite = ra.copyOnWrite
|
||||
|
||||
@@ -288,6 +297,8 @@ func (ra *roaringArray) cloneCopyOnWriteContainers() {
|
||||
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||
//}
|
||||
|
||||
// getContainer returns the container with key `x`
|
||||
// if no such container exists `nil` is returned
|
||||
func (ra *roaringArray) getContainer(x uint16) container {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
||||
if i < 0 {
|
||||
@@ -325,7 +336,6 @@ func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) co
|
||||
return ra.getContainerAtIndex(pos).or(other)
|
||||
}
|
||||
return ra.getContainerAtIndex(pos).ior(other)
|
||||
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
|
||||
@@ -336,7 +346,10 @@ func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
// getIndex returns the index of the container with key `x`
|
||||
// if no such container exists a negative value is returned
|
||||
func (ra *roaringArray) getIndex(x uint16) int {
|
||||
// Todo : test
|
||||
// before the binary search, we optimize for frequent cases
|
||||
size := len(ra.keys)
|
||||
if (size == 0) || (ra.keys[size-1] == x) {
|
||||
@@ -396,7 +409,10 @@ func (ra *roaringArray) size() int {
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
// binarySearch returns the index of the key.
|
||||
// negative value returned if not found
|
||||
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
|
||||
// TODO: add unit tests
|
||||
low := begin
|
||||
high := end - 1
|
||||
for low+16 <= high {
|
||||
@@ -455,7 +471,6 @@ func (ra *roaringArray) headerSize() uint64 {
|
||||
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
|
||||
}
|
||||
return 4 + 4 + 8*size
|
||||
|
||||
}
|
||||
|
||||
// should be dirt cheap
|
||||
@@ -489,7 +504,7 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
|
||||
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
|
||||
nw += 2
|
||||
// compute isRun bitmap without temporary allocation
|
||||
var runbitmapslice = buf[nw : nw+isRunSizeInBytes]
|
||||
runbitmapslice := buf[nw : nw+isRunSizeInBytes]
|
||||
for i, c := range ra.containers {
|
||||
switch c.(type) {
|
||||
case *runContainer16:
|
||||
@@ -577,7 +592,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
|
||||
// create is-run-container bitmap
|
||||
isRunBitmapSize := (int(size) + 7) / 8
|
||||
isRunBitmap, err = stream.Next(isRunBitmapSize)
|
||||
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
|
||||
}
|
||||
@@ -596,7 +610,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
|
||||
|
||||
// descriptive header
|
||||
buf, err := stream.Next(2 * 2 * int(size))
|
||||
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
|
||||
}
|
||||
@@ -637,13 +650,11 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
|
||||
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
||||
// run container
|
||||
nr, err := stream.ReadUInt16()
|
||||
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
|
||||
}
|
||||
|
||||
buf, err := stream.Next(int(nr) * 4)
|
||||
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
|
||||
}
|
||||
@@ -656,7 +667,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
|
||||
} else if card > arrayDefaultMaxSize {
|
||||
// bitmap container
|
||||
buf, err := stream.Next(arrayDefaultMaxSize * 2)
|
||||
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
|
||||
}
|
||||
@@ -670,7 +680,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
|
||||
} else {
|
||||
// array container
|
||||
buf, err := stream.Next(card * 2)
|
||||
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
|
||||
}
|
||||
@@ -696,6 +705,15 @@ func (ra *roaringArray) hasRunCompression() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the smallest integer index larger than pos such that array[index].key>=min. If none can
|
||||
* be found, return size. Based on code by O. Kaser.
|
||||
*
|
||||
* @param min minimal value
|
||||
* @param pos index to exceed
|
||||
* @return the smallest index greater than pos such that array[index].key is at least as large as
|
||||
* min, or size if it is not possible.
|
||||
*/
|
||||
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
|
||||
lower := pos + 1
|
||||
|
||||
@@ -759,3 +777,44 @@ func (ra *roaringArray) needsCopyOnWrite(i int) bool {
|
||||
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
||||
|
||||
func (ra *roaringArray) checkKeysSorted() bool {
|
||||
if len(ra.keys) == 0 || len(ra.keys) == 1 {
|
||||
return true
|
||||
}
|
||||
previous := ra.keys[0]
|
||||
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
|
||||
next := ra.keys[nextIdx]
|
||||
if previous >= next {
|
||||
return false
|
||||
}
|
||||
previous = next
|
||||
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// validate checks the referential integrity
|
||||
// ensures len(keys) == len(containers), recurses and checks each container type
|
||||
func (ra *roaringArray) validate() error {
|
||||
if !ra.checkKeysSorted() {
|
||||
return ErrKeySortOrder
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.containers) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.needCopyOnWrite) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
for _, container := range ra.containers {
|
||||
err := container.validate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -39,9 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// runContainer16 does run-length encoding of sets of
|
||||
@@ -59,6 +59,16 @@ type interval16 struct {
|
||||
length uint16 // length minus 1
|
||||
}
|
||||
|
||||
var (
|
||||
ErrRunIntervalsEmpty = errors.New("run contained no interval")
|
||||
ErrRunNonSorted = errors.New("runs were not sorted")
|
||||
ErrRunIntervalEqual = errors.New("intervals were equal")
|
||||
ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
|
||||
ErrRunIntervalSize = errors.New("too many intervals relative to data")
|
||||
MaxNumIntervals = 2048
|
||||
MaxIntervalsSum = 2048
|
||||
)
|
||||
|
||||
func newInterval16Range(start, last uint16) interval16 {
|
||||
if last < start {
|
||||
panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start))
|
||||
@@ -201,7 +211,6 @@ func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer
|
||||
// somewhat efficiently. For reference, see the Java
|
||||
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
|
||||
func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
|
||||
|
||||
rc := &runContainer16{}
|
||||
nbrRuns := bc.numberOfRuns()
|
||||
if nbrRuns == 0 {
|
||||
@@ -251,7 +260,6 @@ func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
|
||||
curWord = curWordWith1s & (curWordWith1s + 1)
|
||||
// We've lathered and rinsed, so repeat...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// newRunContainer16FromArray populates a new
|
||||
@@ -293,7 +301,6 @@ func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 {
|
||||
// If you have a small number of additions to an already
|
||||
// big runContainer16, calling Add() may be faster.
|
||||
func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) {
|
||||
|
||||
rc2 := newRunContainer16FromVals(alreadySorted, vals...)
|
||||
un := rc.union(rc2)
|
||||
rc.iv = un.iv
|
||||
@@ -374,7 +381,6 @@ func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) {
|
||||
// union merges two runContainer16s, producing
|
||||
// a new runContainer16 with the union of rc and b.
|
||||
func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
|
||||
|
||||
// rc is also known as 'a' here, but golint insisted we
|
||||
// call it rc for consistency with the rest of the methods.
|
||||
|
||||
@@ -457,7 +463,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
|
||||
break aAdds
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if !bDone {
|
||||
@@ -471,7 +476,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
|
||||
break bAdds
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
m = append(m, merged)
|
||||
@@ -489,7 +493,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
|
||||
|
||||
// unionCardinality returns the cardinality of the merger of two runContainer16s, the union of rc and b.
|
||||
func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
|
||||
|
||||
// rc is also known as 'a' here, but golint insisted we
|
||||
// call it rc for consistency with the rest of the methods.
|
||||
answer := uint(0)
|
||||
@@ -528,7 +531,7 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
|
||||
}
|
||||
if !mergedUpdated {
|
||||
// we know that merged is disjoint from cura and curb
|
||||
//m = append(m, merged)
|
||||
// m = append(m, merged)
|
||||
answer += uint(merged.last()) - uint(merged.start) + 1
|
||||
mergedUsed = false
|
||||
}
|
||||
@@ -539,11 +542,11 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
|
||||
if !canMerge16(cura, curb) {
|
||||
if cura.start < curb.start {
|
||||
answer += uint(cura.last()) - uint(cura.start) + 1
|
||||
//m = append(m, cura)
|
||||
// m = append(m, cura)
|
||||
na++
|
||||
} else {
|
||||
answer += uint(curb.last()) - uint(curb.start) + 1
|
||||
//m = append(m, curb)
|
||||
// m = append(m, curb)
|
||||
nb++
|
||||
}
|
||||
} else {
|
||||
@@ -574,7 +577,6 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
|
||||
break aAdds
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if !bDone {
|
||||
@@ -588,10 +590,9 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
|
||||
break bAdds
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//m = append(m, merged)
|
||||
// m = append(m, merged)
|
||||
answer += uint(merged.last()) - uint(merged.start) + 1
|
||||
}
|
||||
for _, r := range rc.iv[na:] {
|
||||
@@ -615,7 +616,6 @@ func (rc *runContainer16) indexOfIntervalAtOrAfter(key int, startIndex int) int
|
||||
// intersect returns a new runContainer16 holding the
|
||||
// intersection of rc (also known as 'a') and b.
|
||||
func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
|
||||
|
||||
a := rc
|
||||
numa := int(len(a.iv))
|
||||
numb := int(len(b.iv))
|
||||
@@ -645,8 +645,7 @@ func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
|
||||
toploop:
|
||||
for acuri < numa && bcuri < numb {
|
||||
|
||||
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
|
||||
intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
|
||||
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
|
||||
|
||||
if !isOverlap {
|
||||
switch {
|
||||
@@ -664,7 +663,6 @@ toploop:
|
||||
}
|
||||
bstart = int(b.iv[bcuri].start)
|
||||
}
|
||||
|
||||
} else {
|
||||
// isOverlap
|
||||
output = append(output, intersection)
|
||||
@@ -748,8 +746,7 @@ toploop:
|
||||
for acuri < numa && bcuri < numb {
|
||||
pass++
|
||||
|
||||
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
|
||||
intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
|
||||
isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
|
||||
|
||||
if !isOverlap {
|
||||
switch {
|
||||
@@ -767,7 +764,6 @@ toploop:
|
||||
}
|
||||
bstart = int(b.iv[bcuri].start)
|
||||
}
|
||||
|
||||
} else {
|
||||
// isOverlap
|
||||
answer += int(intersection.last()) - int(intersection.start) + 1
|
||||
@@ -941,7 +937,7 @@ func (rc *runContainer16) searchRange(key int, startIndex int, endxIndex int) (w
|
||||
// b) whichInterval16 == -1 if key is before our first
|
||||
// interval16 in rc.iv;
|
||||
//
|
||||
// c) whichInterval16 is set to the minimum index of rc.iv
|
||||
// c) whichInterval16 is set to the maximum index of rc.iv
|
||||
// which comes strictly before the key;
|
||||
// so rc.iv[whichInterval16].last < key,
|
||||
// and if whichInterval16+1 exists, then key < rc.iv[whichInterval16+1].start
|
||||
@@ -1014,10 +1010,10 @@ func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 {
|
||||
return rc
|
||||
}
|
||||
|
||||
const baseRc16Size = int(unsafe.Sizeof(runContainer16{}))
|
||||
const perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
|
||||
|
||||
const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0)))
|
||||
const (
|
||||
baseRc16Size = 2
|
||||
perIntervalRc16Size = 4
|
||||
)
|
||||
|
||||
// see also runContainer16SerializedSizeInBytes(numRuns int) int
|
||||
|
||||
@@ -1030,7 +1026,7 @@ func (rc *runContainer16) getSizeInBytes() int {
|
||||
// runContainer16SerializedSizeInBytes returns the number of bytes of disk
|
||||
// required to hold numRuns in a runContainer16.
|
||||
func runContainer16SerializedSizeInBytes(numRuns int) int {
|
||||
return perIntervalRc16Size*numRuns + baseDiskRc16Size
|
||||
return perIntervalRc16Size*numRuns + baseRc16Size
|
||||
}
|
||||
|
||||
// Add adds a single value k to the set.
|
||||
@@ -1274,7 +1270,7 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
|
||||
ri.curPosInIndex += uint16(moreVals) // moreVals always fits in uint16
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1315,7 +1311,7 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
|
||||
ri.curPosInIndex += uint16(moreVals) // moreVals always fits in uint16
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1324,7 +1320,6 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
|
||||
|
||||
// remove removes key from the container.
|
||||
func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
|
||||
|
||||
var index int
|
||||
index, wasPresent, _ = rc.search(int(key))
|
||||
if !wasPresent {
|
||||
@@ -1361,7 +1356,7 @@ func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
|
||||
*curPosInIndex--
|
||||
// if we leave *curIndex alone, then Next() will work properly even after the delete.
|
||||
default:
|
||||
//middle
|
||||
// middle
|
||||
// split into two, adding an interval16
|
||||
new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1)
|
||||
|
||||
@@ -1376,7 +1371,6 @@ func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
|
||||
*curIndex++
|
||||
*curPosInIndex = 0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func have4Overlap16(astart, alast, bstart, blast int) bool {
|
||||
@@ -1503,6 +1497,26 @@ func (iv interval16) isSuperSetOf(b interval16) bool {
|
||||
return iv.start <= b.start && b.last() <= iv.last()
|
||||
}
|
||||
|
||||
func (iv interval16) isNonContiguousDisjoint(b interval16) bool {
|
||||
// cover the zero start case
|
||||
if iv.start == b.start {
|
||||
return false
|
||||
}
|
||||
|
||||
nonContiguous1 := uint32(iv.start) == uint32(b.last())+1 || uint32(iv.last()) == uint32(b.start)+1
|
||||
nonContiguous2 := uint32(b.start) == uint32(iv.last())+1 || uint32(b.last()) == uint32(iv.start)+1
|
||||
if nonContiguous1 || nonContiguous2 {
|
||||
return false
|
||||
}
|
||||
ivl := iv.last()
|
||||
bl := b.last()
|
||||
|
||||
c1 := iv.start <= b.start && b.start <= ivl
|
||||
c2 := b.start <= iv.start && iv.start <= bl
|
||||
|
||||
return !c1 && !c2
|
||||
}
|
||||
|
||||
func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int) {
|
||||
isect, isEmpty := intersectInterval16s(iv, del)
|
||||
|
||||
@@ -1678,7 +1692,6 @@ func (rc *runContainer16) isubtract(del interval16) {
|
||||
// port of run_container_andnot from CRoaring...
|
||||
// https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
|
||||
func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 {
|
||||
|
||||
if len(b.iv) == 0 || len(rc.iv) == 0 {
|
||||
return rc
|
||||
}
|
||||
@@ -1781,10 +1794,25 @@ func (rc *runContainer16) minimum() uint16 {
|
||||
return rc.iv[0].start // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) safeMinimum() (uint16, error) {
|
||||
if len(rc.iv) == 0 {
|
||||
return 0, errors.New("Empty runs")
|
||||
}
|
||||
|
||||
return rc.minimum(), nil
|
||||
}
|
||||
|
||||
func (rc *runContainer16) maximum() uint16 {
|
||||
return rc.iv[len(rc.iv)-1].last() // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) safeMaximum() (uint16, error) {
|
||||
if len(rc.iv) == 0 {
|
||||
return 0, errors.New("Empty runs")
|
||||
}
|
||||
return rc.maximum(), nil // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) isFull() bool {
|
||||
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
|
||||
}
|
||||
@@ -1949,7 +1977,6 @@ func (rc *runContainer16) getManyIterator() manyIterable {
|
||||
// add the values in the range [firstOfRange, endx). endx
|
||||
// is still abe to express 2^16 because it is an int not an uint16.
|
||||
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
|
||||
|
||||
if firstOfRange > endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx > firstOfRange", endx))
|
||||
}
|
||||
@@ -2002,7 +2029,6 @@ func (rc *runContainer16) not(firstOfRange, endx int) container {
|
||||
// makes 2 more passes through the arrays than should be
|
||||
// strictly necessary. Measure both ways though--this may not matter.
|
||||
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
|
||||
|
||||
if firstOfRange > endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx > firstOfRange == %v", endx, firstOfRange))
|
||||
}
|
||||
@@ -2066,12 +2092,12 @@ func (rc *runContainer16) equals(o container) bool {
|
||||
rit := rc.getShortIterator()
|
||||
bit := o.getShortIterator()
|
||||
|
||||
//k := 0
|
||||
// k := 0
|
||||
for rit.hasNext() {
|
||||
if bit.next() != rit.next() {
|
||||
return false
|
||||
}
|
||||
//k++
|
||||
// k++
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -2132,7 +2158,7 @@ func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int
|
||||
for i := range rc.iv {
|
||||
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
// bc.computeCardinality()
|
||||
return answer
|
||||
}
|
||||
|
||||
@@ -2150,7 +2176,7 @@ func (rc *runContainer16) orArray(ac *arrayContainer) container {
|
||||
}
|
||||
intervals, cardMinusOne := runArrayUnionToRuns(rc, ac)
|
||||
result := newRunContainer16TakeOwnership(intervals)
|
||||
if len(intervals) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
|
||||
if len(intervals) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
|
||||
return newBitmapContainerFromRun(result)
|
||||
}
|
||||
if len(intervals)*2 > 1+int(cardMinusOne) {
|
||||
@@ -2190,7 +2216,6 @@ func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
|
||||
|
||||
it := bc.getShortIterator()
|
||||
for it.hasNext() {
|
||||
rc.Add(it.next())
|
||||
@@ -2206,11 +2231,11 @@ func (rc *runContainer16) iorArray(ac *arrayContainer) container {
|
||||
return rc
|
||||
}
|
||||
var cardMinusOne uint16
|
||||
//TODO: perform the union algorithm in-place using rc.iv
|
||||
// TODO: perform the union algorithm in-place using rc.iv
|
||||
// this can be done with methods like the in-place array container union
|
||||
// but maybe lazily moving the remaining elements back.
|
||||
rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
|
||||
if len(rc.iv) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
|
||||
if len(rc.iv) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
|
||||
return newBitmapContainerFromRun(rc)
|
||||
}
|
||||
if len(rc.iv)*2 > 1+int(cardMinusOne) {
|
||||
@@ -2438,12 +2463,8 @@ func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
x2b := x2.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(x2b)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
rc2 := rc.AndNotRunContainer16(x2)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
@@ -2492,7 +2513,7 @@ func (rc *runContainer16) toEfficientContainer() container {
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
card := rc.getCardinality()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return rc
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
@@ -2511,7 +2532,6 @@ func (rc *runContainer16) toArrayContainer() *arrayContainer {
|
||||
}
|
||||
|
||||
func newRunContainer16FromContainer(c container) *runContainer16 {
|
||||
|
||||
switch x := c.(type) {
|
||||
case *runContainer16:
|
||||
return x.Clone()
|
||||
@@ -2622,3 +2642,169 @@ func (rc *runContainer16) addOffset(x uint16) (container, container) {
|
||||
|
||||
return low, high
|
||||
}
|
||||
|
||||
// nextValue returns either the `target` if found or the next larger value.
|
||||
// If the target is in the interior or a run then `target` will be returned
|
||||
// Ex: If our run structure resmembles [[a,c], [d,f]] with a <= target <= c then `target` will be returned.
|
||||
// Ex: If c < target < d then d is returned.
|
||||
// Ex: If target < a then a is returned
|
||||
// if the target > max, this is out of bounds and -1 is returned
|
||||
func (rc *runContainer16) nextValue(target uint16) int {
|
||||
if len(rc.iv) == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
whichIndex, alreadyPresent, _ := rc.search(int(target))
|
||||
|
||||
if alreadyPresent {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
if whichIndex == -1 {
|
||||
return int(rc.iv[0].start)
|
||||
}
|
||||
|
||||
if whichIndex == len(rc.iv)-1 {
|
||||
return -1
|
||||
}
|
||||
|
||||
// The if relies on the non-contiguous nature of runs.
|
||||
// If we have two runs [a,b] and another run [c,d]
|
||||
// We can rely on the invariant that b+1 < c
|
||||
// We will return c
|
||||
possibleNext := whichIndex + 1
|
||||
if possibleNext < len(rc.iv) {
|
||||
return int(rc.iv[possibleNext].start)
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// nextAbsentValue returns the next absent value.
|
||||
// By construction the next absent value will be located between gaps in runs
|
||||
//
|
||||
// Ex: if our runs resemble [[a,b],[c,d]] and a <= target <= b then b+1 will not be equal to c, b+1 will be returned
|
||||
// Ex: if target < a then target is returned
|
||||
// Ex: if target > d then target is returned
|
||||
func (rc *runContainer16) nextAbsentValue(target uint16) int {
|
||||
whichIndex, alreadyPresent, _ := rc.search(int(target))
|
||||
|
||||
if !alreadyPresent {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
return int(rc.iv[whichIndex].last()) + 1
|
||||
}
|
||||
|
||||
// previousValue will return the previous present value
|
||||
// If the target is in the interior of a run then `target` will be returned
|
||||
//
|
||||
// Example:
|
||||
// If our run structure resmembles [[a,c], [d,f]] with a <= target <= c then target will be returned.
|
||||
// If c < target < d then c is returned.
|
||||
// if target > f then f is returned
|
||||
// if the target is less than a, this is out of bounds and -1 is returned
|
||||
func (rc *runContainer16) previousValue(target uint16) int {
|
||||
whichIndex, alreadyPresent, _ := rc.search(int(target))
|
||||
|
||||
if len(rc.iv) == 0 {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
if alreadyPresent {
|
||||
return int(target)
|
||||
}
|
||||
if whichIndex == -1 {
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(rc.iv[whichIndex].last())
|
||||
}
|
||||
|
||||
// previousAbsentValue will return the previous absent value
|
||||
// If the target is in the interior of a run then then the start of the range minus 1 will be returned
|
||||
//
|
||||
// Example:
|
||||
// If our run structure resmembles [[x,z], [a,c], [d,f]] with a <= target <= c then a-1 will be returned.
|
||||
// if the target < x then target is returned
|
||||
// if target > f then target is returned
|
||||
func (rc *runContainer16) previousAbsentValue(target uint16) int {
|
||||
whichIndex, alreadyPresent, _ := rc.search(int(target))
|
||||
|
||||
if !alreadyPresent {
|
||||
return int(target)
|
||||
}
|
||||
|
||||
return int(rc.iv[whichIndex].start) - 1
|
||||
}
|
||||
|
||||
// isNonContiguousDisjoint returns an error if the intervals overlap e.g have non-empty intersection
|
||||
func isNonContiguousDisjoint(outer interval16, inner interval16) error {
|
||||
if !outer.isNonContiguousDisjoint(inner) {
|
||||
return ErrRunIntervalOverlap
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// validate checks the run container referential integrity
|
||||
// Ensures runs are not degenerate, non-contiguous and non-overlapping
|
||||
func (rc *runContainer16) validate() error {
|
||||
if rc.getCardinality() == 0 {
|
||||
return ErrRunIntervalsEmpty
|
||||
}
|
||||
|
||||
intervalsSum := 0
|
||||
for outeridx := range rc.iv {
|
||||
// The length being stored is the actual length - 1.
|
||||
// So we need to add 1 to get the actual length.
|
||||
// It is not possible to have a run with length 0.
|
||||
|
||||
outerInterval := rc.iv[outeridx]
|
||||
|
||||
intervalsSum += outerInterval.runlen()
|
||||
for inneridx := outeridx + 1; inneridx < len(rc.iv); inneridx++ {
|
||||
|
||||
innerInterval := rc.iv[inneridx]
|
||||
|
||||
if outerInterval.equal(innerInterval) {
|
||||
return ErrRunIntervalEqual
|
||||
}
|
||||
|
||||
// only check the start of runs
|
||||
// if the run length overlap the next check will catch that.
|
||||
if outerInterval.start >= innerInterval.start {
|
||||
return ErrRunNonSorted
|
||||
}
|
||||
|
||||
err := isNonContiguousDisjoint(outerInterval, innerInterval)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
if number of distinct values in the container >= 2048 then
|
||||
check that the number of runs is no more than 2047
|
||||
(otherwise you could use a bitset container)
|
||||
else
|
||||
check that the number of runs < (number of distinct values) / 2
|
||||
(otherwise you could use an array container)
|
||||
*/
|
||||
|
||||
sizeAsRunContainer := runContainer16SerializedSizeInBytes(len(rc.iv))
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(intervalsSum)
|
||||
// this is always ok:
|
||||
if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return nil
|
||||
}
|
||||
if sizeAsRunContainer >= sizeAsBitmapContainer {
|
||||
return ErrRunIntervalSize
|
||||
}
|
||||
if sizeAsRunContainer >= sizeAsArrayContainer {
|
||||
return ErrRunIntervalSize
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -299,6 +299,15 @@ func (rb *Bitmap) FrozenView(buf []byte) error {
|
||||
return rb.highlowcontainer.frozenView(buf)
|
||||
}
|
||||
|
||||
func (rb *Bitmap) MustFrozenView(buf []byte) error {
|
||||
if err := rb.FrozenView(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
err := rb.Validate()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
/* Verbatim specification from CRoaring.
|
||||
*
|
||||
* FROZEN SERIALIZATION FORMAT DESCRIPTION
|
||||
@@ -1,26 +1,12 @@
|
||||
package roaring
|
||||
|
||||
func equal(a, b []uint16) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
if 0 == len(set2) {
|
||||
if len(set2) == 0 {
|
||||
buffer = buffer[:len(set1)]
|
||||
for k := 0; k < len(set1); k++ {
|
||||
buffer[k] = set1[k]
|
||||
}
|
||||
copy(buffer, set1)
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
if len(set1) == 0 {
|
||||
return 0
|
||||
}
|
||||
pos := 0
|
||||
@@ -66,7 +52,6 @@ func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
}
|
||||
}
|
||||
return pos
|
||||
|
||||
}
|
||||
|
||||
func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
@@ -135,6 +120,7 @@ func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
return pos
|
||||
}
|
||||
|
||||
// union2by2Cardinality computes the cardinality of the union
|
||||
func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
|
||||
pos := 0
|
||||
k1 := 0
|
||||
@@ -186,8 +172,8 @@ func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
|
||||
func intersection2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
buffer []uint16) int {
|
||||
|
||||
buffer []uint16,
|
||||
) int {
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2(set1, set2, buffer)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
@@ -197,10 +183,11 @@ func intersection2by2(
|
||||
}
|
||||
}
|
||||
|
||||
// intersection2by2Cardinality computes the cardinality of the intersection
|
||||
func intersection2by2Cardinality(
|
||||
set1 []uint16,
|
||||
set2 []uint16) int {
|
||||
|
||||
set2 []uint16,
|
||||
) int {
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2Cardinality(set1, set2)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
@@ -210,44 +197,45 @@ func intersection2by2Cardinality(
|
||||
}
|
||||
}
|
||||
|
||||
// intersects2by2 computes whether the two sets intersect
|
||||
func intersects2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16) bool {
|
||||
set2 []uint16,
|
||||
) bool {
|
||||
// could be optimized if one set is much larger than the other one
|
||||
if (0 == len(set1)) || (0 == len(set2)) {
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return false
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
index1 := 0
|
||||
index2 := 0
|
||||
value1 := set1[index1]
|
||||
value2 := set2[index2]
|
||||
mainwhile:
|
||||
for {
|
||||
|
||||
if s2 < s1 {
|
||||
if value2 < value1 {
|
||||
for {
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
index2++
|
||||
if index2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = set2[k2]
|
||||
if s2 >= s1 {
|
||||
value2 = set2[index2]
|
||||
if value2 >= value1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if s1 < s2 {
|
||||
if value1 < value2 {
|
||||
for {
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
index1++
|
||||
if index1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if s1 >= s2 {
|
||||
value1 = set1[index1]
|
||||
if value1 >= value2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
return true
|
||||
@@ -259,9 +247,9 @@ mainwhile:
|
||||
func localintersect2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
buffer []uint16) int {
|
||||
|
||||
if (0 == len(set1)) || (0 == len(set2)) {
|
||||
buffer []uint16,
|
||||
) int {
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
@@ -295,7 +283,6 @@ mainwhile:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
buffer[pos] = s1
|
||||
@@ -315,57 +302,57 @@ mainwhile:
|
||||
return pos
|
||||
}
|
||||
|
||||
// / localintersect2by2Cardinality computes the cardinality of the intersection
|
||||
func localintersect2by2Cardinality(
|
||||
set1 []uint16,
|
||||
set2 []uint16) int {
|
||||
|
||||
if (0 == len(set1)) || (0 == len(set2)) {
|
||||
set2 []uint16,
|
||||
) int {
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
index1 := 0
|
||||
index2 := 0
|
||||
pos := 0
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
value1 := set1[index1]
|
||||
value2 := set2[index2]
|
||||
mainwhile:
|
||||
for {
|
||||
if s2 < s1 {
|
||||
if value2 < value1 {
|
||||
for {
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
index2++
|
||||
if index2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = set2[k2]
|
||||
if s2 >= s1 {
|
||||
value2 = set2[index2]
|
||||
if value2 >= value1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if s1 < s2 {
|
||||
if value1 < value2 {
|
||||
for {
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
index1++
|
||||
if index1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if s1 >= s2 {
|
||||
value1 = set1[index1]
|
||||
if value1 >= value2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
pos++
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
index1++
|
||||
if index1 == len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
value1 = set1[index1]
|
||||
index2++
|
||||
if index2 == len(set2) {
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
value2 = set2[index2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
@@ -375,7 +362,8 @@ func advanceUntil(
|
||||
array []uint16,
|
||||
pos int,
|
||||
length int,
|
||||
min uint16) int {
|
||||
min uint16,
|
||||
) int {
|
||||
lower := pos + 1
|
||||
|
||||
if lower >= length || array[lower] >= min {
|
||||
@@ -423,14 +411,13 @@ func advanceUntil(
|
||||
}
|
||||
}
|
||||
return upper
|
||||
|
||||
}
|
||||
|
||||
func onesidedgallopingintersect2by2(
|
||||
smallset []uint16,
|
||||
largeset []uint16,
|
||||
buffer []uint16) int {
|
||||
|
||||
buffer []uint16,
|
||||
) int {
|
||||
if 0 == len(smallset) {
|
||||
return 0
|
||||
}
|
||||
@@ -478,8 +465,8 @@ mainwhile:
|
||||
|
||||
func onesidedgallopingintersect2by2Cardinality(
|
||||
smallset []uint16,
|
||||
largeset []uint16) int {
|
||||
|
||||
largeset []uint16,
|
||||
) int {
|
||||
if 0 == len(smallset) {
|
||||
return 0
|
||||
}
|
||||
@@ -548,3 +535,131 @@ func binarySearch(array []uint16, ikey uint16) int {
|
||||
}
|
||||
return -(low + 1)
|
||||
}
|
||||
|
||||
// searchResult provides information about a search request.
|
||||
// The values will depend on the context of the search
|
||||
type searchResult struct {
|
||||
value uint16
|
||||
index int
|
||||
exactMatch bool
|
||||
}
|
||||
|
||||
// notFound returns a bool depending the search context
|
||||
// For cases `previousValue` and `nextValue` if target is present in the slice
|
||||
// this function will return `true` otherwise `false`
|
||||
// For `nextAbsentValue` and `previousAbsentValue` this will only return `False`
|
||||
func (sr *searchResult) notFound() bool {
|
||||
return !sr.exactMatch
|
||||
}
|
||||
|
||||
// outOfBounds indicates whether the target was outside the lower and upper bounds of the container
|
||||
func (sr *searchResult) outOfBounds() bool {
|
||||
return sr.index <= -1
|
||||
}
|
||||
|
||||
// binarySearchUntil is a helper function around binarySearchUntilWithBounds
|
||||
// The user does not have to pass in the lower and upper bound
|
||||
// The lower bound is taken to be `0` and the upper bound `len(array)-1`
|
||||
func binarySearchUntil(array []uint16, target uint16) searchResult {
|
||||
return binarySearchUntilWithBounds(array, target, 0, len(array)-1)
|
||||
}
|
||||
|
||||
// binarySearchUntilWithBounds returns a `searchResult`.
|
||||
// If an exact match is found the `searchResult{target, <index>, true}` will be returned, where `<index>` is
|
||||
// `target`s index in `array`, and `result.notFound()` evaluates to `false`.
|
||||
// If a match is not found, but `target` was in-bounds then the result.index will be the closest smaller value
|
||||
// Example: [ 8,9,11,12] if the target was 10, then `searchResult{9, 1, false}` will be returned.
|
||||
// If `target` was out of bounds `searchResult{0, -1, false}` will be returned.
|
||||
func binarySearchUntilWithBounds(array []uint16, target uint16, lowIndex int, maxIndex int) searchResult {
|
||||
highIndex := maxIndex
|
||||
|
||||
closestIndex := -1
|
||||
|
||||
if target < array[lowIndex] {
|
||||
return searchResult{0, closestIndex, false}
|
||||
}
|
||||
|
||||
if target > array[maxIndex] {
|
||||
return searchResult{0, len(array), false}
|
||||
}
|
||||
|
||||
for lowIndex <= highIndex {
|
||||
middleIndex := (lowIndex + highIndex) / 2
|
||||
middleValue := array[middleIndex]
|
||||
|
||||
if middleValue == target {
|
||||
return searchResult{middleValue, middleIndex, true}
|
||||
}
|
||||
|
||||
if target < middleValue {
|
||||
|
||||
if middleIndex > 0 && target > array[middleIndex-1] {
|
||||
return searchResult{array[middleIndex-1], middleIndex - 1, false}
|
||||
}
|
||||
|
||||
highIndex = middleIndex
|
||||
} else {
|
||||
if middleIndex < maxIndex && target < array[middleIndex+1] {
|
||||
return searchResult{middleValue, middleIndex, false}
|
||||
}
|
||||
lowIndex = middleIndex + 1
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return searchResult{array[closestIndex], closestIndex, false}
|
||||
}
|
||||
|
||||
// binarySearchPast is a wrapper around binarySearchPastWithBounds
|
||||
// The user does not have to pass in the lower and upper bound
|
||||
// The lower bound is taken to be `0` and the upper bound `len(array)-1`
|
||||
func binarySearchPast(array []uint16, target uint16) searchResult {
|
||||
return binarySearchPastWithBounds(array, target, 0, len(array)-1)
|
||||
}
|
||||
|
||||
// binarySearchPastWithBounds looks for the smallest value larger than or equal to `target`
|
||||
// If `target` is out of bounds a `searchResult` indicating out of bounds is returned
|
||||
// `target` does not have to exist in the slice.
|
||||
//
|
||||
// Example:
|
||||
// Suppose the slice is [...10,13...] with `target` equal to 11
|
||||
// The searchResult will have searchResult.value = 13
|
||||
func binarySearchPastWithBounds(array []uint16, target uint16, lowIndex int, maxIndex int) searchResult {
|
||||
highIndex := maxIndex
|
||||
|
||||
closestIndex := -1
|
||||
|
||||
if target < array[lowIndex] {
|
||||
return searchResult{0, closestIndex, false}
|
||||
}
|
||||
|
||||
if target > array[maxIndex] {
|
||||
return searchResult{0, len(array), false}
|
||||
}
|
||||
|
||||
for lowIndex <= highIndex {
|
||||
middleIndex := (lowIndex + highIndex) / 2
|
||||
middleValue := array[middleIndex]
|
||||
|
||||
if middleValue == target {
|
||||
return searchResult{middleValue, middleIndex, true}
|
||||
}
|
||||
|
||||
if target < middleValue {
|
||||
|
||||
if middleIndex > 0 && target > array[middleIndex-1] {
|
||||
return searchResult{array[middleIndex], middleIndex, false}
|
||||
}
|
||||
|
||||
highIndex = middleIndex
|
||||
} else {
|
||||
if middleIndex < maxIndex && target < array[middleIndex+1] {
|
||||
return searchResult{array[middleIndex+1], middleIndex + 1, false}
|
||||
}
|
||||
lowIndex = middleIndex + 1
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return searchResult{array[closestIndex], closestIndex, false}
|
||||
}
|
||||
@@ -52,6 +52,7 @@ func fill(arr []uint64, val uint64) {
|
||||
arr[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
func fillRange(arr []uint64, start, end int, val uint64) {
|
||||
for i := start; i < end; i++ {
|
||||
arr[i] = val
|
||||
@@ -112,10 +113,19 @@ func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
func highbits(x uint32) uint16 {
|
||||
return uint16(x >> 16)
|
||||
}
|
||||
|
||||
func lowbits(x uint32) uint16 {
|
||||
return uint16(x & maxLowBit)
|
||||
}
|
||||
|
||||
func combineLoHi16(lob uint16, hob uint16) uint32 {
|
||||
return combineLoHi32(uint32(lob), uint32(hob))
|
||||
}
|
||||
|
||||
func combineLoHi32(lob uint32, hob uint32) uint32 {
|
||||
return uint32(lob) | (hob << 16)
|
||||
}
|
||||
|
||||
const maxLowBit = 0xFFFF
|
||||
|
||||
func flipBitmapRange(bitmap []uint64, start int, end int) {
|
||||
@@ -146,7 +156,6 @@ func resetBitmapRange(bitmap []uint64, start int, end int) {
|
||||
bitmap[i] = 0
|
||||
}
|
||||
bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
|
||||
|
||||
}
|
||||
|
||||
func setBitmapRange(bitmap []uint64, start int, end int) {
|
||||
@@ -242,7 +251,6 @@ func selectBitPosition(w uint64, j int) int {
|
||||
}
|
||||
}
|
||||
return seen + int(counter)
|
||||
|
||||
}
|
||||
|
||||
func panicOn(err error) {
|
||||
26
vendor/github.com/bits-and-blooms/bitset/README.md
generated
vendored
26
vendor/github.com/bits-and-blooms/bitset/README.md
generated
vendored
@@ -12,7 +12,7 @@ This library is part of the [awesome go collection](https://github.com/avelino/a
|
||||
* [beego](https://github.com/beego/beego)
|
||||
* [CubeFS](https://github.com/cubefs/cubefs)
|
||||
* [Amazon EKS Distro](https://github.com/aws/eks-distro)
|
||||
* [sourcegraph](https://github.com/sourcegraph/sourcegraph)
|
||||
* [sourcegraph](https://github.com/sourcegraph/sourcegraph-public-snapshot)
|
||||
* [torrent](https://github.com/anacrolix/torrent)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ It provides methods for setting, clearing, flipping, and testing individual inte
|
||||
|
||||
But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits.
|
||||
|
||||
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used.
|
||||
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk automatically, but `Shrink` and `Compact` methods are available. On creation, a hint can be given for the number of bits that will be used.
|
||||
|
||||
Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining.
|
||||
|
||||
@@ -69,6 +69,13 @@ func main() {
|
||||
}
|
||||
```
|
||||
|
||||
If you have Go 1.23 or better, you can iterate over the set bits like so:
|
||||
|
||||
```go
|
||||
for i := range b.EachSet() {}
|
||||
```
|
||||
|
||||
|
||||
|
||||
Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc
|
||||
|
||||
@@ -125,13 +132,20 @@ E.g.,
|
||||
|
||||
## Memory Usage
|
||||
|
||||
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
|
||||
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](https://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
|
||||
|
||||
## Implementation Note
|
||||
The `roaring` library allows you to go back and forth between compressed Roaring bitmaps and the conventional bitset instances:
|
||||
```Go
|
||||
mybitset := roaringbitmap.ToBitSet()
|
||||
newroaringbitmap := roaring.FromBitSet(mybitset)
|
||||
```
|
||||
|
||||
Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed.
|
||||
|
||||
It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `uint64`). If so, the version will be bumped.
|
||||
### Goroutine safety
|
||||
|
||||
In general, it's not safe to access the same BitSet using different goroutines--they are unsynchronized for performance.
|
||||
|
||||
Should you want to access a BitSet from more than one goroutine, you should provide synchronization. Typically this is done by using channels to pass the *BitSet around (in Go style; so there is only ever one owner), or by using `sync.Mutex` to serialize operations on BitSets.
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
801
vendor/github.com/bits-and-blooms/bitset/bitset.go
generated
vendored
801
vendor/github.com/bits-and-blooms/bitset/bitset.go
generated
vendored
File diff suppressed because it is too large
Load Diff
23
vendor/github.com/bits-and-blooms/bitset/bitset_iter.go
generated
vendored
Normal file
23
vendor/github.com/bits-and-blooms/bitset/bitset_iter.go
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
//go:build go1.23
|
||||
// +build go1.23
|
||||
|
||||
package bitset
|
||||
|
||||
import (
|
||||
"iter"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func (b *BitSet) EachSet() iter.Seq[uint] {
|
||||
return func(yield func(uint) bool) {
|
||||
for wordIndex, word := range b.set {
|
||||
idx := 0
|
||||
for trail := bits.TrailingZeros64(word); trail != 64; trail = bits.TrailingZeros64(word >> idx) {
|
||||
if !yield(uint(wordIndex<<log2WordSize + idx + trail)) {
|
||||
return
|
||||
}
|
||||
idx += trail + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
8866
vendor/github.com/bits-and-blooms/bitset/pext.gen.go
generated
vendored
Normal file
8866
vendor/github.com/bits-and-blooms/bitset/pext.gen.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
76
vendor/github.com/bits-and-blooms/bitset/popcnt.go
generated
vendored
76
vendor/github.com/bits-and-blooms/bitset/popcnt.go
generated
vendored
@@ -1,53 +1,59 @@
|
||||
package bitset
|
||||
|
||||
// bit population count, take from
|
||||
// https://code.google.com/p/go/issues/detail?id=4988#c11
|
||||
// credit: https://code.google.com/u/arnehormann/
|
||||
func popcount(x uint64) (n uint64) {
|
||||
x -= (x >> 1) & 0x5555555555555555
|
||||
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
|
||||
x += x >> 4
|
||||
x &= 0x0f0f0f0f0f0f0f0f
|
||||
x *= 0x0101010101010101
|
||||
return x >> 56
|
||||
}
|
||||
import "math/bits"
|
||||
|
||||
func popcntSliceGo(s []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
var cnt int
|
||||
for _, x := range s {
|
||||
cnt += popcount(x)
|
||||
cnt += bits.OnesCount64(x)
|
||||
}
|
||||
return cnt
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntMaskSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] &^ m[i])
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
return cnt
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] &^ m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntAndSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] & m[i])
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
return cnt
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] & m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntOrSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] | m[i])
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
return cnt
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] | m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntXorSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] ^ m[i])
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
return cnt
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] ^ m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
62
vendor/github.com/bits-and-blooms/bitset/popcnt_19.go
generated
vendored
62
vendor/github.com/bits-and-blooms/bitset/popcnt_19.go
generated
vendored
@@ -1,62 +0,0 @@
|
||||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
package bitset
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
var cnt int
|
||||
for _, x := range s {
|
||||
cnt += bits.OnesCount64(x)
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] &^ m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] & m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] | m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] ^ m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
68
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go
generated
vendored
68
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.go
generated
vendored
@@ -1,68 +0,0 @@
|
||||
//go:build !go1.9 && amd64 && !appengine
|
||||
// +build !go1.9,amd64,!appengine
|
||||
|
||||
package bitset
|
||||
|
||||
// *** the following functions are defined in popcnt_amd64.s
|
||||
|
||||
//go:noescape
|
||||
|
||||
func hasAsm() bool
|
||||
|
||||
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
|
||||
var useAsm = hasAsm()
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntSliceAsm(s []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntMaskSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntAndSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntOrSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntXorSliceAsm(s, m []uint64) uint64
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntSliceAsm(s)
|
||||
}
|
||||
return popcntSliceGo(s)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntMaskSliceAsm(s, m)
|
||||
}
|
||||
return popcntMaskSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntAndSliceAsm(s, m)
|
||||
}
|
||||
return popcntAndSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntOrSliceAsm(s, m)
|
||||
}
|
||||
return popcntOrSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntXorSliceAsm(s, m)
|
||||
}
|
||||
return popcntXorSliceGo(s, m)
|
||||
}
|
||||
104
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s
generated
vendored
104
vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s
generated
vendored
@@ -1,104 +0,0 @@
|
||||
// +build !go1.9
|
||||
// +build amd64,!appengine
|
||||
|
||||
TEXT ·hasAsm(SB),4,$0-1
|
||||
MOVQ $1, AX
|
||||
CPUID
|
||||
SHRQ $23, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
|
||||
|
||||
TEXT ·popcntSliceAsm(SB),4,$0-32
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntSliceEnd
|
||||
popcntSliceLoop:
|
||||
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
LOOP popcntSliceLoop
|
||||
popcntSliceEnd:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntMaskSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntMaskSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
NOTQ DX
|
||||
ANDQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntMaskSliceLoop
|
||||
popcntMaskSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntAndSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntAndSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntAndSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
ANDQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntAndSliceLoop
|
||||
popcntAndSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntOrSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntOrSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntOrSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
ORQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntOrSliceLoop
|
||||
popcntOrSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntXorSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntXorSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntXorSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
XORQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntXorSliceLoop
|
||||
popcntXorSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
25
vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go
generated
vendored
25
vendor/github.com/bits-and-blooms/bitset/popcnt_generic.go
generated
vendored
@@ -1,25 +0,0 @@
|
||||
//go:build !go1.9 && (!amd64 || appengine)
|
||||
// +build !go1.9
|
||||
// +build !amd64 appengine
|
||||
|
||||
package bitset
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
return popcntSliceGo(s)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
return popcntMaskSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
return popcntAndSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
return popcntOrSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
return popcntXorSliceGo(s, m)
|
||||
}
|
||||
47
vendor/github.com/bits-and-blooms/bitset/select.go
generated
vendored
Normal file
47
vendor/github.com/bits-and-blooms/bitset/select.go
generated
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
package bitset
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func select64(w uint64, j uint) uint {
|
||||
seen := 0
|
||||
// Divide 64bit
|
||||
part := w & 0xFFFFFFFF
|
||||
n := uint(bits.OnesCount64(part))
|
||||
if n <= j {
|
||||
part = w >> 32
|
||||
seen += 32
|
||||
j -= n
|
||||
}
|
||||
ww := part
|
||||
|
||||
// Divide 32bit
|
||||
part = ww & 0xFFFF
|
||||
|
||||
n = uint(bits.OnesCount64(part))
|
||||
if n <= j {
|
||||
part = ww >> 16
|
||||
seen += 16
|
||||
j -= n
|
||||
}
|
||||
ww = part
|
||||
|
||||
// Divide 16bit
|
||||
part = ww & 0xFF
|
||||
n = uint(bits.OnesCount64(part))
|
||||
if n <= j {
|
||||
part = ww >> 8
|
||||
seen += 8
|
||||
j -= n
|
||||
}
|
||||
ww = part
|
||||
|
||||
// Lookup in final byte
|
||||
counter := 0
|
||||
for ; counter < 8; counter++ {
|
||||
j -= uint((ww >> counter) & 1)
|
||||
if j+1 == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return uint(seen + counter)
|
||||
}
|
||||
15
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go
generated
vendored
15
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_18.go
generated
vendored
@@ -1,15 +0,0 @@
|
||||
//go:build !go1.9
|
||||
// +build !go1.9
|
||||
|
||||
package bitset
|
||||
|
||||
var deBruijn = [...]byte{
|
||||
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
||||
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
||||
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
||||
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
||||
}
|
||||
|
||||
func trailingZeroes64(v uint64) uint {
|
||||
return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58])
|
||||
}
|
||||
10
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go
generated
vendored
10
vendor/github.com/bits-and-blooms/bitset/trailing_zeros_19.go
generated
vendored
@@ -1,10 +0,0 @@
|
||||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
package bitset
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func trailingZeroes64(v uint64) uint {
|
||||
return uint(bits.TrailingZeros64(v))
|
||||
}
|
||||
12
vendor/github.com/blevesearch/bleve/v2/.travis.yml
generated
vendored
12
vendor/github.com/blevesearch/bleve/v2/.travis.yml
generated
vendored
@@ -3,9 +3,9 @@ sudo: false
|
||||
language: go
|
||||
|
||||
go:
|
||||
- "1.12.x"
|
||||
- "1.13.x"
|
||||
- "1.14.x"
|
||||
- "1.21.x"
|
||||
- "1.22.x"
|
||||
- "1.23.x"
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
@@ -17,9 +17,9 @@ script:
|
||||
- go vet $(go list ./... | grep -v vendor/)
|
||||
- go test ./test -v -indexType scorch
|
||||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
|
||||
- docs/project-code-coverage.sh
|
||||
- docs/build_children.sh
|
||||
- scripts/project-code-coverage.sh
|
||||
- scripts/build_children.sh
|
||||
|
||||
notifications:
|
||||
email:
|
||||
- marty.schoch@gmail.com
|
||||
- fts-team@couchbase.com
|
||||
|
||||
13
vendor/github.com/blevesearch/bleve/v2/README.md
generated
vendored
13
vendor/github.com/blevesearch/bleve/v2/README.md
generated
vendored
@@ -1,11 +1,11 @@
|
||||
#  bleve
|
||||
|
||||
[](https://github.com/blevesearch/bleve/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster)
|
||||
[](https://github.com/blevesearch/bleve/actions/workflows/tests.yml?query=event%3Apush+branch%3Amaster)
|
||||
[](https://coveralls.io/github/blevesearch/bleve?branch=master)
|
||||
[](https://godoc.org/github.com/blevesearch/bleve)
|
||||
[](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://pkg.go.dev/github.com/blevesearch/bleve/v2)
|
||||
[](https://app.gitter.im/#/room/#blevesearch_bleve:gitter.im)
|
||||
[](https://codebeat.co/projects/github-com-blevesearch-bleve)
|
||||
[](https://goreportcard.com/report/blevesearch/bleve)
|
||||
[](https://goreportcard.com/report/github.com/blevesearch/bleve/v2)
|
||||
[](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
|
||||
[](https://opensource.org/licenses/Apache-2.0)
|
||||
|
||||
@@ -24,7 +24,8 @@ A modern indexing + search library in GO
|
||||
* [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
|
||||
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
|
||||
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
|
||||
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) scoring
|
||||
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
|
||||
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
|
||||
* Hybrid search: exact + semantic
|
||||
* Query time boosting
|
||||
* Search result match highlighting with document fragments
|
||||
@@ -42,7 +43,7 @@ message := struct{
|
||||
Body string
|
||||
}{
|
||||
Id: "example",
|
||||
From: "marty.schoch@gmail.com",
|
||||
From: "xyz@couchbase.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
generated
vendored
@@ -101,7 +101,10 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
generated
vendored
@@ -34,5 +34,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
generated
vendored
@@ -48,5 +48,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
generated
vendored
@@ -60,5 +60,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
generated
vendored
@@ -43,5 +43,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,5 +48,8 @@ func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Ca
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
generated
vendored
@@ -66,5 +66,8 @@ func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go
generated
vendored
@@ -63,7 +63,10 @@ func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
|
||||
err := registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
generated
vendored
@@ -63,5 +63,8 @@ func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
|
||||
err := registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
@@ -45,5 +45,8 @@ func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *regis
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
|
||||
err := registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
generated
vendored
@@ -29,5 +29,8 @@ func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.C
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
generated
vendored
@@ -340,5 +340,8 @@ func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
generated
vendored
@@ -47,7 +47,10 @@ func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.C
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
|
||||
err := registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// toLowerDeferredCopy will function exactly like
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
generated
vendored
@@ -49,5 +49,8 @@ func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cac
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
|
||||
err := registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
generated
vendored
@@ -66,5 +66,8 @@ func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
|
||||
err := registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
generated
vendored
@@ -45,5 +45,8 @@ func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *regis
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
|
||||
err := registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
5
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
generated
vendored
5
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
generated
vendored
@@ -115,7 +115,10 @@ func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
|
||||
err := registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func convertType(segmentWordType int) analysis.TokenType {
|
||||
|
||||
9
vendor/github.com/blevesearch/bleve/v2/analysis/type.go
generated
vendored
9
vendor/github.com/blevesearch/bleve/v2/analysis/type.go
generated
vendored
@@ -106,6 +106,15 @@ type DateTimeParser interface {
|
||||
ParseDateTime(string) (time.Time, string, error)
|
||||
}
|
||||
|
||||
const SynonymSourceType = "synonym"
|
||||
|
||||
type SynonymSourceVisitor func(name string, item SynonymSource) error
|
||||
|
||||
type SynonymSource interface {
|
||||
Analyzer() string
|
||||
Collection() string
|
||||
}
|
||||
|
||||
type ByteArrayConverter interface {
|
||||
Convert([]byte) (interface{}, error)
|
||||
}
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/doc.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/doc.go
generated
vendored
@@ -19,7 +19,7 @@ Example Opening New Index, Indexing Data
|
||||
|
||||
message := struct{
|
||||
Id: "example"
|
||||
From: "marty.schoch@gmail.com",
|
||||
From: "xyz@couchbase.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
|
||||
24
vendor/github.com/blevesearch/bleve/v2/document/document.go
generated
vendored
24
vendor/github.com/blevesearch/bleve/v2/document/document.go
generated
vendored
@@ -34,6 +34,7 @@ type Document struct {
|
||||
Fields []Field `json:"fields"`
|
||||
CompositeFields []*CompositeField
|
||||
StoredFieldsSize uint64
|
||||
indexed bool
|
||||
}
|
||||
|
||||
func (d *Document) StoredFieldsBytes() uint64 {
|
||||
@@ -48,6 +49,13 @@ func NewDocument(id string) *Document {
|
||||
}
|
||||
}
|
||||
|
||||
func NewSynonymDocument(id string) *Document {
|
||||
return &Document{
|
||||
id: id,
|
||||
Fields: make([]Field, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
|
||||
len(d.id)
|
||||
@@ -133,3 +141,19 @@ func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
|
||||
func (d *Document) HasComposite() bool {
|
||||
return len(d.CompositeFields) > 0
|
||||
}
|
||||
|
||||
func (d *Document) VisitSynonymFields(visitor index.SynonymFieldVisitor) {
|
||||
for _, f := range d.Fields {
|
||||
if sf, ok := f.(index.SynonymField); ok {
|
||||
visitor(sf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) SetIndexed() {
|
||||
d.indexed = true
|
||||
}
|
||||
|
||||
func (d *Document) Indexed() bool {
|
||||
return d.indexed
|
||||
}
|
||||
|
||||
4
vendor/github.com/blevesearch/bleve/v2/document/field_boolean.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/v2/document/field_boolean.go
generated
vendored
@@ -116,13 +116,13 @@ func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
options: DefaultBooleanIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
|
||||
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions)
|
||||
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultBooleanIndexingOptions)
|
||||
}
|
||||
|
||||
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options index.FieldIndexingOptions) *BooleanField {
|
||||
|
||||
11
vendor/github.com/blevesearch/bleve/v2/document/field_geoshape.go
generated
vendored
11
vendor/github.com/blevesearch/bleve/v2/document/field_geoshape.go
generated
vendored
@@ -87,13 +87,6 @@ func (n *GeoShapeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
func (n *GeoShapeField) Analyze() {
|
||||
// compute the bytes representation for the coordinates
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(n.encodedValue),
|
||||
Term: n.encodedValue,
|
||||
Position: 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
|
||||
rti := geo.GetSpatialAnalyzerPlugin("s2")
|
||||
terms := rti.GetIndexTokens(n.shape)
|
||||
@@ -126,6 +119,10 @@ func (n *GeoShapeField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (n *GeoShapeField) EncodedShape() []byte {
|
||||
return n.encodedValue
|
||||
}
|
||||
|
||||
func NewGeoShapeField(name string, arrayPositions []uint64,
|
||||
coordinates [][][][]float64, typ string) *GeoShapeField {
|
||||
return NewGeoShapeFieldWithIndexingOptions(name, arrayPositions,
|
||||
|
||||
4
vendor/github.com/blevesearch/bleve/v2/document/field_ip.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/v2/document/field_ip.go
generated
vendored
@@ -31,7 +31,7 @@ func init() {
|
||||
reflectStaticSizeIPField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultIPIndexingOptions = index.StoreField | index.IndexField | index.DocValues | index.IncludeTermVectors
|
||||
const DefaultIPIndexingOptions = index.StoreField | index.IndexField | index.DocValues
|
||||
|
||||
type IPField struct {
|
||||
name string
|
||||
@@ -115,7 +115,7 @@ func NewIPFieldFromBytes(name string, arrayPositions []uint64, value []byte) *IP
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
options: DefaultIPIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
149
vendor/github.com/blevesearch/bleve/v2/document/field_synonym.go
generated
vendored
Normal file
149
vendor/github.com/blevesearch/bleve/v2/document/field_synonym.go
generated
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
// Copyright (c) 2024 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeSynonymField int
|
||||
|
||||
func init() {
|
||||
var f SynonymField
|
||||
reflectStaticSizeSynonymField = int(reflect.TypeOf(f).Size())
|
||||
}
|
||||
|
||||
const DefaultSynonymIndexingOptions = index.IndexField
|
||||
|
||||
type SynonymField struct {
|
||||
name string
|
||||
analyzer analysis.Analyzer
|
||||
options index.FieldIndexingOptions
|
||||
input []string
|
||||
synonyms []string
|
||||
numPlainTextBytes uint64
|
||||
|
||||
// populated during analysis
|
||||
synonymMap map[string][]string
|
||||
}
|
||||
|
||||
func (s *SynonymField) Size() int {
|
||||
return reflectStaticSizeSynonymField + size.SizeOfPtr +
|
||||
len(s.name)
|
||||
}
|
||||
|
||||
func (s *SynonymField) Name() string {
|
||||
return s.name
|
||||
}
|
||||
|
||||
func (s *SynonymField) ArrayPositions() []uint64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SynonymField) Options() index.FieldIndexingOptions {
|
||||
return s.options
|
||||
}
|
||||
|
||||
func (s *SynonymField) NumPlainTextBytes() uint64 {
|
||||
return s.numPlainTextBytes
|
||||
}
|
||||
|
||||
func (s *SynonymField) AnalyzedLength() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (s *SynonymField) EncodedFieldType() byte {
|
||||
return 'y'
|
||||
}
|
||||
|
||||
func (s *SynonymField) AnalyzedTokenFrequencies() index.TokenFrequencies {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SynonymField) Analyze() {
|
||||
var analyzedInput []string
|
||||
if len(s.input) > 0 {
|
||||
analyzedInput = make([]string, 0, len(s.input))
|
||||
for _, term := range s.input {
|
||||
analyzedTerm := analyzeSynonymTerm(term, s.analyzer)
|
||||
if analyzedTerm != "" {
|
||||
analyzedInput = append(analyzedInput, analyzedTerm)
|
||||
}
|
||||
}
|
||||
}
|
||||
analyzedSynonyms := make([]string, 0, len(s.synonyms))
|
||||
for _, syn := range s.synonyms {
|
||||
analyzedTerm := analyzeSynonymTerm(syn, s.analyzer)
|
||||
if analyzedTerm != "" {
|
||||
analyzedSynonyms = append(analyzedSynonyms, analyzedTerm)
|
||||
}
|
||||
}
|
||||
s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
|
||||
}
|
||||
|
||||
func (s *SynonymField) Value() []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) {
|
||||
for term, synonyms := range s.synonymMap {
|
||||
visitor(term, synonyms)
|
||||
}
|
||||
}
|
||||
|
||||
func NewSynonymField(name string, analyzer analysis.Analyzer, input []string, synonyms []string) *SynonymField {
|
||||
return &SynonymField{
|
||||
name: name,
|
||||
analyzer: analyzer,
|
||||
options: DefaultSynonymIndexingOptions,
|
||||
input: input,
|
||||
synonyms: synonyms,
|
||||
}
|
||||
}
|
||||
|
||||
func processSynonymData(input []string, synonyms []string) map[string][]string {
|
||||
var synonymMap map[string][]string
|
||||
if len(input) > 0 {
|
||||
// Map each term to the same list of synonyms.
|
||||
synonymMap = make(map[string][]string, len(input))
|
||||
for _, term := range input {
|
||||
synonymMap[term] = synonyms
|
||||
}
|
||||
} else {
|
||||
synonymMap = make(map[string][]string, len(synonyms))
|
||||
// Precompute a map where each synonym points to all other synonyms.
|
||||
for i, elem := range synonyms {
|
||||
synonymMap[elem] = make([]string, 0, len(synonyms)-1)
|
||||
for j, otherElem := range synonyms {
|
||||
if i != j {
|
||||
synonymMap[elem] = append(synonymMap[elem], otherElem)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return synonymMap
|
||||
}
|
||||
|
||||
func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
|
||||
tokenStream := analyzer.Analyze([]byte(term))
|
||||
if len(tokenStream) == 1 {
|
||||
return string(tokenStream[0].Term)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
2
vendor/github.com/blevesearch/bleve/v2/error.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/error.go
generated
vendored
@@ -27,6 +27,7 @@ const (
|
||||
ErrorEmptyID
|
||||
ErrorIndexReadInconsistency
|
||||
ErrorTwoPhaseSearchInconsistency
|
||||
ErrorSynonymSearchNotSupported
|
||||
)
|
||||
|
||||
// Error represents a more strongly typed bleve error for detecting
|
||||
@@ -49,4 +50,5 @@ var errorMessages = map[Error]string{
|
||||
ErrorEmptyID: "document ID cannot be empty",
|
||||
ErrorIndexReadInconsistency: "index read inconsistency detected",
|
||||
ErrorTwoPhaseSearchInconsistency: "2-phase search failed, likely due to an overlapping topology change",
|
||||
ErrorSynonymSearchNotSupported: "synonym search not supported",
|
||||
}
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/geo/README.md
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/geo/README.md
generated
vendored
@@ -1,4 +1,4 @@
|
||||
# geo support in bleve
|
||||
# Geo spatial search support in bleve
|
||||
|
||||
Latest bleve spatial capabilities are powered by spatial hierarchical tokens generated from s2geometry.
|
||||
You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the
|
||||
|
||||
2
vendor/github.com/blevesearch/bleve/v2/geo/geo.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/v2/geo/geo.go
generated
vendored
@@ -139,7 +139,7 @@ func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, f
|
||||
|
||||
var minLonL, maxLonL float64
|
||||
if minLatL > minLatRad && maxLatL < maxLatRad {
|
||||
deltaLon := asin(sin(radDistance) / cos(radLat))
|
||||
deltaLon := math.Asin(math.Sin(radDistance) / math.Cos(radLat))
|
||||
minLonL = radLon - deltaLon
|
||||
if minLonL < minLonRad {
|
||||
minLonL += 2 * math.Pi
|
||||
|
||||
8
vendor/github.com/blevesearch/bleve/v2/geo/geo_dist.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/v2/geo/geo_dist.go
generated
vendored
@@ -88,11 +88,11 @@ func ParseDistanceUnit(u string) (float64, error) {
|
||||
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
|
||||
x1 := lat1 * degreesToRadian
|
||||
x2 := lat2 * degreesToRadian
|
||||
h1 := 1 - cos(x1-x2)
|
||||
h2 := 1 - cos((lon1-lon2)*degreesToRadian)
|
||||
h := (h1 + cos(x1)*cos(x2)*h2) / 2
|
||||
h1 := 1 - math.Cos(x1-x2)
|
||||
h2 := 1 - math.Cos((lon1-lon2)*degreesToRadian)
|
||||
h := (h1 + math.Cos(x1)*math.Cos(x2)*h2) / 2
|
||||
avgLat := (x1 + x2) / 2
|
||||
diameter := earthDiameter(avgLat)
|
||||
|
||||
return diameter * asin(math.Min(1, math.Sqrt(h)))
|
||||
return diameter * math.Asin(math.Min(1, math.Sqrt(h)))
|
||||
}
|
||||
|
||||
19
vendor/github.com/blevesearch/bleve/v2/geo/parse.go
generated
vendored
19
vendor/github.com/blevesearch/bleve/v2/geo/parse.go
generated
vendored
@@ -236,14 +236,19 @@ func extract2DCoordinates(thing interface{}) [][]float64 {
|
||||
|
||||
func extract3DCoordinates(thing interface{}) (c [][][]float64) {
|
||||
coords := reflect.ValueOf(thing)
|
||||
for i := 0; i < coords.Len(); i++ {
|
||||
vals := coords.Index(i)
|
||||
if !coords.IsValid() {
|
||||
return nil
|
||||
}
|
||||
|
||||
edges := vals.Interface()
|
||||
if es, ok := edges.([]interface{}); ok {
|
||||
loop := extract2DCoordinates(es)
|
||||
if len(loop) > 0 {
|
||||
c = append(c, loop)
|
||||
if coords.Kind() == reflect.Slice {
|
||||
for i := 0; i < coords.Len(); i++ {
|
||||
vals := coords.Index(i)
|
||||
edges := vals.Interface()
|
||||
if es, ok := edges.([]interface{}); ok {
|
||||
loop := extract2DCoordinates(es)
|
||||
if len(loop) > 0 {
|
||||
c = append(c, loop)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
163
vendor/github.com/blevesearch/bleve/v2/geo/sloppy.go
generated
vendored
163
vendor/github.com/blevesearch/bleve/v2/geo/sloppy.go
generated
vendored
@@ -19,104 +19,16 @@ import (
|
||||
)
|
||||
|
||||
var earthDiameterPerLatitude []float64
|
||||
var sinTab []float64
|
||||
var cosTab []float64
|
||||
var asinTab []float64
|
||||
var asinDer1DivF1Tab []float64
|
||||
var asinDer2DivF2Tab []float64
|
||||
var asinDer3DivF3Tab []float64
|
||||
var asinDer4DivF4Tab []float64
|
||||
|
||||
const radiusTabsSize = (1 << 10) + 1
|
||||
const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
|
||||
const radiusIndexer = 1 / radiusDelta
|
||||
const sinCosTabsSize = (1 << 11) + 1
|
||||
const asinTabsSize = (1 << 13) + 1
|
||||
const oneDivF2 = 1 / 2.0
|
||||
const oneDivF3 = 1 / 6.0
|
||||
const oneDivF4 = 1 / 24.0
|
||||
|
||||
// 1.57079632673412561417e+00 first 33 bits of pi/2
|
||||
var pio2Hi = math.Float64frombits(0x3FF921FB54400000)
|
||||
|
||||
// 6.07710050650619224932e-11 pi/2 - PIO2_HI
|
||||
var pio2Lo = math.Float64frombits(0x3DD0B4611A626331)
|
||||
|
||||
var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00
|
||||
var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17
|
||||
var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01
|
||||
var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01
|
||||
var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01
|
||||
var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02
|
||||
var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04
|
||||
var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05
|
||||
var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00
|
||||
var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00
|
||||
var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01
|
||||
var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02
|
||||
|
||||
var twoPiHi = 4 * pio2Hi
|
||||
var twoPiLo = 4 * pio2Lo
|
||||
var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1
|
||||
var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1
|
||||
var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo)
|
||||
var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99
|
||||
var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian)
|
||||
|
||||
var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1)
|
||||
var asinIndexer = 1 / asinDelta
|
||||
const (
|
||||
radiusTabsSize = (1 << 10) + 1
|
||||
radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
|
||||
radiusIndexer = 1 / radiusDelta
|
||||
)
|
||||
|
||||
func init() {
|
||||
// initializes the tables used for the sloppy math functions
|
||||
|
||||
// sin and cos
|
||||
sinTab = make([]float64, sinCosTabsSize)
|
||||
cosTab = make([]float64, sinCosTabsSize)
|
||||
sinCosPiIndex := (sinCosTabsSize - 1) / 2
|
||||
sinCosPiMul2Index := 2 * sinCosPiIndex
|
||||
sinCosPiMul05Index := sinCosPiIndex / 2
|
||||
sinCosPiMul15Index := 3 * sinCosPiIndex / 2
|
||||
for i := 0; i < sinCosTabsSize; i++ {
|
||||
// angle: in [0,2*PI].
|
||||
angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo
|
||||
sinAngle := math.Sin(angle)
|
||||
cosAngle := math.Cos(angle)
|
||||
// For indexes corresponding to null cosine or sine, we make sure the value is zero
|
||||
// and not an epsilon. This allows for a much better accuracy for results close to zero.
|
||||
if i == sinCosPiIndex {
|
||||
sinAngle = 0.0
|
||||
} else if i == sinCosPiMul2Index {
|
||||
sinAngle = 0.0
|
||||
} else if i == sinCosPiMul05Index {
|
||||
sinAngle = 0.0
|
||||
} else if i == sinCosPiMul15Index {
|
||||
sinAngle = 0.0
|
||||
}
|
||||
sinTab[i] = sinAngle
|
||||
cosTab[i] = cosAngle
|
||||
}
|
||||
|
||||
// asin
|
||||
asinTab = make([]float64, asinTabsSize)
|
||||
asinDer1DivF1Tab = make([]float64, asinTabsSize)
|
||||
asinDer2DivF2Tab = make([]float64, asinTabsSize)
|
||||
asinDer3DivF3Tab = make([]float64, asinTabsSize)
|
||||
asinDer4DivF4Tab = make([]float64, asinTabsSize)
|
||||
for i := 0; i < asinTabsSize; i++ {
|
||||
// x: in [0,ASIN_MAX_VALUE_FOR_TABS].
|
||||
x := float64(i) * asinDelta
|
||||
asinTab[i] = math.Asin(x)
|
||||
oneMinusXSqInv := 1.0 / (1 - x*x)
|
||||
oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv)
|
||||
oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv
|
||||
oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv
|
||||
oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv
|
||||
asinDer1DivF1Tab[i] = oneMinusXSqInv05
|
||||
asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2
|
||||
asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3
|
||||
asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4
|
||||
}
|
||||
|
||||
// earth radius
|
||||
a := 6378137.0
|
||||
b := 6356752.31420
|
||||
@@ -145,68 +57,3 @@ func earthDiameter(lat float64) float64 {
|
||||
}
|
||||
return earthDiameterPerLatitude[int(index)]
|
||||
}
|
||||
|
||||
var pio2 = math.Pi / 2
|
||||
|
||||
func sin(a float64) float64 {
|
||||
return cos(a - pio2)
|
||||
}
|
||||
|
||||
// cos is a sloppy math (faster) implementation of math.Cos
|
||||
func cos(a float64) float64 {
|
||||
if a < 0.0 {
|
||||
a = -a
|
||||
}
|
||||
if a > sinCosMaxValueForIntModulo {
|
||||
return math.Cos(a)
|
||||
}
|
||||
// index: possibly outside tables range.
|
||||
index := int(a*sinCosIndexer + 0.5)
|
||||
delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo
|
||||
// Making sure index is within tables range.
|
||||
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo.
|
||||
index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1)
|
||||
indexCos := cosTab[index]
|
||||
indexSin := sinTab[index]
|
||||
return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4)))
|
||||
}
|
||||
|
||||
// asin is a sloppy math (faster) implementation of math.Asin
|
||||
func asin(a float64) float64 {
|
||||
var negateResult bool
|
||||
if a < 0 {
|
||||
a = -a
|
||||
negateResult = true
|
||||
}
|
||||
if a <= asinMaxValueForTabs {
|
||||
index := int(a*asinIndexer + 0.5)
|
||||
delta := a - float64(index)*asinDelta
|
||||
result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index])))
|
||||
if negateResult {
|
||||
return -result
|
||||
}
|
||||
return result
|
||||
}
|
||||
// value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
|
||||
// This part is derived from fdlibm.
|
||||
if a < 1 {
|
||||
t := (1.0 - a) * 0.5
|
||||
p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5)))))
|
||||
q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4)))
|
||||
s := math.Sqrt(t)
|
||||
z := s + s*(p/q)
|
||||
result := asinPio2Hi - ((z + z) - asinPio2Lo)
|
||||
if negateResult {
|
||||
return -result
|
||||
}
|
||||
return result
|
||||
}
|
||||
// value >= 1.0, or value is NaN
|
||||
if a == 1.0 {
|
||||
if negateResult {
|
||||
return -math.Pi / 2
|
||||
}
|
||||
return math.Pi / 2
|
||||
}
|
||||
return math.NaN()
|
||||
}
|
||||
|
||||
63
vendor/github.com/blevesearch/bleve/v2/index.go
generated
vendored
63
vendor/github.com/blevesearch/bleve/v2/index.go
generated
vendored
@@ -16,6 +16,7 @@ package bleve
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown"
|
||||
|
||||
@@ -63,6 +64,36 @@ func (b *Batch) Index(id string, data interface{}) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Batch) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
if eventIndex, ok := b.index.(index.EventIndex); ok {
|
||||
eventIndex.FireIndexEvent()
|
||||
}
|
||||
synMap, ok := b.index.Mapping().(mapping.SynonymMapping)
|
||||
if !ok {
|
||||
return ErrorSynonymSearchNotSupported
|
||||
}
|
||||
|
||||
if err := definition.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
doc := document.NewSynonymDocument(id)
|
||||
err := synMap.MapSynonymDocument(doc, collection, definition.Input, definition.Synonyms)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.internal.Update(doc)
|
||||
|
||||
b.lastDocSize = uint64(doc.Size() +
|
||||
len(id) + size.SizeOfString) // overhead from internal
|
||||
b.totalSize += b.lastDocSize
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Batch) LastDocSize() uint64 {
|
||||
return b.lastDocSize
|
||||
}
|
||||
@@ -323,3 +354,35 @@ type IndexCopyable interface {
|
||||
// FileSystemDirectory is the default implementation for the
|
||||
// index.Directory interface.
|
||||
type FileSystemDirectory string
|
||||
|
||||
// SynonymDefinition represents a synonym mapping in Bleve.
|
||||
// Each instance associates one or more input terms with a list of synonyms,
|
||||
// defining how terms are treated as equivalent in searches.
|
||||
type SynonymDefinition struct {
|
||||
// Input is an optional list of terms for unidirectional synonym mapping.
|
||||
// When terms are specified in Input, they will map to the terms in Synonyms,
|
||||
// making the relationship unidirectional (each Input maps to all Synonyms).
|
||||
// If Input is omitted, the relationship is bidirectional among all Synonyms.
|
||||
Input []string `json:"input,omitempty"`
|
||||
|
||||
// Synonyms is a list of terms that are considered equivalent.
|
||||
// If Input is specified, each term in Input will map to each term in Synonyms.
|
||||
// If Input is not specified, the Synonyms list will be treated bidirectionally,
|
||||
// meaning each term in Synonyms is treated as synonymous with all others.
|
||||
Synonyms []string `json:"synonyms"`
|
||||
}
|
||||
|
||||
func (sd *SynonymDefinition) Validate() error {
|
||||
if len(sd.Synonyms) == 0 {
|
||||
return fmt.Errorf("synonym definition must have at least one synonym")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SynonymIndex supports indexing synonym definitions alongside regular documents.
|
||||
// Synonyms, grouped by collection name, define term relationships for query expansion in searches.
|
||||
type SynonymIndex interface {
|
||||
Index
|
||||
// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
|
||||
IndexSynonym(id string, collection string, definition *SynonymDefinition) error
|
||||
}
|
||||
|
||||
4
vendor/github.com/blevesearch/bleve/v2/index/scorch/builder.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/v2/index/scorch/builder.go
generated
vendored
@@ -19,7 +19,7 @@ import (
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
segment "github.com/blevesearch/scorch_segment_api/v2"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
@@ -303,7 +303,7 @@ func (o *Builder) Close() error {
|
||||
}
|
||||
|
||||
// fill the root bolt with this fake index snapshot
|
||||
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin, nil)
|
||||
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin, nil, nil)
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user