build(deps): bump github.com/blevesearch/bleve/v2 from 2.3.10 to 2.4.0

Bumps [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) from 2.3.10 to 2.4.0. - [Release notes](https://github.com/blevesearch/bleve/releases) - [Commits](https://github.com/blevesearch/bleve/compare/v2.3.10...v2.4.0) --- updated-dependencies: - dependency-name: github.com/blevesearch/bleve/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com>
2026-01-06 04:09:40 -06:00 · 2024-04-03 12:57:16 +00:00
parent 8f432c4cdd
commit 68e4e81870
105 changed files with 15633 additions and 485 deletions
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/Makefile
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/Makefile
@@ -0,0 +1,107 @@
+.PHONY: help all test format fmtcheck vet lint     qa deps clean nuke ser fetch-real-roaring-datasets
+
+
+
+
+
+
+
+
+# Display general help about this command
+help:
+	@echo ""
+	@echo "The following commands are available:"
+	@echo ""
+	@echo "    make qa          : Run all the tests"
+	@echo "    make test        : Run the unit tests"
+	@echo ""
+	@echo "    make format      : Format the source code"
+	@echo "    make fmtcheck    : Check if the source code has been formatted"
+	@echo "    make vet         : Check for suspicious constructs"
+	@echo "    make lint        : Check for style errors"
+	@echo ""
+	@echo "    make deps        : Get the dependencies"
+	@echo "    make clean       : Remove any build artifact"
+	@echo "    make nuke        : Deletes any intermediate file"
+	@echo ""
+	@echo "    make fuzz-smat   : Fuzzy testing with smat"
+	@echo "    make fuzz-stream : Fuzzy testing with stream deserialization"
+	@echo "    make fuzz-buffer : Fuzzy testing with buffer deserialization"
+	@echo ""
+
+# Alias for help target
+all: help
+test:
+	go test
+	go test -race -run TestConcurrent*
+# Format the source code
+format:
+	@find ./ -type f -name "*.go" -exec gofmt -w {} \;
+
+# Check if the source code has been formatted
+fmtcheck:
+	@mkdir -p target
+	@find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff
+	@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
+
+# Check for syntax errors
+vet:
+	GOPATH=$(GOPATH) go vet ./...
+
+# Check for style errors
+lint:
+	GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./...
+
+
+
+
+
+# Alias to run all quality-assurance checks
+qa: fmtcheck test vet lint
+
+# --- INSTALL ---
+
+# Get the dependencies
+deps:
+	GOPATH=$(GOPATH) go get github.com/stretchr/testify
+	GOPATH=$(GOPATH) go get github.com/bits-and-blooms/bitset
+	GOPATH=$(GOPATH) go get github.com/golang/lint/golint
+	GOPATH=$(GOPATH) go get github.com/mschoch/smat
+	GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz
+	GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build
+	GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream
+	GOPATH=$(GOPATH) go get github.com/philhofer/fwd
+	GOPATH=$(GOPATH) go get github.com/jtolds/gls
+
+fuzz-smat:
+	go test -tags=gofuzz -run=TestGenerateSmatCorpus
+	go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
+	go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+
+
+fuzz-stream:
+	go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
+	go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+
+fuzz-buffer:
+	go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
+	go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
+
+# Remove any build artifact
+clean:
+	GOPATH=$(GOPATH) go clean ./...
+
+# Deletes any intermediate file
+nuke:
+	rm -rf ./target
+	GOPATH=$(GOPATH) go clean -i ./...
+
+
+cover:
+	go test -coverprofile=coverage.out
+	go tool cover -html=coverage.out
+
+fetch-real-roaring-datasets:
+	# pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets
+	git submodule init
+	git submodule update
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/bsi64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/bsi64.go
@@ -0,0 +1,852 @@
+package roaring64
+
+import (
+	"fmt"
+	"math/bits"
+	"runtime"
+	"sync"
+	"sync/atomic"
+)
+
+const (
+	// Min64BitSigned - Minimum 64 bit value
+	Min64BitSigned = -9223372036854775808
+	// Max64BitSigned - Maximum 64 bit value
+	Max64BitSigned = 9223372036854775807
+)
+
+// BSI is at its simplest is an array of bitmaps that represent an encoded
+// binary value.  The advantage of a BSI is that comparisons can be made
+// across ranges of values whereas a bitmap can only represent the existence
+// of a single value for a given column ID.  Another usage scenario involves
+// storage of high cardinality values.
+//
+// It depends upon the bitmap libraries.  It is not thread safe, so
+// upstream concurrency guards must be provided.
+type BSI struct {
+	bA           []*Bitmap
+	eBM          *Bitmap // Existence BitMap
+	MaxValue     int64
+	MinValue     int64
+	runOptimized bool
+}
+
+// NewBSI constructs a new BSI.  Min/Max values are optional.  If set to 0
+// then the underlying BSI will be automatically sized.
+func NewBSI(maxValue int64, minValue int64) *BSI {
+
+	bitsz := bits.Len64(uint64(minValue))
+	if bits.Len64(uint64(maxValue)) > bitsz {
+		bitsz = bits.Len64(uint64(maxValue))
+	}
+	ba := make([]*Bitmap, bitsz)
+	for i := 0; i < len(ba); i++ {
+		ba[i] = NewBitmap()
+	}
+	return &BSI{bA: ba, eBM: NewBitmap(), MaxValue: maxValue, MinValue: minValue}
+}
+
+// NewDefaultBSI constructs an auto-sized BSI
+func NewDefaultBSI() *BSI {
+	return NewBSI(int64(0), int64(0))
+}
+
+// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
+func (b *BSI) RunOptimize() {
+	b.eBM.RunOptimize()
+	for i := 0; i < len(b.bA); i++ {
+		b.bA[i].RunOptimize()
+	}
+	b.runOptimized = true
+}
+
+// HasRunCompression returns true if the bitmap benefits from run compression
+func (b *BSI) HasRunCompression() bool {
+	return b.runOptimized
+}
+
+// GetExistenceBitmap returns a pointer to the underlying existence bitmap of the BSI
+func (b *BSI) GetExistenceBitmap() *Bitmap {
+	return b.eBM
+}
+
+// ValueExists tests whether the value exists.
+func (b *BSI) ValueExists(columnID uint64) bool {
+
+	return b.eBM.Contains(uint64(columnID))
+}
+
+// GetCardinality returns a count of unique column IDs for which a value has been set.
+func (b *BSI) GetCardinality() uint64 {
+	return b.eBM.GetCardinality()
+}
+
+// BitCount returns the number of bits needed to represent values.
+func (b *BSI) BitCount() int {
+
+	return len(b.bA)
+}
+
+// SetValue sets a value for a given columnID.
+func (b *BSI) SetValue(columnID uint64, value int64) {
+
+	// If max/min values are set to zero then automatically determine bit array size
+	if b.MaxValue == 0 && b.MinValue == 0 {
+		ba := make([]*Bitmap, bits.Len64(uint64(value)))
+		for i := len(ba) - b.BitCount(); i > 0; i-- {
+			b.bA = append(b.bA, NewBitmap())
+			if b.runOptimized {
+				b.bA[i].RunOptimize()
+			}
+		}
+	}
+
+	var wg sync.WaitGroup
+
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			if uint64(value)&(1<<uint64(j)) > 0 {
+				b.bA[j].Add(uint64(columnID))
+			} else {
+				b.bA[j].Remove(uint64(columnID))
+			}
+		}(i)
+	}
+	wg.Wait()
+	b.eBM.Add(uint64(columnID))
+}
+
+// GetValue gets the value at the column ID.  Second param will be false for non-existant values.
+func (b *BSI) GetValue(columnID uint64) (int64, bool) {
+	value := int64(0)
+	exists := b.eBM.Contains(uint64(columnID))
+	if !exists {
+		return value, exists
+	}
+	for i := 0; i < b.BitCount(); i++ {
+		if b.bA[i].Contains(uint64(columnID)) {
+			value |= (1 << uint64(i))
+		}
+	}
+	return int64(value), exists
+}
+
+type action func(t *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup)
+
+func parallelExecutor(parallelism int, t *task, e action, foundSet *Bitmap) *Bitmap {
+
+	var n int = parallelism
+	if n == 0 {
+		n = runtime.NumCPU()
+	}
+
+	resultsChan := make(chan *Bitmap, n)
+
+	card := foundSet.GetCardinality()
+	x := card / uint64(n)
+
+	remainder := card - (x * uint64(n))
+	var batch []uint64
+	var wg sync.WaitGroup
+	iter := foundSet.ManyIterator()
+	for i := 0; i < n; i++ {
+		if i == n-1 {
+			batch = make([]uint64, x+remainder)
+		} else {
+			batch = make([]uint64, x)
+		}
+		iter.NextMany(batch)
+		wg.Add(1)
+		go e(t, batch, resultsChan, &wg)
+	}
+
+	wg.Wait()
+
+	close(resultsChan)
+
+	ba := make([]*Bitmap, 0)
+	for bm := range resultsChan {
+		ba = append(ba, bm)
+	}
+
+	return ParOr(0, ba...)
+
+}
+
+type bsiAction func(input *BSI, filterSet *Bitmap, batch []uint64, resultsChan chan *BSI, wg *sync.WaitGroup)
+
+func parallelExecutorBSIResults(parallelism int, input *BSI, e bsiAction, foundSet, filterSet *Bitmap, sumResults bool) *BSI {
+
+	var n int = parallelism
+	if n == 0 {
+		n = runtime.NumCPU()
+	}
+
+	resultsChan := make(chan *BSI, n)
+
+	card := foundSet.GetCardinality()
+	x := card / uint64(n)
+
+	remainder := card - (x * uint64(n))
+	var batch []uint64
+	var wg sync.WaitGroup
+	iter := foundSet.ManyIterator()
+	for i := 0; i < n; i++ {
+		if i == n-1 {
+			batch = make([]uint64, x+remainder)
+		} else {
+			batch = make([]uint64, x)
+		}
+		iter.NextMany(batch)
+		wg.Add(1)
+		go e(input, filterSet, batch, resultsChan, &wg)
+	}
+
+	wg.Wait()
+
+	close(resultsChan)
+
+	ba := make([]*BSI, 0)
+	for bm := range resultsChan {
+		ba = append(ba, bm)
+	}
+
+	results := NewDefaultBSI()
+	if sumResults {
+		for _, v := range ba {
+			results.Add(v)
+		}
+	} else {
+		results.ParOr(0, ba...)
+	}
+	return results
+
+}
+
+// Operation identifier
+type Operation int
+
+const (
+	// LT less than
+	LT Operation = 1 + iota
+	// LE less than or equal
+	LE
+	// EQ equal
+	EQ
+	// GE greater than or equal
+	GE
+	// GT greater than
+	GT
+	// RANGE range
+	RANGE
+	// MIN find minimum
+	MIN
+	// MAX find maximum
+	MAX
+)
+
+type task struct {
+	bsi          *BSI
+	op           Operation
+	valueOrStart int64
+	end          int64
+	values       map[int64]struct{}
+	bits         *Bitmap
+}
+
+// CompareValue compares value.
+// For all operations with the exception of RANGE, the value to be compared is specified by valueOrStart.
+// For the RANGE parameter the comparison criteria is >= valueOrStart and <= end.
+// The parallelism parameter indicates the number of CPU threads to be applied for processing.  A value
+// of zero indicates that all available CPU resources will be potentially utilized.
+//
+func (b *BSI) CompareValue(parallelism int, op Operation, valueOrStart, end int64,
+	foundSet *Bitmap) *Bitmap {
+
+	comp := &task{bsi: b, op: op, valueOrStart: valueOrStart, end: end}
+	if foundSet == nil {
+		return parallelExecutor(parallelism, comp, compareValue, b.eBM)
+	}
+	return parallelExecutor(parallelism, comp, compareValue, foundSet)
+}
+
+func compareValue(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := NewBitmap()
+	if e.bsi.runOptimized {
+		results.RunOptimize()
+	}
+
+	x := e.bsi.BitCount()
+	startIsNegative := x == 64 && uint64(e.valueOrStart)&(1<<uint64(x-1)) > 0
+	endIsNegative := x == 64 && uint64(e.end)&(1<<uint64(x-1)) > 0
+
+	for i := 0; i < len(batch); i++ {
+		cID := batch[i]
+		eq1, eq2 := true, true
+		lt1, lt2, gt1 := false, false, false
+		j := e.bsi.BitCount() - 1
+		isNegative := false
+		if x == 64 {
+			isNegative = e.bsi.bA[j].Contains(cID)
+			j--
+		}
+		compStartValue := e.valueOrStart
+		compEndValue := e.end
+		if isNegative != startIsNegative {
+			compStartValue = ^e.valueOrStart + 1
+		}
+		if isNegative != endIsNegative {
+			compEndValue = ^e.end + 1
+		}
+		for ; j >= 0; j-- {
+			sliceContainsBit := e.bsi.bA[j].Contains(cID)
+
+			if uint64(compStartValue)&(1<<uint64(j)) > 0 {
+				// BIT in value is SET
+				if !sliceContainsBit {
+					if eq1 {
+						if (e.op == GT || e.op == GE || e.op == RANGE) && startIsNegative && !isNegative {
+							gt1 = true
+						}
+						if e.op == LT || e.op == LE {
+							if !startIsNegative || (startIsNegative == isNegative) {
+								lt1 = true
+							}
+						}
+						eq1 = false
+						break
+					}
+				}
+			} else {
+				// BIT in value is CLEAR
+				if sliceContainsBit {
+					if eq1 {
+						if (e.op == LT || e.op == LE) && isNegative && !startIsNegative {
+							lt1 = true
+						}
+						if e.op == GT || e.op == GE || e.op == RANGE {
+							if startIsNegative || (startIsNegative == isNegative) {
+								gt1 = true
+							}
+						}
+						eq1 = false
+						if e.op != RANGE {
+							break
+						}
+					}
+				}
+			}
+
+			if e.op == RANGE && uint64(compEndValue)&(1<<uint64(j)) > 0 {
+				// BIT in value is SET
+				if !sliceContainsBit {
+					if eq2 {
+						if !endIsNegative || (endIsNegative == isNegative) {
+							lt2 = true
+						}
+						eq2 = false
+						if startIsNegative && !endIsNegative {
+							break
+						}
+					}
+				}
+			} else if e.op == RANGE {
+				// BIT in value is CLEAR
+				if sliceContainsBit {
+					if eq2 {
+						if isNegative && !endIsNegative {
+							lt2 = true
+						}
+						eq2 = false
+						break
+					}
+				}
+			}
+
+		}
+
+		switch e.op {
+		case LT:
+			if lt1 {
+				results.Add(cID)
+			}
+		case LE:
+			if lt1 || (eq1 && (!startIsNegative || (startIsNegative && isNegative))) {
+				results.Add(cID)
+			}
+		case EQ:
+			if eq1 {
+				results.Add(cID)
+			}
+		case GE:
+			if gt1 || (eq1 && (startIsNegative || (!startIsNegative && !isNegative))) {
+				results.Add(cID)
+			}
+		case GT:
+			if gt1 {
+				results.Add(cID)
+			}
+		case RANGE:
+			if (eq1 || gt1) && (eq2 || lt2) {
+				results.Add(cID)
+			}
+		default:
+			panic(fmt.Sprintf("Operation [%v] not supported here", e.op))
+		}
+	}
+
+	resultsChan <- results
+}
+
+// MinMax - Find minimum or maximum value.
+func (b *BSI) MinMax(parallelism int, op Operation, foundSet *Bitmap) int64 {
+
+	var n int = parallelism
+	if n == 0 {
+		n = runtime.NumCPU()
+	}
+
+	resultsChan := make(chan int64, n)
+
+	card := foundSet.GetCardinality()
+	x := card / uint64(n)
+
+	remainder := card - (x * uint64(n))
+	var batch []uint64
+	var wg sync.WaitGroup
+	iter := foundSet.ManyIterator()
+	for i := 0; i < n; i++ {
+		if i == n-1 {
+			batch = make([]uint64, x+remainder)
+		} else {
+			batch = make([]uint64, x)
+		}
+		iter.NextMany(batch)
+		wg.Add(1)
+		go b.minOrMax(op, batch, resultsChan, &wg)
+	}
+
+	wg.Wait()
+
+	close(resultsChan)
+	var minMax int64
+	if op == MAX {
+		minMax = Min64BitSigned
+	} else {
+		minMax = Max64BitSigned
+	}
+
+	for val := range resultsChan {
+		if (op == MAX && val > minMax) || (op == MIN && val <= minMax) {
+			minMax = val
+		}
+	}
+	return minMax
+}
+
+func (b *BSI) minOrMax(op Operation, batch []uint64, resultsChan chan int64, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	x := b.BitCount()
+	var value int64 = Max64BitSigned
+	if op == MAX {
+		value = Min64BitSigned
+	}
+
+	for i := 0; i < len(batch); i++ {
+		cID := batch[i]
+		eq := true
+		lt, gt := false, false
+		j := b.BitCount() - 1
+		var cVal int64
+		valueIsNegative := uint64(value)&(1<<uint64(x-1)) > 0 && bits.Len64(uint64(value)) == 64
+		isNegative := false
+		if x == 64 {
+			isNegative = b.bA[j].Contains(cID)
+			if isNegative {
+				cVal |= 1 << uint64(j)
+			}
+			j--
+		}
+		compValue := value
+		if isNegative != valueIsNegative {
+			compValue = ^value + 1
+		}
+		for ; j >= 0; j-- {
+			sliceContainsBit := b.bA[j].Contains(cID)
+			if sliceContainsBit {
+				cVal |= 1 << uint64(j)
+			}
+			if uint64(compValue)&(1<<uint64(j)) > 0 {
+				// BIT in value is SET
+				if !sliceContainsBit {
+					if eq {
+						eq = false
+						if op == MAX && valueIsNegative && !isNegative {
+							gt = true
+							break
+						}
+						if op == MIN && (!valueIsNegative || (valueIsNegative == isNegative)) {
+							lt = true
+						}
+					}
+				}
+			} else {
+				// BIT in value is CLEAR
+				if sliceContainsBit {
+					if eq {
+						eq = false
+						if op == MIN && isNegative && !valueIsNegative {
+							lt = true
+						}
+						if op == MAX && (valueIsNegative || (valueIsNegative == isNegative)) {
+							gt = true
+						}
+					}
+				}
+			}
+		}
+		if lt || gt {
+			value = cVal
+		}
+	}
+
+	resultsChan <- value
+}
+
+// Sum all values contained within the foundSet.   As a convenience, the cardinality of the foundSet
+// is also returned (for calculating the average).
+//
+func (b *BSI) Sum(foundSet *Bitmap) (sum int64, count uint64) {
+
+	count = foundSet.GetCardinality()
+	var wg sync.WaitGroup
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			atomic.AddInt64(&sum, int64(foundSet.AndCardinality(b.bA[j])<<uint(j)))
+		}(i)
+	}
+	wg.Wait()
+	return
+}
+
+// Transpose calls b.IntersectAndTranspose(0, b.eBM)
+func (b *BSI) Transpose() *Bitmap {
+	return b.IntersectAndTranspose(0, b.eBM)
+}
+
+// IntersectAndTranspose is a matrix transpose function.  Return a bitmap such that the values are represented as column IDs
+// in the returned bitmap. This is accomplished by iterating over the foundSet and only including
+// the column IDs in the source (foundSet) as compared with this BSI.  This can be useful for
+// vectoring one set of integers to another.
+//
+// TODO: This implementation is functional but not performant, needs to be re-written perhaps using SIMD SSE2 instructions.
+//
+func (b *BSI) IntersectAndTranspose(parallelism int, foundSet *Bitmap) *Bitmap {
+
+	trans := &task{bsi: b}
+	return parallelExecutor(parallelism, trans, transpose, foundSet)
+}
+
+func transpose(e *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := NewBitmap()
+	if e.bsi.runOptimized {
+		results.RunOptimize()
+	}
+	for _, cID := range batch {
+		if value, ok := e.bsi.GetValue(uint64(cID)); ok {
+			results.Add(uint64(value))
+		}
+	}
+	resultsChan <- results
+}
+
+// ParOr is intended primarily to be a concatenation function to be used during bulk load operations.
+// Care should be taken to make sure that columnIDs do not overlap (unless overlapping values are
+// identical).
+func (b *BSI) ParOr(parallelism int, bsis ...*BSI) {
+
+	// Consolidate sets
+	bits := len(b.bA)
+	for i := 0; i < len(bsis); i++ {
+		if len(bsis[i].bA) > bits {
+			bits = bsis[i].BitCount()
+		}
+	}
+
+	// Make sure we have enough bit slices
+	for bits > b.BitCount() {
+		newBm := NewBitmap()
+		if b.runOptimized {
+			newBm.RunOptimize()
+		}
+		b.bA = append(b.bA, newBm)
+	}
+
+	a := make([][]*Bitmap, bits)
+	for i := range a {
+		a[i] = make([]*Bitmap, 0)
+		for _, x := range bsis {
+			if len(x.bA) > i {
+				a[i] = append(a[i], x.bA[i])
+			} else {
+				a[i] = []*Bitmap{NewBitmap()}
+				if b.runOptimized {
+					a[i][0].RunOptimize()
+				}
+			}
+		}
+	}
+
+	// Consolidate existence bit maps
+	ebms := make([]*Bitmap, len(bsis))
+	for i := range ebms {
+		ebms[i] = bsis[i].eBM
+	}
+
+	// First merge all the bit slices from all bsi maps that exist in target
+	var wg sync.WaitGroup
+	for i := 0; i < bits; i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			x := []*Bitmap{b.bA[j]}
+			x = append(x, a[j]...)
+			b.bA[j] = ParOr(parallelism, x...)
+		}(i)
+	}
+	wg.Wait()
+
+	// merge all the EBM maps
+	x := []*Bitmap{b.eBM}
+	x = append(x, ebms...)
+	b.eBM = ParOr(parallelism, x...)
+}
+
+// UnmarshalBinary de-serialize a BSI.  The value at bitData[0] is the EBM.  Other indices are in least to most
+// significance order starting at bitData[1] (bit position 0).
+func (b *BSI) UnmarshalBinary(bitData [][]byte) error {
+
+	for i := 1; i < len(bitData); i++ {
+		if bitData == nil || len(bitData[i]) == 0 {
+			continue
+		}
+		if b.BitCount() < i {
+			newBm := NewBitmap()
+			if b.runOptimized {
+				newBm.RunOptimize()
+			}
+			b.bA = append(b.bA, newBm)
+		}
+		if err := b.bA[i-1].UnmarshalBinary(bitData[i]); err != nil {
+			return err
+		}
+		if b.runOptimized {
+			b.bA[i-1].RunOptimize()
+		}
+
+	}
+	// First element of bitData is the EBM
+	if bitData[0] == nil {
+		b.eBM = NewBitmap()
+		if b.runOptimized {
+			b.eBM.RunOptimize()
+		}
+		return nil
+	}
+	if err := b.eBM.UnmarshalBinary(bitData[0]); err != nil {
+		return err
+	}
+	if b.runOptimized {
+		b.eBM.RunOptimize()
+	}
+	return nil
+}
+
+// MarshalBinary serializes a BSI
+func (b *BSI) MarshalBinary() ([][]byte, error) {
+
+	var err error
+	data := make([][]byte, b.BitCount()+1)
+	// Add extra element for EBM (BitCount() + 1)
+	for i := 1; i < b.BitCount()+1; i++ {
+		data[i], err = b.bA[i-1].MarshalBinary()
+		if err != nil {
+			return nil, err
+		}
+	}
+	// Marshal EBM
+	data[0], err = b.eBM.MarshalBinary()
+	if err != nil {
+		return nil, err
+	}
+	return data, nil
+}
+
+// BatchEqual returns a bitmap containing the column IDs where the values are contained within the list of values provided.
+func (b *BSI) BatchEqual(parallelism int, values []int64) *Bitmap {
+
+	valMap := make(map[int64]struct{}, len(values))
+	for i := 0; i < len(values); i++ {
+		valMap[values[i]] = struct{}{}
+	}
+	comp := &task{bsi: b, values: valMap}
+	return parallelExecutor(parallelism, comp, batchEqual, b.eBM)
+}
+
+func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap,
+	wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := NewBitmap()
+	if e.bsi.runOptimized {
+		results.RunOptimize()
+	}
+
+	for i := 0; i < len(batch); i++ {
+		cID := batch[i]
+		if value, ok := e.bsi.GetValue(uint64(cID)); ok {
+			if _, yes := e.values[int64(value)]; yes {
+				results.Add(cID)
+			}
+		}
+	}
+	resultsChan <- results
+}
+
+// ClearBits cleared the bits that exist in the target if they are also in the found set.
+func ClearBits(foundSet, target *Bitmap) {
+	iter := foundSet.Iterator()
+	for iter.HasNext() {
+		cID := iter.Next()
+		target.Remove(cID)
+	}
+}
+
+// ClearValues removes the values found in foundSet
+func (b *BSI) ClearValues(foundSet *Bitmap) {
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ClearBits(foundSet, b.eBM)
+	}()
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			ClearBits(foundSet, b.bA[j])
+		}(i)
+	}
+	wg.Wait()
+}
+
+// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
+func (b *BSI) NewBSIRetainSet(foundSet *Bitmap) *BSI {
+
+	newBSI := NewBSI(b.MaxValue, b.MinValue)
+	newBSI.bA = make([]*Bitmap, b.BitCount())
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		newBSI.eBM = b.eBM.Clone()
+		newBSI.eBM.And(foundSet)
+	}()
+	for i := 0; i < b.BitCount(); i++ {
+		wg.Add(1)
+		go func(j int) {
+			defer wg.Done()
+			newBSI.bA[j] = b.bA[j].Clone()
+			newBSI.bA[j].And(foundSet)
+		}(i)
+	}
+	wg.Wait()
+	return newBSI
+}
+
+// Clone performs a deep copy of BSI contents.
+func (b *BSI) Clone() *BSI {
+	return b.NewBSIRetainSet(b.eBM)
+}
+
+// Add - In-place sum the contents of another BSI with this BSI, column wise.
+func (b *BSI) Add(other *BSI) {
+
+	b.eBM.Or(other.eBM)
+	for i := 0; i < len(other.bA); i++ {
+		b.addDigit(other.bA[i], i)
+	}
+}
+
+func (b *BSI) addDigit(foundSet *Bitmap, i int) {
+
+	if i >= len(b.bA) {
+		b.bA = append(b.bA, NewBitmap())
+	}
+	carry := And(b.bA[i], foundSet)
+	b.bA[i].Xor(foundSet)
+	if !carry.IsEmpty() {
+		if i+1 >= len(b.bA) {
+			b.bA = append(b.bA, NewBitmap())
+		}
+		b.addDigit(carry, i+1)
+	}
+}
+
+// TransposeWithCounts is a matrix transpose function that returns a BSI that has a columnID system defined by the values
+// contained within the input BSI.   Given that for BSIs, different columnIDs can have the same value.  TransposeWithCounts
+// is useful for situations where there is a one-to-many relationship between the vectored integer sets.  The resulting BSI
+// contains the number of times a particular value appeared in the input BSI.
+//
+func (b *BSI) TransposeWithCounts(parallelism int, foundSet, filterSet *Bitmap) *BSI {
+
+	return parallelExecutorBSIResults(parallelism, b, transposeWithCounts, foundSet, filterSet, true)
+}
+
+func transposeWithCounts(input *BSI, filterSet *Bitmap, batch []uint64, resultsChan chan *BSI, wg *sync.WaitGroup) {
+
+	defer wg.Done()
+
+	results := NewDefaultBSI()
+	if input.runOptimized {
+		results.RunOptimize()
+	}
+	for _, cID := range batch {
+		if value, ok := input.GetValue(uint64(cID)); ok {
+			if !filterSet.Contains(uint64(value)) {
+				continue
+			}
+			if val, ok2 := results.GetValue(uint64(value)); !ok2 {
+				results.SetValue(uint64(value), 1)
+			} else {
+				val++
+				results.SetValue(uint64(value), val)
+			}
+		}
+	}
+	resultsChan <- results
+}
+
+// Increment - In-place increment of values in a BSI.  Found set select columns for incrementing.
+func (b *BSI) Increment(foundSet *Bitmap) {
+	b.addDigit(foundSet, 0)
+}
+
+// IncrementAll - In-place increment of all values in a BSI.
+func (b *BSI) IncrementAll() {
+	b.Increment(b.GetExistenceBitmap())
+}
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/fastaggregation64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/fastaggregation64.go
@@ -0,0 +1,31 @@
+package roaring64
+
+// FastAnd computes the intersection between many bitmaps quickly
+// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
+// of manually calling "And" many times.
+func FastAnd(bitmaps ...*Bitmap) *Bitmap {
+	if len(bitmaps) == 0 {
+		return NewBitmap()
+	} else if len(bitmaps) == 1 {
+		return bitmaps[0].Clone()
+	}
+	answer := And(bitmaps[0], bitmaps[1])
+	for _, bm := range bitmaps[2:] {
+		answer.And(bm)
+	}
+	return answer
+}
+
+// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
+func FastOr(bitmaps ...*Bitmap) *Bitmap {
+	if len(bitmaps) == 0 {
+		return NewBitmap()
+	} else if len(bitmaps) == 1 {
+		return bitmaps[0].Clone()
+	}
+	answer := Or(bitmaps[0], bitmaps[1])
+	for _, bm := range bitmaps[2:] {
+		answer.Or(bm)
+	}
+	return answer
+}
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/iterables64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/iterables64.go
@@ -0,0 +1,169 @@
+package roaring64
+
+import (
+	"github.com/RoaringBitmap/roaring"
+)
+
+// IntIterable64 allows you to iterate over the values in a Bitmap
+type IntIterable64 interface {
+	HasNext() bool
+	Next() uint64
+}
+
+// IntPeekable64 allows you to look at the next value without advancing and
+// advance as long as the next value is smaller than minval
+type IntPeekable64 interface {
+	IntIterable64
+	// PeekNext peeks the next value without advancing the iterator
+	PeekNext() uint64
+	// AdvanceIfNeeded advances as long as the next value is smaller than minval
+	AdvanceIfNeeded(minval uint64)
+}
+
+type intIterator struct {
+	pos              int
+	hs               uint64
+	iter             roaring.IntPeekable
+	highlowcontainer *roaringArray64
+}
+
+// HasNext returns true if there are more integers to iterate over
+func (ii *intIterator) HasNext() bool {
+	return ii.pos < ii.highlowcontainer.size()
+}
+
+func (ii *intIterator) init() {
+	if ii.highlowcontainer.size() > ii.pos {
+		ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).Iterator()
+		ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
+	}
+}
+
+// Next returns the next integer
+func (ii *intIterator) Next() uint64 {
+	lowbits := ii.iter.Next()
+	x := uint64(lowbits) | ii.hs
+	if !ii.iter.HasNext() {
+		ii.pos = ii.pos + 1
+		ii.init()
+	}
+	return x
+}
+
+// PeekNext peeks the next value without advancing the iterator
+func (ii *intIterator) PeekNext() uint64 {
+	return uint64(ii.iter.PeekNext()&maxLowBit) | ii.hs
+}
+
+// AdvanceIfNeeded advances as long as the next value is smaller than minval
+func (ii *intIterator) AdvanceIfNeeded(minval uint64) {
+	to := minval >> 32
+
+	for ii.HasNext() && (ii.hs>>32) < to {
+		ii.pos++
+		ii.init()
+	}
+
+	if ii.HasNext() && (ii.hs>>32) == to {
+		ii.iter.AdvanceIfNeeded(lowbits(minval))
+
+		if !ii.iter.HasNext() {
+			ii.pos++
+			ii.init()
+		}
+	}
+}
+
+func newIntIterator(a *Bitmap) *intIterator {
+	p := new(intIterator)
+	p.pos = 0
+	p.highlowcontainer = &a.highlowcontainer
+	p.init()
+	return p
+}
+
+type intReverseIterator struct {
+	pos              int
+	hs               uint64
+	iter             roaring.IntIterable
+	highlowcontainer *roaringArray64
+}
+
+// HasNext returns true if there are more integers to iterate over
+func (ii *intReverseIterator) HasNext() bool {
+	return ii.pos >= 0
+}
+
+func (ii *intReverseIterator) init() {
+	if ii.pos >= 0 {
+		ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).ReverseIterator()
+		ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
+	} else {
+		ii.iter = nil
+	}
+}
+
+// Next returns the next integer
+func (ii *intReverseIterator) Next() uint64 {
+	x := uint64(ii.iter.Next()) | ii.hs
+	if !ii.iter.HasNext() {
+		ii.pos = ii.pos - 1
+		ii.init()
+	}
+	return x
+}
+
+func newIntReverseIterator(a *Bitmap) *intReverseIterator {
+	p := new(intReverseIterator)
+	p.highlowcontainer = &a.highlowcontainer
+	p.pos = a.highlowcontainer.size() - 1
+	p.init()
+	return p
+}
+
+// ManyIntIterable64 allows you to iterate over the values in a Bitmap
+type ManyIntIterable64 interface {
+	// pass in a buffer to fill up with values, returns how many values were returned
+	NextMany([]uint64) int
+}
+
+type manyIntIterator struct {
+	pos              int
+	hs               uint64
+	iter             roaring.ManyIntIterable
+	highlowcontainer *roaringArray64
+}
+
+func (ii *manyIntIterator) init() {
+	if ii.highlowcontainer.size() > ii.pos {
+		ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).ManyIterator()
+		ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
+	} else {
+		ii.iter = nil
+	}
+}
+
+func (ii *manyIntIterator) NextMany(buf []uint64) int {
+	n := 0
+	for n < len(buf) {
+		if ii.iter == nil {
+			break
+		}
+		moreN := ii.iter.NextMany64(ii.hs, buf[n:])
+		n += moreN
+		if moreN == 0 {
+			ii.pos = ii.pos + 1
+			ii.init()
+		}
+	}
+
+	return n
+}
+
+func newManyIntIterator(a *Bitmap) *manyIntIterator {
+	p := new(manyIntIterator)
+	p.pos = 0
+	p.highlowcontainer = &a.highlowcontainer
+	p.init()
+	return p
+}
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/parallel64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/parallel64.go
@@ -0,0 +1,292 @@
+package roaring64
+
+import (
+	"fmt"
+	"runtime"
+
+	"github.com/RoaringBitmap/roaring"
+)
+
+var defaultWorkerCount = runtime.NumCPU()
+
+// ParOr computes the union (OR) of all provided bitmaps in parallel,
+// where the parameter "parallelism" determines how many workers are to be used
+// (if it is set to 0, a default number of workers is chosen)
+func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
+	var lKey uint32 = maxUint32
+	var hKey uint32
+
+	bitmapsFiltered := bitmaps[:0]
+	for _, b := range bitmaps {
+		if !b.IsEmpty() {
+			bitmapsFiltered = append(bitmapsFiltered, b)
+		}
+	}
+	bitmaps = bitmapsFiltered
+
+	for _, b := range bitmaps {
+		lKey = minOfUint32(lKey, b.highlowcontainer.keys[0])
+		hKey = maxOfUint32(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
+	}
+
+	if lKey == maxUint32 && hKey == 0 {
+		return New()
+	} else if len(bitmaps) == 1 {
+		return bitmaps[0]
+	}
+	// The following might overflow and we do not want that!
+	// as it might lead to a channel of size 0 later which,
+	// on some systems, would block indefinitely.
+	keyRange := uint64(hKey) - uint64(lKey) + 1
+	if keyRange == 1 {
+		// revert to FastOr. Since the key range is 0
+		// no container-level aggregation parallelism is achievable
+		return FastOr(bitmaps...)
+	}
+
+	if parallelism == 0 {
+		parallelism = defaultWorkerCount
+	}
+	// We cannot use int since int is 32-bit on 32-bit systems.
+	var chunkSize int64
+	var chunkCount int64
+	if int64(parallelism)*4 > int64(keyRange) {
+		chunkSize = 1
+		chunkCount = int64(keyRange)
+	} else {
+		chunkCount = int64(parallelism) * 4
+		chunkSize = (int64(keyRange) + chunkCount - 1) / chunkCount
+	}
+
+	if chunkCount*chunkSize < int64(keyRange) {
+		// it's fine to panic to indicate an implementation error
+		panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
+	}
+
+	chunks := make([]*roaringArray64, chunkCount)
+
+	chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
+	chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
+
+	orFunc := func() {
+		for spec := range chunkSpecChan {
+			ra := orOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
+			for _, b := range bitmaps[2:] {
+				ra = iorOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
+			}
+
+			chunkChan <- parChunk{ra, spec.idx}
+		}
+	}
+
+	for i := 0; i < parallelism; i++ {
+		go orFunc()
+	}
+
+	go func() {
+		for i := int64(0); i < chunkCount; i++ {
+			spec := parChunkSpec{
+				start: uint32(int64(lKey) + i*chunkSize),
+				end:   uint32(minOfInt64(int64(lKey)+(i+1)*chunkSize-1, int64(hKey))),
+				idx:   int(i),
+			}
+			chunkSpecChan <- spec
+		}
+	}()
+
+	chunksRemaining := chunkCount
+	for chunk := range chunkChan {
+		chunks[chunk.idx] = chunk.ra
+		chunksRemaining--
+		if chunksRemaining == 0 {
+			break
+		}
+	}
+	close(chunkChan)
+	close(chunkSpecChan)
+
+	containerCount := 0
+	for _, chunk := range chunks {
+		containerCount += chunk.size()
+	}
+
+	result := Bitmap{
+		roaringArray64{
+			containers:      make([]*roaring.Bitmap, containerCount),
+			keys:            make([]uint32, containerCount),
+			needCopyOnWrite: make([]bool, containerCount),
+		},
+	}
+
+	resultOffset := 0
+	for _, chunk := range chunks {
+		copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
+		copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
+		copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
+		resultOffset += chunk.size()
+	}
+
+	return &result
+}
+
+type parChunkSpec struct {
+	start uint32
+	end   uint32
+	idx   int
+}
+
+type parChunk struct {
+	ra  *roaringArray64
+	idx int
+}
+
+func (c parChunk) size() int {
+	return c.ra.size()
+}
+
+func parNaiveStartAt(ra *roaringArray64, start uint32, last uint32) int {
+	for idx, key := range ra.keys {
+		if key >= start && key <= last {
+			return idx
+		} else if key > last {
+			break
+		}
+	}
+	return ra.size()
+}
+
+func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
+	answer := &roaringArray64{}
+	length1 := ra1.size()
+	length2 := ra2.size()
+
+	idx1 := parNaiveStartAt(ra1, start, last)
+	idx2 := parNaiveStartAt(ra2, start, last)
+
+	var key1 uint32
+	var key2 uint32
+	if idx1 < length1 && idx2 < length2 {
+		key1 = ra1.getKeyAtIndex(idx1)
+		key2 = ra2.getKeyAtIndex(idx2)
+
+		for key1 <= last && key2 <= last {
+
+			if key1 < key2 {
+				answer.appendCopy(*ra1, idx1)
+				idx1++
+				if idx1 == length1 {
+					break
+				}
+				key1 = ra1.getKeyAtIndex(idx1)
+			} else if key1 > key2 {
+				answer.appendCopy(*ra2, idx2)
+				idx2++
+				if idx2 == length2 {
+					break
+				}
+				key2 = ra2.getKeyAtIndex(idx2)
+			} else {
+				c1 := ra1.getContainerAtIndex(idx1)
+
+				//answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
+				answer.appendContainer(key1, roaring.Or(c1, ra2.getContainerAtIndex(idx2)), false)
+				idx1++
+				idx2++
+				if idx1 == length1 || idx2 == length2 {
+					break
+				}
+
+				key1 = ra1.getKeyAtIndex(idx1)
+				key2 = ra2.getKeyAtIndex(idx2)
+			}
+		}
+	}
+
+	if idx2 < length2 {
+		key2 = ra2.getKeyAtIndex(idx2)
+		for key2 <= last {
+			answer.appendCopy(*ra2, idx2)
+			idx2++
+			if idx2 == length2 {
+				break
+			}
+			key2 = ra2.getKeyAtIndex(idx2)
+		}
+	}
+
+	if idx1 < length1 {
+		key1 = ra1.getKeyAtIndex(idx1)
+		for key1 <= last {
+			answer.appendCopy(*ra1, idx1)
+			idx1++
+			if idx1 == length1 {
+				break
+			}
+			key1 = ra1.getKeyAtIndex(idx1)
+		}
+	}
+	return answer
+}
+
+func iorOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
+	length1 := ra1.size()
+	length2 := ra2.size()
+
+	idx1 := 0
+	idx2 := parNaiveStartAt(ra2, start, last)
+
+	var key1 uint32
+	var key2 uint32
+	if idx1 < length1 && idx2 < length2 {
+		key1 = ra1.getKeyAtIndex(idx1)
+		key2 = ra2.getKeyAtIndex(idx2)
+
+		for key1 <= last && key2 <= last {
+			if key1 < key2 {
+				idx1++
+				if idx1 >= length1 {
+					break
+				}
+				key1 = ra1.getKeyAtIndex(idx1)
+			} else if key1 > key2 {
+				ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
+				ra1.needCopyOnWrite[idx1] = true
+				idx2++
+				idx1++
+				length1++
+				if idx2 >= length2 {
+					break
+				}
+				key2 = ra2.getKeyAtIndex(idx2)
+			} else {
+				c1 := ra1.getWritableContainerAtIndex(idx1)
+
+				//ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
+				c1.Or(ra2.getContainerAtIndex(idx2))
+				ra1.setContainerAtIndex(idx1, c1)
+
+				ra1.needCopyOnWrite[idx1] = false
+				idx1++
+				idx2++
+				if idx1 >= length1 || idx2 >= length2 {
+					break
+				}
+
+				key1 = ra1.getKeyAtIndex(idx1)
+				key2 = ra2.getKeyAtIndex(idx2)
+			}
+		}
+	}
+	if idx2 < length2 {
+		key2 = ra2.getKeyAtIndex(idx2)
+		for key2 <= last {
+			ra1.appendCopy(*ra2, idx2)
+			idx2++
+			if idx2 >= length2 {
+				break
+			}
+			key2 = ra2.getKeyAtIndex(idx2)
+		}
+	}
+	return ra1
+}
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/roaring64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/roaring64.go
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/roaringarray64.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/roaringarray64.go
@@ -0,0 +1,398 @@
+package roaring64
+
+import "github.com/RoaringBitmap/roaring"
+
+type roaringArray64 struct {
+	keys            []uint32
+	containers      []*roaring.Bitmap
+	needCopyOnWrite []bool
+	copyOnWrite     bool
+}
+
+// runOptimize compresses the element containers to minimize space consumed.
+// Q: how does this interact with copyOnWrite and needCopyOnWrite?
+// A: since we aren't changing the logical content, just the representation,
+//    we don't bother to check the needCopyOnWrite bits. We replace
+//    (possibly all) elements of ra.containers in-place with space
+//    optimized versions.
+func (ra *roaringArray64) runOptimize() {
+	for i := range ra.containers {
+		ra.containers[i].RunOptimize()
+	}
+}
+
+func (ra *roaringArray64) appendContainer(key uint32, value *roaring.Bitmap, mustCopyOnWrite bool) {
+	ra.keys = append(ra.keys, key)
+	ra.containers = append(ra.containers, value)
+	ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
+}
+
+func (ra *roaringArray64) appendWithoutCopy(sa roaringArray64, startingindex int) {
+	mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
+	ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
+}
+
+func (ra *roaringArray64) appendCopy(sa roaringArray64, startingindex int) {
+	// cow only if the two request it, or if we already have a lightweight copy
+	copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
+	if !copyonwrite {
+		// since there is no copy-on-write, we need to clone the container (this is important)
+		ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].Clone(), copyonwrite)
+	} else {
+		ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
+		if !sa.needsCopyOnWrite(startingindex) {
+			sa.setNeedsCopyOnWrite(startingindex)
+		}
+	}
+}
+
+func (ra *roaringArray64) appendWithoutCopyMany(sa roaringArray64, startingindex, end int) {
+	for i := startingindex; i < end; i++ {
+		ra.appendWithoutCopy(sa, i)
+	}
+}
+
+func (ra *roaringArray64) appendCopyMany(sa roaringArray64, startingindex, end int) {
+	for i := startingindex; i < end; i++ {
+		ra.appendCopy(sa, i)
+	}
+}
+
+func (ra *roaringArray64) appendCopiesUntil(sa roaringArray64, stoppingKey uint32) {
+	// cow only if the two request it, or if we already have a lightweight copy
+	copyonwrite := ra.copyOnWrite && sa.copyOnWrite
+
+	for i := 0; i < sa.size(); i++ {
+		if sa.keys[i] >= stoppingKey {
+			break
+		}
+		thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
+		if thiscopyonewrite {
+			ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
+			if !sa.needsCopyOnWrite(i) {
+				sa.setNeedsCopyOnWrite(i)
+			}
+
+		} else {
+			// since there is no copy-on-write, we need to clone the container (this is important)
+			ra.appendContainer(sa.keys[i], sa.containers[i].Clone(), thiscopyonewrite)
+		}
+	}
+}
+
+func (ra *roaringArray64) appendCopiesAfter(sa roaringArray64, beforeStart uint32) {
+	// cow only if the two request it, or if we already have a lightweight copy
+	copyonwrite := ra.copyOnWrite && sa.copyOnWrite
+
+	startLocation := sa.getIndex(beforeStart)
+	if startLocation >= 0 {
+		startLocation++
+	} else {
+		startLocation = -startLocation - 1
+	}
+
+	for i := startLocation; i < sa.size(); i++ {
+		thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
+		if thiscopyonewrite {
+			ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
+			if !sa.needsCopyOnWrite(i) {
+				sa.setNeedsCopyOnWrite(i)
+			}
+		} else {
+			// since there is no copy-on-write, we need to clone the container (this is important)
+			ra.appendContainer(sa.keys[i], sa.containers[i].Clone(), thiscopyonewrite)
+		}
+	}
+}
+
+func (ra *roaringArray64) removeIndexRange(begin, end int) {
+	if end <= begin {
+		return
+	}
+
+	r := end - begin
+
+	copy(ra.keys[begin:], ra.keys[end:])
+	copy(ra.containers[begin:], ra.containers[end:])
+	copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
+
+	ra.resize(len(ra.keys) - r)
+}
+
+func (ra *roaringArray64) resize(newsize int) {
+	for k := newsize; k < len(ra.containers); k++ {
+		ra.containers[k] = nil
+	}
+
+	ra.keys = ra.keys[:newsize]
+	ra.containers = ra.containers[:newsize]
+	ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
+}
+
+func (ra *roaringArray64) clear() {
+	ra.resize(0)
+	ra.copyOnWrite = false
+}
+
+func (ra *roaringArray64) clone() *roaringArray64 {
+
+	sa := roaringArray64{}
+	sa.copyOnWrite = ra.copyOnWrite
+
+	// this is where copyOnWrite is used.
+	if ra.copyOnWrite {
+		sa.keys = make([]uint32, len(ra.keys))
+		copy(sa.keys, ra.keys)
+		sa.containers = make([]*roaring.Bitmap, len(ra.containers))
+		copy(sa.containers, ra.containers)
+		sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
+
+		ra.markAllAsNeedingCopyOnWrite()
+		sa.markAllAsNeedingCopyOnWrite()
+
+		// sa.needCopyOnWrite is shared
+	} else {
+		// make a full copy
+
+		sa.keys = make([]uint32, len(ra.keys))
+		copy(sa.keys, ra.keys)
+
+		sa.containers = make([]*roaring.Bitmap, len(ra.containers))
+		for i := range sa.containers {
+			sa.containers[i] = ra.containers[i].Clone()
+		}
+
+		sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
+	}
+	return &sa
+}
+
+// clone all containers which have needCopyOnWrite set to true
+// This can be used to make sure it is safe to munmap a []byte
+// that the roaring array may still have a reference to.
+func (ra *roaringArray64) cloneCopyOnWriteContainers() {
+	for i, needCopyOnWrite := range ra.needCopyOnWrite {
+		if needCopyOnWrite {
+			ra.containers[i] = ra.containers[i].Clone()
+			ra.needCopyOnWrite[i] = false
+		}
+	}
+}
+
+// unused function:
+// func (ra *roaringArray64) containsKey(x uint32) bool {
+//	return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
+// }
+
+func (ra *roaringArray64) getContainer(x uint32) *roaring.Bitmap {
+	i := ra.binarySearch(0, int64(len(ra.keys)), x)
+	if i < 0 {
+		return nil
+	}
+	return ra.containers[i]
+}
+
+func (ra *roaringArray64) getContainerAtIndex(i int) *roaring.Bitmap {
+	return ra.containers[i]
+}
+
+func (ra *roaringArray64) getWritableContainerAtIndex(i int) *roaring.Bitmap {
+	if ra.needCopyOnWrite[i] {
+		ra.containers[i] = ra.containers[i].Clone()
+		ra.needCopyOnWrite[i] = false
+	}
+	return ra.containers[i]
+}
+
+func (ra *roaringArray64) getIndex(x uint32) int {
+	// before the binary search, we optimize for frequent cases
+	size := len(ra.keys)
+	if (size == 0) || (ra.keys[size-1] == x) {
+		return size - 1
+	}
+	return ra.binarySearch(0, int64(size), x)
+}
+
+func (ra *roaringArray64) getKeyAtIndex(i int) uint32 {
+	return ra.keys[i]
+}
+
+func (ra *roaringArray64) insertNewKeyValueAt(i int, key uint32, value *roaring.Bitmap) {
+	ra.keys = append(ra.keys, 0)
+	ra.containers = append(ra.containers, nil)
+
+	copy(ra.keys[i+1:], ra.keys[i:])
+	copy(ra.containers[i+1:], ra.containers[i:])
+
+	ra.keys[i] = key
+	ra.containers[i] = value
+
+	ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
+	copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
+	ra.needCopyOnWrite[i] = false
+}
+
+func (ra *roaringArray64) remove(key uint32) bool {
+	i := ra.binarySearch(0, int64(len(ra.keys)), key)
+	if i >= 0 { // if a new key
+		ra.removeAtIndex(i)
+		return true
+	}
+	return false
+}
+
+func (ra *roaringArray64) removeAtIndex(i int) {
+	copy(ra.keys[i:], ra.keys[i+1:])
+	copy(ra.containers[i:], ra.containers[i+1:])
+
+	copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
+
+	ra.resize(len(ra.keys) - 1)
+}
+
+func (ra *roaringArray64) setContainerAtIndex(i int, c *roaring.Bitmap) {
+	ra.containers[i] = c
+}
+
+func (ra *roaringArray64) replaceKeyAndContainerAtIndex(i int, key uint32, c *roaring.Bitmap, mustCopyOnWrite bool) {
+	ra.keys[i] = key
+	ra.containers[i] = c
+	ra.needCopyOnWrite[i] = mustCopyOnWrite
+}
+
+func (ra *roaringArray64) size() int {
+	return len(ra.keys)
+}
+
+func (ra *roaringArray64) binarySearch(begin, end int64, ikey uint32) int {
+	low := begin
+	high := end - 1
+	for low+16 <= high {
+		middleIndex := low + (high-low)/2 // avoid overflow
+		middleValue := ra.keys[middleIndex]
+
+		if middleValue < ikey {
+			low = middleIndex + 1
+		} else if middleValue > ikey {
+			high = middleIndex - 1
+		} else {
+			return int(middleIndex)
+		}
+	}
+	for ; low <= high; low++ {
+		val := ra.keys[low]
+		if val >= ikey {
+			if val == ikey {
+				return int(low)
+			}
+			break
+		}
+	}
+	return -int(low + 1)
+}
+
+func (ra *roaringArray64) equals(o interface{}) bool {
+	srb, ok := o.(roaringArray64)
+	if ok {
+
+		if srb.size() != ra.size() {
+			return false
+		}
+		for i, k := range ra.keys {
+			if k != srb.keys[i] {
+				return false
+			}
+		}
+
+		for i, c := range ra.containers {
+			if !c.Equals(srb.containers[i]) {
+				return false
+			}
+		}
+		return true
+	}
+	return false
+}
+
+func (ra *roaringArray64) hasRunCompression() bool {
+	for _, c := range ra.containers {
+		if c.HasRunCompression() {
+			return true
+		}
+	}
+	return false
+}
+
+func (ra *roaringArray64) advanceUntil(min uint32, pos int) int {
+	lower := pos + 1
+
+	if lower >= len(ra.keys) || ra.keys[lower] >= min {
+		return lower
+	}
+
+	spansize := 1
+
+	for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
+		spansize *= 2
+	}
+	var upper int
+	if lower+spansize < len(ra.keys) {
+		upper = lower + spansize
+	} else {
+		upper = len(ra.keys) - 1
+	}
+
+	if ra.keys[upper] == min {
+		return upper
+	}
+
+	if ra.keys[upper] < min {
+		// means
+		// array
+		// has no
+		// item
+		// >= min
+		// pos = array.length;
+		return len(ra.keys)
+	}
+
+	// we know that the next-smallest span was too small
+	lower += (spansize >> 1)
+
+	mid := 0
+	for lower+1 != upper {
+		mid = (lower + upper) >> 1
+		if ra.keys[mid] == min {
+			return mid
+		} else if ra.keys[mid] < min {
+			lower = mid
+		} else {
+			upper = mid
+		}
+	}
+	return upper
+}
+
+func (ra *roaringArray64) markAllAsNeedingCopyOnWrite() {
+	for i := range ra.needCopyOnWrite {
+		ra.needCopyOnWrite[i] = true
+	}
+}
+
+func (ra *roaringArray64) needsCopyOnWrite(i int) bool {
+	return ra.needCopyOnWrite[i]
+}
+
+func (ra *roaringArray64) setNeedsCopyOnWrite(i int) {
+	ra.needCopyOnWrite[i] = true
+}
+
+// should be dirt cheap
+func (ra *roaringArray64) serializedSizeInBytes() uint64 {
+	answer := uint64(8)
+	for _, c := range ra.containers {
+		answer += 4
+		answer += c.GetSerializedSizeInBytes()
+	}
+	return answer
+}
--- a/vendor/github.com/RoaringBitmap/roaring/roaring64/util.go
+++ b/vendor/github.com/RoaringBitmap/roaring/roaring64/util.go
@@ -0,0 +1,49 @@
+package roaring64
+
+import "github.com/RoaringBitmap/roaring"
+
+func highbits(x uint64) uint32 {
+	return uint32(x >> 32)
+}
+
+func lowbits(x uint64) uint32 {
+	return uint32(x & maxLowBit)
+}
+
+const maxLowBit = roaring.MaxUint32
+const maxUint32 = roaring.MaxUint32
+
+func minOfInt64(a, b int64) int64 {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func minOfInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func maxOfInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func maxOfUint32(a, b uint32) uint32 {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func minOfUint32(a, b uint32) uint32 {
+	if a < b {
+		return a
+	}
+	return b
+}