Change list/blob chunk values to be their length, not cumulative length.

This is simpler for chunking, since it no longer needs to "normalize"
the values when re-chunking. It's a bit less efficient because instead
of binary searching we need to linear search through chunk values.
This commit is contained in:
Benjamin Kalman
2015-12-09 10:38:06 -08:00
parent f04aa8ebc0
commit 5ab90e8ae1
11 changed files with 228 additions and 145 deletions

View File

@@ -13,8 +13,9 @@ import (
// It implements the Blob interface.
type compoundBlob struct {
metaSequenceObject
ref *ref.Ref
cs chunks.ChunkStore
length uint64
ref *ref.Ref
cs chunks.ChunkStore
}
func newCompoundBlob(tuples metaSequenceData, cs chunks.ChunkStore) compoundBlob {
@@ -23,7 +24,7 @@ func newCompoundBlob(tuples metaSequenceData, cs chunks.ChunkStore) compoundBlob
func buildCompoundBlob(tuples metaSequenceData, t Type, cs chunks.ChunkStore) Value {
d.Chk.True(t.Equals(typeForBlob))
return compoundBlob{metaSequenceObject{tuples, typeForBlob}, &ref.Ref{}, cs}
return compoundBlob{metaSequenceObject{tuples, typeForBlob}, tuples.uint64ValuesSum(), &ref.Ref{}, cs}
}
func init() {
@@ -31,10 +32,9 @@ func init() {
}
func (cb compoundBlob) Reader() io.ReadSeeker {
length := uint64(cb.lastTuple().value.(Uint64))
cursor, v := newMetaSequenceCursor(cb, cb.cs)
reader := v.(blobLeaf).Reader()
return &compoundBlobReader{cursor: cursor, currentReader: reader, length: length, cs: cb.cs}
return &compoundBlobReader{cursor: cursor, currentReader: reader, length: cb.Len(), cs: cb.cs}
}
func (cb compoundBlob) Equals(other Value) bool {
@@ -46,7 +46,7 @@ func (cb compoundBlob) Ref() ref.Ref {
}
func (cb compoundBlob) Len() uint64 {
return cb.tuples[len(cb.tuples)-1].uint64Value()
return cb.length
}
type compoundBlobReader struct {
@@ -100,20 +100,15 @@ func (cbr *compoundBlobReader) Seek(offset int64, whence int) (int64, error) {
seekAbs := uint64(abs)
chunkStart := cbr.cursor.seek(func(carry interface{}, mt sequenceItem) bool {
return seekAbs < uint64(carry.(Uint64))+uint64(mt.(metaTuple).value.(Uint64))
}, func(carry interface{}, prev, current sequenceItem) interface{} {
pv := uint64(0)
if prev != nil {
pv = uint64(prev.(metaTuple).value.(Uint64))
}
return Uint64(uint64(carry.(Uint64)) + pv)
}, Uint64(0))
chunkStart := cbr.cursor.seekLinear(func(carry interface{}, mt sequenceItem) (bool, interface{}) {
offset := carry.(uint64) + mt.(metaTuple).uint64Value()
return seekAbs < offset, offset
}, uint64(0))
cbr.chunkStart = uint64(chunkStart.(Uint64))
cbr.chunkStart = chunkStart.(uint64)
cbr.chunkOffset = seekAbs - cbr.chunkStart
cbr.currentReader = nil
return int64(seekAbs), nil
return abs, nil
}
func (cbr *compoundBlobReader) updateReader() {

View File

@@ -15,12 +15,10 @@ import (
func getTestCompoundBlob(datas ...string) compoundBlob {
tuples := make([]metaTuple, len(datas))
length := uint64(0)
ms := chunks.NewMemoryStore()
for i, s := range datas {
b := NewBlob(bytes.NewBufferString(s), ms)
length += uint64(len(s))
tuples[i] = metaTuple{WriteValue(b, ms), Uint64(length)}
tuples[i] = metaTuple{WriteValue(b, ms), Uint64(len(s))}
}
return newCompoundBlob(tuples, ms)
}
@@ -28,7 +26,7 @@ func getTestCompoundBlob(datas ...string) compoundBlob {
func getRandomReader() io.ReadSeeker {
length := int(5e5)
s := rand.NewSource(42)
buff := make([]byte, 5e5, 5e5)
buff := make([]byte, length)
for i := 0; i < length; i++ {
buff[i] = byte(s.Int63() & 0xff)
}
@@ -132,7 +130,7 @@ func TestCompoundBlobReaderSeek(t *testing.T) {
n, err = r.Seek(-1, 1)
assert.NoError(err)
// assert.Equal(int64(3), n)
assert.Equal(int64(3), n)
n2, err = r.Read(p)
assert.NoError(err)

View File

@@ -16,12 +16,13 @@ const (
type compoundList struct {
metaSequenceObject
ref *ref.Ref
cs chunks.ChunkStore
length uint64
ref *ref.Ref
cs chunks.ChunkStore
}
func buildCompoundList(tuples metaSequenceData, t Type, cs chunks.ChunkStore) Value {
cl := compoundList{metaSequenceObject{tuples, t}, &ref.Ref{}, cs}
cl := compoundList{metaSequenceObject{tuples, t}, tuples.uint64ValuesSum(), &ref.Ref{}, cs}
return valueFromType(cs, cl, t)
}
@@ -46,7 +47,7 @@ func (cl compoundList) Ref() ref.Ref {
}
func (cl compoundList) Len() uint64 {
return cl.tuples[len(cl.tuples)-1].uint64Value()
return cl.length
}
func (cl compoundList) Empty() bool {
@@ -54,26 +55,21 @@ func (cl compoundList) Empty() bool {
return false
}
// Returns a cursor pointing to the deepest metaTuple containing |idx| within |cl|, the list leaf that it points to, and the offset within the list that the leaf starts at.
func (cl compoundList) cursorAt(idx uint64) (*sequenceCursor, listLeaf, uint64) {
d.Chk.True(idx <= cl.Len())
cursor, leaf := newMetaSequenceCursor(cl, cl.cs)
chunkStart := cursor.seek(func(carry interface{}, mt sequenceItem) bool {
return idx < uint64(carry.(Uint64))+uint64(mt.(metaTuple).value.(Uint64))
}, func(carry interface{}, prev, current sequenceItem) interface{} {
pv := uint64(0)
if prev != nil {
pv = uint64(prev.(metaTuple).value.(Uint64))
}
return Uint64(uint64(carry.(Uint64)) + pv)
}, Uint64(0))
chunkStart := cursor.seekLinear(func(carry interface{}, mt sequenceItem) (bool, interface{}) {
offset := carry.(uint64) + mt.(metaTuple).uint64Value()
return idx < offset, offset
}, uint64(0))
current := cursor.current().(metaTuple)
if current.ref != leaf.Ref() {
if current := cursor.current().(metaTuple); current.ref != leaf.Ref() {
leaf = readMetaTupleValue(cursor.current(), cl.cs)
}
return cursor, leaf.(listLeaf), uint64(chunkStart.(Uint64))
return cursor, leaf.(listLeaf), chunkStart.(uint64)
}
func (cl compoundList) Get(idx uint64) Value {
@@ -102,6 +98,7 @@ func (cl compoundList) Set(idx uint64, v Value) List {
}
func (cl compoundList) Append(vs ...Value) List {
// TODO: add short circuitry to immediately create a cursor pointing to the end of the list.
seq := cl.sequenceChunkerAtIndex(cl.Len())
for _, v := range vs {
seq.Append(v)
@@ -118,7 +115,7 @@ func (cl compoundList) sequenceChunkerAtIndex(idx uint64) *sequenceChunker {
return list, len(list.values)
}}
return newSequenceChunker(cur, makeListLeafChunkFn(cl.t, cl.cs), newMetaSequenceChunkFn(cl.t, cl.cs), normalizeChunkNoop, normalizeMetaSequenceChunk, newListLeafBoundaryChecker(), newMetaSequenceBoundaryChecker)
return newSequenceChunker(cur, makeListLeafChunkFn(cl.t, cl.cs), newMetaSequenceChunkFn(cl.t, cl.cs), newListLeafBoundaryChecker(), newMetaSequenceBoundaryChecker)
}
func (cl compoundList) Filter(cb listFilterCallback) List {

View File

@@ -10,16 +10,12 @@ import (
type testSimpleList []Value
func (tsl testSimpleList) Get(idx uint64) Value {
return tsl[idx]
}
func getTestSimpleListLen() int {
return int(listPattern * 50)
func getTestSimpleListLen() uint64 {
return uint64(listPattern) * 200
}
func getTestSimpleList() testSimpleList {
length := getTestSimpleListLen()
length := int(getTestSimpleListLen())
s := rand.NewSource(42)
values := make([]Value, length)
for i := 0; i < length; i++ {
@@ -38,8 +34,9 @@ func TestCompoundListGet(t *testing.T) {
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int64Kind))
cl := NewTypedList(cs, tr, simpleList...)
for i, v := range simpleList {
assert.Equal(v, cl.Get(uint64(i)))
// Incrementing by len(simpleList)/10 because Get() is too slow to run on every index.
for i := 0; i < len(simpleList); i += len(simpleList) / 10 {
assert.Equal(simpleList[i], cl.Get(uint64(i)))
}
}
@@ -53,15 +50,12 @@ func TestCompoundListIter(t *testing.T) {
cl := NewTypedList(cs, tr, simpleList...)
expectIdx := uint64(0)
endAt := uint64(listPattern)
endAt := getTestSimpleListLen() / 2
cl.Iter(func(v Value, idx uint64) bool {
assert.Equal(expectIdx, idx)
expectIdx += 1
assert.Equal(simpleList.Get(idx), v)
if expectIdx == endAt {
return true
}
return false
expectIdx++
assert.Equal(simpleList[idx], v)
return expectIdx == endAt
})
assert.Equal(endAt, expectIdx)
@@ -80,20 +74,34 @@ func TestCompoundListIterAll(t *testing.T) {
cl.IterAll(func(v Value, idx uint64) {
assert.Equal(expectIdx, idx)
expectIdx += 1
assert.Equal(simpleList.Get(idx), v)
assert.Equal(simpleList[idx], v)
})
assert.Equal(getTestSimpleListLen(), expectIdx)
}
func TestCompoundListCurAt(t *testing.T) {
func TestCompoundListLen(t *testing.T) {
assert := assert.New(t)
listLen := func(at int, next func(*sequenceCursor) bool) (size int) {
cs := chunks.NewMemoryStore()
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int64Kind))
cl := NewTypedList(cs, tr, getTestSimpleList()...).(compoundList)
assert.Equal(getTestSimpleListLen(), cl.Len())
cl = NewTypedList(cs, tr, append(getTestSimpleList(), getTestSimpleList()...)...).(compoundList)
assert.Equal(getTestSimpleListLen()*2, cl.Len())
}
func TestCompoundListCursorAt(t *testing.T) {
assert := assert.New(t)
listLen := func(at uint64, next func(*sequenceCursor) bool) (size uint64) {
cs := chunks.NewMemoryStore()
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int64Kind))
cl := NewTypedList(cs, tr, getTestSimpleList()...).(compoundList)
cur, _, _ := cl.cursorAt(uint64(at))
cur, _, _ := cl.cursorAt(at)
for {
size += int(readMetaTupleValue(cur.current(), cs).(List).Len())
size += readMetaTupleValue(cur.current(), cs).(List).Len()
if !next(cur) {
return
}
@@ -134,31 +142,31 @@ func TestCompoundListAppend(t *testing.T) {
expected := getTestSimpleList()
assert.Equal(expected, compoundToSimple(cl))
assert.Equal(getTestSimpleListLen(), int(cl.Len()))
assert.Equal(getTestSimpleListLen(), cl.Len())
assert.True(newCompoundList(expected).Equals(cl))
expected = append(expected, Int64(42))
assert.Equal(expected, compoundToSimple(cl2))
assert.Equal(getTestSimpleListLen()+1, int(cl2.Len()))
assert.Equal(getTestSimpleListLen()+1, cl2.Len())
assert.True(newCompoundList(expected).Equals(cl2))
expected = append(expected, Int64(43))
assert.Equal(expected, compoundToSimple(cl3))
assert.Equal(getTestSimpleListLen()+2, int(cl3.Len()))
assert.Equal(getTestSimpleListLen()+2, cl3.Len())
assert.True(newCompoundList(expected).Equals(cl3))
expected = append(expected, getTestSimpleList()...)
assert.Equal(expected, compoundToSimple(cl4))
assert.Equal(2*getTestSimpleListLen()+2, int(cl4.Len()))
assert.Equal(2*getTestSimpleListLen()+2, cl4.Len())
assert.True(newCompoundList(expected).Equals(cl4))
expected = append(expected, Int64(44), Int64(45))
assert.Equal(expected, compoundToSimple(cl5))
assert.Equal(2*getTestSimpleListLen()+4, int(cl5.Len()))
assert.Equal(2*getTestSimpleListLen()+4, cl5.Len())
assert.True(newCompoundList(expected).Equals(cl5))
expected = append(expected, getTestSimpleList()...)
assert.Equal(expected, compoundToSimple(cl6))
assert.Equal(3*getTestSimpleListLen()+4, int(cl6.Len()))
assert.Equal(3*getTestSimpleListLen()+4, cl6.Len())
assert.True(newCompoundList(expected).Equals(cl6))
}

View File

@@ -47,20 +47,20 @@ func (cm compoundMap) Empty() bool {
func (cm compoundMap) findLeaf(key Value) (*sequenceCursor, mapLeaf) {
cursor, leaf := newMetaSequenceCursor(cm, cm.cs)
var seekFn sequenceCursorSeekCompareFn
var seekFn sequenceCursorSeekBinaryCompareFn
if orderedSequenceByIndexedType(cm.t) {
orderedKey := key.(OrderedValue)
seekFn = func(carry interface{}, mt sequenceItem) bool {
seekFn = func(mt sequenceItem) bool {
return !mt.(metaTuple).value.(OrderedValue).Less(orderedKey)
}
} else {
seekFn = func(carry interface{}, mt sequenceItem) bool {
seekFn = func(mt sequenceItem) bool {
return !mt.(metaTuple).value.(Ref).TargetRef().Less(key.Ref())
}
}
cursor.seek(seekFn, nil, nil)
cursor.seekBinary(seekFn)
current := cursor.current().(metaTuple)
if current.ref != leaf.Ref() {

View File

@@ -71,20 +71,20 @@ func (cs compoundSet) Filter(cb setFilterCallback) Set {
func (cs compoundSet) findLeaf(key Value) (*sequenceCursor, setLeaf) {
cursor, leaf := newMetaSequenceCursor(cs, cs.cs)
var seekFn sequenceCursorSeekCompareFn
var seekFn sequenceCursorSeekBinaryCompareFn
if orderedSequenceByIndexedType(cs.t) {
orderedKey := key.(OrderedValue)
seekFn = func(carry interface{}, mt sequenceItem) bool {
seekFn = func(mt sequenceItem) bool {
return !mt.(metaTuple).value.(OrderedValue).Less(orderedKey)
}
} else {
seekFn = func(carry interface{}, mt sequenceItem) bool {
seekFn = func(mt sequenceItem) bool {
return !mt.(metaTuple).value.(Ref).TargetRef().Less(key.Ref())
}
}
cursor.seek(seekFn, nil, nil)
cursor.seekBinary(seekFn)
current := cursor.current().(metaTuple)
if current.ref != leaf.Ref() {

View File

@@ -21,7 +21,6 @@ type metaSequence interface {
data() metaSequenceData
tupleAt(idx int) metaTuple
tupleSlice(to int) []metaTuple
lastTuple() metaTuple
tupleCount() int
}
@@ -36,6 +35,13 @@ func (mt metaTuple) uint64Value() uint64 {
type metaSequenceData []metaTuple
func (msd metaSequenceData) uint64ValuesSum() (sum uint64) {
for _, mt := range msd {
sum += mt.uint64Value()
}
return
}
func (msd metaSequenceData) last() metaTuple {
return msd[len(msd)-1]
}
@@ -61,10 +67,6 @@ func (ms metaSequenceObject) data() metaSequenceData {
return ms.tuples
}
func (ms metaSequenceObject) lastTuple() metaTuple {
return ms.tuples.last()
}
func (ms metaSequenceObject) ChildValues() []Value {
leafType := ms.t.Desc.(CompoundDesc).ElemTypes[0]
refOfLeafType := MakeCompoundType(RefKind, leafType)
@@ -121,30 +123,16 @@ func newOrderedMetaSequenceBoundaryChecker() boundaryChecker {
func newMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
return func(items []sequenceItem) (sequenceItem, Value) {
tuples := make(metaSequenceData, len(items))
offsetSum := uint64(0)
for i, v := range items {
mt := v.(metaTuple)
offsetSum += mt.uint64Value()
tuples[i] = metaTuple{mt.ref, Uint64(offsetSum)}
tuples[i] = v.(metaTuple)
}
meta := newMetaSequenceFromData(tuples, t, cs)
ref := WriteValue(meta, cs)
return metaTuple{ref, Uint64(offsetSum)}, meta
return metaTuple{ref, Uint64(tuples.uint64ValuesSum())}, meta
}
}
func normalizeMetaSequenceChunk(in []sequenceItem) (out []sequenceItem) {
offset := uint64(0)
for _, v := range in {
mt := v.(metaTuple)
out = append(out, metaTuple{mt.ref, Uint64(mt.uint64Value() - offset)})
offset = mt.uint64Value()
}
return
}
// Creates a sequenceCursor pointing to the first metaTuple in a metaSequence, and returns that cursor plus the leaf Value referenced from that metaTuple.
func newMetaSequenceCursor(root metaSequence, cs chunks.ChunkStore) (*sequenceCursor, Value) {
d.Chk.NotNil(root)

View File

@@ -33,21 +33,21 @@ func TestMeta(t *testing.T) {
mtr := l0.Type()
m0 := compoundList{metaSequenceObject{metaSequenceData{{lr0, Uint64(1)}, {lr1, Uint64(2)}}, mtr}, &ref.Ref{}, cs}
m0 := compoundList{metaSequenceObject{metaSequenceData{{lr0, Uint64(1)}, {lr1, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
lm0 := WriteValue(m0, cs)
m1 := compoundList{metaSequenceObject{metaSequenceData{{lr2, Uint64(1)}, {lr3, Uint64(2)}}, mtr}, &ref.Ref{}, cs}
m1 := compoundList{metaSequenceObject{metaSequenceData{{lr2, Uint64(1)}, {lr3, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
lm1 := WriteValue(m1, cs)
m2 := compoundList{metaSequenceObject{metaSequenceData{{lr4, Uint64(1)}, {lr5, Uint64(2)}}, mtr}, &ref.Ref{}, cs}
m2 := compoundList{metaSequenceObject{metaSequenceData{{lr4, Uint64(1)}, {lr5, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
lm2 := WriteValue(m2, cs)
m3 := compoundList{metaSequenceObject{metaSequenceData{{lr6, Uint64(1)}, {lr7, Uint64(2)}}, mtr}, &ref.Ref{}, cs}
m3 := compoundList{metaSequenceObject{metaSequenceData{{lr6, Uint64(1)}, {lr7, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
lm3 := WriteValue(m3, cs)
m00 := compoundList{metaSequenceObject{metaSequenceData{{lm0, Uint64(2)}, {lm1, Uint64(4)}}, mtr}, &ref.Ref{}, cs}
m00 := compoundList{metaSequenceObject{metaSequenceData{{lm0, Uint64(2)}, {lm1, Uint64(4)}}, mtr}, 0, &ref.Ref{}, cs}
lm00 := WriteValue(m00, cs)
m01 := compoundList{metaSequenceObject{metaSequenceData{{lm2, Uint64(2)}, {lm3, Uint64(4)}}, mtr}, &ref.Ref{}, cs}
m01 := compoundList{metaSequenceObject{metaSequenceData{{lm2, Uint64(2)}, {lm3, Uint64(4)}}, mtr}, 0, &ref.Ref{}, cs}
lm01 := WriteValue(m01, cs)
rootList := compoundList{metaSequenceObject{metaSequenceData{{lm00, Uint64(4)}, {lm01, Uint64(8)}}, mtr}, &ref.Ref{}, cs}
rootList := compoundList{metaSequenceObject{metaSequenceData{{lm00, Uint64(4)}, {lm01, Uint64(8)}}, mtr}, 0, &ref.Ref{}, cs}
rootRef := WriteValue(rootList, cs)
rootList = ReadValue(rootRef, cs).(compoundList)

View File

@@ -18,7 +18,6 @@ type sequenceChunker struct {
parent *sequenceChunker
current, pendingFirst []sequenceItem
makeChunk, parentMakeChunk makeChunkFn
nzeChunk, parentNzeChunk normalizeChunkFn
boundaryChk boundaryChecker
newBoundaryChecker newBoundaryCheckerFn
}
@@ -26,22 +25,13 @@ type sequenceChunker struct {
// makeChunkFn takes a sequence of items to chunk, and returns the result of chunking those items, a tuple of a reference to that chunk which can itself be chunked + its underlying value.
type makeChunkFn func(values []sequenceItem) (sequenceItem, Value)
// normalizeChunkFn takes a sequence of existing items, and returns a sequence equivalent as though it had never gone through the chunking progress.
type normalizeChunkFn func(values []sequenceItem) []sequenceItem
func normalizeChunkNoop(si []sequenceItem) []sequenceItem {
return si
}
func newEmptySequenceChunker(makeChunk, parentMakeChunk makeChunkFn, boundaryChk boundaryChecker, newBoundaryChecker newBoundaryCheckerFn) *sequenceChunker {
return newSequenceChunker(nil, makeChunk, parentMakeChunk, normalizeChunkNoop, normalizeChunkNoop, boundaryChk, newBoundaryChecker)
return newSequenceChunker(nil, makeChunk, parentMakeChunk, boundaryChk, newBoundaryChecker)
}
func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChunkFn, nzeChunk, parentNzeChunk normalizeChunkFn, boundaryChk boundaryChecker, newBoundaryChecker newBoundaryCheckerFn) *sequenceChunker {
func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChunkFn, boundaryChk boundaryChecker, newBoundaryChecker newBoundaryCheckerFn) *sequenceChunker {
d.Chk.NotNil(makeChunk)
d.Chk.NotNil(parentMakeChunk)
d.Chk.NotNil(nzeChunk)
d.Chk.NotNil(parentNzeChunk)
d.Chk.NotNil(boundaryChk)
d.Chk.NotNil(newBoundaryChecker)
@@ -50,7 +40,6 @@ func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChun
nil,
[]sequenceItem{}, nil,
makeChunk, parentMakeChunk,
nzeChunk, parentNzeChunk,
boundaryChk,
newBoundaryChecker,
}
@@ -65,7 +54,7 @@ func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChun
boundaryChk.Write(item)
}
// Reconstruct this entire chunk.
seq.current = nzeChunk(cur.maxNPrevItems(cur.indexInChunk()))
seq.current = cur.maxNPrevItems(cur.indexInChunk())
}
return seq
@@ -91,7 +80,7 @@ func (seq *sequenceChunker) createParent() {
if seq.cur != nil && seq.cur.parent != nil {
curParent = seq.cur.parent.clone()
}
seq.parent = newSequenceChunker(curParent, seq.parentMakeChunk, seq.parentMakeChunk, seq.parentNzeChunk, seq.parentNzeChunk, seq.newBoundaryChecker(), seq.newBoundaryChecker)
seq.parent = newSequenceChunker(curParent, seq.parentMakeChunk, seq.parentMakeChunk, seq.newBoundaryChecker(), seq.newBoundaryChecker)
}
func (seq *sequenceChunker) commitPendingFirst() {

View File

@@ -97,37 +97,48 @@ func (cur *sequenceCursor) clone() *sequenceCursor {
return &sequenceCursor{parent, cur.item, cur.idx, cur.length, cur.getItem, cur.readChunk}
}
type sequenceCursorSeekCompareFn func(carry interface{}, item sequenceItem) bool
type sequenceCursorSeekBinaryCompareFn func(item sequenceItem) bool
type sequenceCursorSeekStepFn func(carry interface{}, prev, current sequenceItem) interface{}
// Seeks the cursor to the first position in the sequence where |compare| returns true. During seeking, the caller can build up an arbitrary carry value, passed to |compare| and |step|. The carry value is initialized as |carry|, but will be replaced with the return value of |step|.
func (cur *sequenceCursor) seek(compare sequenceCursorSeekCompareFn, step sequenceCursorSeekStepFn, carry interface{}) interface{} {
// seekBinary seeks the cursor to the first position in the sequence where |compare| returns true. This uses a binary search, so the cursor items must be sorted relative to |compare|. seekBinary will not seek past the end of the cursor.
func (cur *sequenceCursor) seekBinary(compare sequenceCursorSeekBinaryCompareFn) {
d.Chk.NotNil(compare)
if cur.parent != nil {
carry = cur.parent.seek(compare, step, carry)
cur.parent.seekBinary(compare)
cur.item, cur.length = cur.readChunk(cur.parent.current())
}
cur.idx = sort.Search(cur.length, func(i int) bool {
return compare(carry, cur.getItem(cur.item, i))
return compare(cur.getItem(cur.item, i))
})
if cur.idx == cur.length {
cur.idx = cur.length - 1
}
}
var prev sequenceItem
if cur.idx > 0 {
prev = cur.getItem(cur.item, cur.idx-1)
type sequenceCursorSeekLinearStepFn func(carryIn interface{}, item sequenceItem) (found bool, carryOut interface{})
// seekLinear seeks the cursor to the first position in the sequence where |step| returns true. This uses a linear search, so there is no ordering restriction. The carry value is initialized as |carry|, but will be replaced with the return value of successive calls to |step|, including when |step| is called on ancestor cursors. The return value is the carry value when seeking stopped. seekLinear will not seek past the end of the cursor.
func (cur *sequenceCursor) seekLinear(step sequenceCursorSeekLinearStepFn, carry interface{}) interface{} {
d.Chk.NotNil(step)
if cur.parent != nil {
carry = cur.parent.seekLinear(step, carry)
cur.item, cur.length = cur.readChunk(cur.parent.current())
}
if step == nil {
return nil
cur.idx = 0
for i := 0; i < cur.length-1; i++ {
found, carryOut := step(carry, cur.getItem(cur.item, i))
if found {
break
}
carry = carryOut
cur.idx++
}
return step(carry, prev, cur.getItem(cur.item, cur.idx))
return carry
}
// Returns a slice of the previous |n| items in |cur|, excluding the current item in |cur|. Does not modify |cur|.

View File

@@ -20,6 +20,27 @@ func newTestSequenceCursor(items [][]int) *sequenceCursor {
}}
}
// TODO: Convert all tests to use newTestSequenceCursor3.
func newTestSequenceCursor3(items [][][]int) *sequenceCursor {
top := &sequenceCursor{nil, items, 0, len(items), func(item sequenceItem, idx int) sequenceItem {
return item.([][][]int)[idx] // item should be == items
}, func(item sequenceItem) (sequenceItem, int) {
panic("not reachable")
}}
middle := &sequenceCursor{top, items[0], 0, len(items[0]), func(item sequenceItem, idx int) sequenceItem {
return item.([][]int)[idx]
}, func(item sequenceItem) (sequenceItem, int) {
return item, len(item.([][]int))
}}
return &sequenceCursor{middle, items[0][0], 0, len(items[0][0]), func(item sequenceItem, idx int) sequenceItem {
return item.([]int)[idx]
}, func(item sequenceItem) (sequenceItem, int) {
return item, len(item.([]int))
}}
}
func TestTestCursor(t *testing.T) {
assert := assert.New(t)
@@ -206,7 +227,7 @@ func TestCursorGetMaxNPrevItemsWithMultiItemSequence(t *testing.T) {
assert.Equal([]sequenceItem{100, 101, 102, 103, 104, 105, 106, 107}, cur.maxNPrevItems(9))
}
func TestCursorSeek(t *testing.T) {
func TestCursorSeekBinary(t *testing.T) {
assert := assert.New(t)
var cur *sequenceCursor
@@ -218,28 +239,16 @@ func TestCursorSeek(t *testing.T) {
}
assertSeeksTo := func(expected sequenceItem, seekTo int) {
// The value being carried around here is the level of the tree being seeked in. The seek is initialized with 0, so carry value passed to the comparison function on the first level should be 0. Subsequent steps increment this number, so 1 should be passed into the comparison function for the second level. When the seek exits, the final step should increment it again, so the result should be 2.
result := cur.seek(func(carry interface{}, val sequenceItem) bool {
cur.seekBinary(func(val sequenceItem) bool {
switch val := val.(type) {
case []int:
assert.Equal(0, carry)
return val[len(val)-1] >= seekTo
case int:
assert.Equal(1, carry)
return val >= seekTo
default:
panic("illegal")
}
}, func(carry interface{}, prev, current sequenceItem) interface{} {
switch current.(type) {
case []int:
assert.Equal(0, carry)
case int:
assert.Equal(1, carry)
}
return carry.(int) + 1
}, 0)
assert.Equal(2, result)
})
assert.Equal(expected, cur.current())
}
@@ -266,3 +275,91 @@ func TestCursorSeek(t *testing.T) {
}
assertSeeksTo(sequenceItem(107), 108)
}
func TestCursorSeekLinear(t *testing.T) {
assert := assert.New(t)
var cur *sequenceCursor
assertSeeksTo := func(reset bool, expectedPos sequenceItem, expectedSumUpto, seekTo int) {
if reset {
cur = newTestSequenceCursor3(
[][][]int{
[][]int{
[]int{100, 101, 102, 103},
[]int{104, 105, 106, 107},
},
[][]int{
[]int{108, 109, 110, 111},
[]int{112, 113, 114, 115},
},
},
)
}
sumUpto := cur.seekLinear(func(carry interface{}, item sequenceItem) (bool, interface{}) {
switch item := item.(type) {
case [][]int:
last := item[len(item)-1]
return seekTo <= last[len(last)-1], carry
case []int:
return seekTo <= item[len(item)-1], carry
case int:
return seekTo <= item, item + carry.(int)
}
panic("illegal")
}, 0)
pos, _ := cur.maybeCurrent()
assert.Equal(expectedPos, pos)
assert.Equal(expectedSumUpto, sumUpto)
}
// Test seeking immediately to values on cursor construction.
assertSeeksTo(true, sequenceItem(100), 0, 99)
assertSeeksTo(true, sequenceItem(100), 0, 100)
assertSeeksTo(true, sequenceItem(101), 100, 101)
assertSeeksTo(true, sequenceItem(102), 201, 102)
assertSeeksTo(true, sequenceItem(103), 303, 103)
assertSeeksTo(true, sequenceItem(104), 0, 104)
assertSeeksTo(true, sequenceItem(105), 104, 105)
assertSeeksTo(true, sequenceItem(106), 209, 106)
assertSeeksTo(true, sequenceItem(107), 315, 107)
assertSeeksTo(true, sequenceItem(108), 0, 108)
assertSeeksTo(true, sequenceItem(109), 108, 109)
assertSeeksTo(true, sequenceItem(110), 217, 110)
assertSeeksTo(true, sequenceItem(111), 327, 111)
assertSeeksTo(true, sequenceItem(112), 0, 112)
assertSeeksTo(true, sequenceItem(113), 112, 113)
assertSeeksTo(true, sequenceItem(114), 225, 114)
assertSeeksTo(true, sequenceItem(115), 339, 115)
assertSeeksTo(true, sequenceItem(115), 339, 116)
// Test reusing an existing cursor to seek all over the place.
assertSeeksTo(false, sequenceItem(100), 0, 99)
assertSeeksTo(false, sequenceItem(100), 0, 100)
assertSeeksTo(false, sequenceItem(101), 100, 101)
assertSeeksTo(false, sequenceItem(102), 201, 102)
assertSeeksTo(false, sequenceItem(103), 303, 103)
assertSeeksTo(false, sequenceItem(104), 0, 104)
assertSeeksTo(false, sequenceItem(105), 104, 105)
assertSeeksTo(false, sequenceItem(106), 209, 106)
assertSeeksTo(false, sequenceItem(107), 315, 107)
assertSeeksTo(false, sequenceItem(108), 0, 108)
assertSeeksTo(false, sequenceItem(109), 108, 109)
assertSeeksTo(false, sequenceItem(110), 217, 110)
assertSeeksTo(false, sequenceItem(111), 327, 111)
assertSeeksTo(false, sequenceItem(112), 0, 112)
assertSeeksTo(false, sequenceItem(113), 112, 113)
assertSeeksTo(false, sequenceItem(114), 225, 114)
assertSeeksTo(false, sequenceItem(115), 339, 115)
assertSeeksTo(false, sequenceItem(115), 339, 116)
}