Fix sequence chunker bug triggered by repeatedly removing last element (#1844)

Fix sequence chunker bug triggered by repeatedly removing last element

The bug is we sometimes create a prollytree with a root meta sequence
node with only a single item. This is never the canonical representation
of prollytrees.

I reworked the sequence chunker to take a different approach to corner
cases. Instead of being smart and avoiding this case (which clearly
didn't work properly), it's more liberal with creating unnecessary
nodes, then it fixes them up in the finalisation step.
This commit is contained in:
Ben Kalman
2016-06-20 18:35:46 -07:00
committed by GitHub
parent afaec8a6ad
commit 1b9ea570ae
16 changed files with 350 additions and 221 deletions
+6 -4
View File
@@ -149,14 +149,16 @@ func newBlobLeafBoundaryChecker() boundaryChecker {
}
func newBlobLeafChunkFn(vr ValueReader, sink ValueWriter) makeChunkFn {
return func(items []sequenceItem) (metaTuple, Collection) {
return func(items []sequenceItem) (metaTuple, sequence) {
buff := make([]byte, len(items))
for i, v := range items {
buff[i] = v.(byte)
}
blob := newBlob(newBlobLeafSequence(vr, buff))
seq := newBlobLeafSequence(vr, buff)
blob := newBlob(seq)
var ref Ref
var child Collection
if sink != nil {
@@ -168,7 +170,7 @@ func newBlobLeafChunkFn(vr ValueReader, sink ValueWriter) makeChunkFn {
child = blob
}
return newMetaTuple(ref, Number(len(buff)), uint64(len(buff)), child), blob
return newMetaTuple(ref, Number(len(buff)), uint64(len(buff)), child), seq
}
}
@@ -189,6 +191,6 @@ func NewStreamingBlob(r io.Reader, vrw ValueReadWriter) Blob {
break
}
}
return seq.Done().(Blob)
return newBlob(seq.Done().(indexedSequence))
}
+6 -5
View File
@@ -110,7 +110,7 @@ func newIndexedMetaSequenceBoundaryChecker() boundaryChecker {
// If |sink| is not nil, chunks will be eagerly written as they're created. Otherwise they are
// written when the root is written.
func newIndexedMetaSequenceChunkFn(kind NomsKind, source ValueReader, sink ValueWriter) makeChunkFn {
return func(items []sequenceItem) (metaTuple, Collection) {
return func(items []sequenceItem) (metaTuple, sequence) {
tuples := make(metaSequenceData, len(items))
numLeaves := uint64(0)
@@ -121,17 +121,18 @@ func newIndexedMetaSequenceChunkFn(kind NomsKind, source ValueReader, sink Value
}
var col Collection
var metaSeq indexedMetaSequence
if kind == ListKind {
metaSeq := newListMetaSequence(tuples, source)
metaSeq = newListMetaSequence(tuples, source)
col = newList(metaSeq)
} else {
d.Chk.True(BlobKind == kind)
metaSeq := newBlobMetaSequence(tuples, source)
metaSeq = newBlobMetaSequence(tuples, source)
col = newBlob(metaSeq)
}
if sink != nil {
return newMetaTuple(sink.WriteValue(col), Number(tuples.uint64ValuesSum()), numLeaves, nil), col
return newMetaTuple(sink.WriteValue(col), Number(tuples.uint64ValuesSum()), numLeaves, nil), metaSeq
}
return newMetaTuple(NewRef(col), Number(tuples.uint64ValuesSum()), numLeaves, col), col
return newMetaTuple(NewRef(col), Number(tuples.uint64ValuesSum()), numLeaves, col), metaSeq
}
}
+8 -6
View File
@@ -32,7 +32,7 @@ func NewList(values ...Value) List {
for _, v := range values {
seq.Append(v)
}
return seq.Done().(List)
return newList(seq.Done().(indexedSequence))
}
// NewStreamingList creates a new List with type t, populated with values, chunking if and when needed. As chunks are created, they're written to vrw -- including the root chunk of the list. Once the caller has closed values, she can read the completed List from the returned channel.
@@ -43,7 +43,7 @@ func NewStreamingList(vrw ValueReadWriter, values <-chan Value) <-chan List {
for v := range values {
seq.Append(v)
}
out <- seq.Done().(List)
out <- newList(seq.Done().(indexedSequence))
close(out)
}()
return out
@@ -151,7 +151,7 @@ func (l List) Splice(idx uint64, deleteCount uint64, vs ...Value) List {
for _, v := range vs {
ch.Append(v)
}
return ch.Done().(List)
return newList(ch.Done().(indexedSequence))
}
func (l List) Insert(idx uint64, vs ...Value) List {
@@ -159,6 +159,7 @@ func (l List) Insert(idx uint64, vs ...Value) List {
}
func (l List) Remove(start uint64, end uint64) List {
d.Chk.True(start <= end)
return l.Splice(start, end-start)
}
@@ -222,14 +223,15 @@ func newListLeafBoundaryChecker() boundaryChecker {
// If |sink| is not nil, chunks will be eagerly written as they're created. Otherwise they are
// written when the root is written.
func makeListLeafChunkFn(vr ValueReader, sink ValueWriter) makeChunkFn {
return func(items []sequenceItem) (metaTuple, Collection) {
return func(items []sequenceItem) (metaTuple, sequence) {
values := make([]Value, len(items))
for i, v := range items {
values[i] = v.(Value)
}
list := newList(newListLeafSequence(vr, values...))
seq := newListLeafSequence(vr, values...)
list := newList(seq)
var ref Ref
var child Collection
@@ -242,6 +244,6 @@ func makeListLeafChunkFn(vr ValueReader, sink ValueWriter) makeChunkFn {
child = list
}
return newMetaTuple(ref, Number(len(values)), uint64(len(values)), child), list
return newMetaTuple(ref, Number(len(values)), uint64(len(values)), child), seq
}
}
+20 -1
View File
@@ -208,7 +208,10 @@ func getTestListLen() uint64 {
}
func getTestList() testList {
length := int(getTestListLen())
return getTestListWithLen(int(getTestListLen()))
}
func getTestListWithLen(length int) testList {
s := rand.NewSource(42)
values := make([]Value, length)
for i := 0; i < length; i++ {
@@ -541,6 +544,22 @@ func TestListRemoveRanges(t *testing.T) {
}
}
func TestListRemoveAtEnd(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test in short mode.")
}
assert := assert.New(t)
tl := getTestListWithLen(testListSize / 10)
cl := tl.toList()
for i := len(tl) - 1; i >= 0; i-- {
cl = cl.Remove(uint64(i), uint64(i+1))
expect := tl[0:i].toList()
assert.True(expect.Equals(cl))
}
}
func TestListSet(t *testing.T) {
if testing.Short() {
t.Skip("Skipping test in short mode.")
+6 -5
View File
@@ -34,7 +34,7 @@ func NewMap(kv ...Value) Map {
seq.Append(entry)
}
return seq.Done().(Map)
return newMap(seq.Done().(orderedSequence))
}
func (m Map) Diff(last Map) (added []Value, removed []Value, modified []Value) {
@@ -149,7 +149,7 @@ func (m Map) splice(cur *sequenceCursor, deleteCount uint64, vs ...mapEntry) Map
for _, v := range vs {
ch.Append(v)
}
return ch.Done().(Map)
return newMap(ch.Done().(orderedSequence))
}
func (m Map) getCursorAtValue(v Value) (cur *sequenceCursor, found bool) {
@@ -234,14 +234,15 @@ func newMapLeafBoundaryChecker() boundaryChecker {
}
func makeMapLeafChunkFn(vr ValueReader) makeChunkFn {
return func(items []sequenceItem) (metaTuple, Collection) {
return func(items []sequenceItem) (metaTuple, sequence) {
mapData := make([]mapEntry, len(items), len(items))
for i, v := range items {
mapData[i] = v.(mapEntry)
}
m := newMap(newMapLeafSequence(vr, mapData...))
seq := newMapLeafSequence(vr, mapData...)
m := newMap(seq)
var indexValue Value
if len(mapData) > 0 {
@@ -251,6 +252,6 @@ func makeMapLeafChunkFn(vr ValueReader) makeChunkFn {
}
}
return newMetaTuple(NewRef(m), indexValue, uint64(len(items)), m), m
return newMetaTuple(NewRef(m), indexValue, uint64(len(items)), m), seq
}
}
+5 -4
View File
@@ -146,7 +146,7 @@ func newOrderedMetaSequenceBoundaryChecker() boundaryChecker {
}
func newOrderedMetaSequenceChunkFn(kind NomsKind, vr ValueReader) makeChunkFn {
return func(items []sequenceItem) (metaTuple, Collection) {
return func(items []sequenceItem) (metaTuple, sequence) {
tuples := make(metaSequenceData, len(items))
numLeaves := uint64(0)
@@ -156,16 +156,17 @@ func newOrderedMetaSequenceChunkFn(kind NomsKind, vr ValueReader) makeChunkFn {
numLeaves += mt.numLeaves
}
var metaSeq orderedMetaSequence
var col Collection
if kind == SetKind {
metaSeq := newSetMetaSequence(tuples, vr)
metaSeq = newSetMetaSequence(tuples, vr)
col = newSet(metaSeq)
} else {
d.Chk.True(MapKind == kind)
metaSeq := newMapMetaSequence(tuples, vr)
metaSeq = newMapMetaSequence(tuples, vr)
col = newMap(metaSeq)
}
return newMetaTuple(NewRef(col), tuples.last().value, numLeaves, col), col
return newMetaTuple(NewRef(col), tuples.last().value, numLeaves, col), metaSeq
}
}
+111 -94
View File
@@ -17,17 +17,17 @@ type newBoundaryCheckerFn func() boundaryChecker
type sequenceChunker struct {
cur *sequenceCursor
isOnChunkBoundary bool
parent *sequenceChunker
current []sequenceItem
lastSeq sequence
makeChunk, parentMakeChunk makeChunkFn
boundaryChk boundaryChecker
newBoundaryChecker newBoundaryCheckerFn
used bool
done bool
}
// makeChunkFn takes a sequence of items to chunk, and returns the result of chunking those items, a tuple of a reference to that chunk which can itself be chunked + its underlying value.
type makeChunkFn func(values []sequenceItem) (metaTuple, Collection)
type makeChunkFn func(values []sequenceItem) (metaTuple, sequence)
func newEmptySequenceChunker(makeChunk, parentMakeChunk makeChunkFn, boundaryChk boundaryChecker, newBoundaryChecker newBoundaryCheckerFn) *sequenceChunker {
return newSequenceChunker(nil, makeChunk, parentMakeChunk, boundaryChk, newBoundaryChecker)
@@ -40,11 +40,11 @@ func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChun
d.Chk.True(boundaryChk != nil)
d.Chk.True(newBoundaryChecker != nil)
seq := &sequenceChunker{
sc := &sequenceChunker{
cur,
false,
nil,
[]sequenceItem{},
nil,
makeChunk, parentMakeChunk,
boundaryChk,
newBoundaryChecker,
@@ -52,9 +52,8 @@ func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChun
}
if cur != nil {
// Eagerly create a chunker for each level of the existing tree, but note that we may not necessarily need them all, since chunk boundaries may change such that the tree ends up shallower. The |seq.used| flag accounts for that case.
if cur.parent != nil {
seq.createParent()
sc.createParent()
}
// Number of previous items which must be hashed into the boundary checker.
@@ -81,7 +80,7 @@ func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChun
backIdx := len(prev) - i
if appendPenultimate && backIdx == 1 {
// Test the penultimate item for a boundary.
seq.Append(item)
sc.Append(item)
continue
}
@@ -90,108 +89,126 @@ func newSequenceChunker(cur *sequenceCursor, makeChunk, parentMakeChunk makeChun
}
if backIdx <= primeCurrentCount {
seq.current = append(seq.current, item)
sc.current = append(sc.current, item)
}
}
}
seq.used = len(seq.current) > 0
return sc
}
func (sc *sequenceChunker) Append(item sequenceItem) {
d.Chk.True(item != nil)
sc.current = append(sc.current, item)
if sc.boundaryChk.Write(item) {
sc.handleChunkBoundary(true)
}
}
func (sc *sequenceChunker) Skip() {
if sc.cur.advance() && sc.cur.indexInChunk() == 0 {
// Advancing moved our cursor into the next chunk. We need to advance our parent's cursor, so that when our parent writes out the remaining chunks it doesn't include the chunk that we skipped.
sc.skipParentIfExists()
}
}
func (sc *sequenceChunker) skipParentIfExists() {
if sc.parent != nil && sc.parent.cur != nil {
sc.parent.Skip()
}
}
func (sc *sequenceChunker) createParent() {
d.Chk.True(sc.parent == nil)
var parent *sequenceCursor
if sc.cur != nil && sc.cur.parent != nil {
// Clone the parent cursor because otherwise calling cur.advance() will affect our parent - and vice versa - in surprising ways. Instead, Skip moves forward our parent's cursor if we advance across a boundary.
parent = sc.cur.parent.clone()
}
sc.parent = newSequenceChunker(parent, sc.parentMakeChunk, sc.parentMakeChunk, sc.newBoundaryChecker(), sc.newBoundaryChecker)
}
func (sc *sequenceChunker) handleChunkBoundary(createParentIfNil bool) {
d.Chk.NotEmpty(sc.current)
chunk, seq := sc.makeChunk(sc.current)
sc.current = []sequenceItem{}
sc.lastSeq = seq
if sc.parent == nil && createParentIfNil {
sc.createParent()
}
if sc.parent != nil {
sc.parent.Append(chunk)
}
}
func (sc *sequenceChunker) Done() sequence {
d.Chk.False(sc.done)
sc.done = true
for s := sc; s != nil; s = s.parent {
if s.cur != nil {
s.finalizeCursor()
}
}
// Chunkers will probably have current items which didn't hit a chunk boundary. Pretend they end on chunk boundaries for now.
for s := sc; s != nil; s = s.parent {
if len(s.current) > 0 {
// Don't create a new parent if we haven't chunked.
s.handleChunkBoundary(s.lastSeq != nil)
}
}
// The rest of this code figures out which sequence in the parent chain is canonical. That is:
// * It's empty, or
// * It never chunked, so it's not a prollytree, or
// * It chunked, so it's a prollytree, but it must have at least 2 children (or it could have been represented as that 1 child).
//
// Examples of when we may have constructed non-canonical sequences:
// * If the previous tree (i.e. its cursor) was deeper, we will have created empty parents.
// * If the last appended item was on a chunk boundary, there may be a sequence with a single chunk.
// Firstly, follow up the parent chain to find the highest chunker which did chunk.
var seq sequence
for s := sc; s != nil; s = s.parent {
if s.lastSeq != nil {
seq = s.lastSeq
}
}
if seq == nil {
_, seq = sc.makeChunk([]sequenceItem{})
return seq
}
// Lastly, step back down to find a meta sequence with more than 1 child.
for seq.seqLen() <= 1 {
d.Chk.NotEqual(0, seq.seqLen())
ms, ok := seq.(metaSequence)
if !ok {
break
}
seq = ms.getChildSequence(0)
}
return seq
}
func (seq *sequenceChunker) Append(item sequenceItem) {
d.Chk.True(item != nil)
// Check |isOnChunkBoundary| immediately, because it's effectively a continuation from the last call to Append. Specifically, this happens when the last call to Append created the first chunk boundary, which delayed creating the parent until absolutely necessary. Otherwise, we will be in a state where a parent has only a single item, which is invalid.
if seq.isOnChunkBoundary {
seq.createParent()
seq.handleChunkBoundary()
seq.isOnChunkBoundary = false
}
seq.current = append(seq.current, item)
seq.used = true
if seq.boundaryChk.Write(item) {
seq.handleChunkBoundary()
}
}
func (seq *sequenceChunker) Skip() {
if seq.cur.advance() && seq.cur.indexInChunk() == 0 {
// Advancing moved our cursor into the next chunk. We need to advance our parent's cursor, so that when our parent writes out the remaining chunks it doesn't include the chunk that we skipped.
seq.skipParentIfExists()
}
}
func (seq *sequenceChunker) skipParentIfExists() {
if seq.parent != nil && seq.parent.cur != nil {
seq.parent.Skip()
}
}
func (seq *sequenceChunker) createParent() {
d.Chk.True(seq.parent == nil)
var parent *sequenceCursor
if seq.cur != nil && seq.cur.parent != nil {
// Clone the parent cursor because otherwise calling cur.advance() will affect our parent - and vice versa - in surprising ways. Instead, Skip moves forward our parent's cursor if we advance across a boundary.
parent = seq.cur.parent.clone()
}
seq.parent = newSequenceChunker(parent, seq.parentMakeChunk, seq.parentMakeChunk, seq.newBoundaryChecker(), seq.newBoundaryChecker)
}
func (seq *sequenceChunker) handleChunkBoundary() {
d.Chk.NotEmpty(seq.current)
if seq.parent == nil {
// Wait until there is a parent.
d.Chk.False(seq.isOnChunkBoundary)
seq.isOnChunkBoundary = true
} else {
chunk, _ := seq.makeChunk(seq.current)
seq.parent.Append(chunk)
seq.current = []sequenceItem{}
}
}
func (seq *sequenceChunker) Done() Value {
if seq.cur != nil {
seq.finalizeCursor()
}
if seq.isRoot() {
_, done := seq.makeChunk(seq.current)
d.Chk.True(done != nil)
return done
}
if len(seq.current) > 0 {
seq.handleChunkBoundary()
}
return seq.parent.Done()
}
func (seq *sequenceChunker) isRoot() bool {
for ancstr := seq.parent; ancstr != nil; ancstr = ancstr.parent {
if ancstr.used {
return false
}
}
return true
}
func (seq *sequenceChunker) finalizeCursor() {
if !seq.cur.valid() {
func (sc *sequenceChunker) finalizeCursor() {
if !sc.cur.valid() {
// The cursor is past the end, and due to the way cursors work, the parent cursor will actually point to its last chunk. We need to force it to point past the end so that our parent's Done() method doesn't add the last chunk twice.
seq.skipParentIfExists()
sc.skipParentIfExists()
return
}
// Append the rest of the values in the sequence, up to the window size, plus the rest of that chunk. It needs to be the full window size because anything that was appended/skipped between chunker construction and finalization will have changed the hash state.
fzr := seq.cur.clone()
for i := 0; i < seq.boundaryChk.WindowSize() || fzr.indexInChunk() > 0; i++ {
fzr := sc.cur.clone()
for i := 0; i < sc.boundaryChk.WindowSize() || fzr.indexInChunk() > 0; i++ {
if i == 0 || fzr.indexInChunk() == 0 {
// Every time we step into a chunk from the original sequence, that chunk will no longer exist in the new sequence. The parent must be instructed to skip it.
seq.skipParentIfExists()
sc.skipParentIfExists()
}
seq.Append(fzr.current())
sc.Append(fzr.current())
if !fzr.advance() {
break
}
+6 -5
View File
@@ -34,7 +34,7 @@ func NewSet(v ...Value) Set {
seq.Append(v)
}
return seq.Done().(Set)
return newSet(seq.Done().(orderedSequence))
}
func (s Set) Diff(last Set) (added []Value, removed []Value) {
@@ -146,7 +146,7 @@ func (s Set) splice(cur *sequenceCursor, deleteCount uint64, vs ...Value) Set {
ch.Append(v)
}
ns := ch.Done().(Set)
ns := newSet(ch.Done().(orderedSequence))
return ns
}
@@ -211,14 +211,15 @@ func newSetLeafBoundaryChecker() boundaryChecker {
}
func makeSetLeafChunkFn(vr ValueReader) makeChunkFn {
return func(items []sequenceItem) (metaTuple, Collection) {
return func(items []sequenceItem) (metaTuple, sequence) {
setData := make([]Value, len(items), len(items))
for i, v := range items {
setData[i] = v.(Value)
}
set := newSet(newSetLeafSequence(vr, setData...))
seq := newSetLeafSequence(vr, setData...)
set := newSet(seq)
var indexValue Value
if len(setData) > 0 {
@@ -228,6 +229,6 @@ func makeSetLeafChunkFn(vr ValueReader) makeChunkFn {
}
}
return newMetaTuple(NewRef(set), indexValue, uint64(len(items)), set), set
return newMetaTuple(NewRef(set), indexValue, uint64(len(items)), set), seq
}
}
+2 -2
View File
@@ -156,7 +156,7 @@ function newBlobLeafChunkFn(vr: ?ValueReader, vw: ?ValueWriter): makeChunkFn {
} else {
mt = new MetaTuple(new Ref(blob), items.length, items.length, blob);
}
return [mt, blob];
return [mt, blobLeaf];
};
}
@@ -187,7 +187,7 @@ export class BlobWriter {
close() {
assert(this._state === 'writable');
this._blob = this._chunker.doneSync();
this._blob = Blob.fromSequence(this._chunker.doneSync());
this._state = 'closed';
}
+11
View File
@@ -203,6 +203,17 @@ suite('List', () => {
assert.strictEqual(listOfNRef, s.hash.toString());
});
test('LONG: remove at end', async() => {
const nums = intSequence(testListSize / 20);
let s = new List(nums);
for (let i = nums.length - 1; i >= 0; i--) {
s = await s.remove(i, i + 1);
const expect = new List(nums.slice(0, i));
assert.isTrue(equals(expect, s));
}
});
test('LONG: splice', async () => {
const nums = intSequence(testListSize);
let s = new List(nums);
+9 -7
View File
@@ -35,14 +35,15 @@ const listPattern = ((1 << 6) | 0) - 1;
function newListLeafChunkFn<T: Value>(vr: ?ValueReader, vw: ?ValueWriter): makeChunkFn {
return (items: Array<T>) => {
const list = List.fromSequence(newListLeafSequence(vr, items));
const seq = newListLeafSequence(vr, items);
const list = List.fromSequence(seq);
let mt;
if (vw) {
mt = new MetaTuple(vw.writeValue(list), items.length, items.length, null);
} else {
mt = new MetaTuple(new Ref(list), items.length, items.length, list);
}
return [mt, list];
return [mt, seq];
};
}
@@ -54,13 +55,14 @@ function newListLeafBoundaryChecker<T: Value>(): BoundaryChecker<T> {
export default class List<T: Value> extends Collection<IndexedSequence> {
constructor(values: Array<T> = []) {
const self = chunkSequenceSync(
const seq = chunkSequenceSync(
values,
newListLeafChunkFn(null, null),
newIndexedMetaSequenceChunkFn(Kind.List, null, null),
newListLeafBoundaryChecker(),
newIndexedMetaSequenceBoundaryChecker);
super(self.sequence);
invariant(seq instanceof IndexedSequence);
super(seq);
}
async get(idx: number): Promise<T> {
@@ -74,7 +76,7 @@ export default class List<T: Value> extends Collection<IndexedSequence> {
chunkSequence(cursor, insert, deleteCount, newListLeafChunkFn(vr, null),
newIndexedMetaSequenceChunkFn(Kind.List, vr, null),
newListLeafBoundaryChecker(),
newIndexedMetaSequenceBoundaryChecker));
newIndexedMetaSequenceBoundaryChecker)).then(s => List.fromSequence(s));
}
insert(idx: number, ...values: Array<T>): Promise<List<T>> {
@@ -175,7 +177,7 @@ type ListWriterState = 'writable' | 'closed';
export class ListWriter<T: Value> {
_state: ListWriterState;
_list: ?List<T>;
_chunker: SequenceChunker<List<T>, T, ListLeafSequence<T>>;
_chunker: SequenceChunker<T, ListLeafSequence<T>>;
constructor(vrw: ?ValueReadWriter) {
this._state = 'writable';
@@ -191,7 +193,7 @@ export class ListWriter<T: Value> {
close() {
assert(this._state === 'writable');
this._list = this._chunker.doneSync();
this._list = List.fromSequence(this._chunker.doneSync());
this._state = 'closed';
}
+8 -5
View File
@@ -4,6 +4,7 @@
// Licensed under the Apache License, version 2.0:
// http://www.apache.org/licenses/LICENSE-2.0
import {invariant} from './assert.js';
import BuzHashBoundaryChecker from './buzhash-boundary-checker.js';
import Ref from './ref.js';
import type {ValueReader} from './value-store.js';
@@ -44,9 +45,10 @@ function newMapLeafChunkFn<K: Value, V: Value>(vr: ?ValueReader):
}
}
const nm = Map.fromSequence(newMapLeafSequence(vr, items));
const seq = newMapLeafSequence(vr, items);
const nm = Map.fromSequence(seq);
const mt = new MetaTuple(new Ref(nm), indexValue, items.length, nm);
return [mt, nm];
return [mt, seq];
};
}
@@ -88,13 +90,14 @@ function buildMapData<K: Value, V: Value>(
export default class Map<K: Value, V: Value> extends
Collection<OrderedSequence> {
constructor(kvs: Array<MapEntry<K, V>> = []) {
const self = chunkSequenceSync(
const seq = chunkSequenceSync(
buildMapData(kvs),
newMapLeafChunkFn(null),
newOrderedMetaSequenceChunkFn(Kind.Map, null),
newMapLeafBoundaryChecker(),
newOrderedMetaSequenceBoundaryChecker);
super(self.sequence);
invariant(seq instanceof OrderedSequence);
super(seq);
}
async has(key: K): Promise<boolean> {
@@ -152,7 +155,7 @@ export default class Map<K: Value, V: Value> extends
return chunkSequence(cursor, insert, remove, newMapLeafChunkFn(vr),
newOrderedMetaSequenceChunkFn(Kind.Map, vr),
newMapLeafBoundaryChecker(),
newOrderedMetaSequenceBoundaryChecker);
newOrderedMetaSequenceBoundaryChecker).then(s => Map.fromSequence(s));
}
async set(key: K, value: V): Promise<Map<K, V>> {
+34 -6
View File
@@ -48,6 +48,10 @@ export class MetaTuple<K> {
return c.sequence;
});
}
getSequenceSync(): Sequence {
return notNull(this.child).sequence;
}
}
// The elemTypes of the collection inside the Ref<Collection<?, ?>>
@@ -125,6 +129,15 @@ export class IndexedMetaSequence extends IndexedSequence<MetaTuple<number>> {
return mt.getSequence(this.vr);
}
getChildSequenceSync(idx: number): ?Sequence {
if (!this.isMeta) {
return null;
}
const mt = this.items[idx];
return mt.getSequenceSync();
}
// Returns the sequences pointed to by all items[i], s.t. start <= i < end, and returns the
// concatentation as one long composite sequence
getCompositeChildSequence(start: number, length: number):
@@ -195,6 +208,15 @@ export class OrderedMetaSequence<K: Value> extends OrderedSequence<K, MetaTuple<
return mt.getSequence(this.vr);
}
getChildSequenceSync(idx: number): ?Sequence {
if (!this.isMeta) {
return null;
}
const mt = this.items[idx];
return mt.getSequenceSync();
}
getKey(idx: number): K {
return this.items[idx].value;
}
@@ -209,14 +231,17 @@ export function newOrderedMetaSequenceChunkFn(kind: NomsKind, vr: ?ValueReader):
return (tuples: Array<MetaTuple>) => {
const numLeaves = tuples.reduce((l, mt) => l + mt.numLeaves, 0);
const last = tuples[tuples.length - 1];
let seq: OrderedMetaSequence;
let col: Collection;
if (kind === Kind.Map) {
col = Map.fromSequence(newMapMetaSequence(vr, tuples));
seq = newMapMetaSequence(vr, tuples);
col = Map.fromSequence(seq);
} else {
invariant(kind === Kind.Set);
col = Set.fromSequence(newSetMetaSequence(vr, tuples));
seq = newSetMetaSequence(vr, tuples);
col = Set.fromSequence(seq);
}
return [new MetaTuple(new Ref(col), last.value, numLeaves, col), col];
return [new MetaTuple(new Ref(col), last.value, numLeaves, col), seq];
};
}
@@ -237,12 +262,15 @@ export function newIndexedMetaSequenceChunkFn(kind: NomsKind, vr: ?ValueReader,
invariant(mt.value === mt.numLeaves);
return l + mt.value;
}, 0);
let seq: IndexedMetaSequence;
let col: Collection;
if (kind === Kind.List) {
col = List.fromSequence(newListMetaSequence(vr, tuples));
seq = newListMetaSequence(vr, tuples);
col = List.fromSequence(seq);
} else {
invariant(kind === Kind.Blob);
col = Blob.fromSequence(newBlobMetaSequence(vr, tuples));
seq = newBlobMetaSequence(vr, tuples);
col = Blob.fromSequence(seq);
}
let mt;
if (vw) {
@@ -250,7 +278,7 @@ export function newIndexedMetaSequenceChunkFn(kind: NomsKind, vr: ?ValueReader,
} else {
mt = new MetaTuple(new Ref(col), sum, sum, col);
}
return [mt, col];
return [mt, seq];
};
}
+107 -72
View File
@@ -6,27 +6,26 @@
import type Sequence from './sequence.js'; // eslint-disable-line no-unused-vars
import {invariant, notNull} from './assert.js';
import type Collection from './collection.js';
import type {MetaSequence, MetaTuple} from './meta-sequence.js';
import type {SequenceCursor} from './sequence.js';
export type BoundaryChecker<T> = {
write: (item: T) => boolean;
windowSize: number;
}
};
export type NewBoundaryCheckerFn = () => BoundaryChecker<MetaTuple>;
export type makeChunkFn<T: Collection> = (items: Array<any>) => [MetaTuple, T];
export type makeChunkFn<T, S: Sequence> = (items: Array<T>) => [MetaTuple, S];
export async function chunkSequence<C: Collection, S>(
export async function chunkSequence<T, S: Sequence<T>>(
cursor: SequenceCursor,
insert: Array<S>,
insert: Array<T>,
remove: number,
makeChunk: makeChunkFn<C>,
parentMakeChunk: makeChunkFn<C>,
boundaryChecker: BoundaryChecker<S>,
newBoundaryChecker: NewBoundaryCheckerFn): Promise<C> {
makeChunk: makeChunkFn<T, S>,
parentMakeChunk: makeChunkFn<MetaTuple, MetaSequence>,
boundaryChecker: BoundaryChecker<T>,
newBoundaryChecker: NewBoundaryCheckerFn): Promise<Sequence> {
const chunker = new SequenceChunker(cursor, makeChunk, parentMakeChunk, boundaryChecker,
newBoundaryChecker);
@@ -49,12 +48,12 @@ export async function chunkSequence<C: Collection, S>(
// Like |chunkSequence|, but without an existing cursor (implying this is a new collection), so it
// can be synchronous. Necessary for constructing collections without a Promises or async/await.
// There is no equivalent in the Go code because Go is already synchronous.
export function chunkSequenceSync<C: Collection, S>(
insert: Array<S>,
makeChunk: makeChunkFn<C>,
parentMakeChunk: makeChunkFn<C>,
boundaryChecker: BoundaryChecker<S>,
newBoundaryChecker: NewBoundaryCheckerFn): C {
export function chunkSequenceSync<T, S: Sequence<T>>(
insert: Array<T>,
makeChunk: makeChunkFn<T, S>,
parentMakeChunk: makeChunkFn<MetaTuple, MetaSequence>,
boundaryChecker: BoundaryChecker<T>,
newBoundaryChecker: NewBoundaryCheckerFn): Sequence {
const chunker = new SequenceChunker(null, makeChunk, parentMakeChunk, boundaryChecker,
newBoundaryChecker);
@@ -64,30 +63,30 @@ export function chunkSequenceSync<C: Collection, S>(
return chunker.doneSync();
}
export default class SequenceChunker<C: Collection, S, U: Sequence> {
_cursor: ?SequenceCursor<S, U>;
_isOnChunkBoundary: boolean;
_parent: ?SequenceChunker<C, MetaTuple, MetaSequence>;
_current: Array<S>;
_makeChunk: makeChunkFn<C>;
_parentMakeChunk: makeChunkFn<C>;
_boundaryChecker: BoundaryChecker<S>;
export default class SequenceChunker<T, S: Sequence<T>> {
_cursor: ?SequenceCursor<T, S>;
_parent: ?SequenceChunker<MetaTuple, MetaSequence>;
_current: Array<T>;
_lastSeq: ?S;
_makeChunk: makeChunkFn<T, S>;
_parentMakeChunk: makeChunkFn<MetaTuple, MetaSequence>;
_boundaryChecker: BoundaryChecker<T>;
_newBoundaryChecker: NewBoundaryCheckerFn;
_used: boolean;
_done: boolean;
constructor(cursor: ?SequenceCursor, makeChunk: makeChunkFn,
parentMakeChunk: makeChunkFn,
boundaryChecker: BoundaryChecker<S>,
boundaryChecker: BoundaryChecker<T>,
newBoundaryChecker: NewBoundaryCheckerFn) {
this._cursor = cursor;
this._isOnChunkBoundary = false;
this._parent = null;
this._current = [];
this._lastSeq = null;
this._makeChunk = makeChunk;
this._parentMakeChunk = parentMakeChunk;
this._boundaryChecker = boundaryChecker;
this._newBoundaryChecker = newBoundaryChecker;
this._used = false;
this._done = false;
}
async resume(): Promise<void> {
@@ -131,20 +130,12 @@ export default class SequenceChunker<C: Collection, S, U: Sequence> {
this._current.push(item);
}
}
this._used = this._current.length > 0;
}
append(item: S) {
if (this._isOnChunkBoundary) {
this.createParent();
this.handleChunkBoundary();
this._isOnChunkBoundary = false;
}
append(item: T) {
this._current.push(item);
this._used = true;
if (this._boundaryChecker.write(item)) {
this.handleChunkBoundary();
this.handleChunkBoundary(true);
}
}
@@ -172,63 +163,88 @@ export default class SequenceChunker<C: Collection, S, U: Sequence> {
this._newBoundaryChecker);
}
handleChunkBoundary() {
handleChunkBoundary(createParentIfNil: boolean) {
invariant(this._current.length > 0);
const parent = this._parent;
if (!parent) {
invariant(!this._isOnChunkBoundary);
this._isOnChunkBoundary = true;
} else {
invariant(this._current.length > 0);
const chunk = this._makeChunk(this._current)[0];
parent.append(chunk);
this._current = [];
const [chunk, seq] = this._makeChunk(this._current);
this._current = [];
this._lastSeq = seq;
if (!this._parent && createParentIfNil) {
this.createParent();
}
if (this._parent) {
this._parent.append(chunk);
}
}
async done(): Promise<C> {
if (this._cursor) {
await this.finalizeCursor();
async done(): Promise<Sequence> {
invariant(!this._done);
this._done = true;
for (let s = this; s; s = s._parent) {
if (s._cursor) {
await s.finalizeCursor();
}
}
if (this.isRoot()) {
return this._makeChunk(this._current)[1];
// Chunkers will probably have current items which didn't hit a chunk boundary. Pretend they end
// on chunk boundaries for now.
this.finalizeChunkBoundaries();
// The rest of this code figures out which sequence in the parent chain is canonical. That is:
// * It's empty, or
// * It never chunked, so it's not a prollytree, or
// * It chunked, so it's a prollytree, but it must have at least 2 children (or it could have
// been represented as that 1 child).
//
// Examples of when we may have constructed non-canonical sequences:
// * If the previous tree (i.e. its cursor) was deeper, we will have created empty parents.
// * If the last appended item was on a chunk boundary, there may be a sequence with a single
// chunk.
// Firstly, follow up the parent chain to find the highest chunker which did chunk.
let seq = this.findRoot();
if (!seq) {
seq = this._makeChunk([])[1];
return seq;
}
if (this._current.length > 0) {
this.handleChunkBoundary();
// Lastly, step back down to find a meta sequence with more than 1 child.
while (seq.length <= 1) {
invariant(seq.length !== 0);
if (!seq.isMeta) {
break;
}
seq = notNull(await seq.getChildSequence(0));
}
invariant(this._parent);
return this._parent.done();
return notNull(seq); // flow should not need this notNull
}
// Like |done|, but assumes there is no cursor, so it can be synchronous. Necessary for
// constructing collections without Promises or async/await. There is no equivalent in the Go
// code because Go is already synchronous.
doneSync(): C {
doneSync(): Sequence {
invariant(!this._cursor);
invariant(!this._done);
this._done = true;
if (this.isRoot()) {
return this._makeChunk(this._current)[1];
this.finalizeChunkBoundaries();
let seq = this.findRoot();
if (!seq) {
seq = this._makeChunk([])[1];
return seq;
}
if (this._current.length > 0) {
this.handleChunkBoundary();
}
invariant(this._parent);
return this._parent.doneSync();
}
isRoot(): boolean {
for (let ancestor = this._parent; ancestor; ancestor = ancestor._parent) {
if (ancestor._used) {
return false;
while (seq.length <= 1) {
invariant(seq.length !== 0);
if (!seq.isMeta) {
break;
}
seq = notNull(seq.getChildSequenceSync(0));
}
return true;
return notNull(seq); // flow should not need this notNull
}
async finalizeCursor(): Promise<void> {
@@ -250,4 +266,23 @@ export default class SequenceChunker<C: Collection, S, U: Sequence> {
}
}
}
finalizeChunkBoundaries() {
for (let s = this; s; s = s._parent) {
if (s._current.length > 0) {
// Don't create a new parent if we haven't chunked.
s.handleChunkBoundary(Boolean(s._lastSeq));
}
}
}
findRoot(): ?Sequence {
let root = null;
for (let s = this; s; s = s._parent) {
if (s._lastSeq) {
root = s._lastSeq;
}
}
return root;
}
}
+4
View File
@@ -43,6 +43,10 @@ export default class Sequence<T> {
return Promise.resolve(null);
}
getChildSequenceSync(idx: number): ?Sequence { // eslint-disable-line no-unused-vars
return null;
}
get chunks(): Array<Ref> {
return [];
}
+7 -5
View File
@@ -41,9 +41,10 @@ function newSetLeafChunkFn<T:Value>(vr: ?ValueReader): makeChunkFn {
}
}
const ns = Set.fromSequence(newSetLeafSequence(vr, items));
const seq = newSetLeafSequence(vr, items);
const ns = Set.fromSequence(seq);
const mt = new MetaTuple(new Ref(ns), indexValue, items.length, ns);
return [mt, ns];
return [mt, seq];
};
}
@@ -68,13 +69,14 @@ export function newSetLeafSequence<K: Value>(
export default class Set<T: Value> extends Collection<OrderedSequence> {
constructor(values: Array<T> = []) {
const self = chunkSequenceSync(
const seq = chunkSequenceSync(
buildSetData(values),
newSetLeafChunkFn(null),
newOrderedMetaSequenceChunkFn(Kind.Set, null),
newSetLeafBoundaryChecker(),
newOrderedMetaSequenceBoundaryChecker);
super(self.sequence);
invariant(seq instanceof OrderedSequence);
super(seq);
}
async has(key: T): Promise<boolean> {
@@ -118,7 +120,7 @@ export default class Set<T: Value> extends Collection<OrderedSequence> {
return chunkSequence(cursor, insert, remove, newSetLeafChunkFn(vr),
newOrderedMetaSequenceChunkFn(Kind.Set, vr),
newSetLeafBoundaryChecker(),
newOrderedMetaSequenceBoundaryChecker);
newOrderedMetaSequenceBoundaryChecker).then(s => Set.fromSequence(s));
}
async add(value: T): Promise<Set<T>> {