mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-11 10:33:08 -06:00
Lazily write sequence chunks.
Instead of writing sequence chunks as soon as they're are created (as a result of hitting chunk boundaries), only write them once they're referenced - which only happens if those chunks are themselves chunked. The effect of this is root chunks of collections/blobs aren't written until they're committed, which makes the XML importer run twice as fast on a month of MLB data - 60s instead of 120s, with --ldb-dump-stats showing a PutCount of 21,272 instead of 342,254. In the future it should be possible to avoid writing *any* chunks until the root is committed, which will improve incremental update performance, but that's a larger change (issue #710). This change fixes issue #832.
This commit is contained in:
@@ -60,20 +60,18 @@ func TestStructWithRef(t *testing.T) {
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
set := gen.SetOfFloat32Def{0: true, 1: true, 2: true}.New(cs)
|
||||
types.WriteValue(set, cs)
|
||||
|
||||
str := gen.StructWithRefDef{
|
||||
R: set.Ref(),
|
||||
}.New(cs)
|
||||
types.WriteValue(str, cs)
|
||||
|
||||
r := str.R()
|
||||
r2 := gen.NewRefOfSetOfFloat32(set.Ref())
|
||||
assert.True(r.Equals(r2))
|
||||
|
||||
assert.True(r2.TargetValue(cs).Equals(set))
|
||||
|
||||
types.WriteValue(str, cs)
|
||||
assert.True(r2.TargetValue(cs).Equals(set))
|
||||
|
||||
types.WriteValue(set, cs)
|
||||
set2 := r2.TargetValue(cs)
|
||||
assert.True(set.Equals(set2))
|
||||
|
||||
|
||||
@@ -49,13 +49,12 @@ func newBlobLeafChunkFn(cs chunks.ChunkStore) makeChunkFn {
|
||||
}
|
||||
|
||||
leaf := newBlobLeaf(buff)
|
||||
ref := WriteValue(leaf, cs)
|
||||
return metaTuple{ref, Uint64(uint64(len(buff)))}, leaf
|
||||
return metaTuple{leaf, leaf.Ref(), Uint64(uint64(len(buff)))}, leaf
|
||||
}
|
||||
}
|
||||
|
||||
func NewBlob(r io.Reader, cs chunks.ChunkStore) Blob {
|
||||
seq := newEmptySequenceChunker(newBlobLeafChunkFn(cs), newMetaSequenceChunkFn(typeForBlob, cs), newBlobLeafBoundaryChecker(), newMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(newBlobLeafChunkFn(cs), newIndexedMetaSequenceChunkFn(typeForBlob, cs), newBlobLeafBoundaryChecker(), newIndexedMetaSequenceBoundaryChecker)
|
||||
buf := []byte{0}
|
||||
for {
|
||||
n, err := r.Read(buf)
|
||||
|
||||
@@ -34,7 +34,7 @@ func init() {
|
||||
func (cb compoundBlob) Reader() io.ReadSeeker {
|
||||
cursor, v := newMetaSequenceCursor(cb, cb.cs)
|
||||
reader := v.(blobLeaf).Reader()
|
||||
return &compoundBlobReader{cursor: cursor, currentReader: reader, length: cb.Len(), cs: cb.cs}
|
||||
return &compoundBlobReader{cursor, reader, 0, 0, cb.Len(), cb.cs}
|
||||
}
|
||||
|
||||
func (cb compoundBlob) Equals(other Value) bool {
|
||||
|
||||
@@ -18,7 +18,8 @@ func getTestCompoundBlob(datas ...string) compoundBlob {
|
||||
ms := chunks.NewMemoryStore()
|
||||
for i, s := range datas {
|
||||
b := NewBlob(bytes.NewBufferString(s), ms)
|
||||
tuples[i] = metaTuple{WriteValue(b, ms), Uint64(len(s))}
|
||||
r := WriteValue(b, ms)
|
||||
tuples[i] = metaTuple{b, r, Uint64(len(s))}
|
||||
}
|
||||
return newCompoundBlob(tuples, ms)
|
||||
}
|
||||
@@ -70,29 +71,29 @@ func TestCompoundBlobReader(t *testing.T) {
|
||||
}
|
||||
assert := assert.New(t)
|
||||
|
||||
test := func(b compoundBlob) {
|
||||
bs, err := ioutil.ReadAll(b.Reader())
|
||||
assert.NoError(err)
|
||||
assert.Equal("helloworld", string(bs))
|
||||
|
||||
ab := getRandomBlob(t)
|
||||
bs, err = ioutil.ReadAll(ab.Reader())
|
||||
assert.NoError(err)
|
||||
r := getRandomReader()
|
||||
bs2, err := ioutil.ReadAll(r)
|
||||
assert.Equal(bs2, bs)
|
||||
testByteRange(assert, 200453, 100232, r, ab.Reader())
|
||||
testByteRange(assert, 100, 10, r, ab.Reader())
|
||||
testByteRange(assert, 2340, 2630, r, ab.Reader())
|
||||
testByteRange(assert, 432423, 50000, r, ab.Reader())
|
||||
testByteRange(assert, 1, 10, r, ab.Reader())
|
||||
}
|
||||
|
||||
cb := getTestCompoundBlob("hello", "world")
|
||||
bs, err := ioutil.ReadAll(cb.Reader())
|
||||
assert.NoError(err)
|
||||
assert.Equal("helloworld", string(bs))
|
||||
|
||||
ab := getRandomBlob(t)
|
||||
bs, err = ioutil.ReadAll(ab.Reader())
|
||||
assert.NoError(err)
|
||||
r := getRandomReader()
|
||||
bs2, err := ioutil.ReadAll(r)
|
||||
assert.Equal(bs2, bs)
|
||||
testByteRange(assert, 200453, 100232, r, ab.Reader())
|
||||
testByteRange(assert, 100, 10, r, ab.Reader())
|
||||
testByteRange(assert, 2340, 2630, r, ab.Reader())
|
||||
testByteRange(assert, 432423, 50000, r, ab.Reader())
|
||||
testByteRange(assert, 1, 10, r, ab.Reader())
|
||||
|
||||
ref := WriteValue(cb, cb.cs.(chunks.ChunkStore))
|
||||
cb2 := ReadValue(ref, cb.cs)
|
||||
bs3, err := ioutil.ReadAll(cb2.(Blob).Reader())
|
||||
assert.NoError(err)
|
||||
assert.Equal("helloworld", string(bs3))
|
||||
test(cb)
|
||||
|
||||
r := WriteValue(cb, cb.cs)
|
||||
test(ReadValue(r, cb.cs).(compoundBlob))
|
||||
}
|
||||
|
||||
type testBlob struct {
|
||||
@@ -187,7 +188,7 @@ func TestCompoundBlobChunks(t *testing.T) {
|
||||
|
||||
bl1 := newBlobLeaf([]byte("hello"))
|
||||
bl2 := newBlobLeaf([]byte("world"))
|
||||
cb = newCompoundBlob([]metaTuple{{WriteValue(bl1, cs), Uint64(uint64(5))}, {WriteValue(bl2, cs), Uint64(uint64(10))}}, cs)
|
||||
cb = newCompoundBlob([]metaTuple{{bl1, bl1.Ref(), Uint64(uint64(5))}, {bl2, bl2.Ref(), Uint64(uint64(10))}}, cs)
|
||||
assert.Equal(2, len(cb.Chunks()))
|
||||
}
|
||||
|
||||
@@ -213,13 +214,13 @@ func TestCompoundBlobSameChunksWithPrefix(t *testing.T) {
|
||||
assert.Equal(cb2.Len(), cb1.Len()+uint64(6))
|
||||
assert.Equal(2, len(cb1.tuples))
|
||||
assert.Equal(2, len(cb2.tuples))
|
||||
assert.NotEqual(cb1.tuples[0].ref, cb2.tuples[0].ref)
|
||||
assert.Equal(cb1.tuples[1].ref, cb2.tuples[1].ref)
|
||||
assert.NotEqual(cb1.tuples[0].childRef, cb2.tuples[0].childRef)
|
||||
assert.Equal(cb1.tuples[1].childRef, cb2.tuples[1].childRef)
|
||||
|
||||
tuples1 := ReadValue(cb1.tuples[0].ref, cb1.cs).(compoundBlob).tuples
|
||||
tuples2 := ReadValue(cb2.tuples[0].ref, cb2.cs).(compoundBlob).tuples
|
||||
assert.NotEqual(tuples1[0].ref, tuples2[0].ref)
|
||||
assert.Equal(tuples1[1].ref, tuples2[1].ref)
|
||||
tuples1 := cb1.tuples[0].child.(compoundBlob).tuples
|
||||
tuples2 := cb2.tuples[0].child.(compoundBlob).tuples
|
||||
assert.NotEqual(tuples1[0].childRef, tuples2[0].childRef)
|
||||
assert.Equal(tuples1[1].childRef, tuples2[1].childRef)
|
||||
}
|
||||
|
||||
func TestCompoundBlobSameChunksWithSuffix(t *testing.T) {
|
||||
@@ -244,14 +245,14 @@ func TestCompoundBlobSameChunksWithSuffix(t *testing.T) {
|
||||
assert.Equal(cb2.Len(), cb1.Len()+uint64(6))
|
||||
assert.Equal(2, len(cb1.tuples))
|
||||
assert.Equal(len(cb1.tuples), len(cb2.tuples))
|
||||
assert.Equal(cb1.tuples[0].ref, cb2.tuples[0].ref)
|
||||
assert.NotEqual(cb1.tuples[1].ref, cb2.tuples[1].ref)
|
||||
assert.Equal(cb1.tuples[0].childRef, cb2.tuples[0].childRef)
|
||||
assert.NotEqual(cb1.tuples[1].childRef, cb2.tuples[1].childRef)
|
||||
|
||||
tuples1 := ReadValue(cb1.tuples[1].ref, cb1.cs).(compoundBlob).tuples
|
||||
tuples2 := ReadValue(cb2.tuples[1].ref, cb2.cs).(compoundBlob).tuples
|
||||
assert.Equal(tuples1[0].ref, tuples2[0].ref)
|
||||
assert.Equal(tuples1[len(tuples1)-2].ref, tuples2[len(tuples2)-2].ref)
|
||||
assert.NotEqual(tuples1[len(tuples1)-1].ref, tuples2[len(tuples2)-1].ref)
|
||||
tuples1 := cb1.tuples[1].child.(compoundBlob).tuples
|
||||
tuples2 := cb2.tuples[1].child.(compoundBlob).tuples
|
||||
assert.Equal(tuples1[0].childRef, tuples2[0].childRef)
|
||||
assert.Equal(tuples1[len(tuples1)-2].childRef, tuples2[len(tuples2)-2].childRef)
|
||||
assert.NotEqual(tuples1[len(tuples1)-1].childRef, tuples2[len(tuples2)-1].childRef)
|
||||
}
|
||||
|
||||
func printBlob(b Blob, indent int) {
|
||||
@@ -263,7 +264,7 @@ func printBlob(b Blob, indent int) {
|
||||
fmt.Printf("%scompoundBlob, len: %d, chunks: %d\n", indentString, b.Len(), len(b.tuples))
|
||||
indent++
|
||||
for _, t := range b.tuples {
|
||||
printBlob(ReadValue(t.ref, b.cs).(Blob), indent)
|
||||
printBlob(ReadValue(t.childRef, b.cs).(Blob), indent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,8 +67,8 @@ func (cl compoundList) cursorAt(idx uint64) (*sequenceCursor, listLeaf, uint64)
|
||||
return idx < offset, offset
|
||||
}, uint64(0))
|
||||
|
||||
if current := cursor.current().(metaTuple); current.ref != valueFromType(cl.cs, leaf, leaf.Type()).Ref() {
|
||||
leaf = readMetaTupleValue(cursor.current(), cl.cs)
|
||||
if current := cursor.current().(metaTuple); current.childRef != valueFromType(cl.cs, leaf, leaf.Type()).Ref() {
|
||||
leaf = readMetaTupleValue(current, cl.cs)
|
||||
}
|
||||
|
||||
return cursor, leaf.(listLeaf), chunkStart.(uint64)
|
||||
@@ -190,11 +190,11 @@ func (cl compoundList) sequenceCursorAtIndex(idx uint64) *sequenceCursor {
|
||||
|
||||
func (cl compoundList) sequenceChunkerAtIndex(idx uint64) *sequenceChunker {
|
||||
cur := cl.sequenceCursorAtIndex(idx)
|
||||
return newSequenceChunker(cur, makeListLeafChunkFn(cl.t, cl.cs), newMetaSequenceChunkFn(cl.t, cl.cs), newListLeafBoundaryChecker(), newMetaSequenceBoundaryChecker)
|
||||
return newSequenceChunker(cur, makeListLeafChunkFn(cl.t, cl.cs), newIndexedMetaSequenceChunkFn(cl.t, cl.cs), newListLeafBoundaryChecker(), newIndexedMetaSequenceBoundaryChecker)
|
||||
}
|
||||
|
||||
func (cl compoundList) Filter(cb listFilterCallback) List {
|
||||
seq := newEmptySequenceChunker(makeListLeafChunkFn(cl.t, cl.cs), newMetaSequenceChunkFn(cl.t, cl.cs), newListLeafBoundaryChecker(), newMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeListLeafChunkFn(cl.t, cl.cs), newIndexedMetaSequenceChunkFn(cl.t, cl.cs), newListLeafBoundaryChecker(), newIndexedMetaSequenceBoundaryChecker)
|
||||
cl.IterAll(func(v Value, idx uint64) {
|
||||
if cb(v, idx) {
|
||||
seq.Append(v)
|
||||
@@ -263,7 +263,6 @@ func makeListLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
}
|
||||
|
||||
list := valueFromType(cs, newListLeaf(cs, t, values...), t)
|
||||
ref := WriteValue(list, cs)
|
||||
return metaTuple{ref, Uint64(len(values))}, list
|
||||
return metaTuple{list, list.Ref(), Uint64(len(values))}, list
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,15 +80,21 @@ func TestCompoundListGet(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
simpleList := getTestSimpleList()
|
||||
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int64Kind))
|
||||
cl := NewTypedList(cs, tr, simpleList...)
|
||||
|
||||
// Incrementing by len(simpleList)/10 because Get() is too slow to run on every index.
|
||||
for i := 0; i < len(simpleList); i += len(simpleList) / 10 {
|
||||
assert.Equal(simpleList[i], cl.Get(uint64(i)))
|
||||
testGet := func(cl compoundList) {
|
||||
// Incrementing by len(simpleList)/10 because Get() is too slow to run on every index.
|
||||
for i := 0; i < len(simpleList); i += len(simpleList) / 10 {
|
||||
assert.Equal(simpleList[i], cl.Get(uint64(i)))
|
||||
}
|
||||
}
|
||||
|
||||
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int64Kind))
|
||||
cl := NewTypedList(cs, tr, simpleList...).(compoundList)
|
||||
testGet(cl)
|
||||
|
||||
r := WriteValue(cl, cs)
|
||||
testGet(ReadValue(r, cs).(compoundList))
|
||||
}
|
||||
|
||||
func TestCompoundListIter(t *testing.T) {
|
||||
|
||||
@@ -110,7 +110,7 @@ func (cm compoundMap) sequenceChunkerAtKey(k Value) (*sequenceChunker, bool) {
|
||||
return otherLeaf, len(otherLeaf.data)
|
||||
}}
|
||||
|
||||
seq := newSequenceChunker(cur, makeMapLeafChunkFn(cm.t, cm.cs), newMapMetaSequenceChunkFn(cm.t, cm.cs), newMapLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newSequenceChunker(cur, makeMapLeafChunkFn(cm.t, cm.cs), newOrderedMetaSequenceChunkFn(cm.t, cm.cs), newMapLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
found := idx < len(leaf.data) && leaf.data[idx].key.Equals(k)
|
||||
return seq, found
|
||||
}
|
||||
@@ -124,7 +124,7 @@ func (cm compoundMap) IterAllP(concurrency int, f mapIterAllCallback) {
|
||||
}
|
||||
|
||||
func (cm compoundMap) Filter(cb mapFilterCallback) Map {
|
||||
seq := newEmptySequenceChunker(makeMapLeafChunkFn(cm.t, cm.cs), newMapMetaSequenceChunkFn(cm.t, cm.cs), newMapLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeMapLeafChunkFn(cm.t, cm.cs), newOrderedMetaSequenceChunkFn(cm.t, cm.cs), newMapLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
|
||||
cm.IterAll(func(k, v Value) {
|
||||
if cb(k, v) {
|
||||
@@ -164,21 +164,6 @@ func (cm compoundMap) IterAll(cb mapIterAllCallback) {
|
||||
})
|
||||
}
|
||||
|
||||
func newMapMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
return func(items []sequenceItem) (sequenceItem, Value) {
|
||||
tuples := make(metaSequenceData, len(items))
|
||||
|
||||
for i, v := range items {
|
||||
tuples[i] = v.(metaTuple)
|
||||
}
|
||||
|
||||
lastIndex := tuples.last().value
|
||||
meta := newMetaSequenceFromData(tuples, t, cs)
|
||||
ref := WriteValue(meta, cs)
|
||||
return metaTuple{ref, lastIndex}, meta
|
||||
}
|
||||
}
|
||||
|
||||
func (cm compoundMap) elemTypes() []Type {
|
||||
return cm.Type().Desc.(CompoundDesc).ElemTypes
|
||||
}
|
||||
|
||||
@@ -121,10 +121,16 @@ func TestCompoundMapHas(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
doTest := func(tm testMap) {
|
||||
m := tm.toCompoundMap(chunks.NewMemoryStore())
|
||||
cs := chunks.NewMemoryStore()
|
||||
m := tm.toCompoundMap(cs)
|
||||
r := WriteValue(m, cs)
|
||||
m2 := ReadValue(r, cs).(compoundMap)
|
||||
for _, entry := range tm.entries {
|
||||
assert.True(m.Has(entry.key))
|
||||
assert.True(m.Get(entry.key).Equals(entry.value))
|
||||
k, v := entry.key, entry.value
|
||||
assert.True(m.Has(k))
|
||||
assert.True(m.Get(k).Equals(v))
|
||||
assert.True(m2.Has(k))
|
||||
assert.True(m2.Get(k).Equals(v))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@ func newSetSequenceCursorAtPosition(metaCur *sequenceCursor, leaf setLeaf, idx i
|
||||
|
||||
func (cs compoundSet) sequenceChunkerAtValue(v Value) (*sequenceChunker, bool) {
|
||||
cur, found := cs.sequenceCursorAtValue(v)
|
||||
seq := newSequenceChunker(cur, makeSetLeafChunkFn(cs.t, cs.cs), newSetMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newSequenceChunker(cur, makeSetLeafChunkFn(cs.t, cs.cs), newOrderedMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
return seq, found
|
||||
}
|
||||
|
||||
@@ -131,7 +131,7 @@ func (cs compoundSet) Subtract(others ...Set) Set {
|
||||
}
|
||||
|
||||
func (cs compoundSet) Filter(cb setFilterCallback) Set {
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(cs.t, cs.cs), newSetMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(cs.t, cs.cs), newOrderedMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
|
||||
cs.IterAll(func(v Value) {
|
||||
if cb(v) {
|
||||
@@ -179,18 +179,3 @@ func (cs compoundSet) IterAllP(concurrency int, f setIterAllCallback) {
|
||||
return false
|
||||
})
|
||||
}
|
||||
|
||||
func newSetMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
return func(items []sequenceItem) (sequenceItem, Value) {
|
||||
tuples := make(metaSequenceData, len(items))
|
||||
|
||||
for i, v := range items {
|
||||
tuples[i] = v.(metaTuple)
|
||||
}
|
||||
|
||||
lastValue := tuples[len(tuples)-1].value
|
||||
meta := newMetaSequenceFromData(tuples, t, cs)
|
||||
ref := WriteValue(meta, cs)
|
||||
return metaTuple{ref, lastValue}, meta
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,9 +98,13 @@ func TestCompoundSetHas(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
doTest := func(ts testSet) {
|
||||
set := ts.toCompoundSet(chunks.NewMemoryStore())
|
||||
cs := chunks.NewMemoryStore()
|
||||
set := ts.toCompoundSet(cs)
|
||||
r := WriteValue(set, cs)
|
||||
set2 := ReadValue(r, cs).(compoundSet)
|
||||
for _, v := range ts.values {
|
||||
assert.True(set.Has(v))
|
||||
assert.True(set2.Has(v))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -178,7 +178,7 @@ func (r *jsonArrayReader) maybeReadMetaSequence(t Type, pkg *Package) (Value, bo
|
||||
for !r2.atEnd() {
|
||||
ref := r2.readRef()
|
||||
v := r2.readValueWithoutTag(indexType, pkg)
|
||||
data = append(data, metaTuple{ref, v})
|
||||
data = append(data, metaTuple{nil, ref, v})
|
||||
}
|
||||
|
||||
t = fixupType(t, pkg)
|
||||
|
||||
@@ -130,11 +130,11 @@ func TestReadCompoundList(t *testing.T) {
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int32Kind))
|
||||
r1 := newListLeaf(cs, tr, Int32(0)).Ref()
|
||||
r2 := newListLeaf(cs, tr, Int32(1), Int32(2), Int32(3)).Ref()
|
||||
l2 := buildCompoundList([]metaTuple{{r1, Uint64(1)}, {r2, Uint64(4)}}, tr, cs)
|
||||
leaf1 := newListLeaf(cs, tr, Int32(0))
|
||||
leaf2 := newListLeaf(cs, tr, Int32(1), Int32(2), Int32(3))
|
||||
l2 := buildCompoundList([]metaTuple{{leaf1, leaf1.Ref(), Uint64(1)}, {leaf2, leaf2.Ref(), Uint64(4)}}, tr, cs)
|
||||
|
||||
a := parseJson(`[%d, %d, true, ["%s", "1", "%s", "4"]]`, ListKind, Int32Kind, r1, r2)
|
||||
a := parseJson(`[%d, %d, true, ["%s", "1", "%s", "4"]]`, ListKind, Int32Kind, leaf1.Ref(), leaf2.Ref())
|
||||
r := newJsonArrayReader(a, cs)
|
||||
l := r.readTopLevelValue()
|
||||
|
||||
@@ -210,7 +210,7 @@ func TestReadCompoundBlob(t *testing.T) {
|
||||
m := r.readTopLevelValue()
|
||||
_, ok := m.(compoundBlob)
|
||||
assert.True(ok)
|
||||
m2 := newCompoundBlob([]metaTuple{{r1, Uint64(20)}, {r2, Uint64(40)}, {r3, Uint64(60)}}, cs)
|
||||
m2 := newCompoundBlob([]metaTuple{{nil, r1, Uint64(20)}, {nil, r2, Uint64(40)}, {nil, r3, Uint64(60)}}, cs)
|
||||
|
||||
assert.True(m.Type().Equals(m2.Type()))
|
||||
assert.Equal(m.Ref().String(), m2.Ref().String())
|
||||
|
||||
@@ -103,7 +103,7 @@ func (w *jsonArrayWriter) maybeWriteMetaSequence(v Value, tr Type, pkg *Package)
|
||||
w2 := newJsonArrayWriter(w.cs)
|
||||
indexType := indexTypeForMetaSequence(tr)
|
||||
for _, tuple := range ms.(metaSequence).data() {
|
||||
w2.writeRef(tuple.ref)
|
||||
w2.writeRef(tuple.childRef)
|
||||
w2.writeValue(tuple.value, indexType, pkg)
|
||||
}
|
||||
w.write(w2.toArray())
|
||||
|
||||
@@ -139,7 +139,7 @@ func TestWriteCompoundBlob(t *testing.T) {
|
||||
r2 := ref.Parse("sha1-0000000000000000000000000000000000000002")
|
||||
r3 := ref.Parse("sha1-0000000000000000000000000000000000000003")
|
||||
|
||||
v := newCompoundBlob([]metaTuple{{r1, Uint64(20)}, {r2, Uint64(40)}, {r3, Uint64(60)}}, cs)
|
||||
v := newCompoundBlob([]metaTuple{{nil, r1, Uint64(20)}, {nil, r2, Uint64(40)}, {nil, r3, Uint64(60)}}, cs)
|
||||
w := newJsonArrayWriter(cs)
|
||||
w.writeTopLevelValue(v)
|
||||
|
||||
@@ -326,13 +326,13 @@ func TestWriteCompoundList(t *testing.T) {
|
||||
cs := chunks.NewMemoryStore()
|
||||
|
||||
ltr := MakeCompoundType(ListKind, MakePrimitiveType(Int32Kind))
|
||||
r1 := newListLeaf(cs, ltr, Int32(0)).Ref()
|
||||
r2 := newListLeaf(cs, ltr, Int32(1), Int32(2), Int32(3)).Ref()
|
||||
cl := buildCompoundList([]metaTuple{{r1, Uint64(1)}, {r2, Uint64(4)}}, ltr, cs)
|
||||
leaf1 := newListLeaf(cs, ltr, Int32(0))
|
||||
leaf2 := newListLeaf(cs, ltr, Int32(1), Int32(2), Int32(3))
|
||||
cl := buildCompoundList([]metaTuple{{leaf1, leaf1.Ref(), Uint64(1)}, {leaf2, leaf2.Ref(), Uint64(4)}}, ltr, cs)
|
||||
|
||||
w := newJsonArrayWriter(cs)
|
||||
w.writeTopLevelValue(cl)
|
||||
assert.EqualValues([]interface{}{ListKind, Int32Kind, true, []interface{}{r1.String(), "1", r2.String(), "4"}}, w.toArray())
|
||||
assert.EqualValues([]interface{}{ListKind, Int32Kind, true, []interface{}{leaf1.Ref().String(), "1", leaf2.Ref().String(), "4"}}, w.toArray())
|
||||
}
|
||||
|
||||
func TestWriteListOfValue(t *testing.T) {
|
||||
|
||||
@@ -61,7 +61,7 @@ func TestValueEquals(t *testing.T) {
|
||||
ms := chunks.NewMemoryStore()
|
||||
b1 := NewBlob(bytes.NewBufferString("hi"), ms)
|
||||
b2 := NewBlob(bytes.NewBufferString("bye"), ms)
|
||||
return newCompoundBlob([]metaTuple{{WriteValue(b1, ms), Uint64(uint64(2))}, {WriteValue(b2, ms), Uint64(uint64(5))}}, ms)
|
||||
return newCompoundBlob([]metaTuple{{b1, b1.Ref(), Uint64(uint64(2))}, {b2, b2.Ref(), Uint64(uint64(5))}}, ms)
|
||||
},
|
||||
func() Value { return NewList(cs) },
|
||||
func() Value { return NewList(cs, NewString("foo")) },
|
||||
|
||||
@@ -44,16 +44,16 @@ func TestEnsureRef(t *testing.T) {
|
||||
}()
|
||||
|
||||
bl := newBlobLeaf([]byte("hi"))
|
||||
cb := newCompoundBlob([]metaTuple{{WriteValue(bl, cs), Uint64(2)}}, cs)
|
||||
cb := newCompoundBlob([]metaTuple{{bl, bl.Ref(), Uint64(2)}}, cs)
|
||||
|
||||
ll := newListLeaf(cs, listType, NewString("foo"))
|
||||
cl := buildCompoundList([]metaTuple{{WriteValue(ll, cs), Uint64(1)}}, listType, cs)
|
||||
cl := buildCompoundList([]metaTuple{{ll, ll.Ref(), Uint64(1)}}, listType, cs)
|
||||
|
||||
ml := newMapLeaf(cs, mapType, mapEntry{NewString("foo"), NewString("bar")})
|
||||
cm := buildCompoundMap([]metaTuple{{WriteValue(ml, cs), NewString("foo")}}, mapType, cs)
|
||||
cm := buildCompoundMap([]metaTuple{{ml, ml.Ref(), NewString("foo")}}, mapType, cs)
|
||||
|
||||
sl := newSetLeaf(cs, setType, NewString("foo"))
|
||||
cps := buildCompoundSet([]metaTuple{{WriteValue(sl, cs), NewString("foo")}}, setType, cs)
|
||||
cps := buildCompoundSet([]metaTuple{{sl, sl.Ref(), NewString("foo")}}, setType, cs)
|
||||
|
||||
count = byte(1)
|
||||
values := []Value{
|
||||
|
||||
29
types/indexed_sequences.go
Normal file
29
types/indexed_sequences.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
)
|
||||
|
||||
func newIndexedMetaSequenceBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(objectWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
|
||||
digest := item.(metaTuple).childRef.Digest()
|
||||
return digest[:]
|
||||
})
|
||||
}
|
||||
|
||||
func newIndexedMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
return func(items []sequenceItem) (sequenceItem, Value) {
|
||||
tuples := make(metaSequenceData, len(items))
|
||||
for i, v := range items {
|
||||
mt := v.(metaTuple)
|
||||
tuples[i] = mt
|
||||
// Immediately write intermediate chunks. It would be better to defer writing any chunks until commit, see https://github.com/attic-labs/noms/issues/710.
|
||||
WriteValue(mt.child, cs)
|
||||
}
|
||||
|
||||
meta := newMetaSequenceFromData(tuples, t, cs)
|
||||
return metaTuple{meta, meta.Ref(), Uint64(tuples.uint64ValuesSum())}, meta
|
||||
}
|
||||
}
|
||||
@@ -35,7 +35,7 @@ func NewList(cs chunks.ChunkStore, v ...Value) List {
|
||||
}
|
||||
|
||||
func NewTypedList(cs chunks.ChunkStore, t Type, values ...Value) List {
|
||||
seq := newEmptySequenceChunker(makeListLeafChunkFn(t, cs), newMetaSequenceChunkFn(t, cs), newListLeafBoundaryChecker(), newMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeListLeafChunkFn(t, cs), newIndexedMetaSequenceChunkFn(t, cs), newListLeafBoundaryChecker(), newIndexedMetaSequenceBoundaryChecker)
|
||||
for _, v := range values {
|
||||
seq.Append(v)
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ func NewTypedMap(cs chunks.ChunkStore, t Type, kv ...Value) Map {
|
||||
}
|
||||
|
||||
func newTypedMap(cs chunks.ChunkStore, t Type, entries ...mapEntry) Map {
|
||||
seq := newEmptySequenceChunker(makeMapLeafChunkFn(t, cs), newMapMetaSequenceChunkFn(t, cs), newMapLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeMapLeafChunkFn(t, cs), newOrderedMetaSequenceChunkFn(t, cs), newMapLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
|
||||
for _, entry := range entries {
|
||||
seq.Append(entry)
|
||||
|
||||
@@ -244,7 +244,6 @@ func makeMapLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
}
|
||||
|
||||
mapLeaf := valueFromType(cs, newMapLeaf(cs, t, mapData...), t)
|
||||
ref := WriteValue(mapLeaf, cs)
|
||||
|
||||
var indexValue Value
|
||||
if len(mapData) > 0 {
|
||||
@@ -256,6 +255,6 @@ func makeMapLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
}
|
||||
}
|
||||
|
||||
return metaTuple{ref, indexValue}, mapLeaf
|
||||
return metaTuple{mapLeaf, mapLeaf.Ref(), indexValue}, mapLeaf
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
"github.com/attic-labs/noms/ref"
|
||||
@@ -15,7 +13,6 @@ const (
|
||||
)
|
||||
|
||||
// metaSequence is a logical abstraction, but has no concrete "base" implementation. A Meta Sequence is a non-leaf (internal) node of a "probably" tree, which results from the chunking of an ordered or unordered sequence of values.
|
||||
|
||||
type metaSequence interface {
|
||||
Value
|
||||
data() metaSequenceData
|
||||
@@ -24,9 +21,11 @@ type metaSequence interface {
|
||||
tupleCount() int
|
||||
}
|
||||
|
||||
// metaTuple is a node in a "probably" tree, consisting of data in the node (either tree leaves or other metaSequences), and a Value annotation for exploring the tree (e.g. the largest item if this an ordered sequence).
|
||||
type metaTuple struct {
|
||||
ref ref.Ref
|
||||
value Value
|
||||
child Value // may be nil if the child data hasn't been read yet
|
||||
childRef ref.Ref
|
||||
value Value
|
||||
}
|
||||
|
||||
func (mt metaTuple) uint64Value() uint64 {
|
||||
@@ -72,14 +71,14 @@ func (ms metaSequenceObject) ChildValues() []Value {
|
||||
refOfLeafType := MakeCompoundType(RefKind, leafType)
|
||||
res := make([]Value, len(ms.tuples))
|
||||
for i, t := range ms.tuples {
|
||||
res[i] = refFromType(t.ref, refOfLeafType)
|
||||
res[i] = refFromType(t.childRef, refOfLeafType)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func (ms metaSequenceObject) Chunks() (chunks []ref.Ref) {
|
||||
for _, tuple := range ms.tuples {
|
||||
chunks = append(chunks, tuple.ref)
|
||||
chunks = append(chunks, tuple.childRef)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -106,33 +105,6 @@ func newMetaSequenceFromData(tuples metaSequenceData, t Type, cs chunks.ChunkSto
|
||||
panic("not reachable")
|
||||
}
|
||||
|
||||
func newMetaSequenceBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(objectWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
|
||||
digest := item.(metaTuple).ref.Digest()
|
||||
return digest[:]
|
||||
})
|
||||
}
|
||||
|
||||
func newOrderedMetaSequenceBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(orderedSequenceWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
|
||||
digest := item.(metaTuple).ref.Digest()
|
||||
return digest[:]
|
||||
})
|
||||
}
|
||||
|
||||
func newMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
return func(items []sequenceItem) (sequenceItem, Value) {
|
||||
tuples := make(metaSequenceData, len(items))
|
||||
for i, v := range items {
|
||||
tuples[i] = v.(metaTuple)
|
||||
}
|
||||
|
||||
meta := newMetaSequenceFromData(tuples, t, cs)
|
||||
ref := WriteValue(meta, cs)
|
||||
return metaTuple{ref, Uint64(tuples.uint64ValuesSum())}, meta
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a sequenceCursor pointing to the first metaTuple in a metaSequence, and returns that cursor plus the leaf Value referenced from that metaTuple.
|
||||
func newMetaSequenceCursor(root metaSequence, cs chunks.ChunkStore) (*sequenceCursor, Value) {
|
||||
d.Chk.NotNil(root)
|
||||
@@ -161,8 +133,13 @@ func newMetaSequenceCursor(root metaSequence, cs chunks.ChunkStore) (*sequenceCu
|
||||
}
|
||||
|
||||
func readMetaTupleValue(item sequenceItem, cs chunks.ChunkStore) Value {
|
||||
v := ReadValue(item.(metaTuple).ref, cs)
|
||||
return internalValueFromType(v, v.Type())
|
||||
mt := item.(metaTuple)
|
||||
if mt.child == nil {
|
||||
child := ReadValue(mt.childRef, cs)
|
||||
d.Chk.NotNil(child)
|
||||
mt.child = internalValueFromType(child, child.Type())
|
||||
}
|
||||
return internalValueFromType(mt.child, mt.child.Type())
|
||||
}
|
||||
|
||||
func iterateMetaSequenceLeaf(ms metaSequence, cs chunks.ChunkStore, cb func(Value) bool) {
|
||||
|
||||
@@ -33,21 +33,21 @@ func TestMeta(t *testing.T) {
|
||||
|
||||
mtr := l0.Type()
|
||||
|
||||
m0 := compoundList{metaSequenceObject{metaSequenceData{{lr0, Uint64(1)}, {lr1, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
m0 := compoundList{metaSequenceObject{metaSequenceData{{l0, lr0, Uint64(1)}, {l1, lr1, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
lm0 := WriteValue(m0, cs)
|
||||
m1 := compoundList{metaSequenceObject{metaSequenceData{{lr2, Uint64(1)}, {lr3, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
m1 := compoundList{metaSequenceObject{metaSequenceData{{l2, lr2, Uint64(1)}, {l3, lr3, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
lm1 := WriteValue(m1, cs)
|
||||
m2 := compoundList{metaSequenceObject{metaSequenceData{{lr4, Uint64(1)}, {lr5, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
m2 := compoundList{metaSequenceObject{metaSequenceData{{l4, lr4, Uint64(1)}, {l5, lr5, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
lm2 := WriteValue(m2, cs)
|
||||
m3 := compoundList{metaSequenceObject{metaSequenceData{{lr6, Uint64(1)}, {lr7, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
m3 := compoundList{metaSequenceObject{metaSequenceData{{l6, lr6, Uint64(1)}, {l7, lr7, Uint64(2)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
lm3 := WriteValue(m3, cs)
|
||||
|
||||
m00 := compoundList{metaSequenceObject{metaSequenceData{{lm0, Uint64(2)}, {lm1, Uint64(4)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
m00 := compoundList{metaSequenceObject{metaSequenceData{{m0, lm0, Uint64(2)}, {m1, lm1, Uint64(4)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
lm00 := WriteValue(m00, cs)
|
||||
m01 := compoundList{metaSequenceObject{metaSequenceData{{lm2, Uint64(2)}, {lm3, Uint64(4)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
m01 := compoundList{metaSequenceObject{metaSequenceData{{m2, lm2, Uint64(2)}, {m3, lm3, Uint64(4)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
lm01 := WriteValue(m01, cs)
|
||||
|
||||
rootList := compoundList{metaSequenceObject{metaSequenceData{{lm00, Uint64(4)}, {lm01, Uint64(8)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
rootList := compoundList{metaSequenceObject{metaSequenceData{{m00, lm00, Uint64(4)}, {m01, lm01, Uint64(8)}}, mtr}, 0, &ref.Ref{}, cs}
|
||||
rootRef := WriteValue(rootList, cs)
|
||||
|
||||
rootList = ReadValue(rootRef, cs).(compoundList)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"sort"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
@@ -29,8 +30,8 @@ func findLeafInOrderedSequence(ms metaSequence, t Type, key Value, getValues get
|
||||
})
|
||||
}
|
||||
|
||||
if current := cursor.current().(metaTuple); current.ref != valueFromType(cs, leaf, leaf.Type()).Ref() {
|
||||
leaf = readMetaTupleValue(cursor.current(), cs)
|
||||
if current := cursor.current().(metaTuple); current.childRef != valueFromType(cs, leaf, leaf.Type()).Ref() {
|
||||
leaf = readMetaTupleValue(current, cs)
|
||||
}
|
||||
|
||||
if leafData := getValues(leaf); isSequenceOrderedByIndexedType(t) {
|
||||
@@ -47,3 +48,27 @@ func findLeafInOrderedSequence(ms metaSequence, t Type, key Value, getValues get
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func newOrderedMetaSequenceBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(orderedSequenceWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
|
||||
digest := item.(metaTuple).childRef.Digest()
|
||||
return digest[:]
|
||||
})
|
||||
}
|
||||
|
||||
func newOrderedMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
return func(items []sequenceItem) (sequenceItem, Value) {
|
||||
tuples := make(metaSequenceData, len(items))
|
||||
|
||||
for i, v := range items {
|
||||
mt := v.(metaTuple)
|
||||
tuples[i] = mt
|
||||
// Immediately write intermediate chunks. It would be better to defer writing any chunks until commit, see https://github.com/attic-labs/noms/issues/710.
|
||||
WriteValue(mt.child, cs)
|
||||
}
|
||||
|
||||
lastValue := tuples[len(tuples)-1].value
|
||||
meta := newMetaSequenceFromData(tuples, t, cs)
|
||||
return metaTuple{meta, meta.Ref(), lastValue}, meta
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ func NewTypedSet(cs chunks.ChunkStore, t Type, v ...Value) Set {
|
||||
}
|
||||
|
||||
func newTypedSet(cs chunks.ChunkStore, t Type, data ...Value) Set {
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(t, cs), newSetMetaSequenceChunkFn(t, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(t, cs), newOrderedMetaSequenceChunkFn(t, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
|
||||
for _, v := range data {
|
||||
seq.Append(v)
|
||||
@@ -55,7 +55,7 @@ func setUnion(set Set, cs chunks.ChunkStore, others []Set) Set {
|
||||
assertSetsSameType(set, others...)
|
||||
|
||||
tr := set.Type()
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(tr, cs), newSetMetaSequenceChunkFn(tr, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(tr, cs), newOrderedMetaSequenceChunkFn(tr, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
|
||||
var lessFunction func(a, b sequenceItem) bool
|
||||
if isSequenceOrderedByIndexedType(tr) {
|
||||
|
||||
@@ -207,7 +207,6 @@ func makeSetLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
}
|
||||
|
||||
setLeaf := valueFromType(cs, newSetLeaf(cs, t, setData...), t)
|
||||
ref := WriteValue(setLeaf, cs)
|
||||
|
||||
var indexValue Value
|
||||
if len(setData) > 0 {
|
||||
@@ -219,7 +218,7 @@ func makeSetLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
}
|
||||
}
|
||||
|
||||
return metaTuple{ref, indexValue}, setLeaf
|
||||
return metaTuple{setLeaf, setLeaf.Ref(), indexValue}, setLeaf
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user