Don't calculate metaSequence chunk refs until necessary.

This saves a lot of work for the XML importer.
This commit is contained in:
Benjamin Kalman
2016-01-07 12:29:44 -08:00
parent f93bc427f9
commit e27980dbd3
13 changed files with 48 additions and 34 deletions

View File

@@ -5,6 +5,7 @@ import (
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
"github.com/attic-labs/noms/ref"
)
const (
@@ -49,7 +50,7 @@ func newBlobLeafChunkFn(cs chunks.ChunkStore) makeChunkFn {
}
leaf := newBlobLeaf(buff)
return metaTuple{leaf, leaf.Ref(), Uint64(uint64(len(buff)))}, leaf
return metaTuple{leaf, ref.Ref{}, Uint64(uint64(len(buff)))}, leaf
}
}

View File

@@ -11,6 +11,7 @@ import (
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/stretchr/testify/assert"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/ref"
)
func getTestCompoundBlob(datas ...string) compoundBlob {
@@ -188,7 +189,7 @@ func TestCompoundBlobChunks(t *testing.T) {
bl1 := newBlobLeaf([]byte("hello"))
bl2 := newBlobLeaf([]byte("world"))
cb = newCompoundBlob([]metaTuple{{bl1, bl1.Ref(), Uint64(uint64(5))}, {bl2, bl2.Ref(), Uint64(uint64(10))}}, cs)
cb = newCompoundBlob([]metaTuple{{bl1, ref.Ref{}, Uint64(uint64(5))}, {bl2, ref.Ref{}, Uint64(uint64(10))}}, cs)
assert.Equal(2, len(cb.Chunks()))
}
@@ -214,13 +215,13 @@ func TestCompoundBlobSameChunksWithPrefix(t *testing.T) {
assert.Equal(cb2.Len(), cb1.Len()+uint64(6))
assert.Equal(2, len(cb1.tuples))
assert.Equal(2, len(cb2.tuples))
assert.NotEqual(cb1.tuples[0].childRef, cb2.tuples[0].childRef)
assert.Equal(cb1.tuples[1].childRef, cb2.tuples[1].childRef)
assert.NotEqual(cb1.tuples[0].ChildRef(), cb2.tuples[0].ChildRef())
assert.Equal(cb1.tuples[1].ChildRef(), cb2.tuples[1].ChildRef())
tuples1 := cb1.tuples[0].child.(compoundBlob).tuples
tuples2 := cb2.tuples[0].child.(compoundBlob).tuples
assert.NotEqual(tuples1[0].childRef, tuples2[0].childRef)
assert.Equal(tuples1[1].childRef, tuples2[1].childRef)
assert.NotEqual(tuples1[0].ChildRef(), tuples2[0].ChildRef())
assert.Equal(tuples1[1].ChildRef(), tuples2[1].ChildRef())
}
func TestCompoundBlobSameChunksWithSuffix(t *testing.T) {
@@ -245,14 +246,14 @@ func TestCompoundBlobSameChunksWithSuffix(t *testing.T) {
assert.Equal(cb2.Len(), cb1.Len()+uint64(6))
assert.Equal(2, len(cb1.tuples))
assert.Equal(len(cb1.tuples), len(cb2.tuples))
assert.Equal(cb1.tuples[0].childRef, cb2.tuples[0].childRef)
assert.NotEqual(cb1.tuples[1].childRef, cb2.tuples[1].childRef)
assert.Equal(cb1.tuples[0].ChildRef(), cb2.tuples[0].ChildRef())
assert.NotEqual(cb1.tuples[1].ChildRef(), cb2.tuples[1].ChildRef())
tuples1 := cb1.tuples[1].child.(compoundBlob).tuples
tuples2 := cb2.tuples[1].child.(compoundBlob).tuples
assert.Equal(tuples1[0].childRef, tuples2[0].childRef)
assert.Equal(tuples1[len(tuples1)-2].childRef, tuples2[len(tuples2)-2].childRef)
assert.NotEqual(tuples1[len(tuples1)-1].childRef, tuples2[len(tuples2)-1].childRef)
assert.Equal(tuples1[0].ChildRef(), tuples2[0].ChildRef())
assert.Equal(tuples1[len(tuples1)-2].ChildRef(), tuples2[len(tuples2)-2].ChildRef())
assert.NotEqual(tuples1[len(tuples1)-1].ChildRef(), tuples2[len(tuples2)-1].ChildRef())
}
func printBlob(b Blob, indent int) {
@@ -264,7 +265,7 @@ func printBlob(b Blob, indent int) {
fmt.Printf("%scompoundBlob, len: %d, chunks: %d\n", indentString, b.Len(), len(b.tuples))
indent++
for _, t := range b.tuples {
printBlob(ReadValue(t.childRef, b.cs).(Blob), indent)
printBlob(ReadValue(t.ChildRef(), b.cs).(Blob), indent)
}
}
}

View File

@@ -67,7 +67,7 @@ func (cl compoundList) cursorAt(idx uint64) (*sequenceCursor, listLeaf, uint64)
return idx < offset, offset
}, uint64(0))
if current := cursor.current().(metaTuple); current.childRef != valueFromType(cl.cs, leaf, leaf.Type()).Ref() {
if current := cursor.current().(metaTuple); current.ChildRef() != valueFromType(cl.cs, leaf, leaf.Type()).Ref() {
leaf = readMetaTupleValue(current, cl.cs)
}
@@ -263,6 +263,6 @@ func makeListLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
}
list := valueFromType(cs, newListLeaf(cs, t, values...), t)
return metaTuple{list, list.Ref(), Uint64(len(values))}, list
return metaTuple{list, ref.Ref{}, Uint64(len(values))}, list
}
}

View File

@@ -132,7 +132,7 @@ func TestReadCompoundList(t *testing.T) {
tr := MakeCompoundType(ListKind, MakePrimitiveType(Int32Kind))
leaf1 := newListLeaf(cs, tr, Int32(0))
leaf2 := newListLeaf(cs, tr, Int32(1), Int32(2), Int32(3))
l2 := buildCompoundList([]metaTuple{{leaf1, leaf1.Ref(), Uint64(1)}, {leaf2, leaf2.Ref(), Uint64(4)}}, tr, cs)
l2 := buildCompoundList([]metaTuple{{leaf1, ref.Ref{}, Uint64(1)}, {leaf2, ref.Ref{}, Uint64(4)}}, tr, cs)
a := parseJson(`[%d, %d, true, ["%s", "1", "%s", "4"]]`, ListKind, Int32Kind, leaf1.Ref(), leaf2.Ref())
r := newJsonArrayReader(a, cs)

View File

@@ -103,7 +103,7 @@ func (w *jsonArrayWriter) maybeWriteMetaSequence(v Value, tr Type, pkg *Package)
w2 := newJsonArrayWriter(w.cs)
indexType := indexTypeForMetaSequence(tr)
for _, tuple := range ms.(metaSequence).data() {
w2.writeRef(tuple.childRef)
w2.writeRef(tuple.ChildRef())
w2.writeValue(tuple.value, indexType, pkg)
}
w.write(w2.toArray())

View File

@@ -328,7 +328,7 @@ func TestWriteCompoundList(t *testing.T) {
ltr := MakeCompoundType(ListKind, MakePrimitiveType(Int32Kind))
leaf1 := newListLeaf(cs, ltr, Int32(0))
leaf2 := newListLeaf(cs, ltr, Int32(1), Int32(2), Int32(3))
cl := buildCompoundList([]metaTuple{{leaf1, leaf1.Ref(), Uint64(1)}, {leaf2, leaf2.Ref(), Uint64(4)}}, ltr, cs)
cl := buildCompoundList([]metaTuple{{leaf1, ref.Ref{}, Uint64(1)}, {leaf2, ref.Ref{}, Uint64(4)}}, ltr, cs)
w := newJsonArrayWriter(cs)
w.writeTopLevelValue(cl)

View File

@@ -6,6 +6,7 @@ import (
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/stretchr/testify/assert"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/ref"
)
func TestValueEquals(t *testing.T) {
@@ -61,7 +62,7 @@ func TestValueEquals(t *testing.T) {
ms := chunks.NewMemoryStore()
b1 := NewBlob(bytes.NewBufferString("hi"), ms)
b2 := NewBlob(bytes.NewBufferString("bye"), ms)
return newCompoundBlob([]metaTuple{{b1, b1.Ref(), Uint64(uint64(2))}, {b2, b2.Ref(), Uint64(uint64(5))}}, ms)
return newCompoundBlob([]metaTuple{{b1, ref.Ref{}, Uint64(uint64(2))}, {b2, ref.Ref{}, Uint64(uint64(5))}}, ms)
},
func() Value { return NewList(cs) },
func() Value { return NewList(cs, NewString("foo")) },

View File

@@ -44,16 +44,16 @@ func TestEnsureRef(t *testing.T) {
}()
bl := newBlobLeaf([]byte("hi"))
cb := newCompoundBlob([]metaTuple{{bl, bl.Ref(), Uint64(2)}}, cs)
cb := newCompoundBlob([]metaTuple{{bl, ref.Ref{}, Uint64(2)}}, cs)
ll := newListLeaf(cs, listType, NewString("foo"))
cl := buildCompoundList([]metaTuple{{ll, ll.Ref(), Uint64(1)}}, listType, cs)
cl := buildCompoundList([]metaTuple{{ll, ref.Ref{}, Uint64(1)}}, listType, cs)
ml := newMapLeaf(cs, mapType, mapEntry{NewString("foo"), NewString("bar")})
cm := buildCompoundMap([]metaTuple{{ml, ml.Ref(), NewString("foo")}}, mapType, cs)
cm := buildCompoundMap([]metaTuple{{ml, ref.Ref{}, NewString("foo")}}, mapType, cs)
sl := newSetLeaf(cs, setType, NewString("foo"))
cps := buildCompoundSet([]metaTuple{{sl, sl.Ref(), NewString("foo")}}, setType, cs)
cps := buildCompoundSet([]metaTuple{{sl, ref.Ref{}, NewString("foo")}}, setType, cs)
count = byte(1)
values := []Value{

View File

@@ -4,11 +4,12 @@ import (
"crypto/sha1"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/ref"
)
func newIndexedMetaSequenceBoundaryChecker() boundaryChecker {
return newBuzHashBoundaryChecker(objectWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
digest := item.(metaTuple).childRef.Digest()
digest := item.(metaTuple).ChildRef().Digest()
return digest[:]
})
}
@@ -24,6 +25,6 @@ func newIndexedMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
}
meta := newMetaSequenceFromData(tuples, t, cs)
return metaTuple{meta, meta.Ref(), Uint64(tuples.uint64ValuesSum())}, meta
return metaTuple{meta, ref.Ref{}, Uint64(tuples.uint64ValuesSum())}, meta
}
}

View File

@@ -255,6 +255,6 @@ func makeMapLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
}
}
return metaTuple{mapLeaf, mapLeaf.Ref(), indexValue}, mapLeaf
return metaTuple{mapLeaf, ref.Ref{}, indexValue}, mapLeaf
}
}

View File

@@ -23,11 +23,20 @@ type metaSequence interface {
// metaTuple is a node in a "probably" tree, consisting of data in the node (either tree leaves or other metaSequences), and a Value annotation for exploring the tree (e.g. the largest item if this an ordered sequence).
type metaTuple struct {
child Value // may be nil if the child data hasn't been read yet
childRef ref.Ref
child Value // nil if the child data hasn't been read
childRef ref.Ref // maybe empty if |child| is non-nil, call ChildRef() instead of accessing |childRef| directly
value Value
}
func (mt metaTuple) ChildRef() ref.Ref {
if mt.child != nil {
return mt.child.Ref()
} else {
d.Chk.False(mt.childRef.IsEmpty())
return mt.childRef
}
}
func (mt metaTuple) uint64Value() uint64 {
return uint64(mt.value.(Uint64))
}
@@ -71,14 +80,14 @@ func (ms metaSequenceObject) ChildValues() []Value {
refOfLeafType := MakeCompoundType(RefKind, leafType)
res := make([]Value, len(ms.tuples))
for i, t := range ms.tuples {
res[i] = refFromType(t.childRef, refOfLeafType)
res[i] = refFromType(t.ChildRef(), refOfLeafType)
}
return res
}
func (ms metaSequenceObject) Chunks() (chunks []ref.Ref) {
for _, tuple := range ms.tuples {
chunks = append(chunks, tuple.childRef)
chunks = append(chunks, tuple.ChildRef())
}
return
}
@@ -135,6 +144,7 @@ func newMetaSequenceCursor(root metaSequence, cs chunks.ChunkStore) (*sequenceCu
func readMetaTupleValue(item sequenceItem, cs chunks.ChunkStore) Value {
mt := item.(metaTuple)
if mt.child == nil {
d.Chk.False(mt.childRef.IsEmpty())
mt.child = ReadValue(mt.childRef, cs)
d.Chk.NotNil(mt.child)
}

View File

@@ -5,6 +5,7 @@ import (
"sort"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/ref"
)
func isSequenceOrderedByIndexedType(t Type) bool {
@@ -30,7 +31,7 @@ func findLeafInOrderedSequence(ms metaSequence, t Type, key Value, getValues get
})
}
if current := cursor.current().(metaTuple); current.childRef != valueFromType(cs, leaf, leaf.Type()).Ref() {
if current := cursor.current().(metaTuple); current.ChildRef() != valueFromType(cs, leaf, leaf.Type()).Ref() {
leaf = readMetaTupleValue(current, cs)
}
@@ -51,7 +52,7 @@ func findLeafInOrderedSequence(ms metaSequence, t Type, key Value, getValues get
func newOrderedMetaSequenceBoundaryChecker() boundaryChecker {
return newBuzHashBoundaryChecker(orderedSequenceWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
digest := item.(metaTuple).childRef.Digest()
digest := item.(metaTuple).ChildRef().Digest()
return digest[:]
})
}
@@ -67,8 +68,7 @@ func newOrderedMetaSequenceChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
WriteValue(mt.child, cs)
}
lastValue := tuples[len(tuples)-1].value
meta := newMetaSequenceFromData(tuples, t, cs)
return metaTuple{meta, meta.Ref(), lastValue}, meta
return metaTuple{meta, ref.Ref{}, tuples.last().value}, meta
}
}

View File

@@ -218,7 +218,7 @@ func makeSetLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
}
}
return metaTuple{setLeaf, setLeaf.Ref(), indexValue}, setLeaf
return metaTuple{setLeaf, ref.Ref{}, indexValue}, setLeaf
}
}