From 40b28f94e528df4721772bf99e63d2d7ed7471c2 Mon Sep 17 00:00:00 2001 From: zcstarr Date: Fri, 30 Sep 2016 16:53:00 -0700 Subject: [PATCH] Refactor Chunks and ChildValues API to work iteratively (#2599) * Refactors Chunks and ChildValues API to be iterative change also exposes WalkValues which replaces SomeP and AllP --- go/datas/database_common.go | 10 +- go/datas/pull.go | 13 +- go/types/blob.go | 7 +- go/types/blob_leaf_sequence.go | 3 +- go/types/bool.go | 6 +- go/types/collection_test.go | 6 +- go/types/encoding_test.go | 10 +- go/types/list.go | 10 +- go/types/list_leaf_sequence.go | 5 +- go/types/map.go | 9 +- go/types/map_leaf_sequence.go | 7 +- go/types/map_test.go | 6 +- go/types/meta_sequence.go | 11 +- go/types/number.go | 6 +- go/types/ref.go | 17 +- go/types/ref_test.go | 4 +- go/types/sequence.go | 2 +- go/types/sequence_cursor_test.go | 4 +- go/types/set.go | 9 +- go/types/set_leaf_sequence.go | 5 +- go/types/set_test.go | 8 +- go/types/string.go | 6 +- go/types/struct.go | 12 +- go/types/struct_test.go | 11 +- go/types/type.go | 8 +- go/types/value.go | 13 +- go/types/value_store.go | 7 +- go/walk/walk.go | 260 ++--------------------------- go/walk/walk_test.go | 120 ++++++------- samples/go/nomdex/nomdex_update.go | 5 +- samples/go/photo-index/main.go | 5 +- 31 files changed, 193 insertions(+), 412 deletions(-) diff --git a/go/datas/database_common.go b/go/datas/database_common.go index 635c462a32..288ed6c9bf 100644 --- a/go/datas/database_common.go +++ b/go/datas/database_common.go @@ -193,6 +193,14 @@ func (dbc *databaseCommon) validateRefAsCommit(r types.Ref) types.Struct { return v.(types.Struct) } +func getNumValues(v types.Value) (count int) { + count = 0 + v.WalkValues(func(v types.Value) { + count++ + }) + return +} + func buildNewCommit(ds Dataset, v types.Value, opts CommitOptions) types.Struct { parents := opts.Parents if (parents == types.Set{}) { @@ -205,7 +213,7 @@ func buildNewCommit(ds Dataset, v types.Value, opts CommitOptions) types.Struct meta := opts.Meta // Ideally, would like to do 'if meta == types.Struct{}' but types.Struct is not comparable in Go // since it contains a slice. - if meta.Type() == nil && len(meta.ChildValues()) == 0 { + if meta.Type() == nil && getNumValues(meta) == 0 { meta = types.EmptyStruct } return NewCommit(v, parents, meta) diff --git a/go/datas/pull.go b/go/datas/pull.go index 86e4cb20c9..548432a081 100644 --- a/go/datas/pull.go +++ b/go/datas/pull.go @@ -237,6 +237,13 @@ func sendWork(ch chan<- types.Ref, refs types.RefSlice) { type hintCache map[hash.Hash]hash.Hash +func getChunks(v types.Value) (chunks []types.Ref) { + v.WalkRefs(func(ref types.Ref) { + chunks = append(chunks, ref) + }) + return +} + func traverseSource(srcRef types.Ref, srcDB, sinkDB Database, estimateBytesWritten bool) traverseSourceResult { h := srcRef.TargetHash() if !sinkDB.has(h) { @@ -251,7 +258,7 @@ func traverseSource(srcRef types.Ref, srcDB, sinkDB Database, estimateBytesWritt // write size is implementation specific. bytesWritten = len(snappy.Encode(nil, c.Data())) } - ts := traverseSourceResult{traverseResult{h, v.Chunks(), len(c.Data())}, bytesWritten} + ts := traverseSourceResult{traverseResult{h, getChunks(v), len(c.Data())}, bytesWritten} return ts } return traverseSourceResult{} @@ -259,7 +266,7 @@ func traverseSource(srcRef types.Ref, srcDB, sinkDB Database, estimateBytesWritt func traverseSink(sinkRef types.Ref, db Database) traverseResult { if sinkRef.Height() > 1 { - return traverseResult{sinkRef.TargetHash(), sinkRef.TargetValue(db).Chunks(), 0} + return traverseResult{sinkRef.TargetHash(), getChunks(sinkRef.TargetValue(db)), 0} } return traverseResult{} } @@ -272,7 +279,7 @@ func traverseCommon(comRef, sinkHead types.Ref, db Database) traverseResult { if comRef.Equals(sinkHead) { exclusionSet = commit.Get(ParentsField).(types.Set) } - chunks := types.RefSlice(commit.Chunks()) + chunks := types.RefSlice(getChunks(commit)) for i := 0; i < len(chunks); { if exclusionSet.Has(chunks[i]) { end := len(chunks) - 1 diff --git a/go/types/blob.go b/go/types/blob.go index 3ff3b2a390..b1f1c1d68b 100644 --- a/go/types/blob.go +++ b/go/types/blob.go @@ -103,12 +103,11 @@ func (b Blob) Hash() hash.Hash { return *b.h } -func (b Blob) ChildValues() []Value { - return []Value{} +func (b Blob) WalkValues(cb ValueCallback) { } -func (b Blob) Chunks() []Ref { - return b.seq.Chunks() +func (b Blob) WalkRefs(cb RefCallback) { + b.seq.WalkRefs(cb) } func (b Blob) Type() *Type { diff --git a/go/types/blob_leaf_sequence.go b/go/types/blob_leaf_sequence.go index f8a13b2a6e..bde256621d 100644 --- a/go/types/blob_leaf_sequence.go +++ b/go/types/blob_leaf_sequence.go @@ -26,6 +26,5 @@ func (bl blobLeafSequence) getItem(idx int) sequenceItem { return bl.data[idx] } -func (bl blobLeafSequence) Chunks() []Ref { - return []Ref{} +func (bl blobLeafSequence) WalkRefs(cb RefCallback) { } diff --git a/go/types/bool.go b/go/types/bool.go index d71f258245..1e10dd9934 100644 --- a/go/types/bool.go +++ b/go/types/bool.go @@ -27,12 +27,10 @@ func (v Bool) Hash() hash.Hash { return getHash(v) } -func (v Bool) ChildValues() []Value { - return nil +func (v Bool) WalkValues(cb ValueCallback) { } -func (v Bool) Chunks() []Ref { - return nil +func (v Bool) WalkRefs(cb RefCallback) { } func (v Bool) Type() *Type { diff --git a/go/types/collection_test.go b/go/types/collection_test.go index 4f955ef08d..2e732b75a9 100644 --- a/go/types/collection_test.go +++ b/go/types/collection_test.go @@ -43,7 +43,7 @@ func (suite *collectionTestSuite) TestEquals() { } func (suite *collectionTestSuite) TestChunkCountAndType() { - chunks := suite.col.Chunks() + chunks := getChunks(suite.col) suite.Equal(suite.expectChunkCount, len(chunks)) refType := MakeRefType(suite.expectType) for _, r := range chunks { @@ -62,12 +62,12 @@ func (suite *collectionTestSuite) TestRoundTripAndValidate() { func (suite *collectionTestSuite) TestPrependChunkDiff() { v2 := suite.prependOne() - suite.Equal(suite.expectPrependChunkDiff, chunkDiffCount(suite.col.Chunks(), v2.Chunks())) + suite.Equal(suite.expectPrependChunkDiff, chunkDiffCount(getChunks(suite.col), getChunks(v2))) } func (suite *collectionTestSuite) TestAppendChunkDiff() { v2 := suite.appendOne() - suite.Equal(suite.expectAppendChunkDiff, chunkDiffCount(suite.col.Chunks(), v2.Chunks())) + suite.Equal(suite.expectAppendChunkDiff, chunkDiffCount(getChunks(suite.col), getChunks(v2))) } func deriveCollectionHeight(c Collection) uint64 { diff --git a/go/types/encoding_test.go b/go/types/encoding_test.go index e2b081b238..ac65098e13 100644 --- a/go/types/encoding_test.go +++ b/go/types/encoding_test.go @@ -589,11 +589,11 @@ func TestWriteEmptyUnionList(t *testing.T) { type bogusType int -func (bg bogusType) Equals(other Value) bool { return false } -func (bg bogusType) Less(other Value) bool { return false } -func (bg bogusType) Hash() hash.Hash { return hash.Hash{} } -func (bg bogusType) ChildValues() []Value { return ValueSlice{} } -func (bg bogusType) Chunks() []Ref { return RefSlice{} } +func (bg bogusType) Equals(other Value) bool { return false } +func (bg bogusType) Less(other Value) bool { return false } +func (bg bogusType) Hash() hash.Hash { return hash.Hash{} } +func (bg bogusType) WalkValues(cb ValueCallback) {} +func (bg bogusType) WalkRefs(cb RefCallback) {} func (bg bogusType) Type() *Type { return MakeCycleType(0) } diff --git a/go/types/list.go b/go/types/list.go index 2fabe61c51..847acc5596 100644 --- a/go/types/list.go +++ b/go/types/list.go @@ -91,16 +91,14 @@ func (l List) Hash() hash.Hash { return *l.h } -func (l List) ChildValues() []Value { - values := make([]Value, l.Len()) +func (l List) WalkValues(cb ValueCallback) { l.IterAll(func(v Value, idx uint64) { - values[idx] = v + cb(v) }) - return values } -func (l List) Chunks() []Ref { - return l.seq.Chunks() +func (l List) WalkRefs(cb RefCallback) { + l.seq.WalkRefs(cb) } func (l List) Type() *Type { diff --git a/go/types/list_leaf_sequence.go b/go/types/list_leaf_sequence.go index 280c1ec507..111bd3ca5f 100644 --- a/go/types/list_leaf_sequence.go +++ b/go/types/list_leaf_sequence.go @@ -31,9 +31,8 @@ func (ll listLeafSequence) getItem(idx int) sequenceItem { return ll.values[idx] } -func (ll listLeafSequence) Chunks() (chunks []Ref) { +func (ll listLeafSequence) WalkRefs(cb RefCallback) { for _, v := range ll.values { - chunks = append(chunks, v.Chunks()...) + v.WalkRefs(cb) } - return } diff --git a/go/types/map.go b/go/types/map.go index 6592f79edc..5307f3fa13 100644 --- a/go/types/map.go +++ b/go/types/map.go @@ -107,15 +107,16 @@ func (m Map) Hash() hash.Hash { return *m.h } -func (m Map) ChildValues() (values []Value) { +func (m Map) WalkValues(cb ValueCallback) { m.IterAll(func(k, v Value) { - values = append(values, k, v) + cb(k) + cb(v) }) return } -func (m Map) Chunks() []Ref { - return m.seq.Chunks() +func (m Map) WalkRefs(cb RefCallback) { + m.seq.WalkRefs(cb) } func (m Map) Type() *Type { diff --git a/go/types/map_leaf_sequence.go b/go/types/map_leaf_sequence.go index f6303d8b58..7349406438 100644 --- a/go/types/map_leaf_sequence.go +++ b/go/types/map_leaf_sequence.go @@ -50,12 +50,11 @@ func (ml mapLeafSequence) getItem(idx int) sequenceItem { return ml.data[idx] } -func (ml mapLeafSequence) Chunks() (chunks []Ref) { +func (ml mapLeafSequence) WalkRefs(cb RefCallback) { for _, entry := range ml.data { - chunks = append(chunks, entry.key.Chunks()...) - chunks = append(chunks, entry.value.Chunks()...) + entry.key.WalkRefs(cb) + entry.value.WalkRefs(cb) } - return } func (ml mapLeafSequence) getCompareFn(other sequence) compareFn { diff --git a/go/types/map_test.go b/go/types/map_test.go index 0cdd4657da..d018126b31 100644 --- a/go/types/map_test.go +++ b/go/types/map_test.go @@ -1046,15 +1046,15 @@ func TestMapChunks(t *testing.T) { assert := assert.New(t) l1 := NewMap(Number(0), Number(1)) - c1 := l1.Chunks() + c1 := getChunks(l1) assert.Len(c1, 0) l2 := NewMap(NewRef(Number(0)), Number(1)) - c2 := l2.Chunks() + c2 := getChunks(l2) assert.Len(c2, 1) l3 := NewMap(Number(0), NewRef(Number(1))) - c3 := l3.Chunks() + c3 := getChunks(l3) assert.Len(c3, 1) } diff --git a/go/types/meta_sequence.go b/go/types/meta_sequence.go index 6f559bb23a..1bd4127b38 100644 --- a/go/types/meta_sequence.go +++ b/go/types/meta_sequence.go @@ -126,12 +126,10 @@ func (ms metaSequence) valueReader() ValueReader { return ms.vr } -func (ms metaSequence) Chunks() []Ref { - chunks := make([]Ref, len(ms.tuples)) - for i, tuple := range ms.tuples { - chunks[i] = tuple.ref +func (ms metaSequence) WalkRefs(cb RefCallback) { + for _, tuple := range ms.tuples { + cb(tuple.ref) } - return chunks } func (ms metaSequence) Type() *Type { @@ -263,8 +261,7 @@ func (es emptySequence) valueReader() ValueReader { return nil } -func (es emptySequence) Chunks() (chunks []Ref) { - return +func (es emptySequence) WalkRefs(cb RefCallback) { } func (es emptySequence) Type() *Type { diff --git a/go/types/number.go b/go/types/number.go index 29747528bf..88e89d5e29 100644 --- a/go/types/number.go +++ b/go/types/number.go @@ -27,12 +27,10 @@ func (v Number) Hash() hash.Hash { return getHash(v) } -func (v Number) ChildValues() []Value { - return nil +func (v Number) WalkValues(cb ValueCallback) { } -func (v Number) Chunks() []Ref { - return nil +func (v Number) WalkRefs(cb RefCallback) { } func (v Number) Type() *Type { diff --git a/go/types/ref.go b/go/types/ref.go index 0fab84dd4f..9b707d9b0a 100644 --- a/go/types/ref.go +++ b/go/types/ref.go @@ -28,13 +28,11 @@ func constructRef(t *Type, target hash.Hash, height uint64) Ref { } func maxChunkHeight(v Value) (max uint64) { - if chunks := v.Chunks(); chunks != nil { - for _, r := range chunks { - if height := r.Height(); height > max { - max = height - } + v.WalkRefs(func(r Ref) { + if height := r.Height(); height > max { + max = height } - } + }) return } @@ -67,12 +65,11 @@ func (r Ref) Hash() hash.Hash { return *r.h } -func (r Ref) ChildValues() []Value { - return nil +func (r Ref) WalkValues(cb ValueCallback) { } -func (r Ref) Chunks() (chunks []Ref) { - return append(chunks, r) +func (r Ref) WalkRefs(cb RefCallback) { + cb(r) } func (r Ref) Type() *Type { diff --git a/go/types/ref_test.go b/go/types/ref_test.go index a788d075e4..b16bccb767 100644 --- a/go/types/ref_test.go +++ b/go/types/ref_test.go @@ -48,6 +48,6 @@ func TestRefChunks(t *testing.T) { l := NewList() r := NewRef(l) - assert.Len(r.Chunks(), 1) - assert.Equal(r, r.Chunks()[0]) + assert.Len(getChunks(r), 1) + assert.Equal(r, getChunks(r)[0]) } diff --git a/go/types/sequence.go b/go/types/sequence.go index 9df13d5e2c..0f0ad00318 100644 --- a/go/types/sequence.go +++ b/go/types/sequence.go @@ -13,7 +13,7 @@ type sequence interface { seqLen() int numLeaves() uint64 valueReader() ValueReader - Chunks() []Ref + WalkRefs(cb RefCallback) Type() *Type getCompareFn(other sequence) compareFn getChildSequence(idx int) sequence diff --git a/go/types/sequence_cursor_test.go b/go/types/sequence_cursor_test.go index 1d8fdbd60a..04fca623ac 100644 --- a/go/types/sequence_cursor_test.go +++ b/go/types/sequence_cursor_test.go @@ -57,11 +57,11 @@ func (ts testSequence) Hash() hash.Hash { panic("not reached") } -func (ts testSequence) ChildValues() []Value { +func (ts testSequence) WalkValues(cb ValueCallback) { panic("not reached") } -func (ts testSequence) Chunks() []Ref { +func (ts testSequence) WalkRefs(cb RefCallback) { panic("not reached") } diff --git a/go/types/set.go b/go/types/set.go index 46f1987a52..9652cede64 100644 --- a/go/types/set.go +++ b/go/types/set.go @@ -92,15 +92,14 @@ func (s Set) Hash() hash.Hash { return *s.h } -func (s Set) ChildValues() (values []Value) { +func (s Set) WalkValues(cb ValueCallback) { s.IterAll(func(v Value) { - values = append(values, v) + cb(v) }) - return } -func (s Set) Chunks() []Ref { - return s.seq.Chunks() +func (s Set) WalkRefs(cb RefCallback) { + s.seq.WalkRefs(cb) } func (s Set) Type() *Type { diff --git a/go/types/set_leaf_sequence.go b/go/types/set_leaf_sequence.go index ffd83a2227..de4994e8af 100644 --- a/go/types/set_leaf_sequence.go +++ b/go/types/set_leaf_sequence.go @@ -24,11 +24,10 @@ func (sl setLeafSequence) getItem(idx int) sequenceItem { return sl.data[idx] } -func (sl setLeafSequence) Chunks() (chunks []Ref) { +func (sl setLeafSequence) WalkRefs(cb RefCallback) { for _, v := range sl.data { - chunks = append(chunks, v.Chunks()...) + v.WalkRefs(cb) } - return } func (sl setLeafSequence) getCompareFn(other sequence) compareFn { diff --git a/go/types/set_test.go b/go/types/set_test.go index 82d6f53b96..16bc65d3b6 100644 --- a/go/types/set_test.go +++ b/go/types/set_test.go @@ -817,11 +817,11 @@ func TestSetChunks(t *testing.T) { assert := assert.New(t) l1 := NewSet(Number(0)) - c1 := l1.Chunks() + c1 := getChunks(l1) assert.Len(c1, 0) l2 := NewSet(NewRef(Number(0))) - c2 := l2.Chunks() + c2 := getChunks(l2) assert.Len(c2, 1) } @@ -834,8 +834,8 @@ func TestSetChunks2(t *testing.T) { vs := NewTestValueStore() doTest := func(ts testSet) { set := ts.toSet() - set2chunks := vs.ReadValue(vs.WriteValue(set).TargetHash()).Chunks() - for i, r := range set.Chunks() { + set2chunks := getChunks(vs.ReadValue(vs.WriteValue(set).TargetHash())) + for i, r := range getChunks(set) { assert.True(r.Type().Equals(set2chunks[i].Type()), "%s != %s", r.Type().Describe(), set2chunks[i].Type().Describe()) } } diff --git a/go/types/string.go b/go/types/string.go index 25ed1f8a3e..34540546da 100644 --- a/go/types/string.go +++ b/go/types/string.go @@ -25,12 +25,10 @@ func (s String) Hash() hash.Hash { return getHash(s) } -func (s String) ChildValues() []Value { - return nil +func (s String) WalkValues(cb ValueCallback) { } -func (s String) Chunks() []Ref { - return nil +func (s String) WalkRefs(cb RefCallback) { } func (s String) Type() *Type { diff --git a/go/types/struct.go b/go/types/struct.go index 179b179fe9..5780c1e1c0 100644 --- a/go/types/struct.go +++ b/go/types/struct.go @@ -76,17 +76,17 @@ func (s Struct) Hash() hash.Hash { return *s.h } -func (s Struct) ChildValues() []Value { - return s.values +func (s Struct) WalkValues(cb ValueCallback) { + for _, v := range s.values { + cb(v) + } } -func (s Struct) Chunks() (chunks []Ref) { - chunks = append(chunks, s.t.Chunks()...) +func (s Struct) WalkRefs(cb RefCallback) { for _, v := range s.values { - chunks = append(chunks, v.Chunks()...) + v.WalkRefs(cb) } - return } func (s Struct) Type() *Type { diff --git a/go/types/struct_test.go b/go/types/struct_test.go index d33419a682..52bc3905d3 100644 --- a/go/types/struct_test.go +++ b/go/types/struct_test.go @@ -10,6 +10,13 @@ import ( "github.com/attic-labs/testify/assert" ) +func getChunks(v Value) (chunks []Ref) { + v.WalkRefs(func(r Ref) { + chunks = append(chunks, r) + }) + return +} + func TestGenericStructEquals(t *testing.T) { assert := assert.New(t) @@ -34,8 +41,8 @@ func TestGenericStructChunks(t *testing.T) { s1 := NewStructWithType(typ, ValueSlice{NewRef(b)}) - assert.Len(s1.Chunks(), 1) - assert.Equal(b.Hash(), s1.Chunks()[0].TargetHash()) + assert.Len(getChunks(s1), 1) + assert.Equal(b.Hash(), getChunks(s1)[0].TargetHash()) } func TestGenericStructNew(t *testing.T) { diff --git a/go/types/type.go b/go/types/type.go index e577c31d7f..446b7a6305 100644 --- a/go/types/type.go +++ b/go/types/type.go @@ -83,15 +83,15 @@ func (t *Type) Hash() hash.Hash { return *t.h } -func (t *Type) ChildValues() (res []Value) { +func (t *Type) WalkValues(cb ValueCallback) { switch desc := t.Desc.(type) { case CompoundDesc: for _, t := range desc.ElemTypes { - res = append(res, t) + cb(t) } case StructDesc: desc.IterFields(func(name string, t *Type) { - res = append(res, t) + cb(t) }) case PrimitiveDesc: // Nothing, these have no child values @@ -101,7 +101,7 @@ func (t *Type) ChildValues() (res []Value) { return } -func (t *Type) Chunks() (chunks []Ref) { +func (t *Type) WalkRefs(cb RefCallback) { return } diff --git a/go/types/value.go b/go/types/value.go index 310652db7f..4f42f1f8f6 100644 --- a/go/types/value.go +++ b/go/types/value.go @@ -8,6 +8,9 @@ import ( "github.com/attic-labs/noms/go/hash" ) +type ValueCallback func(v Value) +type RefCallback func(ref Ref) + // Value is the interface all Noms values implement. type Value interface { // Equals determines if two different Noms values represents the same underlying value. @@ -24,13 +27,13 @@ type Value interface { // same hash they must be equal. Hash() hash.Hash - // ChildValues returns the immediate children of this value in the DAG, if any, not including - // Type(). - ChildValues() []Value + // WalkValues iterates over the immediate children of this value in the DAG, if any, not including + // Type() + WalkValues(ValueCallback) - // Chunks returns the refs to the underlying chunks. If this value is a collection that has been + // WalkRefs iterates over the refs to the underlying chunks. If this value is a collection that has been // chunked then this will return the refs of th sub trees of the prolly-tree. - Chunks() []Ref + WalkRefs(RefCallback) // Type returns the type of the Noms value. All Noms values carry their runtime Noms type. Type() *Type diff --git a/go/types/value_store.go b/go/types/value_store.go index 3d41673442..8ddfebcd38 100644 --- a/go/types/value_store.go +++ b/go/types/value_store.go @@ -138,12 +138,12 @@ func (lvs *ValueStore) Close() error { // cacheChunks looks at the Chunks reachable from v and, for each one checks if there's a hint in the cache. If there isn't, or if the hint is a self-reference, the chunk gets r set as its new hint. func (lvs *ValueStore) cacheChunks(v Value, r hash.Hash) { - for _, reachable := range v.Chunks() { + v.WalkRefs(func(reachable Ref) { hash := reachable.TargetHash() if cur := lvs.check(hash); cur == nil || cur.Hint().IsEmpty() || cur.Hint() == hash { lvs.set(hash, hintedChunk{getTargetType(reachable), r}) } - } + }) } func (lvs *ValueStore) isPresent(r hash.Hash) (present bool) { @@ -188,7 +188,7 @@ func (lvs *ValueStore) opCache() opCache { func (lvs *ValueStore) checkChunksInCache(v Value, readValues bool) Hints { hints := map[hash.Hash]struct{}{} - for _, reachable := range v.Chunks() { + collectHints := func(reachable Ref) { // First, check the type cache to see if reachable is already known to be valid. targetHash := reachable.TargetHash() entry := lvs.check(targetHash) @@ -212,6 +212,7 @@ func (lvs *ValueStore) checkChunksInCache(v Value, readValues bool) Hints { targetType := getTargetType(reachable) d.PanicIfTrue(!entry.Type().Equals(targetType), "Value to write contains ref %s, which points to a value of a different type: %+v != %+v", reachable.TargetHash(), entry.Type(), targetType) } + v.WalkRefs(collectHints) return hints } diff --git a/go/walk/walk.go b/go/walk/walk.go index c9a0648837..30ed1a975c 100644 --- a/go/walk/walk.go +++ b/go/walk/walk.go @@ -6,272 +6,46 @@ package walk import ( - "fmt" - "sync" - - "github.com/attic-labs/noms/go/chunks" "github.com/attic-labs/noms/go/d" "github.com/attic-labs/noms/go/hash" "github.com/attic-labs/noms/go/types" ) -// SomeCallback takes a types.Value and returns a bool indicating whether the current walk should skip the tree descending from value. If |v| is a top-level value in a Chunk, then |r| will be the Ref which referenced it (otherwise |r| is nil). -type SomeCallback func(v types.Value, r *types.Ref) bool - -// AllCallback takes a types.Value and processes it. If |v| is a top-level value in a Chunk, then |r| will be the Ref which referenced it (otherwise |r| is nil). -type AllCallback func(v types.Value, r *types.Ref) - -// SomeP recursively walks over all types.Values reachable from r and calls cb on them. If cb ever returns true, the walk will stop recursing on the current ref. If |concurrency| > 1, it is the callers responsibility to make ensure that |cb| is threadsafe. -func SomeP(v types.Value, vr types.ValueReader, cb SomeCallback, concurrency int) { - doTreeWalkP(v, vr, cb, concurrency) -} - -// AllP recursively walks over all types.Values reachable from r and calls cb on them. If |concurrency| > 1, it is the callers responsibility to make ensure that |cb| is threadsafe. -func AllP(v types.Value, vr types.ValueReader, cb AllCallback, concurrency int) { - doTreeWalkP(v, vr, func(v types.Value, r *types.Ref) (skip bool) { - cb(v, r) - return - }, concurrency) -} - -func doTreeWalkP(v types.Value, vr types.ValueReader, cb SomeCallback, concurrency int) { - rq := newRefQueue() - f := newFailure() +type SkipValueCallback func(v types.Value) bool +// WalkValues recursively walks over all types. Values reachable from r and calls cb on them. +func WalkValues(target types.Value, vr types.ValueReader, cb SkipValueCallback) { + var processRef func(r types.Ref) + var processVal func(v types.Value) visited := map[hash.Hash]bool{} - mu := sync.Mutex{} - wg := sync.WaitGroup{} - var processVal func(v types.Value, r *types.Ref) - processVal = func(v types.Value, r *types.Ref) { - if cb(v, r) { + processVal = func(v types.Value) { + if cb(v) { return } - if sr, ok := v.(types.Ref); ok { - wg.Add(1) - rq.tail() <- sr + processRef(sr) } else { - switch coll := v.(type) { - case types.List: - coll.IterAll(func(c types.Value, index uint64) { - processVal(c, nil) - }) - case types.Set: - coll.IterAll(func(c types.Value) { - processVal(c, nil) - }) - case types.Map: - coll.IterAll(func(k, c types.Value) { - processVal(k, nil) - processVal(c, nil) - }) - default: - for _, c := range v.ChildValues() { - processVal(c, nil) - } - } + v.WalkValues(processVal) } } - processRef := func(r types.Ref) { - defer wg.Done() - - mu.Lock() - skip := visited[r.TargetHash()] - visited[r.TargetHash()] = true - mu.Unlock() - - if skip || f.didFail() { + processRef = func(r types.Ref) { + target := r.TargetHash() + if visited[target] { return } - - target := r.TargetHash() + visited[target] = true v := vr.ReadValue(target) if v == nil { - f.fail(fmt.Errorf("Attempt to visit absent ref:%s", target.String())) - return - } - processVal(v, &r) - } - - iter := func() { - for r := range rq.head() { - processRef(r) - } - } - - for i := 0; i < concurrency; i++ { - go iter() - } - - processVal(v, nil) - wg.Wait() - - rq.close() - - f.checkNotFailed() -} - -// SomeChunksStopCallback is called for every unique types.Ref |r|. Return true to stop walking beyond |r|. -type SomeChunksStopCallback func(r types.Ref) bool - -// SomeChunksChunkCallback is called for every unique chunks.Chunk |c| which wasn't stopped from SomeChunksStopCallback. |r| is a types.Ref referring to |c|. -type SomeChunksChunkCallback func(r types.Ref, c chunks.Chunk) - -// SomeChunksP invokes callbacks on every unique chunk reachable from |r| in top-down order. Callbacks are invoked only once for each chunk regardless of how many times the chunk appears. -// -// |stopCb| is invoked for the types.Ref of every chunk. It can return true to stop SomeChunksP from descending any further. -// |chunkCb| is optional, invoked with the chunks.Chunk referenced by |stopCb| if it didn't return true. -func SomeChunksP(r types.Ref, bs types.BatchStore, stopCb SomeChunksStopCallback, chunkCb SomeChunksChunkCallback, concurrency int) { - rq := newRefQueue() - wg := sync.WaitGroup{} - mu := sync.Mutex{} - visitedRefs := map[hash.Hash]bool{} - - walkChunk := func(r types.Ref) { - defer wg.Done() - - tr := r.TargetHash() - - mu.Lock() - visited := visitedRefs[tr] - visitedRefs[tr] = true - mu.Unlock() - - if visited || stopCb(r) { + d.Chk.Fail("Attempt to visit absent ref:%s", target.String()) return } - // Try to avoid the cost of reading |c|. It's only necessary if the caller wants to know about every chunk, or if we need to descend below |c| (ref height > 1). - var c chunks.Chunk - - if chunkCb != nil || r.Height() > 1 { - c = bs.Get(tr) - d.PanicIfTrue(c.IsEmpty()) - - if chunkCb != nil { - chunkCb(r, c) - } - } - - if r.Height() == 1 { - return - } - - v := types.DecodeValue(c, nil) - for _, r1 := range v.Chunks() { - wg.Add(1) - rq.tail() <- r1 - } + processVal(v) } - iter := func() { - for r := range rq.head() { - walkChunk(r) - } - } + //Process initial value + processVal(target) - for i := 0; i < concurrency; i++ { - go iter() - } - - wg.Add(1) - rq.tail() <- r - wg.Wait() - rq.close() -} - -// refQueue emulates a buffered channel of refs of unlimited size. -type refQueue struct { - head func() <-chan types.Ref - tail func() chan<- types.Ref - close func() -} - -func newRefQueue() refQueue { - head := make(chan types.Ref, 64) - tail := make(chan types.Ref, 64) - done := make(chan struct{}) - buff := []types.Ref{} - - push := func(r types.Ref) { - buff = append(buff, r) - } - - pop := func() types.Ref { - d.PanicIfFalse(len(buff) > 0) - r := buff[0] - buff = buff[1:] - return r - } - - go func() { - loop: - for { - if len(buff) == 0 { - select { - case r := <-tail: - push(r) - case <-done: - break loop - } - } else { - first := buff[0] - select { - case r := <-tail: - push(r) - case head <- first: - r := pop() - d.PanicIfFalse(r == first) - case <-done: - break loop - } - } - } - }() - - return refQueue{ - func() <-chan types.Ref { - return head - }, - func() chan<- types.Ref { - return tail - }, - func() { - close(head) - done <- struct{}{} - }, - } -} - -type failure struct { - err error - mu *sync.Mutex -} - -func newFailure() *failure { - return &failure{ - mu: &sync.Mutex{}, - } -} - -func (f *failure) fail(err error) { - f.mu.Lock() - defer f.mu.Unlock() - if f.err == nil { // only capture first error - f.err = err - } -} - -func (f *failure) didFail() bool { - f.mu.Lock() - defer f.mu.Unlock() - return f.err != nil -} - -func (f *failure) checkNotFailed() { - f.mu.Lock() - defer f.mu.Unlock() - d.Chk.NoError(f.err) } diff --git a/go/walk/walk_test.go b/go/walk/walk_test.go index 76192750e5..fcba99090e 100644 --- a/go/walk/walk_test.go +++ b/go/walk/walk_test.go @@ -28,14 +28,23 @@ func (suite *WalkAllTestSuite) SetupTest() { suite.vs = types.NewTestValueStore() } -func (suite *WalkAllTestSuite) walkWorker(r types.Ref, expected int) { +func (suite *WalkAllTestSuite) walkWorker(v types.Value, expected int) { actual := 0 - AllP(r, suite.vs, func(c types.Value, r *types.Ref) { + WalkValues(v, suite.vs, func(c types.Value) bool { actual++ - }, 1) + return false + }) suite.Equal(expected, actual) } +func (suite *WalkAllTestSuite) TestWalkValuesDuplicates() { + + dup := suite.NewList(types.Number(9), types.Number(10), types.Number(11), types.Number(12), types.Number(13)) + l := suite.NewList(types.Number(8), dup, dup) + + suite.walkWorker(l, 11) +} + func (suite *WalkAllTestSuite) TestWalkPrimitives() { suite.walkWorker(suite.vs.WriteValue(types.Number(0.0)), 2) suite.walkWorker(suite.vs.WriteValue(types.String("hello")), 2) @@ -50,6 +59,54 @@ func (suite *WalkAllTestSuite) TestWalkComposites() { suite.walkWorker(suite.NewMap(types.Number(8), types.Bool(true), types.Number(0), types.Bool(false)), 6) } +func (suite *WalkTestSuite) skipWorker(composite types.Value) (reached []types.Value) { + WalkValues(composite, suite.vs, func(v types.Value) bool { + suite.False(v.Equals(suite.deadValue), "Should never have reached %+v", suite.deadValue) + reached = append(reached, v) + return v.Equals(suite.mustSkip) + }) + return +} + +// Skipping a sub-tree must allow other items in the list to be processed. +func (suite *WalkTestSuite) TestSkipListElement() { + wholeList := types.NewList(suite.mustSkip, suite.shouldSee, suite.shouldSee) + reached := suite.skipWorker(wholeList) + for _, v := range []types.Value{wholeList, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { + suite.Contains(reached, v, "Doesn't contain %+v", v) + } + suite.Len(reached, 6) +} + +func (suite *WalkTestSuite) TestSkipSetElement() { + wholeSet := types.NewSet(suite.mustSkip, suite.shouldSee).Insert(suite.shouldSee) + reached := suite.skipWorker(wholeSet) + for _, v := range []types.Value{wholeSet, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { + suite.Contains(reached, v, "Doesn't contain %+v", v) + } + suite.Len(reached, 4) +} + +func (suite *WalkTestSuite) TestSkipMapValue() { + shouldAlsoSeeItem := types.String("Also good") + shouldAlsoSee := types.NewSet(shouldAlsoSeeItem) + wholeMap := types.NewMap(suite.shouldSee, suite.mustSkip, shouldAlsoSee, suite.shouldSee) + reached := suite.skipWorker(wholeMap) + for _, v := range []types.Value{wholeMap, suite.shouldSee, suite.shouldSeeItem, suite.mustSkip, shouldAlsoSee, shouldAlsoSeeItem} { + suite.Contains(reached, v, "Doesn't contain %+v", v) + } + suite.Len(reached, 8) +} + +func (suite *WalkTestSuite) TestSkipMapKey() { + wholeMap := types.NewMap(suite.mustSkip, suite.shouldSee, suite.shouldSee, suite.shouldSee) + reached := suite.skipWorker(wholeMap) + for _, v := range []types.Value{wholeMap, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { + suite.Contains(reached, v, "Doesn't contain %+v", v) + } + suite.Len(reached, 8) +} + func (suite *WalkAllTestSuite) NewList(vs ...types.Value) types.Ref { v := types.NewList(vs...) return suite.vs.WriteValue(v) @@ -101,60 +158,3 @@ func (suite *WalkTestSuite) SetupTest() { suite.deadValue = types.Number(0xDEADBEEF) suite.mustSkip = types.NewList(suite.deadValue) } - -func (suite *WalkTestSuite) TestStopWalkImmediately() { - actual := 0 - SomeP(types.NewList(types.NewSet(), types.NewList()), suite.vs, func(v types.Value, r *types.Ref) bool { - actual++ - return true - }, 1) - suite.Equal(1, actual) -} - -func (suite *WalkTestSuite) skipWorker(composite types.Value) (reached []types.Value) { - SomeP(composite, suite.vs, func(v types.Value, r *types.Ref) bool { - suite.False(v.Equals(suite.deadValue), "Should never have reached %+v", suite.deadValue) - reached = append(reached, v) - return v.Equals(suite.mustSkip) - }, 1) - return -} - -// Skipping a sub-tree must allow other items in the list to be processed. -func (suite *WalkTestSuite) SkipTestSkipListElement() { - wholeList := types.NewList(suite.mustSkip, suite.shouldSee, suite.shouldSee) - reached := suite.skipWorker(wholeList) - for _, v := range []types.Value{wholeList, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { - suite.Contains(reached, v, "Doesn't contain %+v", v) - } - suite.Len(reached, 6) -} - -func (suite *WalkTestSuite) SkipTestSkipSetElement() { - wholeSet := types.NewSet(suite.mustSkip, suite.shouldSee).Insert(suite.shouldSee) - reached := suite.skipWorker(wholeSet) - for _, v := range []types.Value{wholeSet, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { - suite.Contains(reached, v, "Doesn't contain %+v", v) - } - suite.Len(reached, 4) -} - -func (suite *WalkTestSuite) SkipTestSkipMapValue() { - shouldAlsoSeeItem := types.String("Also good") - shouldAlsoSee := types.NewSet(shouldAlsoSeeItem) - wholeMap := types.NewMap(suite.shouldSee, suite.mustSkip, shouldAlsoSee, suite.shouldSee) - reached := suite.skipWorker(wholeMap) - for _, v := range []types.Value{wholeMap, suite.shouldSee, suite.shouldSeeItem, suite.mustSkip, shouldAlsoSee, shouldAlsoSeeItem} { - suite.Contains(reached, v, "Doesn't contain %+v", v) - } - suite.Len(reached, 8) -} - -func (suite *WalkTestSuite) SkipTestSkipMapKey() { - wholeMap := types.NewMap(suite.mustSkip, suite.shouldSee, suite.shouldSee, suite.shouldSee) - reached := suite.skipWorker(wholeMap) - for _, v := range []types.Value{wholeMap, suite.mustSkip, suite.shouldSee, suite.shouldSeeItem} { - suite.Contains(reached, v, "Doesn't contain %+v", v) - } - suite.Len(reached, 8) -} diff --git a/samples/go/nomdex/nomdex_update.go b/samples/go/nomdex/nomdex_update.go index e03e83c26b..9ec325947d 100644 --- a/samples/go/nomdex/nomdex_update.go +++ b/samples/go/nomdex/nomdex_update.go @@ -164,7 +164,7 @@ func addElementsToGraphBuilder(gb *types.GraphBuilder, db datas.Database, rootOb } index := Index{m: IndexMap{}} - walk.AllP(rootObject, db, func(v types.Value, r *types.Ref) { + walk.WalkValues(rootObject, db, func(v types.Value) bool { typ := v.Type() typeCacheMutex.Lock() hasPath, ok := typeCache[typ] @@ -182,7 +182,8 @@ func addElementsToGraphBuilder(gb *types.GraphBuilder, db datas.Database, rootOb typeCacheMutex.Unlock() } } - }, 4) + return false + }) status.Done() } diff --git a/samples/go/photo-index/main.go b/samples/go/photo-index/main.go index 14e913d35c..b066bccb28 100644 --- a/samples/go/photo-index/main.go +++ b/samples/go/photo-index/main.go @@ -26,7 +26,6 @@ func main() { func index() (win bool) { var dbStr = flag.String("db", "", "input database spec") var outDSStr = flag.String("out-ds", "", "output dataset to write to - if empty, defaults to input dataset") - var parallelism = flag.Int("parallelism", 16, "number of parallel goroutines to search") flag.Usage = usage flag.Parse(false) @@ -96,7 +95,7 @@ func index() (win bool) { byFace := types.NewGraphBuilder(db, types.MapKind, true) for _, v := range inputs { - walk.SomeP(v, db, func(cv types.Value, _ *types.Ref) (stop bool) { + walk.WalkValues(v, db, func(cv types.Value) (stop bool) { if types.IsSubtype(photoType, cv.Type()) { s := cv.(types.Struct) @@ -130,7 +129,7 @@ func index() (win bool) { stop = true } return - }, *parallelism) + }) } outDS, err = db.CommitValue(outDS, types.NewStruct("", types.StructData{