Merge pull request #788 from arv/set-union

Implement Set Union
This commit is contained in:
Erik Arvidsson
2015-12-17 10:34:41 -05:00
5 changed files with 156 additions and 16 deletions

View File

@@ -10,7 +10,7 @@ func assertType(t Type, v ...Value) {
}
}
func assertSetsSameType(s setLeaf, v ...Set) {
func assertSetsSameType(s Set, v ...Set) {
if s.elemType().Kind() != ValueKind {
t := s.Type()
for _, v := range v {

View File

@@ -89,23 +89,38 @@ func (cs compoundSet) Remove(values ...Value) Set {
return res.Remove(tail...)
}
func (cs compoundSet) sequenceChunkerAtValue(v Value) (*sequenceChunker, bool) {
func (cs compoundSet) sequenceCursorAtValue(v Value) (*sequenceCursor, bool) {
metaCur, leaf, idx := cs.findLeaf(v)
cur := &sequenceCursor{metaCur, leaf, idx, len(leaf.data), func(otherLeaf sequenceItem, idx int) sequenceItem {
return otherLeaf.(setLeaf).data[idx]
}, func(mt sequenceItem) (sequenceItem, int) {
otherLeaf := readMetaTupleValue(mt, cs.cs).(setLeaf)
return otherLeaf, len(otherLeaf.data)
}}
seq := newSequenceChunker(cur, makeSetLeafChunkFn(cs.t, cs.cs), newSetMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
found := idx < len(leaf.data) && leaf.data[idx].Equals(v)
return cur, found
}
func (cs compoundSet) sequenceChunkerAtValue(v Value) (*sequenceChunker, bool) {
cur, found := cs.sequenceCursorAtValue(v)
seq := newSequenceChunker(cur, makeSetLeafChunkFn(cs.t, cs.cs), newSetMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
return seq, found
}
func (cs compoundSet) elemType() Type {
return cs.t.Desc.(CompoundDesc).ElemTypes[0]
}
func (cs compoundSet) sequenceCursorAtFirst() *sequenceCursor {
// TODO: This can be done more efficiently - Bug 795
v := cs.First()
cur, found := cs.sequenceCursorAtValue(v)
d.Chk.True(found)
return cur
}
func (cs compoundSet) Union(others ...Set) Set {
panic("not implemented")
return setUnion(cs, cs.cs, others)
}
func (cs compoundSet) Subtract(others ...Set) Set {

View File

@@ -282,3 +282,53 @@ func TestCompoundSetFilter(t *testing.T) {
doTest(getTestRefToNativeOrderSet(2))
doTest(getTestRefToValueOrderSet(2))
}
func TestCompoundSetUnion(t *testing.T) {
assert := assert.New(t)
ms := chunks.NewMemoryStore()
doTest := func(ts testSet) {
cs := ts.toCompoundSet(ms)
cs2 := cs.Union()
assert.True(cs.Equals(cs2))
cs3 := cs.Union(cs2)
assert.True(cs.Equals(cs3))
cs4 := cs.Union(cs2, cs3)
assert.True(cs.Equals(cs4))
emptySet := NewTypedSet(ms, ts.tr)
cs5 := cs.Union(emptySet)
assert.True(cs.Equals(cs5))
cs6 := emptySet.Union(cs)
assert.True(cs.Equals(cs6))
r := rand.New(rand.NewSource(123))
subsetValues1 := make([]Value, 0, len(ts.values))
subsetValues2 := make([]Value, 0, len(ts.values))
subsetValues3 := make([]Value, 0, len(ts.values))
subsetValuesAll := make([]Value, 0, len(ts.values))
for _, v := range ts.values {
if r.Intn(3) == 0 {
subsetValues1 = append(subsetValues1, v)
subsetValuesAll = append(subsetValuesAll, v)
} else if r.Intn(3) == 0 {
subsetValues2 = append(subsetValues2, v)
subsetValuesAll = append(subsetValuesAll, v)
} else if r.Intn(3) == 0 {
subsetValues3 = append(subsetValues3, v)
subsetValuesAll = append(subsetValuesAll, v)
}
}
s1 := NewTypedSet(ms, ts.tr, subsetValues1...)
s2 := NewTypedSet(ms, ts.tr, subsetValues2...)
s3 := NewTypedSet(ms, ts.tr, subsetValues3...)
sAll := NewTypedSet(ms, ts.tr, subsetValuesAll...)
assert.True(s1.Union(s2, s3).Equals(sAll))
}
doTest(getTestNativeOrderSet(16))
doTest(getTestRefValueOrderSet(2))
doTest(getTestRefToNativeOrderSet(2))
doTest(getTestRefToValueOrderSet(2))
}

View File

@@ -1,6 +1,9 @@
package types
import "github.com/attic-labs/noms/chunks"
import (
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
)
type Set interface {
Value
@@ -16,6 +19,8 @@ type Set interface {
IterAll(cb setIterAllCallback)
IterAllP(concurrency int, f setIterAllCallback)
Filter(cb setFilterCallback) Set
elemType() Type
sequenceCursorAtFirst() *sequenceCursor
}
type indexOfSetFn func(m setData, v Value) int
@@ -42,3 +47,66 @@ func newTypedSet(cs chunks.ChunkStore, t Type, data ...Value) Set {
return seq.Done().(Set)
}
func setUnion(set Set, cs chunks.ChunkStore, others []Set) Set {
// TODO: This can be done more efficiently by realizing that if two sets have the same meta tuple we only have to traverse one of the subtrees. Bug 794
if len(others) == 0 {
return set
}
assertSetsSameType(set, others...)
tr := set.Type()
seq := newEmptySequenceChunker(makeSetLeafChunkFn(tr, cs), newSetMetaSequenceChunkFn(tr, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
var lessFunction func(a, b sequenceItem) bool
if isSequenceOrderedByIndexedType(tr) {
lessFunction = func(a, b sequenceItem) bool {
return a.(OrderedValue).Less(b.(OrderedValue))
}
} else {
lessFunction = func(a, b sequenceItem) bool {
return a.(Value).Ref().Less(b.(Value).Ref())
}
}
smallest := func(cursors map[*sequenceCursor]bool) (smallestCursor *sequenceCursor, smallestItem sequenceItem) {
for cursor, _ := range cursors {
currentItem := cursor.current()
if smallestCursor == nil || lessFunction(currentItem, smallestItem) {
smallestCursor = cursor
smallestItem = currentItem
}
}
return
}
cursors := make(map[*sequenceCursor]bool, len(others)+1)
if !set.Empty() {
cursor := set.sequenceCursorAtFirst()
cursors[cursor] = true
}
for _, s := range others {
if !s.Empty() {
cursor := s.sequenceCursorAtFirst()
cursors[cursor] = true
}
}
var last Value
for len(cursors) > 0 {
smallestCursor, smallestItem := smallest(cursors)
d.Chk.NotNil(smallestCursor)
// Don't add same value twice
if last == nil || !last.Equals(smallestItem.(Value)) {
seq.Append(smallestItem)
last = smallestItem.(Value)
}
if !smallestCursor.advance() {
delete(cursors, smallestCursor)
}
}
return seq.Done().(Set)
}

View File

@@ -57,15 +57,7 @@ func (s setLeaf) Remove(values ...Value) Set {
}
func (s setLeaf) Union(others ...Set) Set {
assertSetsSameType(s, others...)
var result Set = s
for _, other := range others {
other.Iter(func(v Value) (stop bool) {
result = result.Insert(v)
return
})
}
return result
return setUnion(s, s.cs, others)
}
func (s setLeaf) Subtract(others ...Set) Set {
@@ -241,3 +233,18 @@ func makeSetLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
return metaTuple{ref, indexValue}, setLeaf
}
}
func (s setLeaf) sequenceCursorAtFirst() *sequenceCursor {
return &sequenceCursor{
nil,
s.data,
0,
len(s.data),
func(parent sequenceItem, idx int) sequenceItem {
return s.data[idx]
},
func(reference sequenceItem) (sequence sequenceItem, length int) {
panic("unreachable")
},
}
}