mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-28 18:59:00 -06:00
@@ -10,7 +10,7 @@ func assertType(t Type, v ...Value) {
|
||||
}
|
||||
}
|
||||
|
||||
func assertSetsSameType(s setLeaf, v ...Set) {
|
||||
func assertSetsSameType(s Set, v ...Set) {
|
||||
if s.elemType().Kind() != ValueKind {
|
||||
t := s.Type()
|
||||
for _, v := range v {
|
||||
|
||||
@@ -89,23 +89,38 @@ func (cs compoundSet) Remove(values ...Value) Set {
|
||||
return res.Remove(tail...)
|
||||
}
|
||||
|
||||
func (cs compoundSet) sequenceChunkerAtValue(v Value) (*sequenceChunker, bool) {
|
||||
func (cs compoundSet) sequenceCursorAtValue(v Value) (*sequenceCursor, bool) {
|
||||
metaCur, leaf, idx := cs.findLeaf(v)
|
||||
|
||||
cur := &sequenceCursor{metaCur, leaf, idx, len(leaf.data), func(otherLeaf sequenceItem, idx int) sequenceItem {
|
||||
return otherLeaf.(setLeaf).data[idx]
|
||||
}, func(mt sequenceItem) (sequenceItem, int) {
|
||||
otherLeaf := readMetaTupleValue(mt, cs.cs).(setLeaf)
|
||||
return otherLeaf, len(otherLeaf.data)
|
||||
}}
|
||||
|
||||
seq := newSequenceChunker(cur, makeSetLeafChunkFn(cs.t, cs.cs), newSetMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
found := idx < len(leaf.data) && leaf.data[idx].Equals(v)
|
||||
return cur, found
|
||||
}
|
||||
|
||||
func (cs compoundSet) sequenceChunkerAtValue(v Value) (*sequenceChunker, bool) {
|
||||
cur, found := cs.sequenceCursorAtValue(v)
|
||||
seq := newSequenceChunker(cur, makeSetLeafChunkFn(cs.t, cs.cs), newSetMetaSequenceChunkFn(cs.t, cs.cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
return seq, found
|
||||
}
|
||||
|
||||
func (cs compoundSet) elemType() Type {
|
||||
return cs.t.Desc.(CompoundDesc).ElemTypes[0]
|
||||
}
|
||||
|
||||
func (cs compoundSet) sequenceCursorAtFirst() *sequenceCursor {
|
||||
// TODO: This can be done more efficiently - Bug 795
|
||||
v := cs.First()
|
||||
cur, found := cs.sequenceCursorAtValue(v)
|
||||
d.Chk.True(found)
|
||||
return cur
|
||||
}
|
||||
|
||||
func (cs compoundSet) Union(others ...Set) Set {
|
||||
panic("not implemented")
|
||||
return setUnion(cs, cs.cs, others)
|
||||
}
|
||||
|
||||
func (cs compoundSet) Subtract(others ...Set) Set {
|
||||
|
||||
@@ -282,3 +282,53 @@ func TestCompoundSetFilter(t *testing.T) {
|
||||
doTest(getTestRefToNativeOrderSet(2))
|
||||
doTest(getTestRefToValueOrderSet(2))
|
||||
}
|
||||
|
||||
func TestCompoundSetUnion(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
ms := chunks.NewMemoryStore()
|
||||
|
||||
doTest := func(ts testSet) {
|
||||
cs := ts.toCompoundSet(ms)
|
||||
cs2 := cs.Union()
|
||||
assert.True(cs.Equals(cs2))
|
||||
cs3 := cs.Union(cs2)
|
||||
assert.True(cs.Equals(cs3))
|
||||
cs4 := cs.Union(cs2, cs3)
|
||||
assert.True(cs.Equals(cs4))
|
||||
emptySet := NewTypedSet(ms, ts.tr)
|
||||
cs5 := cs.Union(emptySet)
|
||||
assert.True(cs.Equals(cs5))
|
||||
cs6 := emptySet.Union(cs)
|
||||
assert.True(cs.Equals(cs6))
|
||||
|
||||
r := rand.New(rand.NewSource(123))
|
||||
subsetValues1 := make([]Value, 0, len(ts.values))
|
||||
subsetValues2 := make([]Value, 0, len(ts.values))
|
||||
subsetValues3 := make([]Value, 0, len(ts.values))
|
||||
subsetValuesAll := make([]Value, 0, len(ts.values))
|
||||
for _, v := range ts.values {
|
||||
if r.Intn(3) == 0 {
|
||||
subsetValues1 = append(subsetValues1, v)
|
||||
subsetValuesAll = append(subsetValuesAll, v)
|
||||
} else if r.Intn(3) == 0 {
|
||||
subsetValues2 = append(subsetValues2, v)
|
||||
subsetValuesAll = append(subsetValuesAll, v)
|
||||
} else if r.Intn(3) == 0 {
|
||||
subsetValues3 = append(subsetValues3, v)
|
||||
subsetValuesAll = append(subsetValuesAll, v)
|
||||
}
|
||||
}
|
||||
|
||||
s1 := NewTypedSet(ms, ts.tr, subsetValues1...)
|
||||
s2 := NewTypedSet(ms, ts.tr, subsetValues2...)
|
||||
s3 := NewTypedSet(ms, ts.tr, subsetValues3...)
|
||||
sAll := NewTypedSet(ms, ts.tr, subsetValuesAll...)
|
||||
|
||||
assert.True(s1.Union(s2, s3).Equals(sAll))
|
||||
}
|
||||
|
||||
doTest(getTestNativeOrderSet(16))
|
||||
doTest(getTestRefValueOrderSet(2))
|
||||
doTest(getTestRefToNativeOrderSet(2))
|
||||
doTest(getTestRefToValueOrderSet(2))
|
||||
}
|
||||
|
||||
70
types/set.go
70
types/set.go
@@ -1,6 +1,9 @@
|
||||
package types
|
||||
|
||||
import "github.com/attic-labs/noms/chunks"
|
||||
import (
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
)
|
||||
|
||||
type Set interface {
|
||||
Value
|
||||
@@ -16,6 +19,8 @@ type Set interface {
|
||||
IterAll(cb setIterAllCallback)
|
||||
IterAllP(concurrency int, f setIterAllCallback)
|
||||
Filter(cb setFilterCallback) Set
|
||||
elemType() Type
|
||||
sequenceCursorAtFirst() *sequenceCursor
|
||||
}
|
||||
|
||||
type indexOfSetFn func(m setData, v Value) int
|
||||
@@ -42,3 +47,66 @@ func newTypedSet(cs chunks.ChunkStore, t Type, data ...Value) Set {
|
||||
|
||||
return seq.Done().(Set)
|
||||
}
|
||||
|
||||
func setUnion(set Set, cs chunks.ChunkStore, others []Set) Set {
|
||||
// TODO: This can be done more efficiently by realizing that if two sets have the same meta tuple we only have to traverse one of the subtrees. Bug 794
|
||||
if len(others) == 0 {
|
||||
return set
|
||||
}
|
||||
assertSetsSameType(set, others...)
|
||||
|
||||
tr := set.Type()
|
||||
seq := newEmptySequenceChunker(makeSetLeafChunkFn(tr, cs), newSetMetaSequenceChunkFn(tr, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
|
||||
|
||||
var lessFunction func(a, b sequenceItem) bool
|
||||
if isSequenceOrderedByIndexedType(tr) {
|
||||
lessFunction = func(a, b sequenceItem) bool {
|
||||
return a.(OrderedValue).Less(b.(OrderedValue))
|
||||
}
|
||||
} else {
|
||||
lessFunction = func(a, b sequenceItem) bool {
|
||||
return a.(Value).Ref().Less(b.(Value).Ref())
|
||||
}
|
||||
}
|
||||
|
||||
smallest := func(cursors map[*sequenceCursor]bool) (smallestCursor *sequenceCursor, smallestItem sequenceItem) {
|
||||
for cursor, _ := range cursors {
|
||||
currentItem := cursor.current()
|
||||
if smallestCursor == nil || lessFunction(currentItem, smallestItem) {
|
||||
smallestCursor = cursor
|
||||
smallestItem = currentItem
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
cursors := make(map[*sequenceCursor]bool, len(others)+1)
|
||||
if !set.Empty() {
|
||||
cursor := set.sequenceCursorAtFirst()
|
||||
cursors[cursor] = true
|
||||
}
|
||||
for _, s := range others {
|
||||
if !s.Empty() {
|
||||
cursor := s.sequenceCursorAtFirst()
|
||||
cursors[cursor] = true
|
||||
}
|
||||
}
|
||||
|
||||
var last Value
|
||||
for len(cursors) > 0 {
|
||||
smallestCursor, smallestItem := smallest(cursors)
|
||||
d.Chk.NotNil(smallestCursor)
|
||||
|
||||
// Don't add same value twice
|
||||
if last == nil || !last.Equals(smallestItem.(Value)) {
|
||||
seq.Append(smallestItem)
|
||||
last = smallestItem.(Value)
|
||||
}
|
||||
|
||||
if !smallestCursor.advance() {
|
||||
delete(cursors, smallestCursor)
|
||||
}
|
||||
}
|
||||
|
||||
return seq.Done().(Set)
|
||||
}
|
||||
|
||||
@@ -57,15 +57,7 @@ func (s setLeaf) Remove(values ...Value) Set {
|
||||
}
|
||||
|
||||
func (s setLeaf) Union(others ...Set) Set {
|
||||
assertSetsSameType(s, others...)
|
||||
var result Set = s
|
||||
for _, other := range others {
|
||||
other.Iter(func(v Value) (stop bool) {
|
||||
result = result.Insert(v)
|
||||
return
|
||||
})
|
||||
}
|
||||
return result
|
||||
return setUnion(s, s.cs, others)
|
||||
}
|
||||
|
||||
func (s setLeaf) Subtract(others ...Set) Set {
|
||||
@@ -241,3 +233,18 @@ func makeSetLeafChunkFn(t Type, cs chunks.ChunkStore) makeChunkFn {
|
||||
return metaTuple{ref, indexValue}, setLeaf
|
||||
}
|
||||
}
|
||||
|
||||
func (s setLeaf) sequenceCursorAtFirst() *sequenceCursor {
|
||||
return &sequenceCursor{
|
||||
nil,
|
||||
s.data,
|
||||
0,
|
||||
len(s.data),
|
||||
func(parent sequenceItem, idx int) sequenceItem {
|
||||
return s.data[idx]
|
||||
},
|
||||
func(reference sequenceItem) (sequence sequenceItem, length int) {
|
||||
panic("unreachable")
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user