Files
dolt/types/set.go
Erik Arvidsson b8be6908f8 Implement Set Union
This is done by creating a cursor for each set. This is a cursor for
the actual values in the sets. We then pick the "smallest" value from
the cursors and advance that cursor. This continues until we have
exhausted all the cursors.

  setA.Union(set0, ... setN)

The time complexity is O(len(setA) + len(set0)) + ... len(setN))
2015-12-17 10:18:04 -05:00

113 lines
3.0 KiB
Go

package types
import (
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
)
type Set interface {
Value
First() Value
Len() uint64
Empty() bool
Has(key Value) bool
Insert(values ...Value) Set
Remove(values ...Value) Set
Union(others ...Set) Set
Subtract(others ...Set) Set
Iter(cb setIterCallback)
IterAll(cb setIterAllCallback)
IterAllP(concurrency int, f setIterAllCallback)
Filter(cb setFilterCallback) Set
elemType() Type
sequenceCursorAtFirst() *sequenceCursor
}
type indexOfSetFn func(m setData, v Value) int
type setIterCallback func(v Value) bool
type setIterAllCallback func(v Value)
type setFilterCallback func(v Value) (keep bool)
var setType = MakeCompoundType(SetKind, MakePrimitiveType(ValueKind))
func NewSet(cs chunks.ChunkStore, v ...Value) Set {
return NewTypedSet(cs, setType, v...)
}
func NewTypedSet(cs chunks.ChunkStore, t Type, v ...Value) Set {
return newTypedSet(cs, t, buildSetData(setData{}, v, t)...)
}
func newTypedSet(cs chunks.ChunkStore, t Type, data ...Value) Set {
seq := newEmptySequenceChunker(makeSetLeafChunkFn(t, cs), newSetMetaSequenceChunkFn(t, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
for _, v := range data {
seq.Append(v)
}
return seq.Done().(Set)
}
func setUnion(set Set, cs chunks.ChunkStore, others []Set) Set {
// TODO: This can be done more efficiently by realizing that if two sets have the same meta tuple we only have to traverse one of the subtrees. Bug 794
if len(others) == 0 {
return set
}
assertSetsSameType(set, others...)
tr := set.Type()
seq := newEmptySequenceChunker(makeSetLeafChunkFn(tr, cs), newSetMetaSequenceChunkFn(tr, cs), newSetLeafBoundaryChecker(), newOrderedMetaSequenceBoundaryChecker)
var lessFunction func(a, b sequenceItem) bool
if isSequenceOrderedByIndexedType(tr) {
lessFunction = func(a, b sequenceItem) bool {
return a.(OrderedValue).Less(b.(OrderedValue))
}
} else {
lessFunction = func(a, b sequenceItem) bool {
return a.(Value).Ref().Less(b.(Value).Ref())
}
}
smallest := func(cursors map[*sequenceCursor]bool) (smallestCursor *sequenceCursor, smallestItem sequenceItem) {
for cursor, _ := range cursors {
currentItem := cursor.current()
if smallestCursor == nil || lessFunction(currentItem, smallestItem) {
smallestCursor = cursor
smallestItem = currentItem
}
}
return
}
cursors := make(map[*sequenceCursor]bool, len(others)+1)
if !set.Empty() {
cursor := set.sequenceCursorAtFirst()
cursors[cursor] = true
}
for _, s := range others {
if !s.Empty() {
cursor := s.sequenceCursorAtFirst()
cursors[cursor] = true
}
}
var last Value
for len(cursors) > 0 {
smallestCursor, smallestItem := smallest(cursors)
d.Chk.NotNil(smallestCursor)
// Don't add same value twice
if last == nil || !last.Equals(smallestItem.(Value)) {
seq.Append(smallestItem)
last = smallestItem.(Value)
}
if !smallestCursor.advance() {
delete(cursors, smallestCursor)
}
}
return seq.Done().(Set)
}