Files
dolt/newset/set_builder.go
Benjamin Kalman 326a202025 Implement Put for newset, also requiring a moderate refactor.
Previously we had flatSet vs chunkedSet, each implementing the Set
interface, and SetBuilder dealing with this Set interface.

Now we have a chunkedSet interface with setValueChunk and setIndexChunk
implementations, SetBuilder dealing with the chunkedSet interface, and a
Set class which implements Set primitives on top of a chunkedSet.

Of particular note is the introduction of the setChunkStore, an
in-memory cache of setChunks keyed by their refs. This is needed right
now so that we only have a single SetBuilder implementation which chunks
refs, as opposed to 2 SetBuilder implementations, one which chunks refs
and the other chunkedSet instances. Something like it will be needed in
the longer term to implement lazily loading set chunks, however there
will be issues like memory pressure to deal with as well.

Follow-ups to this patch include renaming files to reflect their class
names better, and doing a bit of function/member renaming to better
reflect semantics. I've tried to keep the diff as small as possible.
2015-10-28 17:42:15 -07:00

76 lines
2.1 KiB
Go

package newset
import (
"github.com/attic-labs/noms/ref"
)
// SetBuilder creates a chunked Set implementation. It builds up a sequence of node, chunking them if necessary in the process.
type SetBuilder struct {
current node
chunks []node
chunker Chunker
store *nodeStore
newChunker chunkerFactory
newNode nodeFactory
}
type nodeFactory func(st *nodeStore) node
func NewSetBuilder(store *nodeStore, newChunker chunkerFactory) SetBuilder {
return SetBuilder{
store: store,
chunker: newChunker(),
newChunker: newChunker,
newNode: func(st *nodeStore) node { return leaf{} },
}
}
// Adds the next item to the builder. Items must be added in sort order.
func (builder *SetBuilder) AddItem(r ref.Ref) {
builder.addEntry(r, r)
}
func (builder *SetBuilder) addEntry(first, r ref.Ref) {
var newCurrent node
if builder.current == nil {
newCurrent = builder.newNode(builder.store).appendRef(first, r)
} else {
newCurrent = builder.current.appendRef(first, r)
}
builder.current = newCurrent
builder.store.d[builder.current.ref()] = builder.current
if builder.chunker.Add(r) {
builder.chunks = append(builder.chunks, builder.current)
builder.current = nil
}
}
// Build returns the a Set with the canonical set structure of the added items.
func (builder *SetBuilder) Build() Set {
if builder.current != nil {
builder.chunks = append(builder.chunks, builder.current)
}
if len(builder.chunks) == 0 {
// Nothing was added, this is an empty set.
return Set{nil, builder.store, builder.newChunker}
}
if len(builder.chunks) == 1 {
// No chunks were created, we're done.
return Set{builder.chunks[0], builder.store, builder.newChunker}
}
// The set components chunked into multiple components. Now we chunk those.
internalBuilder := &SetBuilder{
store: builder.store,
chunker: builder.newChunker(),
newChunker: builder.newChunker,
newNode: func(st *nodeStore) node { return internal{store: st} },
}
for _, c := range builder.chunks {
internalBuilder.addEntry(c.start(), c.ref())
}
return internalBuilder.Build()
}