mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-31 03:18:43 -06:00
Previously we had flatSet vs chunkedSet, each implementing the Set interface, and SetBuilder dealing with this Set interface. Now we have a chunkedSet interface with setValueChunk and setIndexChunk implementations, SetBuilder dealing with the chunkedSet interface, and a Set class which implements Set primitives on top of a chunkedSet. Of particular note is the introduction of the setChunkStore, an in-memory cache of setChunks keyed by their refs. This is needed right now so that we only have a single SetBuilder implementation which chunks refs, as opposed to 2 SetBuilder implementations, one which chunks refs and the other chunkedSet instances. Something like it will be needed in the longer term to implement lazily loading set chunks, however there will be issues like memory pressure to deal with as well. Follow-ups to this patch include renaming files to reflect their class names better, and doing a bit of function/member renaming to better reflect semantics. I've tried to keep the diff as small as possible.
76 lines
2.1 KiB
Go
76 lines
2.1 KiB
Go
package newset
|
|
|
|
import (
|
|
"github.com/attic-labs/noms/ref"
|
|
)
|
|
|
|
// SetBuilder creates a chunked Set implementation. It builds up a sequence of node, chunking them if necessary in the process.
|
|
type SetBuilder struct {
|
|
current node
|
|
chunks []node
|
|
chunker Chunker
|
|
store *nodeStore
|
|
newChunker chunkerFactory
|
|
newNode nodeFactory
|
|
}
|
|
|
|
type nodeFactory func(st *nodeStore) node
|
|
|
|
func NewSetBuilder(store *nodeStore, newChunker chunkerFactory) SetBuilder {
|
|
return SetBuilder{
|
|
store: store,
|
|
chunker: newChunker(),
|
|
newChunker: newChunker,
|
|
newNode: func(st *nodeStore) node { return leaf{} },
|
|
}
|
|
}
|
|
|
|
// Adds the next item to the builder. Items must be added in sort order.
|
|
func (builder *SetBuilder) AddItem(r ref.Ref) {
|
|
builder.addEntry(r, r)
|
|
}
|
|
|
|
func (builder *SetBuilder) addEntry(first, r ref.Ref) {
|
|
var newCurrent node
|
|
if builder.current == nil {
|
|
newCurrent = builder.newNode(builder.store).appendRef(first, r)
|
|
} else {
|
|
newCurrent = builder.current.appendRef(first, r)
|
|
}
|
|
builder.current = newCurrent
|
|
builder.store.d[builder.current.ref()] = builder.current
|
|
if builder.chunker.Add(r) {
|
|
builder.chunks = append(builder.chunks, builder.current)
|
|
builder.current = nil
|
|
}
|
|
}
|
|
|
|
// Build returns the a Set with the canonical set structure of the added items.
|
|
func (builder *SetBuilder) Build() Set {
|
|
if builder.current != nil {
|
|
builder.chunks = append(builder.chunks, builder.current)
|
|
}
|
|
|
|
if len(builder.chunks) == 0 {
|
|
// Nothing was added, this is an empty set.
|
|
return Set{nil, builder.store, builder.newChunker}
|
|
}
|
|
|
|
if len(builder.chunks) == 1 {
|
|
// No chunks were created, we're done.
|
|
return Set{builder.chunks[0], builder.store, builder.newChunker}
|
|
}
|
|
|
|
// The set components chunked into multiple components. Now we chunk those.
|
|
internalBuilder := &SetBuilder{
|
|
store: builder.store,
|
|
chunker: builder.newChunker(),
|
|
newChunker: builder.newChunker,
|
|
newNode: func(st *nodeStore) node { return internal{store: st} },
|
|
}
|
|
for _, c := range builder.chunks {
|
|
internalBuilder.addEntry(c.start(), c.ref())
|
|
}
|
|
return internalBuilder.Build()
|
|
}
|