Files
dolt/go/store/prolly/tuple_mutable_map.go
Maximilian Hoffman 801a82a3af Statistics for multiple branches (#7558)
* starter

* prog

* flush out statsdb interface fork, and noms implementation

* more progress on tests

* StatsIO tests passing

* pretty close, thread canceller still broken

* check context dropped inside refresh loop

* tidy

* nil panic

* dolt harness setup imports setup statistics

* lot of test fixes

* bump

* fmt

* fix providers race

* don't load in create database hook

* bump

* bump

* fix integration tests

* fix more bugs

* fix drop

* more test fixes

* cleanup

* missing updateActive

* simplify bucket merging

* nick comments

* bump

* fix bad merge

* tidy and edits

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* bump test fixes

* bump

* fix bats

* bump

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* redo create db to account for variety of edge cases

* try path.join

---------

Co-authored-by: max-hoffman <max-hoffman@users.noreply.github.com>
2024-03-27 11:54:15 -07:00

311 lines
9.0 KiB
Go

// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package prolly
import (
"context"
"io"
"strconv"
"strings"
"github.com/dolthub/dolt/go/store/prolly/message"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/val"
)
const (
defaultMaxPending = 64 * 1024
)
// MutableMap is an ordered collection of val.Tuple backed by a Prolly Tree.
// Writes to the map are queued in a skip.List and periodically flushed when
// the maximum number of pending writes is exceeded.
type MutableMap struct {
// tuples contains the primary Prolly Tree and skip.List for this map.
tuples tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc]
// stash, if not nil, contains a previous checkpoint of this map.
// stashes are created when a MutableMap has been check-pointed, but
// the number of in-memory pending writes exceeds, maxPending.
// In this case we stash a copy MutableMap containing the checkpoint,
// flush the pending writes and continue accumulating
stash *tree.MutableMap[val.Tuple, val.Tuple, val.TupleDesc]
// keyDesc and valDesc are tuples descriptors for the map.
keyDesc, valDesc val.TupleDesc
// buffer size
maxPending int
}
// newMutableMap returns a new MutableMap.
func newMutableMap(m Map) *MutableMap {
return &MutableMap{
tuples: m.tuples.Mutate(),
keyDesc: m.keyDesc,
valDesc: m.valDesc,
maxPending: defaultMaxPending,
}
}
// newMutableMapWithDescriptors returns a new MutableMap with the key and value TupleDescriptors overridden to the
// values specified in |kd| and |vd|. This is useful if you are rewriting the data in a map to change its schema.
func newMutableMapWithDescriptors(m Map, kd, vd val.TupleDesc) *MutableMap {
return &MutableMap{
tuples: m.tuples.Mutate(),
keyDesc: kd,
valDesc: vd,
maxPending: defaultMaxPending,
}
}
// Map materializes all pending and applied mutations in the MutableMap.
func (mut *MutableMap) Map(ctx context.Context) (Map, error) {
s := message.NewProllyMapSerializer(mut.valDesc, mut.NodeStore().Pool())
return mut.flushWithSerializer(ctx, s)
}
func (mut *MutableMap) flushWithSerializer(ctx context.Context, s message.Serializer) (Map, error) {
sm := mut.tuples.Static
fn := tree.ApplyMutations[val.Tuple, val.TupleDesc, message.Serializer]
root, err := fn(ctx, sm.NodeStore, sm.Root, mut.keyDesc, s, mut.tuples.Mutations())
if err != nil {
return Map{}, err
}
return Map{
tuples: tree.StaticMap[val.Tuple, val.Tuple, val.TupleDesc]{
Root: root,
NodeStore: sm.NodeStore,
Order: sm.Order,
},
keyDesc: mut.keyDesc,
valDesc: mut.valDesc,
}, nil
}
// WithMaxPending returns a MutableMap with a new pending buffer size.
func (mut *MutableMap) WithMaxPending(max int) *MutableMap {
ret := *mut
ret.maxPending = max
return &ret
}
// NodeStore returns the map's NodeStore
func (mut *MutableMap) NodeStore() tree.NodeStore {
return mut.tuples.Static.NodeStore
}
// Put adds the Tuple pair |key|, |value| to the MutableMap.
func (mut *MutableMap) Put(ctx context.Context, key, value val.Tuple) error {
if err := mut.tuples.Put(ctx, key, value); err != nil {
return err
}
if mut.tuples.Edits.Count() > mut.maxPending {
return mut.flushPending(ctx)
}
return nil
}
// Delete deletes the pair keyed by |key| from the MutableMap.
func (mut *MutableMap) Delete(ctx context.Context, key val.Tuple) error {
return mut.tuples.Delete(ctx, key)
}
// Get fetches the Tuple pair keyed by |key|, if it exists, and passes it to |cb|.
// If the |key| is not present in the MutableMap, a nil Tuple pair is passed to |cb|.
func (mut *MutableMap) Get(ctx context.Context, key val.Tuple, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) {
return mut.tuples.Get(ctx, key, cb)
}
func (mut *MutableMap) GetPrefix(ctx context.Context, key val.Tuple, prefixDesc val.TupleDesc, cb tree.KeyValueFn[val.Tuple, val.Tuple]) (err error) {
return mut.tuples.GetPrefix(ctx, key, prefixDesc, cb)
}
// Has returns true if |key| is present in the MutableMap.
func (mut *MutableMap) Has(ctx context.Context, key val.Tuple) (ok bool, err error) {
return mut.tuples.Has(ctx, key)
}
// HasPrefix returns true if a key with a matching prefix to |key| is present in the MutableMap.
func (mut *MutableMap) HasPrefix(ctx context.Context, key val.Tuple, prefixDesc val.TupleDesc) (ok bool, err error) {
return mut.tuples.HasPrefix(ctx, key, prefixDesc)
}
// Checkpoint records a checkpoint that can be reverted to.
func (mut *MutableMap) Checkpoint(context.Context) error {
// discard previous stash, if one exists
mut.stash = nil
mut.tuples.Edits.Checkpoint()
return nil
}
// Revert discards writes made since the last checkpoint.
func (mut *MutableMap) Revert(context.Context) {
// if we've accumulated a large number of writes
// since we check-pointed, our last checkpoint
// may be stashed in a separate tree.MutableMap
if mut.stash != nil {
mut.tuples = *mut.stash
return
}
mut.tuples.Edits.Revert()
}
func (mut *MutableMap) flushPending(ctx context.Context) error {
stash := mut.stash
// if our in-memory edit set contains a checkpoint, we
// must stash a copy of |mut.tuples| we can revert to.
if mut.tuples.Edits.HasCheckpoint() {
cp := mut.tuples.Copy()
cp.Edits.Revert()
stash = &cp
}
sm, err := mut.Map(ctx)
if err != nil {
return err
}
mut.tuples.Static = sm.tuples
mut.tuples.Edits.Truncate() // reuse skip list
mut.stash = stash
return nil
}
// IterAll returns a mutableMapIter that iterates over the entire MutableMap.
func (mut *MutableMap) IterAll(ctx context.Context) (MapIter, error) {
rng := Range{Fields: nil, Desc: mut.keyDesc}
return mut.IterRange(ctx, rng)
}
// IterKeyRange iterates over a physical key range defined by |start| and
// |stop|. If |start| and/or |stop| is nil, the range will be open
// towards that end.
func (mut *MutableMap) IterKeyRange(ctx context.Context, start, stop val.Tuple) (MapIter, error) {
return mut.tuples.Static.IterKeyRange(ctx, start, stop)
}
// IterRange returns a MapIter that iterates over a Range.
func (mut *MutableMap) IterRange(ctx context.Context, rng Range) (MapIter, error) {
treeIter, err := treeIterFromRange(ctx, mut.tuples.Static.Root, mut.tuples.Static.NodeStore, rng)
if err != nil {
return nil, err
}
memIter := memIterFromRange(mut.tuples.Edits, rng)
iter := &mutableMapIter[val.Tuple, val.Tuple, val.TupleDesc]{
memory: memIter,
prolly: treeIter,
order: rng.Desc,
}
return filteredIter{iter: iter, rng: rng}, err
}
// HasEdits returns true when the MutableMap has performed at least one Put or Delete operation. This does not indicate
// whether the materialized map contains different values to the contained unedited map.
func (mut *MutableMap) HasEdits() bool {
return mut.tuples.Edits.Count() > 0
}
// Descriptors returns the key and value val.TupleDesc.
func (mut *MutableMap) Descriptors() (val.TupleDesc, val.TupleDesc) {
return mut.keyDesc, mut.valDesc
}
type tupleIter struct {
tuples []val.Tuple
}
var _ TupleIter = &tupleIter{}
func (s *tupleIter) Next(context.Context) (k, v val.Tuple) {
if len(s.tuples) > 0 {
k, v = s.tuples[0], s.tuples[1]
s.tuples = s.tuples[2:]
}
return
}
// mutationIter wraps a TupleIter as a MutationIter.
type mutationIter struct {
iter TupleIter
}
var _ tree.MutationIter = mutationIter{}
func (m mutationIter) NextMutation(ctx context.Context) (key, value tree.Item) {
k, v := m.iter.Next(ctx)
key, value = tree.Item(k), tree.Item(v)
return
}
func (m mutationIter) Close() error {
return nil
}
func debugFormat(ctx context.Context, m *MutableMap) (string, error) {
kd, vd := m.keyDesc, m.valDesc
editIter := m.tuples.Edits.IterAtStart()
iter, err := m.tuples.Static.IterAll(ctx)
if err != nil {
return "", err
}
var sb strings.Builder
sb.WriteString("Mutable Map {\n")
c := strconv.Itoa(m.tuples.Edits.Count())
sb.WriteString("\tedits (count: " + c + ") {\n")
for {
k, v := editIter.Current()
if k == nil {
break
}
sb.WriteString("\t\t")
sb.WriteString(kd.Format(k))
sb.WriteString(": ")
sb.WriteString(vd.Format(v))
sb.WriteString(",\n")
editIter.Advance()
}
sb.WriteString("\t},\n")
ci, err := m.tuples.Static.Count()
if err != nil {
return "", err
}
c = strconv.Itoa(ci)
sb.WriteString("\tTree (count: " + c + ") {\n")
for {
k, v, err := iter.Next(ctx)
if err == io.EOF {
break
}
if err != nil {
return "", err
}
sb.WriteString("\t\t")
sb.WriteString(kd.Format(k))
sb.WriteString(": ")
sb.WriteString(vd.Format(v))
sb.WriteString(",\n")
}
sb.WriteString("\t}\n}\n")
return sb.String(), nil
}