From c15f16442386b27874bdcb5c5a39f3cc7eda5706 Mon Sep 17 00:00:00 2001 From: Brian Hendriks Date: Sun, 28 Feb 2021 13:42:51 -0800 Subject: [PATCH 1/4] panic on overflow fix --- go/store/types/codec.go | 15 ++++++++++++++- go/store/types/map_leaf_sequence.go | 5 +++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/go/store/types/codec.go b/go/store/types/codec.go index a0442d2177..6a6e14f483 100644 --- a/go/store/types/codec.go +++ b/go/store/types/codec.go @@ -356,9 +356,21 @@ type binaryNomsWriter struct { offset uint32 } +func newBinaryNomsWriterWithSizeHint(sizeHint uint64) binaryNomsWriter { + size := uint32(initialBufferSize) + if sizeHint >= math.MaxUint32 { + size = math.MaxUint32 + } else if sizeHint > uint64(size) { + size = uint32(sizeHint) + } + + return binaryNomsWriter{make([]byte, size), 0} +} + func newBinaryNomsWriter() binaryNomsWriter { return binaryNomsWriter{make([]byte, initialBufferSize), 0} } + func (b *binaryNomsWriter) data() []byte { return b.buff[0:b.offset] } @@ -374,7 +386,7 @@ const ( func (b *binaryNomsWriter) ensureCapacity(n uint32) { length := uint64(len(b.buff)) - minLength := uint64(b.offset + n) + minLength := uint64(b.offset) + uint64(n) if length >= minLength { return } @@ -389,6 +401,7 @@ func (b *binaryNomsWriter) ensureCapacity(n uint32) { length = length * 2 if length >= Gigs2 { + length = Gigs2 break } } diff --git a/go/store/types/map_leaf_sequence.go b/go/store/types/map_leaf_sequence.go index ad9aa55977..8f1ba7aaac 100644 --- a/go/store/types/map_leaf_sequence.go +++ b/go/store/types/map_leaf_sequence.go @@ -113,8 +113,9 @@ func (mes mapEntrySlice) Equals(other mapEntrySlice) bool { func newMapLeafSequence(vrw ValueReadWriter, data ...mapEntry) (orderedSequence, error) { d.PanicIfTrue(vrw == nil) - offsets := make([]uint32, len(data)+sequencePartValues+1) - w := newBinaryNomsWriter() + dataLen := len(data) + offsets := make([]uint32, dataLen+sequencePartValues+1) + w := newBinaryNomsWriterWithSizeHint(uint64(dataLen) * 16) offsets[sequencePartKind] = w.offset err := MapKind.writeTo(&w, vrw.Format()) From 3723fe468de9fc711da91bccfff31912f6ac43d7 Mon Sep 17 00:00:00 2001 From: Brian Hendriks Date: Mon, 1 Mar 2021 07:44:03 -0800 Subject: [PATCH 2/4] new sequence type that doesn't concatenate all the sequences into a single buffer --- go/store/types/map_leaf_sequence.go | 290 +++++++++++++++++++++++++--- go/store/types/meta_sequence.go | 5 +- go/store/types/ordered_sequences.go | 6 +- 3 files changed, 267 insertions(+), 34 deletions(-) diff --git a/go/store/types/map_leaf_sequence.go b/go/store/types/map_leaf_sequence.go index 8f1ba7aaac..193686a7af 100644 --- a/go/store/types/map_leaf_sequence.go +++ b/go/store/types/map_leaf_sequence.go @@ -22,9 +22,14 @@ package types import ( + "context" + "errors" "github.com/dolthub/dolt/go/store/d" + "github.com/dolthub/dolt/go/store/hash" ) +var ErrNotAMapOfTuples = errors.New("type error: not a map of tuples") + type mapLeafSequence struct { leafSequence } @@ -195,41 +200,51 @@ func (ml mapLeafSequence) entries() (mapEntrySlice, error) { } func (ml mapLeafSequence) getCompareFn(other sequence) compareFn { - dec1 := ml.decoder() - ml2 := other.(mapLeafSequence) - dec2 := ml2.decoder() - return func(idx, otherIdx int) (bool, error) { - dec1.offset = uint32(ml.getItemOffset(idx)) - dec2.offset = uint32(ml2.getItemOffset(otherIdx)) - k1, err := dec1.readValue(ml.format()) - - if err != nil { - return false, err + if mes, ok := other.(mapEntrySequence); ok { + // use mapEntrySequence comparison func rather than implementing the logic 2x + cmpFn := mes.getCompareFn(ml) + return func(idx, otherIdx int) (bool, error) { + // need to use otherIdx as first param and idx as second since we are using other's comparison func + return cmpFn(otherIdx, idx) } + } else { - k2, err := dec2.readValue(ml2.format()) + ml2 := other.(mapLeafSequence) + dec1 := ml.decoder() + dec2 := ml2.decoder() + return func(idx, otherIdx int) (bool, error) { + dec1.offset = uint32(ml.getItemOffset(idx)) + dec2.offset = uint32(ml2.getItemOffset(otherIdx)) + k1, err := dec1.readValue(ml.format()) - if err != nil { - return false, err + if err != nil { + return false, err + } + + k2, err := dec2.readValue(ml2.format()) + + if err != nil { + return false, err + } + + if !k1.Equals(k2) { + return false, nil + } + + v1, err := dec1.readValue(ml.format()) + + if err != nil { + return false, err + } + + v2, err := dec2.readValue(ml2.format()) + + if err != nil { + return false, err + } + + return v1.Equals(v2), nil } - - if !k1.Equals(k2) { - return false, nil - } - - v1, err := dec1.readValue(ml.format()) - - if err != nil { - return false, err - } - - v2, err := dec2.readValue(ml2.format()) - - if err != nil { - return false, err - } - - return v1.Equals(v2), nil } } @@ -351,3 +366,216 @@ func (ml mapLeafSequence) getValue(idx int) (Value, error) { return dec.readValue(ml.format()) } + +var _ sequence = (*mapEntrySequence)(nil) +var _ orderedSequence = (*mapEntrySequence)(nil) + +type mapEntrySequence struct { + nbf *NomsBinFormat + vrw ValueReadWriter + entries []mapEntry +} + +func newMapEntrySequence(vrw ValueReadWriter, data ...mapEntry) (sequence, error) { + return mapEntrySequence{nbf: vrw.Format(), vrw: vrw, entries: data}, nil +} + +func (mes mapEntrySequence) getKey(idx int) (orderedKey, error) { + return newOrderedKey(mes.entries[idx].key, mes.nbf) +} + +func (mes mapEntrySequence) getValue(idx int) (Value, error) { + return mes.entries[idx].value, nil +} + +func (mes mapEntrySequence) search(key orderedKey) (int, error) { + n, err := SearchWithErroringLess(len(mes.entries), func(i int) (bool, error) { + ordKey, err := mes.getKey(i) + + if err != nil { + return false, err + } + + isLess, err := ordKey.Less(mes.nbf, key) + + if err != nil { + return false, nil + } + + return !isLess, nil + }) + + return n, err +} + +func (mes mapEntrySequence) cumulativeNumberOfLeaves(idx int) (uint64, error) { + return uint64(idx) + 1, nil +} + +func (mes mapEntrySequence) Empty() bool { + return len(mes.entries) == 0 +} + +func (mes mapEntrySequence) format() *NomsBinFormat { + return mes.vrw.Format() +} + +func (mes mapEntrySequence) getChildSequence(ctx context.Context, idx int) (sequence, error) { + return nil, nil +} + +func (mes mapEntrySequence) getItem(idx int) (sequenceItem, error) { + return mes.entries[idx], nil +} + +func (mes mapEntrySequence) isLeaf() bool { + return true +} + +func (mes mapEntrySequence) Kind() NomsKind { + return MapKind +} + +func (mes mapEntrySequence) Len() uint64 { + return uint64(len(mes.entries)) +} + +func (mes mapEntrySequence) numLeaves() uint64 { + return uint64(len(mes.entries)) +} + +func (mes mapEntrySequence) seqLen() int { + return len(mes.entries) +} + +func (mes mapEntrySequence) treeLevel() uint64 { + return 0 +} + +func (mes mapEntrySequence) valueReadWriter() ValueReadWriter { + return mes.vrw +} + +func (mes mapEntrySequence) valuesSlice(from, to uint64) ([]Value, error) { + if l := mes.Len(); to > l { + to = l + } + + numPairs := (to - from) + numTuples := numPairs * 2 // 1 key, 1 value interleaved + dest := make([]Value, numTuples) + + dest = dest[:numTuples] + for i := uint64(0); i < numPairs; i++ { + entry := mes.entries[i] + dest[i*2] = entry.key + dest[i*2+1] = entry.value + } + return dest, nil +} + +func (mes mapEntrySequence) kvTuples(from, to uint64, dest []Tuple) ([]Tuple, error) { + if l := mes.Len(); to > l { + to = l + } + + numPairs := (to - from) + numTuples := numPairs * 2 // 1 key, 1 value interleaved + + if uint64(cap(dest)) < numTuples { + dest = make([]Tuple, numTuples) + } + + dest = dest[:numTuples] + for i := uint64(0); i < numPairs; i++ { + entry := mes.entries[i] + + keyTuple, ok := entry.key.(Tuple) + + if !ok { + return nil, ErrNotAMapOfTuples + } + + valTuple, ok := entry.value.(Tuple) + + if !ok { + return nil, ErrNotAMapOfTuples + } + + dest[i*2] = keyTuple + dest[i*2+1] = valTuple + } + return dest, nil +} + +func (mes mapEntrySequence) getCompareFn(other sequence) compareFn { + if otherCMLS, ok := other.(mapEntrySequence); ok { + return func(idx, otherIdx int) (bool, error) { + entry1 := mes.entries[idx] + entry2 := otherCMLS.entries[otherIdx] + + if !entry1.key.Equals(entry2.key) { + return false, nil + } + + return entry1.value.Equals(entry2.value), nil + } + } else if ml, ok := other.(mapLeafSequence); ok { + dec1 := ml.decoder() + return func(mesIdx, mlsIdx int) (bool, error) { + entry := mes.entries[mesIdx] + dec1.offset = uint32(ml.getItemOffset(mlsIdx)) + mlk, err := dec1.readValue(ml.format()) + + if err != nil { + return false, err + } + + if !entry.key.Equals(mlk) { + return false, nil + } + + mlv, err := dec1.readValue(ml.format()) + + if err != nil { + return false, err + } + + return entry.value.Equals(mlv), nil + } + } + + panic("unsupported") +} + +func (mes mapEntrySequence) typeOf() (*Type, error) { + panic("implement me") +} + +func (mes mapEntrySequence) WalkRefs(nbf *NomsBinFormat, cb RefCallback) error { + panic("implement me") +} + +func (mes mapEntrySequence) writeTo(writer nomsWriter, format *NomsBinFormat) error { + panic("implement me") +} + +func (mes mapEntrySequence) Less(nbf *NomsBinFormat, other LesserValuable) (bool, error) { + panic("implement me") +} + +func (mes mapEntrySequence) Hash(format *NomsBinFormat) (hash.Hash, error) { + panic("implement me") +} + +func (mes mapEntrySequence) Equals(other Value) bool { + panic("implement me") +} + +func (mes mapEntrySequence) asValueImpl() valueImpl { + panic("not implemented") +} + +func (mes mapEntrySequence) getCompositeChildSequence(ctx context.Context, start uint64, length uint64) (sequence, error) { + panic("getCompositeChildSequence called on a leaf sequence") +} diff --git a/go/store/types/meta_sequence.go b/go/store/types/meta_sequence.go index ddfdfeb12a..1163bd4a5f 100644 --- a/go/store/types/meta_sequence.go +++ b/go/store/types/meta_sequence.go @@ -24,7 +24,6 @@ package types import ( "context" "fmt" - "github.com/dolthub/dolt/go/libraries/utils/tracing" "github.com/dolthub/dolt/go/store/d" "github.com/dolthub/dolt/go/store/hash" @@ -496,7 +495,9 @@ func (ms metaSequence) getCompositeChildSequence(ctx context.Context, start uint valueItems = append(valueItems, entries.entries...) } - return newMapLeafSequence(ms.vrw, valueItems...) + + return newMapEntrySequence(ms.vrw, valueItems...) + case SetKind: var valueItems []Value for _, seq := range output { diff --git a/go/store/types/ordered_sequences.go b/go/store/types/ordered_sequences.go index 4a9c179e06..4e9a1c20c6 100644 --- a/go/store/types/ordered_sequences.go +++ b/go/store/types/ordered_sequences.go @@ -186,8 +186,12 @@ func getCurrentKey(cur *sequenceCursor) (orderedKey, error) { return seq.getKey(cur.idx) } +type mapSeqWithValues interface { + getValue(idx int) (Value, error) +} + func getMapValue(cur *sequenceCursor) (Value, error) { - if ml, ok := cur.seq.(mapLeafSequence); ok { + if ml, ok := cur.seq.(mapSeqWithValues); ok { return ml.getValue(cur.idx) } From 55cfcc524f7f255269acef224920faa42034c95a Mon Sep 17 00:00:00 2001 From: Brian Hendriks Date: Mon, 1 Mar 2021 07:52:16 -0800 Subject: [PATCH 3/4] formatting --- go/store/types/map_leaf_sequence.go | 1 + go/store/types/meta_sequence.go | 1 + 2 files changed, 2 insertions(+) diff --git a/go/store/types/map_leaf_sequence.go b/go/store/types/map_leaf_sequence.go index 193686a7af..0622c8e143 100644 --- a/go/store/types/map_leaf_sequence.go +++ b/go/store/types/map_leaf_sequence.go @@ -24,6 +24,7 @@ package types import ( "context" "errors" + "github.com/dolthub/dolt/go/store/d" "github.com/dolthub/dolt/go/store/hash" ) diff --git a/go/store/types/meta_sequence.go b/go/store/types/meta_sequence.go index 1163bd4a5f..e1f8f324e8 100644 --- a/go/store/types/meta_sequence.go +++ b/go/store/types/meta_sequence.go @@ -24,6 +24,7 @@ package types import ( "context" "fmt" + "github.com/dolthub/dolt/go/libraries/utils/tracing" "github.com/dolthub/dolt/go/store/d" "github.com/dolthub/dolt/go/store/hash" From a5dc80cd07205a911966f7664e730a5eaf0f1efc Mon Sep 17 00:00:00 2001 From: Brian Hendriks Date: Mon, 1 Mar 2021 09:00:10 -0800 Subject: [PATCH 4/4] pr feedback --- go/store/types/map_leaf_sequence.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go/store/types/map_leaf_sequence.go b/go/store/types/map_leaf_sequence.go index 0622c8e143..ba012c53bd 100644 --- a/go/store/types/map_leaf_sequence.go +++ b/go/store/types/map_leaf_sequence.go @@ -550,27 +550,27 @@ func (mes mapEntrySequence) getCompareFn(other sequence) compareFn { } func (mes mapEntrySequence) typeOf() (*Type, error) { - panic("implement me") + panic("not implemented") } func (mes mapEntrySequence) WalkRefs(nbf *NomsBinFormat, cb RefCallback) error { - panic("implement me") + panic("not implemented") } func (mes mapEntrySequence) writeTo(writer nomsWriter, format *NomsBinFormat) error { - panic("implement me") + panic("not implemented") } func (mes mapEntrySequence) Less(nbf *NomsBinFormat, other LesserValuable) (bool, error) { - panic("implement me") + panic("not implemented") } func (mes mapEntrySequence) Hash(format *NomsBinFormat) (hash.Hash, error) { - panic("implement me") + panic("not implemented") } func (mes mapEntrySequence) Equals(other Value) bool { - panic("implement me") + panic("not implemented") } func (mes mapEntrySequence) asValueImpl() valueImpl {