From 9f758c0bdcd00a6c9a1b6c20076fc824f5aa4707 Mon Sep 17 00:00:00 2001 From: Erik Arvidsson Date: Tue, 19 Sep 2017 17:02:11 -0700 Subject: [PATCH] Make EncodeValue return bytes directly (#3725) Also make the primitives allocate a more exact buffer instead of the initialBufferSize. Also, stack allocate binaryNomsWriter --- go/types/blob_leaf_sequence.go | 2 +- go/types/bool.go | 35 +++++++++++++++++++------------- go/types/codec.go | 26 +++++++++++++++++------- go/types/encoding_test.go | 2 +- go/types/leaf_sequence.go | 4 ++-- go/types/map_leaf_sequence.go | 4 ++-- go/types/meta_sequence.go | 4 ++-- go/types/number.go | 10 +++++++++ go/types/ref.go | 8 ++++++-- go/types/rolling_value_hasher.go | 4 ++-- go/types/string.go | 15 +++++++++++++- go/types/struct.go | 17 ++++++++++------ 12 files changed, 91 insertions(+), 40 deletions(-) diff --git a/go/types/blob_leaf_sequence.go b/go/types/blob_leaf_sequence.go index 89ccc753e2..8eff8d395b 100644 --- a/go/types/blob_leaf_sequence.go +++ b/go/types/blob_leaf_sequence.go @@ -15,7 +15,7 @@ func newBlobLeafSequence(vrw ValueReadWriter, data []byte) sequence { offsets := make([]uint32, sequencePartValues+1) w := newBinaryNomsWriter() offsets[sequencePartKind] = w.offset - BlobKind.writeTo(w) + BlobKind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(0) // level offsets[sequencePartCount] = w.offset diff --git a/go/types/bool.go b/go/types/bool.go index 5ff6df029c..2c0c61c2bb 100644 --- a/go/types/bool.go +++ b/go/types/bool.go @@ -12,40 +12,40 @@ import ( type Bool bool // Value interface -func (v Bool) Value() Value { - return v +func (b Bool) Value() Value { + return b } -func (v Bool) Equals(other Value) bool { - return v == other +func (b Bool) Equals(other Value) bool { + return b == other } -func (v Bool) Less(other Value) bool { - if v2, ok := other.(Bool); ok { - return !bool(v) && bool(v2) +func (b Bool) Less(other Value) bool { + if b2, ok := other.(Bool); ok { + return !bool(b) && bool(b2) } return true } -func (v Bool) Hash() hash.Hash { - return getHash(v) +func (b Bool) Hash() hash.Hash { + return getHash(b) } -func (v Bool) WalkValues(cb ValueCallback) { +func (b Bool) WalkValues(cb ValueCallback) { } -func (v Bool) WalkRefs(cb RefCallback) { +func (b Bool) WalkRefs(cb RefCallback) { } -func (v Bool) typeOf() *Type { +func (b Bool) typeOf() *Type { return BoolType } -func (v Bool) Kind() NomsKind { +func (b Bool) Kind() NomsKind { return BoolKind } -func (v Bool) valueReadWriter() ValueReadWriter { +func (b Bool) valueReadWriter() ValueReadWriter { return nil } @@ -53,3 +53,10 @@ func (b Bool) writeTo(w nomsWriter) { BoolKind.writeTo(w) w.writeBool(bool(b)) } + +func (b Bool) valueBytes() []byte { + if bool(b) { + return []byte{byte(BoolKind), 1} + } + return []byte{byte(BoolKind), 0} +} diff --git a/go/types/codec.go b/go/types/codec.go index 207f52cbf5..8314c6c1fb 100644 --- a/go/types/codec.go +++ b/go/types/codec.go @@ -14,11 +14,23 @@ import ( const initialBufferSize = 2048 +type valueBytes interface { + valueBytes() []byte +} + func EncodeValue(v Value) chunks.Chunk { - // TODO: Once all Values are backed by []byte we might not need EncodeValue - w := newBinaryNomsWriter() - v.writeTo(w) - return chunks.NewChunk(w.data()) + switch v := v.(type) { + case Collection: + return chunks.NewChunk(v.sequence().bytes()) + case valueBytes: + return chunks.NewChunk(v.valueBytes()) + case *Type: + w := newBinaryNomsWriter() + v.writeTo(&w) + return chunks.NewChunk(w.data()) + } + + panic("unreachable") } func DecodeFromBytes(data []byte, vrw ValueReadWriter) Value { @@ -145,8 +157,8 @@ type binaryNomsWriter struct { offset uint32 } -func newBinaryNomsWriter() *binaryNomsWriter { - return &binaryNomsWriter{make([]byte, initialBufferSize, initialBufferSize), 0} +func newBinaryNomsWriter() binaryNomsWriter { + return binaryNomsWriter{make([]byte, initialBufferSize, initialBufferSize), 0} } func (b *binaryNomsWriter) data() []byte { @@ -187,7 +199,7 @@ func (b *binaryNomsWriter) writeUint8(v uint8) { } func (b *binaryNomsWriter) writeCount(v uint64) { - b.ensureCapacity(binary.MaxVarintLen64 * 2) + b.ensureCapacity(binary.MaxVarintLen64) count := binary.PutUvarint(b.buff[b.offset:], v) b.offset += uint32(count) } diff --git a/go/types/encoding_test.go b/go/types/encoding_test.go index 80cd2d008b..a4b52df09b 100644 --- a/go/types/encoding_test.go +++ b/go/types/encoding_test.go @@ -48,7 +48,7 @@ func assertEncoding(t *testing.T, expect []interface{}, v Value) { expectedAsByteSlice := toBinaryNomsReaderData(expect) vs := newTestValueStore() w := newBinaryNomsWriter() - v.writeTo(w) + v.writeTo(&w) assert.EqualValues(t, expectedAsByteSlice, w.data()) dec := newValueDecoder(expectedAsByteSlice, vs) diff --git a/go/types/leaf_sequence.go b/go/types/leaf_sequence.go index 0cbc908006..730071581a 100644 --- a/go/types/leaf_sequence.go +++ b/go/types/leaf_sequence.go @@ -22,14 +22,14 @@ func newLeafSequence(kind NomsKind, count uint64, vrw ValueReadWriter, vs ...Val w := newBinaryNomsWriter() offsets := make([]uint32, len(vs)+sequencePartValues+1) offsets[sequencePartKind] = w.offset - kind.writeTo(w) + kind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(0) // level offsets[sequencePartCount] = w.offset w.writeCount(count) offsets[sequencePartValues] = w.offset for i, v := range vs { - v.writeTo(w) + v.writeTo(&w) offsets[i+sequencePartValues+1] = w.offset } return leafSequence{vrw, w.data(), offsets} diff --git a/go/types/map_leaf_sequence.go b/go/types/map_leaf_sequence.go index c8fb27f91d..a50e9c74da 100644 --- a/go/types/map_leaf_sequence.go +++ b/go/types/map_leaf_sequence.go @@ -56,14 +56,14 @@ func newMapLeafSequence(vrw ValueReadWriter, data ...mapEntry) orderedSequence { offsets := make([]uint32, len(data)+sequencePartValues+1) w := newBinaryNomsWriter() offsets[sequencePartKind] = w.offset - MapKind.writeTo(w) + MapKind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(0) // level offsets[sequencePartCount] = w.offset w.writeCount(uint64(len(data))) offsets[sequencePartValues] = w.offset for i, me := range data { - me.writeTo(w) + me.writeTo(&w) offsets[i+sequencePartValues+1] = w.offset } return mapLeafSequence{leafSequence{vrw, w.data(), offsets}} diff --git a/go/types/meta_sequence.go b/go/types/meta_sequence.go index 6aaa901838..d3ff0ce1a1 100644 --- a/go/types/meta_sequence.go +++ b/go/types/meta_sequence.go @@ -169,14 +169,14 @@ func newMetaSequence(kind NomsKind, level uint64, tuples []metaTuple, vrw ValueR w := newBinaryNomsWriter() offsets := make([]uint32, len(tuples)+sequencePartValues+1) offsets[sequencePartKind] = w.offset - kind.writeTo(w) + kind.writeTo(&w) offsets[sequencePartLevel] = w.offset w.writeCount(level) offsets[sequencePartCount] = w.offset w.writeCount(uint64(len(tuples))) offsets[sequencePartValues] = w.offset for i, mt := range tuples { - mt.writeTo(w) + mt.writeTo(&w) offsets[i+sequencePartValues+1] = w.offset } return metaSequence{vrw, w.data(), offsets} diff --git a/go/types/number.go b/go/types/number.go index 9c96d2266e..211b9b5871 100644 --- a/go/types/number.go +++ b/go/types/number.go @@ -5,6 +5,7 @@ package types import ( + "encoding/binary" "math" "github.com/attic-labs/noms/go/d" @@ -60,3 +61,12 @@ func (v Number) writeTo(w nomsWriter) { } w.writeNumber(v) } + +func (v Number) valueBytes() []byte { + // We know the size of the buffer here so allocate it once. + // NumberKind, int (Varint), exp (Varint) + buff := make([]byte, 1+2*binary.MaxVarintLen64) + w := binaryNomsWriter{buff, 0} + v.writeTo(&w) + return buff[:w.offset] +} diff --git a/go/types/ref.go b/go/types/ref.go index 143dfd4894..a83bffee06 100644 --- a/go/types/ref.go +++ b/go/types/ref.go @@ -43,11 +43,11 @@ func constructRef(targetHash hash.Hash, targetType *Type, height uint64) Ref { var offsets refOffsets offsets[refPartKind] = w.offset - RefKind.writeTo(w) + RefKind.writeTo(&w) offsets[refPartTargetHash] = w.offset w.writeHash(targetHash) offsets[refPartTargetType] = w.offset - targetType.writeToAsType(w, map[string]*Type{}) + targetType.writeToAsType(&w, map[string]*Type{}) offsets[refPartHeight] = w.offset w.writeCount(height) @@ -87,6 +87,10 @@ func (r Ref) writeTo(w nomsWriter) { w.writeRaw(r.buff) } +func (r Ref) valueBytes() []byte { + return r.buff +} + func maxChunkHeight(v Value) (max uint64) { v.WalkRefs(func(r Ref) { if height := r.Height(); height > max { diff --git a/go/types/rolling_value_hasher.go b/go/types/rolling_value_hasher.go index 9ec7eb68f8..5458cc29cc 100644 --- a/go/types/rolling_value_hasher.go +++ b/go/types/rolling_value_hasher.go @@ -47,7 +47,7 @@ func normalProductionChunks() { } type rollingValueHasher struct { - bw *binaryNomsWriter + bw binaryNomsWriter bz *buzhash.BuzHash crossedBoundary bool pattern, window uint32 @@ -96,6 +96,6 @@ func (rv *rollingValueHasher) Reset() { } func (rv *rollingValueHasher) HashValue(v Value) { - v.writeTo(rv.bw) + v.writeTo(&rv.bw) rv.sl.Update(rv.bw.data()) } diff --git a/go/types/string.go b/go/types/string.go index bd64944f8b..4729e3a4ce 100644 --- a/go/types/string.go +++ b/go/types/string.go @@ -4,7 +4,11 @@ package types -import "github.com/attic-labs/noms/go/hash" +import ( + "encoding/binary" + + "github.com/attic-labs/noms/go/hash" +) // String is a Noms Value wrapper around the primitive string type. type String string @@ -51,3 +55,12 @@ func (s String) writeTo(w nomsWriter) { StringKind.writeTo(w) w.writeString(string(s)) } + +func (s String) valueBytes() []byte { + // We know the size of the buffer here so allocate it once. + // StringKind, Length (UVarint), UTF-8 encoded string + buff := make([]byte, 1+binary.MaxVarintLen64+len(s)) + w := binaryNomsWriter{buff, 0} + s.writeTo(&w) + return buff[:w.offset] +} diff --git a/go/types/struct.go b/go/types/struct.go index ec09ff0c8d..7a02a937f3 100644 --- a/go/types/struct.go +++ b/go/types/struct.go @@ -47,10 +47,14 @@ func (s Struct) writeTo(enc nomsWriter) { enc.writeRaw(s.buff) } +func (s Struct) valueBytes() []byte { + return s.buff +} + func newStruct(name string, fieldNames []string, values []Value) Struct { var vrw ValueReadWriter w := newBinaryNomsWriter() - StructKind.writeTo(w) + StructKind.writeTo(&w) w.writeString(name) w.writeCount(uint64(len(fieldNames))) for i := 0; i < len(fieldNames); i++ { @@ -58,7 +62,7 @@ func newStruct(name string, fieldNames []string, values []Value) Struct { if vrw == nil { vrw = values[i].(valueReadWriter).valueReadWriter() } - values[i].writeTo(w) + values[i].writeTo(&w) } return Struct{vrw, w.data()} } @@ -263,7 +267,8 @@ func (s Struct) Get(n string) Value { // struct field a new struct type is created. func (s Struct) Set(n string, v Value) Struct { verifyFieldName(n) - return s.set(newBinaryNomsWriter(), n, v, 0) + w := newBinaryNomsWriter() + return s.set(&w, n, v, 0) } func (s Struct) set(w *binaryNomsWriter, n string, v Value, addedCount int) Struct { @@ -324,9 +329,9 @@ func (s Struct) IsZeroValue() bool { func (s Struct) Delete(n string) Struct { dec := s.decoder() w := newBinaryNomsWriter() - StructKind.writeTo(w) + StructKind.writeTo(&w) dec.skipKind() - dec.copyString(w) + dec.copyString(&w) count := dec.readCount() w.writeCount(count - 1) // If not found we just return s @@ -339,7 +344,7 @@ func (s Struct) Delete(n string) Struct { found = true } else { w.writeString(name) - dec.copyValue(w) + dec.copyValue(&w) } }