mirror of
https://github.com/dolthub/dolt.git
synced 2026-05-14 19:38:56 -05:00
Slight modification to compound blob encoding
The json serialization now only contains the length of each individual
blob child.
The go representation of this still uses offsets but the offsets are
for the end delimiter.
For "hi" "bye" we get
{"cb", [{"ref": "sha1-hi"}, 2, {"ref": "sha1-bye"}, 3]}
compoundBlob{[2, 5], [sha1-hi, ,sha1-bye]}
Keeping the length in the serialization leads to smaller serializations
Using the end offset leads to simpler binary search and allows us to
use the last entry as the length.
Issue #17
This commit is contained in:
+10
-19
@@ -111,17 +111,13 @@ func toUint64(v interface{}) (uint64, error) {
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// [length,length0,{"ref":"sha1-0"}, ... lengthN, {"ref":"sha1-N"}]
|
||||
// [{"ref":"sha1-0"}, length0, ... {"ref":"sha1-N"},lengthN]
|
||||
func jsonDecodeCompoundBlob(input []interface{}) (interface{}, error) {
|
||||
if len(input)%2 != 0 || len(input) < 2 {
|
||||
return nil, errInvalidEncoding
|
||||
}
|
||||
|
||||
length, err := toUint64(input[len(input)-1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
offset := uint64(0)
|
||||
numBlobs := len(input) / 2
|
||||
offsets := make([]uint64, numBlobs)
|
||||
blobs := make([]ref.Ref, numBlobs)
|
||||
@@ -137,26 +133,21 @@ func jsonDecodeCompoundBlob(input []interface{}) (interface{}, error) {
|
||||
return v.(ref.Ref), nil
|
||||
}
|
||||
|
||||
for i := 0; i < len(input)-1; i++ {
|
||||
for i := 0; i < len(input); i += 2 {
|
||||
var err error
|
||||
var offset uint64
|
||||
if i == 0 {
|
||||
offset = uint64(0)
|
||||
} else {
|
||||
offset, err = toUint64(input[i])
|
||||
i++
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
offsets[i/2] = offset
|
||||
blobs[i/2], err = ensureRef(jsonDecodeValue(input[i]))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
length, err := toUint64(input[i+1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
offset += length
|
||||
offsets[i/2] = offset
|
||||
}
|
||||
|
||||
return CompoundBlob{length, offsets, blobs}, nil
|
||||
return CompoundBlob{offsets, blobs}, nil
|
||||
}
|
||||
|
||||
func jsonDecodeList(input []interface{}) ([]interface{}, error) {
|
||||
|
||||
@@ -80,15 +80,15 @@ func TestJSONDecode(t *testing.T) {
|
||||
// Blob (compound)
|
||||
// echo -n 'b Hello' | sha1sum
|
||||
blr := ref.MustParse("sha1-c35018551e725bd2ab45166b69d15fda00b161c1")
|
||||
cb := CompoundBlob{uint64(2), []uint64{0}, []ref.Ref{blr}}
|
||||
cb := CompoundBlob{[]uint64{2}, []ref.Ref{blr}}
|
||||
testDecode(`j {"cb":[{"ref":"sha1-c35018551e725bd2ab45166b69d15fda00b161c1"},2]}
|
||||
`, cb)
|
||||
// echo -n 'b ' | sha1sum
|
||||
blr2 := ref.MustParse("sha1-641283a12b475ed58ba510517c1224a912e934a6")
|
||||
// echo -n 'b World!' | sha1sum
|
||||
blr3 := ref.MustParse("sha1-8169c017ce2779f3f66bfe27ee2313d71f7698b9")
|
||||
cb2 := CompoundBlob{uint64(12), []uint64{0, 5, 6}, []ref.Ref{blr, blr2, blr3}}
|
||||
testDecode(`j {"cb":[{"ref":"sha1-c35018551e725bd2ab45166b69d15fda00b161c1"},5,{"ref":"sha1-641283a12b475ed58ba510517c1224a912e934a6"},6,{"ref":"sha1-8169c017ce2779f3f66bfe27ee2313d71f7698b9"},12]}
|
||||
cb2 := CompoundBlob{[]uint64{5, 6, 12}, []ref.Ref{blr, blr2, blr3}}
|
||||
testDecode(`j {"cb":[{"ref":"sha1-c35018551e725bd2ab45166b69d15fda00b161c1"},5,{"ref":"sha1-641283a12b475ed58ba510517c1224a912e934a6"},1,{"ref":"sha1-8169c017ce2779f3f66bfe27ee2313d71f7698b9"},6]}
|
||||
`, cb2)
|
||||
}
|
||||
|
||||
|
||||
+9
-7
@@ -15,11 +15,14 @@ var (
|
||||
|
||||
// CompoundBlob represents the info needed to encode/decode chunked blob metadata.
|
||||
type CompoundBlob struct {
|
||||
Length uint64
|
||||
Offsets []uint64
|
||||
Offsets []uint64 // The offsets of the end of the related blobs.
|
||||
Blobs []ref.Ref
|
||||
}
|
||||
|
||||
func (cb CompoundBlob) Len() uint64 {
|
||||
return cb.Offsets[len(cb.Offsets)-1]
|
||||
}
|
||||
|
||||
// MapFromItems takes an even-numbered list of items and converts them into a stably-ordered map-like value by treating the even-indexed items as keys and the odd-indexed items as values, e.g. {e[0]: e[1], e[2]: e[3], ...}. This does NOT enforce key uniqueness.
|
||||
func MapFromItems(e ...interface{}) Map {
|
||||
dbg.Chk.True(0 == len(e)%2, "Length on input array must be multiple of 2")
|
||||
@@ -121,19 +124,18 @@ func getJSONPrimitive(v interface{}) (interface{}, error) {
|
||||
func getJSONCompoundBlob(cb CompoundBlob) (interface{}, error) {
|
||||
// Perhaps tighten this up: BUG #170
|
||||
// {"cb":[{"ref":"sha1-x"},length]}
|
||||
// {"cb":[{"ref":"sha1-x"},offset,{"ref":"sha1-y"},length]}
|
||||
// {"cb":[{"ref":"sha1-x"},lengthX,{"ref":"sha1-y"},lengthY]}
|
||||
offset := uint64(0)
|
||||
l := make([]interface{}, 0, len(cb.Blobs)*2)
|
||||
for i, f := range cb.Blobs {
|
||||
if i != 0 {
|
||||
l = append(l, cb.Offsets[i])
|
||||
}
|
||||
c, err := getJSONPrimitive(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
l = append(l, c)
|
||||
l = append(l, cb.Offsets[i]-offset)
|
||||
offset = cb.Offsets[i]
|
||||
}
|
||||
l = append(l, cb.Length)
|
||||
|
||||
dbg.Chk.Equal(len(l), len(cb.Blobs)*2)
|
||||
|
||||
|
||||
@@ -87,5 +87,5 @@ func TestJsonEncode(t *testing.T) {
|
||||
|
||||
// Blob (compound)
|
||||
testEncode(fmt.Sprintf(`j {"cb":[{"ref":"%s"},2]}
|
||||
`, ref2), CompoundBlob{uint64(2), []uint64{0}, []ref.Ref{ref2}})
|
||||
`, ref2), CompoundBlob{[]uint64{2}, []ref.Ref{ref2}})
|
||||
}
|
||||
|
||||
+4
-4
@@ -42,10 +42,10 @@ func NewBlob(r io.Reader) (Blob, error) {
|
||||
break
|
||||
}
|
||||
|
||||
blob = newBlobLeaf(buf.Bytes())
|
||||
offsets = append(offsets, length)
|
||||
blobs = append(blobs, futureFromValue(blob))
|
||||
length += n
|
||||
offsets = append(offsets, length)
|
||||
blob = newBlobLeaf(buf.Bytes())
|
||||
blobs = append(blobs, futureFromValue(blob))
|
||||
}
|
||||
|
||||
if length == 0 {
|
||||
@@ -55,7 +55,7 @@ func NewBlob(r io.Reader) (Blob, error) {
|
||||
if len(blobs) == 1 {
|
||||
return blob, nil
|
||||
}
|
||||
return compoundBlob{length, offsets, blobs, &ref.Ref{}, nil}, nil
|
||||
return compoundBlob{offsets, blobs, &ref.Ref{}, nil}, nil
|
||||
}
|
||||
|
||||
func BlobFromVal(v Value) Blob {
|
||||
|
||||
+8
-11
@@ -12,8 +12,7 @@ import (
|
||||
// compoundBlob represents a list of Blobs.
|
||||
// It implements the Blob interface.
|
||||
type compoundBlob struct {
|
||||
length uint64
|
||||
offsets []uint64
|
||||
offsets []uint64 // The offsets of the end of the related blobs.
|
||||
blobs []Future
|
||||
ref *ref.Ref
|
||||
cs chunks.ChunkSource
|
||||
@@ -79,7 +78,10 @@ func (cbr *compoundBlobReader) Seek(offset int64, whence int) (int64, error) {
|
||||
}
|
||||
}
|
||||
if cbr.currentReader != nil {
|
||||
offset := abs - int64(cbr.cb.offsets[cbr.currentBlobIndex])
|
||||
offset := abs
|
||||
if cbr.currentBlobIndex > 0 {
|
||||
offset -= int64(cbr.cb.offsets[cbr.currentBlobIndex-1])
|
||||
}
|
||||
if _, err := cbr.currentReader.Seek(offset, 0); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@@ -89,13 +91,9 @@ func (cbr *compoundBlobReader) Seek(offset int64, whence int) (int64, error) {
|
||||
}
|
||||
|
||||
func (cbr *compoundBlobReader) findBlobOffset(abs uint64) int {
|
||||
// TODO(arv): The -1 at the end is bad. If the offsets was shifted one to the right things would be cleaner.
|
||||
if abs >= cbr.cb.Len() {
|
||||
return len(cbr.cb.blobs)
|
||||
}
|
||||
return sort.Search(len(cbr.cb.offsets), func(i int) bool {
|
||||
return cbr.cb.offsets[i] > abs
|
||||
}) - 1
|
||||
})
|
||||
}
|
||||
|
||||
func (cbr *compoundBlobReader) updateReader() error {
|
||||
@@ -113,7 +111,7 @@ func (cbr *compoundBlobReader) updateReader() error {
|
||||
|
||||
// Len implements the Blob interface
|
||||
func (cb compoundBlob) Len() uint64 {
|
||||
return cb.length
|
||||
return cb.offsets[len(cb.offsets)-1]
|
||||
}
|
||||
|
||||
func (cb compoundBlob) Ref() ref.Ref {
|
||||
@@ -123,9 +121,8 @@ func (cb compoundBlob) Ref() ref.Ref {
|
||||
func (cb compoundBlob) Equals(other Value) bool {
|
||||
if other == nil {
|
||||
return false
|
||||
} else {
|
||||
return cb.Ref() == other.Ref()
|
||||
}
|
||||
return cb.Ref() == other.Ref()
|
||||
}
|
||||
|
||||
func (cb compoundBlob) Chunks() (futures []Future) {
|
||||
|
||||
@@ -19,10 +19,10 @@ func getTestCompoundBlob(datas ...string) compoundBlob {
|
||||
for i, s := range datas {
|
||||
b, _ := NewBlob(bytes.NewBufferString(s))
|
||||
blobs[i] = futureFromValue(b)
|
||||
offsets[i] = length
|
||||
length += uint64(len(s))
|
||||
offsets[i] = length
|
||||
}
|
||||
return compoundBlob{length, offsets, blobs, &ref.Ref{}, nil}
|
||||
return compoundBlob{offsets, blobs, &ref.Ref{}, nil}
|
||||
}
|
||||
|
||||
func getAliceBlob(t *testing.T) compoundBlob {
|
||||
@@ -86,7 +86,7 @@ func TestCompoundBlobReaderLazy(t *testing.T) {
|
||||
b2 := newBlobLeaf([]byte("bye"))
|
||||
tb2 := &testBlob{b2, &readCount2}
|
||||
|
||||
cb := compoundBlob{uint64(5), []uint64{0, 2}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
|
||||
cb := compoundBlob{[]uint64{2, 5}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
|
||||
|
||||
r := cb.Reader()
|
||||
assert.Equal(0, readCount1)
|
||||
@@ -129,7 +129,7 @@ func TestCompoundBlobReaderLazySeek(t *testing.T) {
|
||||
b2 := newBlobLeaf([]byte("bye"))
|
||||
tb2 := &testBlob{b2, &readCount2}
|
||||
|
||||
cb := compoundBlob{uint64(5), []uint64{0, 2}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
|
||||
cb := compoundBlob{[]uint64{2, 5}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
|
||||
|
||||
r := cb.Reader()
|
||||
|
||||
@@ -232,7 +232,7 @@ func TestCompoundBlobChunks(t *testing.T) {
|
||||
bl1 := newBlobLeaf([]byte("hello"))
|
||||
blr1 := bl1.Ref()
|
||||
bl2 := newBlobLeaf([]byte("world"))
|
||||
cb = compoundBlob{uint64(10), []uint64{0, 5}, []Future{futureFromRef(blr1), futureFromValue(bl2)}, &ref.Ref{}, cs}
|
||||
cb = compoundBlob{[]uint64{5, 10}, []Future{futureFromRef(blr1), futureFromValue(bl2)}, &ref.Ref{}, cs}
|
||||
assert.Equal(1, len(cb.Chunks()))
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ func TestPrimitiveEquals(t *testing.T) {
|
||||
func() Value {
|
||||
b1, _ := NewBlob(bytes.NewBufferString("hi"))
|
||||
b2, _ := NewBlob(bytes.NewBufferString("bye"))
|
||||
return compoundBlob{uint64(5), []uint64{0, 2}, []Future{futureFromValue(b1), futureFromValue(b2)}, &ref.Ref{}, nil}
|
||||
return compoundBlob{[]uint64{2, 5}, []Future{futureFromValue(b1), futureFromValue(b2)}, &ref.Ref{}, nil}
|
||||
},
|
||||
func() Value { return NewList() },
|
||||
func() Value { return NewList(NewString("foo")) },
|
||||
|
||||
@@ -42,7 +42,7 @@ func TestEnsureRef(t *testing.T) {
|
||||
}()
|
||||
|
||||
bl := newBlobLeaf([]byte("hi"))
|
||||
cb := compoundBlob{uint64(2), []uint64{0}, []Future{futureFromValue(bl)}, &ref.Ref{}, cs}
|
||||
cb := compoundBlob{[]uint64{2}, []Future{futureFromValue(bl)}, &ref.Ref{}, cs}
|
||||
|
||||
values := []Value{
|
||||
newBlobLeaf([]byte{}),
|
||||
|
||||
+1
-1
@@ -86,7 +86,7 @@ func fromEncodeable(i interface{}, cs chunks.ChunkSource) (Future, error) {
|
||||
}
|
||||
blobs[idx] = f
|
||||
}
|
||||
cb := compoundBlob{i.Length, i.Offsets, blobs, &ref.Ref{}, cs}
|
||||
cb := compoundBlob{i.Offsets, blobs, &ref.Ref{}, cs}
|
||||
return futureFromValue(cb), nil
|
||||
default:
|
||||
dbg.Chk.Fail("Unknown encodeable", "%+v", i)
|
||||
|
||||
@@ -76,7 +76,7 @@ func encCompoundBlobFromCompoundBlob(cb compoundBlob, cs chunks.ChunkSink) (inte
|
||||
// All children of compoundBlob must be Blobs, which get encoded and reffed by processChild.
|
||||
refs[idx] = i.(ref.Ref)
|
||||
}
|
||||
return enc.CompoundBlob{Length: cb.length, Offsets: cb.offsets, Blobs: refs}, nil
|
||||
return enc.CompoundBlob{Offsets: cb.offsets, Blobs: refs}, nil
|
||||
}
|
||||
|
||||
func makeListEncodeable(l List, cs chunks.ChunkSink) (interface{}, error) {
|
||||
|
||||
Reference in New Issue
Block a user