Slight modification to compound blob encoding

The json serialization now only contains the length of each individual
blob child.

The go representation of this still uses offsets but the offsets are
for the end delimiter.

For "hi" "bye" we get

{"cb", [{"ref": "sha1-hi"}, 2, {"ref": "sha1-bye"}, 3]}

compoundBlob{[2, 5], [sha1-hi, ,sha1-bye]}

Keeping the length in the serialization leads to smaller serializations

Using the end offset leads to simpler binary search and allows us to
use the last entry as the length.

Issue #17
This commit is contained in:
Erik Arvidsson
2015-08-07 10:14:45 -04:00
parent eedb385fe2
commit ddebdcaefd
11 changed files with 44 additions and 54 deletions
+10 -19
View File
@@ -111,17 +111,13 @@ func toUint64(v interface{}) (uint64, error) {
return i, nil
}
// [length,length0,{"ref":"sha1-0"}, ... lengthN, {"ref":"sha1-N"}]
// [{"ref":"sha1-0"}, length0, ... {"ref":"sha1-N"},lengthN]
func jsonDecodeCompoundBlob(input []interface{}) (interface{}, error) {
if len(input)%2 != 0 || len(input) < 2 {
return nil, errInvalidEncoding
}
length, err := toUint64(input[len(input)-1])
if err != nil {
return nil, err
}
offset := uint64(0)
numBlobs := len(input) / 2
offsets := make([]uint64, numBlobs)
blobs := make([]ref.Ref, numBlobs)
@@ -137,26 +133,21 @@ func jsonDecodeCompoundBlob(input []interface{}) (interface{}, error) {
return v.(ref.Ref), nil
}
for i := 0; i < len(input)-1; i++ {
for i := 0; i < len(input); i += 2 {
var err error
var offset uint64
if i == 0 {
offset = uint64(0)
} else {
offset, err = toUint64(input[i])
i++
if err != nil {
return nil, err
}
}
offsets[i/2] = offset
blobs[i/2], err = ensureRef(jsonDecodeValue(input[i]))
if err != nil {
return nil, err
}
length, err := toUint64(input[i+1])
if err != nil {
return nil, err
}
offset += length
offsets[i/2] = offset
}
return CompoundBlob{length, offsets, blobs}, nil
return CompoundBlob{offsets, blobs}, nil
}
func jsonDecodeList(input []interface{}) ([]interface{}, error) {
+3 -3
View File
@@ -80,15 +80,15 @@ func TestJSONDecode(t *testing.T) {
// Blob (compound)
// echo -n 'b Hello' | sha1sum
blr := ref.MustParse("sha1-c35018551e725bd2ab45166b69d15fda00b161c1")
cb := CompoundBlob{uint64(2), []uint64{0}, []ref.Ref{blr}}
cb := CompoundBlob{[]uint64{2}, []ref.Ref{blr}}
testDecode(`j {"cb":[{"ref":"sha1-c35018551e725bd2ab45166b69d15fda00b161c1"},2]}
`, cb)
// echo -n 'b ' | sha1sum
blr2 := ref.MustParse("sha1-641283a12b475ed58ba510517c1224a912e934a6")
// echo -n 'b World!' | sha1sum
blr3 := ref.MustParse("sha1-8169c017ce2779f3f66bfe27ee2313d71f7698b9")
cb2 := CompoundBlob{uint64(12), []uint64{0, 5, 6}, []ref.Ref{blr, blr2, blr3}}
testDecode(`j {"cb":[{"ref":"sha1-c35018551e725bd2ab45166b69d15fda00b161c1"},5,{"ref":"sha1-641283a12b475ed58ba510517c1224a912e934a6"},6,{"ref":"sha1-8169c017ce2779f3f66bfe27ee2313d71f7698b9"},12]}
cb2 := CompoundBlob{[]uint64{5, 6, 12}, []ref.Ref{blr, blr2, blr3}}
testDecode(`j {"cb":[{"ref":"sha1-c35018551e725bd2ab45166b69d15fda00b161c1"},5,{"ref":"sha1-641283a12b475ed58ba510517c1224a912e934a6"},1,{"ref":"sha1-8169c017ce2779f3f66bfe27ee2313d71f7698b9"},6]}
`, cb2)
}
+9 -7
View File
@@ -15,11 +15,14 @@ var (
// CompoundBlob represents the info needed to encode/decode chunked blob metadata.
type CompoundBlob struct {
Length uint64
Offsets []uint64
Offsets []uint64 // The offsets of the end of the related blobs.
Blobs []ref.Ref
}
func (cb CompoundBlob) Len() uint64 {
return cb.Offsets[len(cb.Offsets)-1]
}
// MapFromItems takes an even-numbered list of items and converts them into a stably-ordered map-like value by treating the even-indexed items as keys and the odd-indexed items as values, e.g. {e[0]: e[1], e[2]: e[3], ...}. This does NOT enforce key uniqueness.
func MapFromItems(e ...interface{}) Map {
dbg.Chk.True(0 == len(e)%2, "Length on input array must be multiple of 2")
@@ -121,19 +124,18 @@ func getJSONPrimitive(v interface{}) (interface{}, error) {
func getJSONCompoundBlob(cb CompoundBlob) (interface{}, error) {
// Perhaps tighten this up: BUG #170
// {"cb":[{"ref":"sha1-x"},length]}
// {"cb":[{"ref":"sha1-x"},offset,{"ref":"sha1-y"},length]}
// {"cb":[{"ref":"sha1-x"},lengthX,{"ref":"sha1-y"},lengthY]}
offset := uint64(0)
l := make([]interface{}, 0, len(cb.Blobs)*2)
for i, f := range cb.Blobs {
if i != 0 {
l = append(l, cb.Offsets[i])
}
c, err := getJSONPrimitive(f)
if err != nil {
return nil, err
}
l = append(l, c)
l = append(l, cb.Offsets[i]-offset)
offset = cb.Offsets[i]
}
l = append(l, cb.Length)
dbg.Chk.Equal(len(l), len(cb.Blobs)*2)
+1 -1
View File
@@ -87,5 +87,5 @@ func TestJsonEncode(t *testing.T) {
// Blob (compound)
testEncode(fmt.Sprintf(`j {"cb":[{"ref":"%s"},2]}
`, ref2), CompoundBlob{uint64(2), []uint64{0}, []ref.Ref{ref2}})
`, ref2), CompoundBlob{[]uint64{2}, []ref.Ref{ref2}})
}
+4 -4
View File
@@ -42,10 +42,10 @@ func NewBlob(r io.Reader) (Blob, error) {
break
}
blob = newBlobLeaf(buf.Bytes())
offsets = append(offsets, length)
blobs = append(blobs, futureFromValue(blob))
length += n
offsets = append(offsets, length)
blob = newBlobLeaf(buf.Bytes())
blobs = append(blobs, futureFromValue(blob))
}
if length == 0 {
@@ -55,7 +55,7 @@ func NewBlob(r io.Reader) (Blob, error) {
if len(blobs) == 1 {
return blob, nil
}
return compoundBlob{length, offsets, blobs, &ref.Ref{}, nil}, nil
return compoundBlob{offsets, blobs, &ref.Ref{}, nil}, nil
}
func BlobFromVal(v Value) Blob {
+8 -11
View File
@@ -12,8 +12,7 @@ import (
// compoundBlob represents a list of Blobs.
// It implements the Blob interface.
type compoundBlob struct {
length uint64
offsets []uint64
offsets []uint64 // The offsets of the end of the related blobs.
blobs []Future
ref *ref.Ref
cs chunks.ChunkSource
@@ -79,7 +78,10 @@ func (cbr *compoundBlobReader) Seek(offset int64, whence int) (int64, error) {
}
}
if cbr.currentReader != nil {
offset := abs - int64(cbr.cb.offsets[cbr.currentBlobIndex])
offset := abs
if cbr.currentBlobIndex > 0 {
offset -= int64(cbr.cb.offsets[cbr.currentBlobIndex-1])
}
if _, err := cbr.currentReader.Seek(offset, 0); err != nil {
return 0, err
}
@@ -89,13 +91,9 @@ func (cbr *compoundBlobReader) Seek(offset int64, whence int) (int64, error) {
}
func (cbr *compoundBlobReader) findBlobOffset(abs uint64) int {
// TODO(arv): The -1 at the end is bad. If the offsets was shifted one to the right things would be cleaner.
if abs >= cbr.cb.Len() {
return len(cbr.cb.blobs)
}
return sort.Search(len(cbr.cb.offsets), func(i int) bool {
return cbr.cb.offsets[i] > abs
}) - 1
})
}
func (cbr *compoundBlobReader) updateReader() error {
@@ -113,7 +111,7 @@ func (cbr *compoundBlobReader) updateReader() error {
// Len implements the Blob interface
func (cb compoundBlob) Len() uint64 {
return cb.length
return cb.offsets[len(cb.offsets)-1]
}
func (cb compoundBlob) Ref() ref.Ref {
@@ -123,9 +121,8 @@ func (cb compoundBlob) Ref() ref.Ref {
func (cb compoundBlob) Equals(other Value) bool {
if other == nil {
return false
} else {
return cb.Ref() == other.Ref()
}
return cb.Ref() == other.Ref()
}
func (cb compoundBlob) Chunks() (futures []Future) {
+5 -5
View File
@@ -19,10 +19,10 @@ func getTestCompoundBlob(datas ...string) compoundBlob {
for i, s := range datas {
b, _ := NewBlob(bytes.NewBufferString(s))
blobs[i] = futureFromValue(b)
offsets[i] = length
length += uint64(len(s))
offsets[i] = length
}
return compoundBlob{length, offsets, blobs, &ref.Ref{}, nil}
return compoundBlob{offsets, blobs, &ref.Ref{}, nil}
}
func getAliceBlob(t *testing.T) compoundBlob {
@@ -86,7 +86,7 @@ func TestCompoundBlobReaderLazy(t *testing.T) {
b2 := newBlobLeaf([]byte("bye"))
tb2 := &testBlob{b2, &readCount2}
cb := compoundBlob{uint64(5), []uint64{0, 2}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
cb := compoundBlob{[]uint64{2, 5}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
r := cb.Reader()
assert.Equal(0, readCount1)
@@ -129,7 +129,7 @@ func TestCompoundBlobReaderLazySeek(t *testing.T) {
b2 := newBlobLeaf([]byte("bye"))
tb2 := &testBlob{b2, &readCount2}
cb := compoundBlob{uint64(5), []uint64{0, 2}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
cb := compoundBlob{[]uint64{2, 5}, []Future{futureFromValue(tb1), futureFromValue(tb2)}, &ref.Ref{}, nil}
r := cb.Reader()
@@ -232,7 +232,7 @@ func TestCompoundBlobChunks(t *testing.T) {
bl1 := newBlobLeaf([]byte("hello"))
blr1 := bl1.Ref()
bl2 := newBlobLeaf([]byte("world"))
cb = compoundBlob{uint64(10), []uint64{0, 5}, []Future{futureFromRef(blr1), futureFromValue(bl2)}, &ref.Ref{}, cs}
cb = compoundBlob{[]uint64{5, 10}, []Future{futureFromRef(blr1), futureFromValue(bl2)}, &ref.Ref{}, cs}
assert.Equal(1, len(cb.Chunks()))
}
+1 -1
View File
@@ -54,7 +54,7 @@ func TestPrimitiveEquals(t *testing.T) {
func() Value {
b1, _ := NewBlob(bytes.NewBufferString("hi"))
b2, _ := NewBlob(bytes.NewBufferString("bye"))
return compoundBlob{uint64(5), []uint64{0, 2}, []Future{futureFromValue(b1), futureFromValue(b2)}, &ref.Ref{}, nil}
return compoundBlob{[]uint64{2, 5}, []Future{futureFromValue(b1), futureFromValue(b2)}, &ref.Ref{}, nil}
},
func() Value { return NewList() },
func() Value { return NewList(NewString("foo")) },
+1 -1
View File
@@ -42,7 +42,7 @@ func TestEnsureRef(t *testing.T) {
}()
bl := newBlobLeaf([]byte("hi"))
cb := compoundBlob{uint64(2), []uint64{0}, []Future{futureFromValue(bl)}, &ref.Ref{}, cs}
cb := compoundBlob{[]uint64{2}, []Future{futureFromValue(bl)}, &ref.Ref{}, cs}
values := []Value{
newBlobLeaf([]byte{}),
+1 -1
View File
@@ -86,7 +86,7 @@ func fromEncodeable(i interface{}, cs chunks.ChunkSource) (Future, error) {
}
blobs[idx] = f
}
cb := compoundBlob{i.Length, i.Offsets, blobs, &ref.Ref{}, cs}
cb := compoundBlob{i.Offsets, blobs, &ref.Ref{}, cs}
return futureFromValue(cb), nil
default:
dbg.Chk.Fail("Unknown encodeable", "%+v", i)
+1 -1
View File
@@ -76,7 +76,7 @@ func encCompoundBlobFromCompoundBlob(cb compoundBlob, cs chunks.ChunkSink) (inte
// All children of compoundBlob must be Blobs, which get encoded and reffed by processChild.
refs[idx] = i.(ref.Ref)
}
return enc.CompoundBlob{Length: cb.length, Offsets: cb.offsets, Blobs: refs}, nil
return enc.CompoundBlob{Offsets: cb.offsets, Blobs: refs}, nil
}
func makeListEncodeable(l List, cs chunks.ChunkSink) (interface{}, error) {