Merge pull request #716 from kalman/buz-window-size

Correctly distinguish between chunking window size and buzhash window…
This commit is contained in:
Ben Kalman
2015-12-03 15:26:56 -08:00
5 changed files with 43 additions and 40 deletions

View File

@@ -3,7 +3,6 @@ package types
import (
"io"
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
)
@@ -36,9 +35,8 @@ func NewMemoryBlob(r io.Reader) Blob {
}
func newBlobLeafBoundaryChecker() boundaryChecker {
return newBuzHashBoundaryChecker(blobWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool {
b := item.(byte)
return h.HashByte(b)&blobPattern == blobPattern
return newBuzHashBoundaryChecker(blobWindowSize, 1, blobPattern, func(item sequenceItem) []byte {
return []byte{item.(byte)}
})
}

View File

@@ -0,0 +1,31 @@
package types
import (
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
"github.com/attic-labs/noms/d"
)
type buzHashBoundaryChecker struct {
h *buzhash.BuzHash
windowSize, valueSize int
pattern uint32
getBytes getBytesFn
}
type getBytesFn func(item sequenceItem) []byte
func newBuzHashBoundaryChecker(windowSize, valueSize int, pattern uint32, getBytes getBytesFn) boundaryChecker {
return &buzHashBoundaryChecker{buzhash.NewBuzHash(uint32(windowSize * valueSize)), windowSize, valueSize, pattern, getBytes}
}
func (b *buzHashBoundaryChecker) Write(item sequenceItem) bool {
bytes := b.getBytes(item)
d.Chk.Equal(b.valueSize, len(bytes))
_, err := b.h.Write(bytes)
d.Chk.NoError(err)
return b.h.Sum32()&b.pattern == b.pattern
}
func (b *buzHashBoundaryChecker) WindowSize() int {
return b.windowSize
}

View File

@@ -1,7 +1,8 @@
package types
import (
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
"crypto/sha1"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
"github.com/attic-labs/noms/ref"
@@ -161,11 +162,9 @@ func (cl compoundList) IterAll(f listIterAllFunc) {
}
func newListLeafBoundaryChecker() boundaryChecker {
return newBuzHashBoundaryChecker(listWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool {
v := item.(Value)
digest := v.Ref().Digest()
b := digest[0]
return h.HashByte(b)&listPattern == listPattern
return newBuzHashBoundaryChecker(listWindowSize, sha1.Size, listPattern, func(item sequenceItem) []byte {
digest := item.(Value).Ref().Digest()
return digest[:]
})
}

View File

@@ -15,7 +15,7 @@ func (tsl testSimpleList) Get(idx uint64) Value {
}
func getTestSimpleListLen() int {
return int(listPattern * 16)
return int(listPattern * 50)
}
func getTestSimpleList() testSimpleList {

View File

@@ -3,14 +3,12 @@ package types
import (
"crypto/sha1"
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
"github.com/attic-labs/noms/chunks"
"github.com/attic-labs/noms/d"
"github.com/attic-labs/noms/ref"
)
const (
objectWindowSize = 8 * sha1.Size
objectWindowSize = 8
objectPattern = uint32(1<<6 - 1) // Average size of 64 elements
)
@@ -100,33 +98,10 @@ func newMetaSequenceFromData(tuples metaSequenceData, t Type, cs chunks.ChunkSto
panic("not reachable")
}
type checkHashFn func(h *buzhash.BuzHash, item sequenceItem) bool
type buzHashBoundaryChecker struct {
h *buzhash.BuzHash
windowSize int
checkHash checkHashFn
}
func newBuzHashBoundaryChecker(windowSize int, checkHash checkHashFn) boundaryChecker {
return &buzHashBoundaryChecker{buzhash.NewBuzHash(uint32(windowSize)), windowSize, checkHash}
}
func (b *buzHashBoundaryChecker) Write(item sequenceItem) bool {
return b.checkHash(b.h, item)
}
func (b *buzHashBoundaryChecker) WindowSize() int {
return b.windowSize
}
func newMetaSequenceBoundaryChecker() boundaryChecker {
return newBuzHashBoundaryChecker(objectWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool {
mt := item.(metaTuple)
digest := mt.ref.Digest()
_, err := h.Write(digest[:])
d.Chk.NoError(err)
return h.Sum32()&objectPattern == objectPattern
return newBuzHashBoundaryChecker(objectWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
digest := item.(metaTuple).ref.Digest()
return digest[:]
})
}