diff --git a/types/blob.go b/types/blob.go index 515e9c25ca..e859947bcc 100644 --- a/types/blob.go +++ b/types/blob.go @@ -3,7 +3,6 @@ package types import ( "io" - "github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash" "github.com/attic-labs/noms/chunks" "github.com/attic-labs/noms/d" ) @@ -36,9 +35,8 @@ func NewMemoryBlob(r io.Reader) Blob { } func newBlobLeafBoundaryChecker() boundaryChecker { - return newBuzHashBoundaryChecker(blobWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool { - b := item.(byte) - return h.HashByte(b)&blobPattern == blobPattern + return newBuzHashBoundaryChecker(blobWindowSize, 1, blobPattern, func(item sequenceItem) []byte { + return []byte{item.(byte)} }) } diff --git a/types/buz_hash_boundary_checker.go b/types/buz_hash_boundary_checker.go new file mode 100644 index 0000000000..0053faa656 --- /dev/null +++ b/types/buz_hash_boundary_checker.go @@ -0,0 +1,31 @@ +package types + +import ( + "github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash" + "github.com/attic-labs/noms/d" +) + +type buzHashBoundaryChecker struct { + h *buzhash.BuzHash + windowSize, valueSize int + pattern uint32 + getBytes getBytesFn +} + +type getBytesFn func(item sequenceItem) []byte + +func newBuzHashBoundaryChecker(windowSize, valueSize int, pattern uint32, getBytes getBytesFn) boundaryChecker { + return &buzHashBoundaryChecker{buzhash.NewBuzHash(uint32(windowSize * valueSize)), windowSize, valueSize, pattern, getBytes} +} + +func (b *buzHashBoundaryChecker) Write(item sequenceItem) bool { + bytes := b.getBytes(item) + d.Chk.Equal(b.valueSize, len(bytes)) + _, err := b.h.Write(bytes) + d.Chk.NoError(err) + return b.h.Sum32()&b.pattern == b.pattern +} + +func (b *buzHashBoundaryChecker) WindowSize() int { + return b.windowSize +} diff --git a/types/compound_list.go b/types/compound_list.go index b5cc3829e3..57b5f8ed57 100644 --- a/types/compound_list.go +++ b/types/compound_list.go @@ -1,7 +1,8 @@ package types import ( - "github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash" + "crypto/sha1" + "github.com/attic-labs/noms/chunks" "github.com/attic-labs/noms/d" "github.com/attic-labs/noms/ref" @@ -161,11 +162,9 @@ func (cl compoundList) IterAll(f listIterAllFunc) { } func newListLeafBoundaryChecker() boundaryChecker { - return newBuzHashBoundaryChecker(listWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool { - v := item.(Value) - digest := v.Ref().Digest() - b := digest[0] - return h.HashByte(b)&listPattern == listPattern + return newBuzHashBoundaryChecker(listWindowSize, sha1.Size, listPattern, func(item sequenceItem) []byte { + digest := item.(Value).Ref().Digest() + return digest[:] }) } diff --git a/types/compound_list_test.go b/types/compound_list_test.go index f4ec89d648..0cb0809f3d 100644 --- a/types/compound_list_test.go +++ b/types/compound_list_test.go @@ -15,7 +15,7 @@ func (tsl testSimpleList) Get(idx uint64) Value { } func getTestSimpleListLen() int { - return int(listPattern * 16) + return int(listPattern * 50) } func getTestSimpleList() testSimpleList { diff --git a/types/meta_sequence.go b/types/meta_sequence.go index f3fff3ac9d..c6e1e73795 100644 --- a/types/meta_sequence.go +++ b/types/meta_sequence.go @@ -3,14 +3,12 @@ package types import ( "crypto/sha1" - "github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash" "github.com/attic-labs/noms/chunks" - "github.com/attic-labs/noms/d" "github.com/attic-labs/noms/ref" ) const ( - objectWindowSize = 8 * sha1.Size + objectWindowSize = 8 objectPattern = uint32(1<<6 - 1) // Average size of 64 elements ) @@ -100,33 +98,10 @@ func newMetaSequenceFromData(tuples metaSequenceData, t Type, cs chunks.ChunkSto panic("not reachable") } -type checkHashFn func(h *buzhash.BuzHash, item sequenceItem) bool - -type buzHashBoundaryChecker struct { - h *buzhash.BuzHash - windowSize int - checkHash checkHashFn -} - -func newBuzHashBoundaryChecker(windowSize int, checkHash checkHashFn) boundaryChecker { - return &buzHashBoundaryChecker{buzhash.NewBuzHash(uint32(windowSize)), windowSize, checkHash} -} - -func (b *buzHashBoundaryChecker) Write(item sequenceItem) bool { - return b.checkHash(b.h, item) -} - -func (b *buzHashBoundaryChecker) WindowSize() int { - return b.windowSize -} - func newMetaSequenceBoundaryChecker() boundaryChecker { - return newBuzHashBoundaryChecker(objectWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool { - mt := item.(metaTuple) - digest := mt.ref.Digest() - _, err := h.Write(digest[:]) - d.Chk.NoError(err) - return h.Sum32()&objectPattern == objectPattern + return newBuzHashBoundaryChecker(objectWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte { + digest := item.(metaTuple).ref.Digest() + return digest[:] }) }