mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-24 10:30:48 -06:00
Merge pull request #716 from kalman/buz-window-size
Correctly distinguish between chunking window size and buzhash window…
This commit is contained in:
@@ -3,7 +3,6 @@ package types
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
)
|
||||
@@ -36,9 +35,8 @@ func NewMemoryBlob(r io.Reader) Blob {
|
||||
}
|
||||
|
||||
func newBlobLeafBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(blobWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool {
|
||||
b := item.(byte)
|
||||
return h.HashByte(b)&blobPattern == blobPattern
|
||||
return newBuzHashBoundaryChecker(blobWindowSize, 1, blobPattern, func(item sequenceItem) []byte {
|
||||
return []byte{item.(byte)}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
31
types/buz_hash_boundary_checker.go
Normal file
31
types/buz_hash_boundary_checker.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
|
||||
"github.com/attic-labs/noms/d"
|
||||
)
|
||||
|
||||
type buzHashBoundaryChecker struct {
|
||||
h *buzhash.BuzHash
|
||||
windowSize, valueSize int
|
||||
pattern uint32
|
||||
getBytes getBytesFn
|
||||
}
|
||||
|
||||
type getBytesFn func(item sequenceItem) []byte
|
||||
|
||||
func newBuzHashBoundaryChecker(windowSize, valueSize int, pattern uint32, getBytes getBytesFn) boundaryChecker {
|
||||
return &buzHashBoundaryChecker{buzhash.NewBuzHash(uint32(windowSize * valueSize)), windowSize, valueSize, pattern, getBytes}
|
||||
}
|
||||
|
||||
func (b *buzHashBoundaryChecker) Write(item sequenceItem) bool {
|
||||
bytes := b.getBytes(item)
|
||||
d.Chk.Equal(b.valueSize, len(bytes))
|
||||
_, err := b.h.Write(bytes)
|
||||
d.Chk.NoError(err)
|
||||
return b.h.Sum32()&b.pattern == b.pattern
|
||||
}
|
||||
|
||||
func (b *buzHashBoundaryChecker) WindowSize() int {
|
||||
return b.windowSize
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
"github.com/attic-labs/noms/ref"
|
||||
@@ -161,11 +162,9 @@ func (cl compoundList) IterAll(f listIterAllFunc) {
|
||||
}
|
||||
|
||||
func newListLeafBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(listWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool {
|
||||
v := item.(Value)
|
||||
digest := v.Ref().Digest()
|
||||
b := digest[0]
|
||||
return h.HashByte(b)&listPattern == listPattern
|
||||
return newBuzHashBoundaryChecker(listWindowSize, sha1.Size, listPattern, func(item sequenceItem) []byte {
|
||||
digest := item.(Value).Ref().Digest()
|
||||
return digest[:]
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ func (tsl testSimpleList) Get(idx uint64) Value {
|
||||
}
|
||||
|
||||
func getTestSimpleListLen() int {
|
||||
return int(listPattern * 16)
|
||||
return int(listPattern * 50)
|
||||
}
|
||||
|
||||
func getTestSimpleList() testSimpleList {
|
||||
|
||||
@@ -3,14 +3,12 @@ package types
|
||||
import (
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/attic-labs/noms/Godeps/_workspace/src/github.com/attic-labs/buzhash"
|
||||
"github.com/attic-labs/noms/chunks"
|
||||
"github.com/attic-labs/noms/d"
|
||||
"github.com/attic-labs/noms/ref"
|
||||
)
|
||||
|
||||
const (
|
||||
objectWindowSize = 8 * sha1.Size
|
||||
objectWindowSize = 8
|
||||
objectPattern = uint32(1<<6 - 1) // Average size of 64 elements
|
||||
)
|
||||
|
||||
@@ -100,33 +98,10 @@ func newMetaSequenceFromData(tuples metaSequenceData, t Type, cs chunks.ChunkSto
|
||||
panic("not reachable")
|
||||
}
|
||||
|
||||
type checkHashFn func(h *buzhash.BuzHash, item sequenceItem) bool
|
||||
|
||||
type buzHashBoundaryChecker struct {
|
||||
h *buzhash.BuzHash
|
||||
windowSize int
|
||||
checkHash checkHashFn
|
||||
}
|
||||
|
||||
func newBuzHashBoundaryChecker(windowSize int, checkHash checkHashFn) boundaryChecker {
|
||||
return &buzHashBoundaryChecker{buzhash.NewBuzHash(uint32(windowSize)), windowSize, checkHash}
|
||||
}
|
||||
|
||||
func (b *buzHashBoundaryChecker) Write(item sequenceItem) bool {
|
||||
return b.checkHash(b.h, item)
|
||||
}
|
||||
|
||||
func (b *buzHashBoundaryChecker) WindowSize() int {
|
||||
return b.windowSize
|
||||
}
|
||||
|
||||
func newMetaSequenceBoundaryChecker() boundaryChecker {
|
||||
return newBuzHashBoundaryChecker(objectWindowSize, func(h *buzhash.BuzHash, item sequenceItem) bool {
|
||||
mt := item.(metaTuple)
|
||||
digest := mt.ref.Digest()
|
||||
_, err := h.Write(digest[:])
|
||||
d.Chk.NoError(err)
|
||||
return h.Sum32()&objectPattern == objectPattern
|
||||
return newBuzHashBoundaryChecker(objectWindowSize, sha1.Size, objectPattern, func(item sequenceItem) []byte {
|
||||
digest := item.(metaTuple).ref.Digest()
|
||||
return digest[:]
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user