Argh, submodules incompatible with CodePipeline. Vendor manually instead.

This commit is contained in:
Aaron Boodman
2016-02-07 21:16:31 -08:00
parent 3240576448
commit c0c00b5259
9 changed files with 326 additions and 7 deletions
-3
View File
@@ -1,3 +0,0 @@
[submodule "vendor/github.com/attic-labs/buzhash"]
path = vendor/github.com/attic-labs/buzhash
url = ../buzhash.git
-3
View File
@@ -2,9 +2,6 @@ language: go
go:
- 1.5
before_install:
- echo "machine github.com\n login atticlabs-ci\n password $CI_USER_PASSWORD"
- echo -e "machine github.com\n login atticlabs-ci\n password $CI_USER_PASSWORD"
- echo -e "machine github.com\n login atticlabs-ci\n password $CI_USER_PASSWORD" >> ~/.netrc
- npm version
- nvm install stable
- nvm use stable
Executable
+39
View File
@@ -0,0 +1,39 @@
#!/usr/bin/python
import os, os.path, subprocess, sys, shutil
def main():
if len(sys.argv) != 2:
print 'Usage: %s github.com/foo/bar' % sys.argv[0]
sys.exit(1)
dep = sys.argv[1]
if dep.startswith('http'):
print 'Argument should not start with "http" or "https" (https is implied)'
sys.exit(1)
if not os.path.isdir('.git'):
print '%s must be run from the root of a repository' % sys.argv[0]
sys.exit(1)
depdir = os.path.join('vendor', dep)
shutil.rmtree(depdir, True)
parent = os.path.dirname(depdir)
if not os.path.isdir(parent):
os.makedirs(parent)
os.chdir(parent)
subprocess.check_call(['git', 'clone', '--depth=1', 'https://%s' % dep])
head = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
os.chdir(os.path.basename(depdir))
f = open('.version', 'w')
f.write(head)
f.close()
shutil.rmtree('.git')
if __name__ == '__main__':
main()
+1
View File
@@ -0,0 +1 @@
658a73880b699b2e9468a4154da226cd101d4b4d
+4
View File
@@ -0,0 +1,4 @@
DO WHATEVER THE FUCK YOU WANT, PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHATEVER THE FUCK YOU WANT.
+24
View File
@@ -0,0 +1,24 @@
# buzhash
Package buzhash implements a buzhash algorithm using this defintion <http://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial>.
## Rolling hash
Buzhash is a rolling hash function, that means that the current hash sum is the sum of the last n consumed bytes.
Example:
* Message 1: <code>This is a stupid example text to demo<strong>nstrate buzhash.</strong></code>
* Message 2: <code>Another text to demo<strong>nstrate buzhash.</strong></code>
When hashing both messages with a buzhasher with n=16, both messages will have the same hash sum, since the last 16 characters (`nstrate buzhash.`) are equal.
This can be useful, when searching for a data fragment in large files, without knowing the fragment (only its hash). This is used in binary diff tools, such as `rdiff` (although they use a different rolling hash function).
## Installation
`go get github.com/kch42/buzhash`
## Documentation
Either install the package and use a local godoc server or use [godoc.org](http://godoc.org/github.com/kch42/buzhash)
+143
View File
@@ -0,0 +1,143 @@
// Package buzhash implements a buzhash algorithm using this defintion:
// http://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
//
// BuzHash is a rolling hash function, that means that the current sum will
// always be the sum of the last n input bytes.
package buzhash
import (
"bytes"
"encoding/binary"
)
// 256 random uint32's to hash a single byte
var bytehash = [256]uint32{
0x12bd9527, 0xf4140cea, 0x987bd6e1, 0x79079850, 0xafbfd539, 0xd350ce0a,
0x82973931, 0x9fc32b9c, 0x28003b88, 0xc30c13aa, 0x6b678c34, 0x5844ef1d,
0xaa552c18, 0x4a77d3e8, 0xd1f62ea0, 0x6599417c, 0xfbe30e7a, 0xf9e2d5ee,
0xa1fca42e, 0x41548969, 0x116d5b59, 0xaeda1e1a, 0xc5191c17, 0x54b9a3cb,
0x727e492a, 0x5c432f91, 0x31a50bce, 0xc2696af6, 0x217c8020, 0x1262aefc,
0xace75924, 0x9876a04f, 0xaf300bc2, 0x3ffce3f6, 0xd6680fb5, 0xd0b1ced8,
0x6651f842, 0x736fadef, 0xbc2d3429, 0xb03d2904, 0x7e634ba4, 0xdfd87d8c,
0x7988d63a, 0x4be4d933, 0x6a8d0382, 0x9e132d62, 0x3ee9c95f, 0xfec05b97,
0x6907ad34, 0x8616cfcc, 0xa6aabf24, 0x8ad1c92e, 0x4f2affc0, 0xb87519db,
0x6576eaf6, 0x15dbe00a, 0x63e1dd82, 0xa36b6a81, 0xeead99b3, 0xbc6a4309,
0x3478d1a7, 0x2182bcc0, 0xdd50cfce, 0x7cb25580, 0x73075483, 0x503b7f42,
0x4cd50d63, 0x3f4d94c9, 0x385fcbb7, 0x90daf16c, 0xece10b8e, 0x11c1cb04,
0x816a899b, 0x69a29d06, 0xfb090b37, 0xf98ef13c, 0x07653435, 0x9f15dc42,
0x3b43abdf, 0x1334283f, 0x93f3d9af, 0x0cbdfe71, 0xa788a614, 0x4f54d2f0,
0xd4374fc7, 0x70557ce7, 0xf741fce8, 0xe4b6f661, 0xc630cb98, 0x387a6366,
0x72f428fd, 0x539009db, 0xc53e3810, 0x1e1a52e5, 0x7d6816b0, 0x040f9b81,
0x9c99c9fb, 0x9f3af3d2, 0x774d1061, 0xd5c840ea, 0x8e1480fe, 0x6ee4023c,
0x2fbda535, 0xd88eff7a, 0xd8632a2a, 0x43c4e024, 0x3ef27971, 0xc72866fd,
0xe35cc630, 0x46d96220, 0x437a8384, 0xe92caf0c, 0x6290a47e, 0xa7bb9238,
0x0e1000f9, 0x49e76bdc, 0x3acfb4b8, 0x03582b8e, 0x6ea2de4e, 0x2ec1008d,
0xfcc8df69, 0x91c2fe0a, 0xb471c7d9, 0x778be812, 0x70d29ad1, 0x76411cbf,
0xc302e81c, 0x4e445194, 0x22e3aa72, 0xb65762e9, 0xa280db05, 0x827aa70e,
0x4c531a9d, 0x7a60bf4a, 0x8fd95a44, 0x2289aef0, 0xcd50ddc4, 0x639aae69,
0x5fe85ed6, 0x4ed724ff, 0x00f04f7d, 0x95a5fcb0, 0x88255d15, 0xa603d2c9,
0xf6956a5b, 0x53ea7f3e, 0xb570f225, 0x2b3be203, 0xa181e40e, 0xc413cdce,
0xa7cb1ebb, 0xcf258b1f, 0x516eb016, 0xca204586, 0xd1e69894, 0xe85a73d3,
0x7db2d382, 0xae73b463, 0x3598d643, 0x5087c864, 0xd91f30b6, 0xe1d4d1e7,
0x73b3b337, 0xceac1233, 0x8edf7845, 0xa69c45c9, 0xdb5db3ab, 0x28cfade8,
0xebfa49e7, 0xcbc2a659, 0x59cce971, 0x959a01af, 0x8ee9aae7, 0xfb2f01c6,
0x5a752836, 0x9ed12981, 0x618d05b6, 0x93ec12b3, 0x4590c779, 0xed1317a2,
0x03fe5835, 0x7ad3c6f7, 0xd4aad5b5, 0x1a995ed7, 0x247bfaa4, 0x69c2c799,
0x745fa405, 0xc5b9f239, 0xc3d9aebc, 0xa6f60e0b, 0xdf1e91d7, 0xab8e041c,
0xee3188c6, 0x37377a9e, 0xc0e1a3bf, 0x19a5a9e4, 0x56cb9556, 0xc4d33d3f,
0xfb1eb03e, 0xf9557057, 0x1be31d37, 0xd1fa65f1, 0xf518d714, 0x570ac722,
0xf26cf66a, 0x24794d47, 0x8ba2e402, 0x3f5137e6, 0x35be1453, 0x43350478,
0x9f05ee88, 0x364cf9cf, 0x39a23ee7, 0xa4db8d49, 0xc2ebb3d2, 0xc6fb99d5,
0xe014dfb0, 0x7156d425, 0xe090a87a, 0x4cc12f78, 0x1b30f503, 0x06694a7a,
0x68198cd1, 0x2f8345bd, 0x9d79198e, 0xd871943f, 0x22ef6cf4, 0xe81b1c15,
0x067b61d8, 0xfc4ea4f5, 0xfe6dab57, 0x1bf744ba, 0xa70b6a25, 0xafe6e412,
0xc6c1a05c, 0x8ffbe3ce, 0xc4270af1, 0xf3f36373, 0xc4507dd8, 0x5e6fd1e2,
0x58cd9739, 0x47d3c5b5, 0xe1d5a343, 0x3d4dea4a, 0x893d91ae, 0xbb2a5e2a,
0x0d57b800, 0x652a7cc9, 0x6a68ccfd, 0x62529f0b, 0xec5f36d6, 0x766cceda,
0x96ca63ef, 0xa0499838, 0xd9030f59, 0x8185f4d2}
// BuzHash implements the hash.Hash32 interface and also has a function to write a single byte.
type BuzHash struct {
state uint32
buf []byte
n uint32
bshiftn uint
bshiftm uint
bufpos uint32
overflow bool
}
// NewBuzHash generates and initializes a new BuzHash object with block size n
func NewBuzHash(n uint32) *BuzHash {
rv := new(BuzHash)
rv.n = n
rv.bshiftn = uint(n % 32)
rv.bshiftm = 32 - rv.bshiftn
rv.buf = make([]byte, n)
rv.Reset()
return rv
}
// HashByte updates the hash with a single byte and returns the resulting sum
func (bh *BuzHash) HashByte(b byte) uint32 {
if bh.bufpos == bh.n {
bh.overflow = true
bh.bufpos = 0
}
state := bh.state
state = (state << 1) | (state >> 31)
if bh.overflow {
toshift := bytehash[bh.buf[bh.bufpos]]
state ^= (toshift << bh.bshiftn) | (toshift >> bh.bshiftm)
}
bh.buf[bh.bufpos] = b
bh.bufpos++
state ^= bytehash[b]
bh.state = state
return state
}
// Write updates the hash with the bytes from slice p
func (bh *BuzHash) Write(p []byte) (int, error) {
for _, b := range p {
bh.HashByte(b)
}
return len(p), nil
}
// Sum appends the (little endian) checksum to b
func (bh *BuzHash) Sum(b []byte) []byte {
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, bh.state)
hash := buf.Bytes()
for _, hb := range hash {
b = append(b, hb)
}
return b
}
func (bh *BuzHash) Reset() {
bh.state = 0
bh.bufpos = 0
bh.overflow = false
}
func (bh *BuzHash) Size() int {
return 4
}
func (bh *BuzHash) BlockSize() int {
return int(bh.n)
}
func (bh *BuzHash) Sum32() uint32 {
return bh.state
}
+115
View File
@@ -0,0 +1,115 @@
package buzhash
import (
"bytes"
"encoding/binary"
"fmt"
"testing"
)
var loremipsum1 = `Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et
magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis,
ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis
enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In
enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis
eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum
semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu,
consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra
quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet.
Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur
ullamcorper ultricies nisi. Nam eget dui.
Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper
libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel,
luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt
tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam
sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit
amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum
sodales, augue velit cursus nunc, quis gravida magna mi a libero. Fusce
vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed,
nonummy id, metus. Nullam accumsan lorem in dui. Cras ultricies mi eu turpis
hendrerit fringilla. Vestibulum ante ipsum primis in faucibus orci luctus et
ultrices posuere cubilia Curae; In ac dui quis mi consectetuer lacinia.`
var loremipsum2 = `Nam pretium turpis et arcu. Duis arcu tortor, suscipit eget,
imperdiet nec, imperdiet iaculis, ipsum. Sed aliquam ultrices mauris. Integer
ante arcu, accumsan a, consectetuer eget, posuere ut, mauris. Praesent
adipiscing. Phasellus ullamcorper ipsum rutrum nunc. Nunc nonummy metus.
Vestibulum volutpat pretium libero. Cras id dui. Aenean ut eros et nisl sagittis
vestibulum. Nullam nulla eros, ultricies sit amet, nonummy id, imperdiet
feugiat, pede. Sed lectus. Donec mollis hendrerit risus. Phasellus nec sem in
justo pellentesque facilisis. Etiam imperdiet imperdiet orci. Nunc nec neque.
Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi.
Curabitur ligula sapien, tincidunt non, euismod vitae, posuere imperdiet, leo.
Maecenas malesuada. Praesent congue erat at massa. Sed cursus turpis vitae
tortor. Donec posuere vulputate arcu. Phasellus accumsan cursus velit.
Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia
Curae; Sed aliquam, nisi quis porttitor congue, elit erat euismod orci, ac
placerat dolor lectus quis orci. Phasellus consectetuer vestibulum elit. Aenean
tellus metus, bibendum sed, posuere ac, mattis non, nunc. Vestibulum fringilla
pede sit amet augue. In turpis. Pellentesque posuere. Praesent turpis.`
// Test the rolling hash property of the buzhash
func TestRollingHash(t *testing.T) {
phrase1 := "Aenean massa. Cum sociis natoque"
phrase2 := "Phasellus leo dolor, tempus non, auctor et, hendrerit quis, nisi"
hasher1 := NewBuzHash(32)
fmt.Fprint(hasher1, phrase1)
p1sum := hasher1.Sum32()
hasher1.Reset()
found := false
for idx, b := range []byte(loremipsum1) {
ssum := hasher1.HashByte(b)
if (ssum == p1sum) && (idx-32 == 91) {
found = true
break
}
}
if !found {
t.Errorf("Could not find '%s' by its checksum %08x.", phrase1, p1sum)
}
hasher2 := NewBuzHash(64)
fmt.Fprint(hasher2, phrase2)
p2sum := hasher2.Sum32()
hasher2.Reset()
found = false
for idx, b := range []byte(loremipsum2) {
ssum := hasher2.HashByte(b)
if (ssum == p2sum) && (idx-64 == 592) {
found = true
break
}
}
if !found {
t.Errorf("Could not find '%s' by its checksum %08x.", phrase2, p2sum)
}
}
func TestSum(t *testing.T) {
h := NewBuzHash(16)
fmt.Fprint(h, loremipsum1)
sum32 := h.Sum32()
sumBytes := h.Sum(nil)
if l := len(sumBytes); l != 4 {
t.Fatalf("h.Sum() returned slice of len %d, expected 4", l)
}
var sumBytesAsNum uint32
if err := binary.Read(bytes.NewBuffer(sumBytes), binary.LittleEndian, &sumBytesAsNum); err != nil {
t.Fatalf("Could not read binary number? %s", err)
}
if sum32 != sumBytesAsNum {
t.Errorf("Sum32 (%08x) and Sum (%08x) returned different sums!", sum32, sumBytesAsNum)
}
}