NBS table names now just hash of suffix block (#3421)

Used to be that an NBS table was named by hashing the hashes
of every chunk present in the table, in hash order. That means
that to generate the name of a table you'd need to iterate
the prefix map and load every associated suffix. That would
be expensive when e.g. compacting multiple tables. This is
waaay cheaper and only slightly more likely to wind up with a
name collision.

Toward #3411
This commit is contained in:
cmasone-attic
2017-04-24 14:45:54 -07:00
committed by GitHub
parent 98e408a0d0
commit 1d52617eb5
5 changed files with 3 additions and 4 deletions

View File

@@ -30,7 +30,7 @@ import (
const (
// StorageVersion is the version of the on-disk Noms Chunks Store data format.
StorageVersion = "3"
StorageVersion = "4"
defaultMemTableSize uint64 = (1 << 20) * 128 // 128MB
defaultAWSReadLimit = 1024

View File

@@ -128,8 +128,6 @@ func (tw *tableWriter) writeIndex() {
suffixesOffset := lengthsOffset + numRecords*lengthSize // skip size for each record
for _, pi := range tw.prefixes {
binary.BigEndian.PutUint64(pfxScratch[:], pi.prefix)
tw.blockHash.Write(pfxScratch[:])
tw.blockHash.Write(pi.suffix)
// hash prefix
n := uint64(copy(tw.buff[tw.pos:], pfxScratch[:]))
@@ -149,6 +147,7 @@ func (tw *tableWriter) writeIndex() {
n = uint64(copy(tw.buff[offset:], pi.suffix))
d.Chk.True(n == addrSuffixSize)
}
tw.blockHash.Write(tw.buff[suffixesOffset : suffixesOffset+numRecords*addrSuffixSize])
tw.pos = suffixesOffset + numRecords*addrSuffixSize
}

View File

@@ -1 +1 @@
3:7.9:2i6mkbcajmnkkguethlmo929rif79r8r:c1uoqa08f12o0abqgv2lvavmppuc3kg4:m6j2e6jd69tbfk7d4hqf05ke2so64df3:2:e9v26bl5mov3mtp2vdvpb7q926oqn2dn:2
4:7.9:nh54p8hlk0c6c5q9mf8gb33pt9r9poc0:c1uoqa08f12o0abqgv2lvavmppuc3kg4:7s84n5m4b7i2n1r0vr0ksaemr9qjnhdl:2:ullneu8fijlfnhhmq82dtco4n60gupc2:2