NBS: Serialize Commit() calls within a process (#3594)

This patch uses process-wide per-store locking to ensure that only one
NomsBlockStore instance is ever trying to update the upstream NBS
manifest at a time. It also locks out attempts to fetch the manifest
contents during that window.

Conjoining is now much simpler. Since only one instance can ever be in
the critical path of Commit at a time, and conjoining is triggered on
that critical path, we now simply perform the conjoin while excluding
all other in-process NBS instances. Hopefully, locking out instances
who want to fetch the manifest contents during a conjoin won't cripple
performance.

Fixes issue #3583
This commit is contained in:
cmasone-attic
2017-07-20 14:04:43 -07:00
committed by GitHub
parent 44941ee44c
commit ccdf08c4f8
9 changed files with 279 additions and 293 deletions

View File

@@ -293,10 +293,14 @@ func TestBlockStoreConjoinOnCommit(t *testing.T) {
}
}
makeManifestManager := func(m manifest) manifestManager {
return manifestManager{m, newManifestCache(0), newManifestLocks()}
}
newChunk := chunks.NewChunk([]byte("gnu"))
t.Run("NoConjoin", func(t *testing.T) {
mm := cachingManifest{&fakeManifest{}, newManifestCache(0)}
mm := makeManifestManager(&fakeManifest{})
p := newFakeTablePersister()
c := &fakeConjoiner{}
@@ -329,7 +333,7 @@ func TestBlockStoreConjoinOnCommit(t *testing.T) {
[]cannedConjoin{makeCanned(upstream[:2], upstream[2:], p)},
}
smallTableStore := newNomsBlockStore(cachingManifest{fm, newManifestCache(0)}, p, c, testMemTableSize)
smallTableStore := newNomsBlockStore(makeManifestManager(fm), p, c, testMemTableSize)
root := smallTableStore.Root()
smallTableStore.Put(newChunk)
@@ -352,7 +356,7 @@ func TestBlockStoreConjoinOnCommit(t *testing.T) {
},
}
smallTableStore := newNomsBlockStore(cachingManifest{fm, newManifestCache(0)}, p, c, testMemTableSize)
smallTableStore := newNomsBlockStore(makeManifestManager(fm), p, c, testMemTableSize)
root := smallTableStore.Root()
smallTableStore.Put(newChunk)
@@ -378,20 +382,20 @@ func (fc *fakeConjoiner) ConjoinRequired(ts tableSet) bool {
return fc.canned[0].should
}
func (fc *fakeConjoiner) Conjoin(mm manifest, p tablePersister, novelCount int, stats *Stats) {
func (fc *fakeConjoiner) Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents {
d.PanicIfTrue(len(fc.canned) == 0)
canned := fc.canned[0]
fc.canned = fc.canned[1:]
_, contents := mm.ParseIfExists(stats, nil)
newContents := manifestContents{
vers: constants.NomsVersion,
root: contents.root,
root: upstream.root,
specs: canned.specs,
lock: generateLockHash(contents.root, canned.specs),
lock: generateLockHash(upstream.root, canned.specs),
}
contents = mm.Update(contents.lock, newContents, stats, nil)
d.PanicIfFalse(contents.lock == newContents.lock)
upstream = mm.Update(upstream.lock, newContents, stats, nil)
d.PanicIfFalse(upstream.lock == newContents.lock)
return upstream
}
func assertInputInStore(input []byte, h hash.Hash, s chunks.ChunkStore, assert *assert.Assertions) {

View File

@@ -16,96 +16,32 @@ import (
type conjoiner interface {
// ConjoinRequired tells the caller whether or not it's time to request a
// Conjoin, based upon the contents of |ts| and the conjoiner
// implementation's policy. Implementations must be goroutine-safe.
// implementation's policy.
ConjoinRequired(ts tableSet) bool
// Conjoin attempts to use |p| to conjoin some number of tables referenced
// by |mm|, allowing it to update |mm| with a new, smaller, set of tables
// by |upstream|, allowing it to update |mm| with a new, smaller, set of tables
// that references precisely the same set of chunks. Conjoin() may not
// actually conjoin any upstream tables, usually because some out-of-
// process actor has already landed a conjoin of its own. Callers must
// handle this, likely by rebasing against upstream and re-evaluating the
// situation.
// Before performing the conjoin, implementations should verify that a
// conjoin is actually currently needed; callers may be working with an
// out-of-date notion of upstream state. |novelCount|, the number of new
// tables the caller is trying to land, may be used in this determination.
// Implementations must be goroutine-safe.
Conjoin(mm manifest, p tablePersister, novelCount int, stats *Stats)
Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents
}
func newAsyncConjoiner(maxTables int) *asyncConjoiner {
return &asyncConjoiner{
waiters: map[string]chan struct{}{},
maxTables: maxTables,
}
}
type asyncConjoiner struct {
mu sync.RWMutex
waiters map[string]chan struct{}
type inlineConjoiner struct {
maxTables int
}
func (c *asyncConjoiner) ConjoinRequired(ts tableSet) bool {
func (c inlineConjoiner) ConjoinRequired(ts tableSet) bool {
return ts.Size() > c.maxTables
}
// Conjoin checks to see if there's already a conjoin underway for the store
// described by |mm|. If so, it blocks until that conjoin completes. If not,
// it starts one and blocks until it completes. Conjoin can be called
// concurrently from many goroutines.
func (c *asyncConjoiner) Conjoin(mm manifest, p tablePersister, novelCount int, stats *Stats) {
needsConjoin := func(upstreamCount int) bool {
return upstreamCount+novelCount > c.maxTables
}
c.await(mm.Name(), func() { conjoin(mm, p, needsConjoin, stats) }, nil)
return
func (c inlineConjoiner) Conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents {
return conjoin(upstream, mm, p, stats)
}
// await checks to see if there's already something running for |id| and, if
// so, waits for it to complete. If not, it runs f() and waits for it to
// complete. While f() is running, other callers to await that pass in |id|
// will block until f() completes.
func (c *asyncConjoiner) await(id string, f func(), testWg *sync.WaitGroup) {
wait := func() <-chan struct{} {
c.mu.Lock()
defer c.mu.Unlock()
if ch, present := c.waiters[id]; present {
return ch
}
c.waiters[id] = make(chan struct{})
go c.runAndNotify(id, f)
return c.waiters[id]
}()
if testWg != nil {
testWg.Done()
}
<-wait
}
// runAndNotify runs f() and, upon completion, signals everyone who called
// await(id).
func (c *asyncConjoiner) runAndNotify(id string, f func()) {
f()
c.mu.Lock()
defer c.mu.Unlock()
ch, present := c.waiters[id]
d.PanicIfFalse(present)
close(ch)
delete(c.waiters, id)
}
func conjoin(mm manifest, p tablePersister, needsConjoin func(int) bool, stats *Stats) {
exists, upstream := mm.ParseIfExists(stats, nil)
d.PanicIfFalse(exists)
// This conjoin may have been requested by someone with an out-of-date notion of what's upstream. Verify that we actually still believe a conjoin is needed and, if not, return early
if !needsConjoin(len(upstream.specs)) {
return
}
func conjoin(upstream manifestContents, mm manifestUpdater, p tablePersister, stats *Stats) manifestContents {
var conjoined tableSpec
var conjoinees, keepers []tableSpec
@@ -126,7 +62,7 @@ func conjoin(mm manifest, p tablePersister, needsConjoin func(int) bool, stats *
upstream = mm.Update(upstream.lock, newContents, stats, nil)
if newContents.lock == upstream.lock {
return
return upstream // Success!
}
// Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us.
// If we can re-use the conjoin we already performed, we want to try again. Currently, we will only do so if ALL conjoinees are still present upstream. If we can't re-use...then someone else almost certainly landed a conjoin upstream. In this case, bail and let clients ask again if they think they still can't proceed.
@@ -137,7 +73,7 @@ func conjoin(mm manifest, p tablePersister, needsConjoin func(int) bool, stats *
}
for _, c := range conjoinees {
if _, present := upstreamNames[c.name]; !present {
return // Bail!
return upstream // Bail!
}
conjoineeSet[c.name] = struct{}{}
}
@@ -150,6 +86,7 @@ func conjoin(mm manifest, p tablePersister, needsConjoin func(int) bool, stats *
}
}
}
panic("Not Reached")
}
func conjoinTables(p tablePersister, upstream []tableSpec, stats *Stats) (conjoined tableSpec, conjoinees, keepers []tableSpec) {

View File

@@ -8,7 +8,6 @@ import (
"bytes"
"encoding/binary"
"sort"
"sync"
"testing"
"github.com/attic-labs/noms/go/constants"
@@ -16,50 +15,6 @@ import (
"github.com/attic-labs/testify/assert"
)
func TestAsyncConjoinerAwait(t *testing.T) {
runTest := func(t *testing.T, names ...string) {
c := newAsyncConjoiner(defaultMaxTables)
// mu protects |conjoins|
mu := sync.Mutex{}
conjoins := map[string]int{}
// |trigger| ensures the goroutines will all await() concurrently
trigger := &sync.WaitGroup{}
trigger.Add(len(names))
// |wg| allows the test to wait for all the goroutines started below
wg := sync.WaitGroup{}
for _, n := range names {
wg.Add(1)
go func(db string) {
defer wg.Done()
c.await(db, func() {
trigger.Wait()
mu.Lock()
defer mu.Unlock()
cnt := conjoins[db]
cnt++
conjoins[db] = cnt
}, trigger)
}(n)
}
wg.Wait()
for _, n := range names {
assert.EqualValues(t, 1, conjoins[n], "Wrong num conjoins for %s", n)
}
}
t.Run("AllDifferent", func(t *testing.T) {
runTest(t, "foo", "bar", "baz")
})
t.Run("Concurrent", func(t *testing.T) {
runTest(t, "foo", "foo", "bar", "foo", "baz")
})
}
type tableSpecsByAscendingCount []tableSpec
func (ts tableSpecsByAscendingCount) Len() int { return len(ts) }
@@ -128,11 +83,11 @@ func TestConjoin(t *testing.T) {
}
}
setup := func(lock addr, root hash.Hash, sizes []uint32) (fm *fakeManifest, p tablePersister, upstream []tableSpec) {
setup := func(lock addr, root hash.Hash, sizes []uint32) (fm *fakeManifest, p tablePersister, upstream manifestContents) {
p = newFakeTablePersister()
upstream = makeTestTableSpecs(sizes, p)
fm = &fakeManifest{}
fm.set(constants.NomsVersion, lock, root, upstream)
fm.set(constants.NomsVersion, lock, root, makeTestTableSpecs(sizes, p))
_, upstream = fm.ParseIfExists(nil, nil)
return
}
@@ -150,7 +105,6 @@ func TestConjoin(t *testing.T) {
}
stats := &Stats{}
alwaysConjoin := func(int) bool { return true }
startLock, startRoot := computeAddr([]byte("lock")), hash.Of([]byte("root"))
t.Run("Success", func(t *testing.T) {
// Compact some tables, no one interrupts
@@ -158,11 +112,11 @@ func TestConjoin(t *testing.T) {
t.Run(c.name, func(t *testing.T) {
fm, p, upstream := setup(startLock, startRoot, c.precompact)
conjoin(fm, p, alwaysConjoin, stats)
conjoin(upstream, fm, p, stats)
exists, newUpstream := fm.ParseIfExists(stats, nil)
assert.True(t, exists)
assert.Equal(t, c.postcompact, getSortedSizes(newUpstream.specs))
assertContainAll(t, p, upstream, newUpstream.specs)
assertContainAll(t, p, upstream.specs, newUpstream.specs)
})
}
})
@@ -182,13 +136,14 @@ func TestConjoin(t *testing.T) {
newTable := makeExtra(p)
u := updatePreemptManifest{fm, func() {
fm.set(constants.NomsVersion, computeAddr([]byte("lock2")), startRoot, append(upstream, newTable))
specs := append([]tableSpec{}, upstream.specs...)
fm.set(constants.NomsVersion, computeAddr([]byte("lock2")), startRoot, append(specs, newTable))
}}
conjoin(u, p, alwaysConjoin, stats)
conjoin(upstream, u, p, stats)
exists, newUpstream := fm.ParseIfExists(stats, nil)
assert.True(t, exists)
assert.Equal(t, append([]uint32{1}, c.postcompact...), getSortedSizes(newUpstream.specs))
assertContainAll(t, p, append(upstream, newTable), newUpstream.specs)
assertContainAll(t, p, append(upstream.specs, newTable), newUpstream.specs)
})
}
})
@@ -200,31 +155,15 @@ func TestConjoin(t *testing.T) {
fm, p, upstream := setup(startLock, startRoot, c.precompact)
u := updatePreemptManifest{fm, func() {
fm.set(constants.NomsVersion, computeAddr([]byte("lock2")), startRoot, upstream[1:])
fm.set(constants.NomsVersion, computeAddr([]byte("lock2")), startRoot, upstream.specs[1:])
}}
conjoin(u, p, alwaysConjoin, stats)
conjoin(upstream, u, p, stats)
exists, newUpstream := fm.ParseIfExists(stats, nil)
assert.True(t, exists)
assert.Equal(t, c.precompact[1:], getSortedSizes(newUpstream.specs))
})
}
})
neverConjoin := func(int) bool { return false }
t.Run("ExitEarly", func(t *testing.T) {
// conjoin called with out-of-date manifest; no longer needs a conjoin
for _, c := range tc {
t.Run(c.name, func(t *testing.T) {
fm, p, upstream := setup(startLock, startRoot, c.precompact)
conjoin(fm, p, neverConjoin, stats)
exists, newUpstream := fm.ParseIfExists(stats, nil)
assert.True(t, exists)
assert.Equal(t, c.precompact, getSortedSizes(newUpstream.specs))
assertContainAll(t, p, upstream, newUpstream.specs)
})
}
})
}
type updatePreemptManifest struct {

View File

@@ -8,8 +8,6 @@ import (
"fmt"
"os"
"path"
"sync"
"time"
"github.com/attic-labs/noms/go/chunks"
"github.com/attic-labs/noms/go/d"
@@ -30,6 +28,7 @@ type AWSStoreFactory struct {
table string
conjoiner conjoiner
manifestLocks *manifestLocks
manifestCache *manifestCache
}
@@ -59,22 +58,21 @@ func NewAWSStoreFactory(sess *session.Session, table, bucket string, maxOpenFile
tc,
},
table: table,
conjoiner: newAsyncConjoiner(awsMaxTables),
conjoiner: inlineConjoiner{awsMaxTables},
manifestLocks: newManifestLocks(),
manifestCache: newManifestCache(defaultManifestCacheSize),
}
}
func (asf *AWSStoreFactory) CreateStore(ns string) chunks.ChunkStore {
mm := cachingManifest{newDynamoManifest(asf.table, ns, asf.ddb), asf.manifestCache}
mm := manifestManager{newDynamoManifest(asf.table, ns, asf.ddb), asf.manifestCache, asf.manifestLocks}
return newNomsBlockStore(mm, asf.persister, asf.conjoiner, defaultMemTableSize)
}
func (asf *AWSStoreFactory) CreateStoreFromCache(ns string) chunks.ChunkStore {
mm := cachingManifest{newDynamoManifest(asf.table, ns, asf.ddb), asf.manifestCache}
mm := manifestManager{newDynamoManifest(asf.table, ns, asf.ddb), asf.manifestCache, asf.manifestLocks}
contents, _, present := func() (manifestContents, time.Time, bool) {
return asf.manifestCache.Get(mm.Name())
}()
contents, _, present := asf.manifestCache.Get(mm.Name())
if present {
return newNomsBlockStoreWithContents(mm, contents, asf.persister, asf.conjoiner, defaultMemTableSize)
}
@@ -90,8 +88,8 @@ type LocalStoreFactory struct {
indexCache *indexCache
conjoiner conjoiner
manifestCacheMu sync.Mutex
manifestCache *manifestCache
manifestLocks *manifestLocks
manifestCache *manifestCache
}
func checkDir(dir string) error {
@@ -117,8 +115,9 @@ func NewLocalStoreFactory(dir string, indexCacheSize uint64, maxOpenFiles int) c
dir: dir,
fc: newFDCache(maxOpenFiles),
indexCache: indexCache,
conjoiner: inlineConjoiner{defaultMaxTables},
manifestLocks: newManifestLocks(),
manifestCache: newManifestCache(defaultManifestCacheSize),
conjoiner: newAsyncConjoiner(defaultMaxTables),
}
}
@@ -126,20 +125,16 @@ func (lsf *LocalStoreFactory) CreateStore(ns string) chunks.ChunkStore {
path := path.Join(lsf.dir, ns)
d.PanicIfError(os.MkdirAll(path, 0777))
mm := cachingManifest{fileManifest{path}, lsf.manifestCache}
mm := manifestManager{fileManifest{path}, lsf.manifestCache, lsf.manifestLocks}
p := newFSTablePersister(path, lsf.fc, lsf.indexCache)
return newNomsBlockStore(mm, p, lsf.conjoiner, defaultMemTableSize)
}
func (lsf *LocalStoreFactory) CreateStoreFromCache(ns string) chunks.ChunkStore {
path := path.Join(lsf.dir, ns)
mm := cachingManifest{fileManifest{path}, lsf.manifestCache}
mm := manifestManager{fileManifest{path}, lsf.manifestCache, lsf.manifestLocks}
contents, _, present := func() (manifestContents, time.Time, bool) {
lsf.manifestCacheMu.Lock()
defer lsf.manifestCacheMu.Unlock()
return lsf.manifestCache.Get(mm.Name())
}()
contents, _, present := lsf.manifestCache.Get(mm.Name())
if present {
_, err := os.Stat(path)
d.PanicIfTrue(os.IsNotExist(err))

View File

@@ -7,6 +7,7 @@ package nbs
import (
"crypto/sha512"
"strconv"
"sync"
"time"
"github.com/attic-labs/noms/go/d"
@@ -14,6 +15,9 @@ import (
)
type manifest interface {
// Name returns a stable, unique identifier for the store this manifest describes.
Name() string
// ParseIfExists extracts and returns values from a NomsBlockStore
// manifest, if one exists. Concrete implementations are responsible for
// defining how to find and parse the desired manifest, e.g. a
@@ -29,6 +33,10 @@ type manifest interface {
// implementation is guaranteeing exclusive access to the manifest.
ParseIfExists(stats *Stats, readHook func()) (exists bool, contents manifestContents)
manifestUpdater
}
type manifestUpdater interface {
// Update optimistically tries to write a new manifest containing
// |newContents|. If |lastLock| matches the lock hash in the currently
// persisted manifest (logically, the lock that would be returned by
@@ -45,9 +53,6 @@ type manifest interface {
// guaranteeing exclusive access to the manifest. This allows for testing
// of race conditions.
Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents
// Name returns a stable, unique identifier for the store this manifest describes.
Name() string
}
type manifestContents struct {
@@ -65,13 +70,80 @@ func (mc manifestContents) size() (size uint64) {
return
}
type cachingManifest struct {
mm manifest
cache *manifestCache
func newManifestLocks() *manifestLocks {
return &manifestLocks{map[string]struct{}{}, map[string]struct{}{}, sync.NewCond(&sync.Mutex{})}
}
func (cm cachingManifest) updateWillFail(lastLock addr) (cached manifestContents, doomed bool) {
if upstream, _, hit := cm.cache.Get(cm.Name()); hit {
type manifestLocks struct {
updating map[string]struct{}
fetching map[string]struct{}
cond *sync.Cond
}
func (ml *manifestLocks) lockForFetch(db string) {
lockByName(db, ml.cond, ml.fetching)
}
func (ml *manifestLocks) unlockForFetch(db string) {
unlockByName(db, ml.cond, ml.fetching)
}
func (ml *manifestLocks) lockForUpdate(db string) {
lockByName(db, ml.cond, ml.updating)
}
func (ml *manifestLocks) unlockForUpdate(db string) {
unlockByName(db, ml.cond, ml.updating)
}
func lockByName(db string, c *sync.Cond, locks map[string]struct{}) {
c.L.Lock()
defer c.L.Unlock()
for {
if _, inProgress := locks[db]; !inProgress {
locks[db] = struct{}{}
break
}
c.Wait()
}
}
func unlockByName(db string, c *sync.Cond, locks map[string]struct{}) {
c.L.Lock()
defer c.L.Unlock()
_, ok := locks[db]
d.PanicIfFalse(ok)
delete(locks, db)
c.Broadcast()
}
type manifestManager struct {
m manifest
cache *manifestCache
locks *manifestLocks
}
func (mm manifestManager) LockOutFetch() {
mm.locks.lockForFetch(mm.Name())
}
func (mm manifestManager) AllowFetch() {
mm.locks.unlockForFetch(mm.Name())
}
func (mm manifestManager) LockForUpdate() {
mm.locks.lockForUpdate(mm.Name())
}
func (mm manifestManager) UnlockForUpdate() {
mm.locks.unlockForUpdate(mm.Name())
}
func (mm manifestManager) updateWillFail(lastLock addr) (cached manifestContents, doomed bool) {
if upstream, _, hit := mm.cache.Get(mm.Name()); hit {
if lastLock != upstream.lock {
doomed, cached = true, upstream
}
@@ -79,13 +151,13 @@ func (cm cachingManifest) updateWillFail(lastLock addr) (cached manifestContents
return
}
func (cm cachingManifest) ParseIfExists(stats *Stats, readHook func()) (exists bool, contents manifestContents) {
func (mm manifestManager) Fetch(stats *Stats) (exists bool, contents manifestContents) {
entryTime := time.Now()
cm.cache.Lock(cm.Name())
defer cm.cache.Unlock(cm.Name())
mm.LockOutFetch()
defer mm.AllowFetch()
cached, t, hit := cm.cache.Get(cm.Name())
cached, t, hit := mm.cache.Get(mm.Name())
if hit && t.After(entryTime) {
// Cache contains a manifest which is newer than entry time.
@@ -93,27 +165,28 @@ func (cm cachingManifest) ParseIfExists(stats *Stats, readHook func()) (exists b
}
t = time.Now()
exists, contents = cm.mm.ParseIfExists(stats, readHook)
cm.cache.Put(cm.Name(), contents, t)
exists, contents = mm.m.ParseIfExists(stats, nil)
mm.cache.Put(mm.Name(), contents, t)
return
}
func (cm cachingManifest) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents {
cm.cache.Lock(cm.Name())
defer cm.cache.Unlock(cm.Name())
if upstream, _, hit := cm.cache.Get(cm.Name()); hit {
// Callers MUST protect uses of Update with Lock/UnlockForUpdate.
// Update does not call Lock/UnlockForUpdate() on its own because it is
// intended to be used in a larger critical section along with updateWillFail.
func (mm manifestManager) Update(lastLock addr, newContents manifestContents, stats *Stats, writeHook func()) manifestContents {
if upstream, _, hit := mm.cache.Get(mm.Name()); hit {
if lastLock != upstream.lock {
return upstream
}
}
t := time.Now()
contents := cm.mm.Update(lastLock, newContents, stats, writeHook)
cm.cache.Put(cm.Name(), contents, t)
contents := mm.m.Update(lastLock, newContents, stats, writeHook)
mm.cache.Put(mm.Name(), contents, t)
return contents
}
func (cm cachingManifest) Name() string {
return cm.mm.Name()
func (mm manifestManager) Name() string {
return mm.m.Name()
}
type tableSpec struct {

View File

@@ -17,8 +17,7 @@ func newManifestCache(maxSize uint64) *manifestCache {
return &manifestCache{
maxSize: maxSize,
cache: map[string]manifestCacheEntry{},
locked: map[string]struct{}{},
cond: sync.NewCond(&sync.Mutex{}),
mu: &sync.Mutex{},
}
}
@@ -31,18 +30,17 @@ type manifestCacheEntry struct {
type manifestCache struct {
totalSize uint64
maxSize uint64
mu *sync.Mutex
lru list.List
cache map[string]manifestCacheEntry
locked map[string]struct{}
cond *sync.Cond
}
// Get() checks the searches the cache for an entry. If it exists, it moves it's
// lru entry to the back of the queue and returns (value, true). Otherwise, it
// returns (nil, false).
func (mc *manifestCache) Get(db string) (contents manifestContents, t time.Time, present bool) {
mc.cond.L.Lock()
defer mc.cond.L.Unlock()
mc.mu.Lock()
defer mc.mu.Unlock()
if entry, ok := mc.entry(db); ok {
contents, t, present = entry.contents, entry.t, true
@@ -50,32 +48,6 @@ func (mc *manifestCache) Get(db string) (contents manifestContents, t time.Time,
return
}
func (mc *manifestCache) Lock(db string) {
mc.cond.L.Lock()
defer mc.cond.L.Unlock()
for {
_, ok := mc.locked[db]
if ok {
mc.cond.Wait()
} else {
mc.locked[db] = struct{}{}
break
}
}
}
func (mc *manifestCache) Unlock(db string) {
mc.cond.L.Lock()
defer mc.cond.L.Unlock()
_, ok := mc.locked[db]
d.PanicIfFalse(ok)
delete(mc.locked, db)
mc.cond.Broadcast()
}
// entry() checks if the value is in the cache. If not in the cache, it returns an
// empty manifestCacheEntry and false. It it is in the cache, it moves it to
// to the back of lru and returns the entry and true.
@@ -96,8 +68,8 @@ func (mc *manifestCache) entry(key string) (manifestCacheEntry, bool) {
// keep the total cache size below maxSize. |t| must be *prior* to initiating
// the call which read/wrote |contents|.
func (mc *manifestCache) Put(db string, contents manifestContents, t time.Time) {
mc.cond.L.Lock()
defer mc.cond.L.Unlock()
mc.mu.Lock()
defer mc.mu.Unlock()
if entry, ok := mc.entry(db); ok {
mc.totalSize -= entry.contents.size()

View File

@@ -104,13 +104,13 @@ func TestChunkStoreManifestAppearsAfterConstruction(t *testing.T) {
func TestChunkStoreManifestFirstWriteByOtherProcess(t *testing.T) {
assert := assert.New(t)
fm := &fakeManifest{}
mm := cachingManifest{fm, newManifestCache(0)}
mm := manifestManager{fm, newManifestCache(0), newManifestLocks()}
p := newFakeTablePersister()
// Simulate another process writing a manifest behind store's back.
newRoot, chunks := interloperWrite(fm, p, []byte("new root"), []byte("hello2"), []byte("goodbye2"), []byte("badbye2"))
store := newNomsBlockStore(mm, p, newAsyncConjoiner(defaultMaxTables), defaultMemTableSize)
store := newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, defaultMemTableSize)
defer store.Close()
assert.Equal(newRoot, store.Root())
@@ -135,9 +135,9 @@ func TestChunkStoreCommitOptimisticLockFail(t *testing.T) {
func TestChunkStoreManifestPreemptiveOptimisticLockFail(t *testing.T) {
assert := assert.New(t)
fm := &fakeManifest{}
mm := cachingManifest{fm, newManifestCache(defaultManifestCacheSize)}
mm := manifestManager{fm, newManifestCache(defaultManifestCacheSize), newManifestLocks()}
p := newFakeTablePersister()
c := newAsyncConjoiner(defaultMaxTables)
c := inlineConjoiner{defaultMaxTables}
store := newNomsBlockStore(mm, p, c, defaultMemTableSize)
defer store.Close()
@@ -163,11 +163,87 @@ func TestChunkStoreManifestPreemptiveOptimisticLockFail(t *testing.T) {
assert.Equal(constants.NomsVersion, store.Version())
}
func TestChunkStoreCommitLocksOutFetch(t *testing.T) {
assert := assert.New(t)
fm := &fakeManifest{name: "foo"}
upm := &updatePreemptManifest{manifest: fm}
mm := manifestManager{upm, newManifestCache(defaultManifestCacheSize), newManifestLocks()}
p := newFakeTablePersister()
c := inlineConjoiner{defaultMaxTables}
store := newNomsBlockStore(mm, p, c, defaultMemTableSize)
defer store.Close()
// store.Commit() should lock out calls to mm.Fetch()
wg := sync.WaitGroup{}
fetched := manifestContents{}
upm.preUpdate = func() {
wg.Add(1)
go func() {
defer wg.Done()
_, fetched = mm.Fetch(nil)
}()
}
rootChunk := chunks.NewChunk([]byte("new root"))
store.Put(rootChunk)
assert.True(store.Commit(rootChunk.Hash(), store.Root()))
wg.Wait()
assert.Equal(store.Root(), fetched.root)
}
func TestChunkStoreSerializeCommits(t *testing.T) {
assert := assert.New(t)
fm := &fakeManifest{name: "foo"}
upm := &updatePreemptManifest{manifest: fm}
mc := newManifestCache(defaultManifestCacheSize)
l := newManifestLocks()
p := newFakeTablePersister()
c := inlineConjoiner{defaultMaxTables}
store := newNomsBlockStore(manifestManager{upm, mc, l}, p, c, defaultMemTableSize)
defer store.Close()
storeChunk := chunks.NewChunk([]byte("store"))
interloperChunk := chunks.NewChunk([]byte("interloper"))
updateCount := 0
interloper := newNomsBlockStore(
manifestManager{
updatePreemptManifest{fm, func() { updateCount++ }}, mc, l,
},
p,
c,
defaultMemTableSize)
defer interloper.Close()
wg := sync.WaitGroup{}
upm.preUpdate = func() {
wg.Add(1)
go func() {
defer wg.Done()
interloper.Put(interloperChunk)
assert.True(interloper.Commit(interloper.Root(), interloper.Root()))
}()
updateCount++
}
store.Put(storeChunk)
assert.True(store.Commit(store.Root(), store.Root()))
wg.Wait()
assert.Equal(2, updateCount)
assert.True(interloper.Has(storeChunk.Hash()))
assert.True(interloper.Has(interloperChunk.Hash()))
}
func makeStoreWithFakes(t *testing.T) (fm *fakeManifest, p tablePersister, store *NomsBlockStore) {
fm = &fakeManifest{}
mm := cachingManifest{fm, newManifestCache(0)}
mm := manifestManager{fm, newManifestCache(0), newManifestLocks()}
p = newFakeTablePersister()
store = newNomsBlockStore(mm, p, newAsyncConjoiner(defaultMaxTables), 0)
store = newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, 0)
return
}

View File

@@ -87,7 +87,7 @@ func TestStats(t *testing.T) {
assert.Equal(uint64(60), stats(store).FileBytesPerRead.Sum())
// Force a conjoin
store.c = newAsyncConjoiner(2)
store.c = inlineConjoiner{2}
store.Put(c4)
store.Commit(store.Root(), store.Root())
store.Put(c5)

View File

@@ -34,31 +34,28 @@ const (
var (
cacheOnce = sync.Once{}
globalIndexCache *indexCache
makeCachingManifest func(manifest) cachingManifest
makeManifestManager func(manifest) manifestManager
globalFDCache *fdCache
globalConjoiner conjoiner
)
func makeGlobalCaches() {
globalIndexCache = newIndexCache(defaultIndexCacheSize)
globalFDCache = newFDCache(defaultMaxTables)
globalConjoiner = newAsyncConjoiner(defaultMaxTables)
manifestCache := newManifestCache(defaultManifestCacheSize)
makeCachingManifest = func(mm manifest) cachingManifest { return cachingManifest{mm, manifestCache} }
manifestLocks := newManifestLocks()
makeManifestManager = func(m manifest) manifestManager { return manifestManager{m, manifestCache, manifestLocks} }
}
type NomsBlockStore struct {
mm cachingManifest
p tablePersister
c conjoiner
manifestLock addr
nomsVersion string
mm manifestManager
p tablePersister
c conjoiner
mu sync.RWMutex // protects the following state
mt *memTable
tables tableSet
root hash.Hash
mu sync.RWMutex // protects the following state
mt *memTable
tables tableSet
upstream manifestContents
mtSize uint64
putCount uint64
@@ -78,47 +75,45 @@ func NewAWSStore(table, ns, bucket string, s3 s3svc, ddb ddbsvc, memTableSize ui
make(chan struct{}, 32),
nil,
}
mm := makeCachingManifest(newDynamoManifest(table, ns, ddb))
return newNomsBlockStore(mm, p, globalConjoiner, memTableSize)
mm := makeManifestManager(newDynamoManifest(table, ns, ddb))
return newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, memTableSize)
}
func NewLocalStore(dir string, memTableSize uint64) *NomsBlockStore {
cacheOnce.Do(makeGlobalCaches)
d.PanicIfError(checkDir(dir))
mm := makeCachingManifest(fileManifest{dir})
mm := makeManifestManager(fileManifest{dir})
p := newFSTablePersister(dir, globalFDCache, globalIndexCache)
return newNomsBlockStore(mm, p, globalConjoiner, memTableSize)
return newNomsBlockStore(mm, p, inlineConjoiner{defaultMaxTables}, memTableSize)
}
func newNomsBlockStore(mm cachingManifest, p tablePersister, c conjoiner, memTableSize uint64) *NomsBlockStore {
func newNomsBlockStore(mm manifestManager, p tablePersister, c conjoiner, memTableSize uint64) *NomsBlockStore {
if memTableSize == 0 {
memTableSize = defaultMemTableSize
}
nbs := &NomsBlockStore{
mm: mm,
p: p,
c: c,
tables: newTableSet(p),
nomsVersion: constants.NomsVersion,
mtSize: memTableSize,
stats: NewStats(),
mm: mm,
p: p,
c: c,
tables: newTableSet(p),
upstream: manifestContents{vers: constants.NomsVersion},
mtSize: memTableSize,
stats: NewStats(),
}
t1 := time.Now()
defer func() {
nbs.stats.OpenLatency.SampleTimeSince(t1)
}()
defer nbs.stats.OpenLatency.SampleTimeSince(t1)
if exists, contents := nbs.mm.ParseIfExists(nbs.stats, nil); exists {
nbs.nomsVersion, nbs.manifestLock, nbs.root = contents.vers, contents.lock, contents.root
if exists, contents := nbs.mm.Fetch(nbs.stats); exists {
nbs.upstream = contents
nbs.tables = nbs.tables.Rebase(contents.specs)
}
return nbs
}
func newNomsBlockStoreWithContents(mm cachingManifest, mc manifestContents, p tablePersister, c conjoiner, memTableSize uint64) *NomsBlockStore {
func newNomsBlockStoreWithContents(mm manifestManager, mc manifestContents, p tablePersister, c conjoiner, memTableSize uint64) *NomsBlockStore {
if memTableSize == 0 {
memTableSize = defaultMemTableSize
}
@@ -129,10 +124,8 @@ func newNomsBlockStoreWithContents(mm cachingManifest, mc manifestContents, p ta
mtSize: memTableSize,
stats: NewStats(),
nomsVersion: mc.vers,
manifestLock: mc.lock,
root: mc.root,
tables: newTableSet(p).Rebase(mc.specs),
upstream: mc,
tables: newTableSet(p).Rebase(mc.specs),
}
}
@@ -353,8 +346,8 @@ func toHasRecords(hashes hash.HashSet) []hasRecord {
func (nbs *NomsBlockStore) Rebase() {
nbs.mu.Lock()
defer nbs.mu.Unlock()
if exists, contents := nbs.mm.ParseIfExists(nbs.stats, nil); exists {
nbs.nomsVersion, nbs.manifestLock, nbs.root = contents.vers, contents.lock, contents.root
if exists, contents := nbs.mm.Fetch(nbs.stats); exists {
nbs.upstream = contents
nbs.tables = nbs.tables.Rebase(contents.specs)
}
}
@@ -362,19 +355,17 @@ func (nbs *NomsBlockStore) Rebase() {
func (nbs *NomsBlockStore) Root() hash.Hash {
nbs.mu.RLock()
defer nbs.mu.RUnlock()
return nbs.root
return nbs.upstream.root
}
func (nbs *NomsBlockStore) Commit(current, last hash.Hash) bool {
t1 := time.Now()
defer func() {
nbs.stats.CommitLatency.SampleTimeSince(t1)
}()
defer nbs.stats.CommitLatency.SampleTimeSince(t1)
anyPossiblyNovelChunks := func() bool {
nbs.mu.Lock()
defer nbs.mu.Unlock()
return nbs.mt != nil || len(nbs.tables.novel) > 0
return nbs.mt != nil || nbs.tables.Novel() > 0
}
if !anyPossiblyNovelChunks() && current == last {
@@ -382,6 +373,8 @@ func (nbs *NomsBlockStore) Commit(current, last hash.Hash) bool {
return true
}
nbs.mm.LockForUpdate()
defer nbs.mm.UnlockForUpdate()
for {
if err := nbs.updateManifest(current, last); err == nil {
return true
@@ -400,13 +393,15 @@ var (
func (nbs *NomsBlockStore) updateManifest(current, last hash.Hash) error {
nbs.mu.Lock()
defer nbs.mu.Unlock()
if nbs.root != last {
if nbs.upstream.root != last {
return errLastRootMismatch
}
nbs.mm.LockOutFetch()
defer nbs.mm.AllowFetch()
handleOptimisticLockFailure := func(upstream manifestContents) error {
nbs.manifestLock = upstream.lock
nbs.root = upstream.root
nbs.upstream = upstream
nbs.tables = nbs.tables.Rebase(upstream.specs)
if last != upstream.root {
@@ -415,9 +410,9 @@ func (nbs *NomsBlockStore) updateManifest(current, last hash.Hash) error {
return errOptimisticLockFailedTables
}
if upstream, doomed := nbs.mm.updateWillFail(nbs.manifestLock); doomed {
if cached, doomed := nbs.mm.updateWillFail(nbs.upstream.lock); doomed {
// Pre-emptive optimistic lock failure. Someone else in-process moved to the root, the set of tables, or both out from under us.
return handleOptimisticLockFailure(upstream)
return handleOptimisticLockFailure(cached)
}
if nbs.mt != nil && nbs.mt.count() > 0 {
@@ -426,13 +421,8 @@ func (nbs *NomsBlockStore) updateManifest(current, last hash.Hash) error {
}
if nbs.c.ConjoinRequired(nbs.tables) {
nbs.c.Conjoin(nbs.mm, nbs.p, nbs.tables.Novel(), nbs.stats)
exists, upstream := nbs.mm.ParseIfExists(nbs.stats, nil)
d.PanicIfFalse(exists)
nbs.manifestLock = upstream.lock
nbs.root = upstream.root
nbs.tables = nbs.tables.Rebase(upstream.specs)
nbs.upstream = nbs.c.Conjoin(nbs.upstream, nbs.mm, nbs.p, nbs.stats)
nbs.tables = nbs.tables.Rebase(nbs.upstream.specs)
return errOptimisticLockFailedTables
}
@@ -443,19 +433,19 @@ func (nbs *NomsBlockStore) updateManifest(current, last hash.Hash) error {
lock: generateLockHash(current, specs),
specs: specs,
}
upstream := nbs.mm.Update(nbs.manifestLock, newContents, nbs.stats, nil)
upstream := nbs.mm.Update(nbs.upstream.lock, newContents, nbs.stats, nil)
if newContents.lock != upstream.lock {
// Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us.
return handleOptimisticLockFailure(upstream)
}
nbs.upstream = newContents
nbs.tables = nbs.tables.Flatten()
nbs.nomsVersion, nbs.manifestLock, nbs.root = constants.NomsVersion, newContents.lock, current
return nil
}
func (nbs *NomsBlockStore) Version() string {
return nbs.nomsVersion
return nbs.upstream.vers
}
func (nbs *NomsBlockStore) Close() (err error) {