NBS: Fragmentation tool using new estimate of locality (#3658)

The new version of this tool now estimates the locality of a DB written using the "grandchild" strategy implemented by types.ValueStore. It does do by dividing each level of the graph up into groups that are roughly the size of the branching factor of that level, and then calculating how many physical reads are needed to read each group. In the case of perfect locality, each group could be read in a single physical read, so that's what the tool uses as its estimate of the optimal case. Toward #2968
2026-01-26 18:59:08 -06:00 · 2017-09-11 15:34:17 -07:00
parent a18bd984d1
commit 14e95379af
3 changed files with 83 additions and 69 deletions
--- a/go/nbs/frag/main.go
+++ b/go/nbs/frag/main.go
@@ -10,7 +10,6 @@ import (
 	"os"
 	"sync"

-	"github.com/attic-labs/noms/go/d"
 	"github.com/attic-labs/noms/go/datas"
 	"github.com/attic-labs/noms/go/hash"
 	"github.com/attic-labs/noms/go/nbs"
@@ -63,72 +62,85 @@ func main() {

 	defer profile.MaybeStartProfile().Stop()

-	type record struct {
-		count, calc int
-		split       bool
-	}
+	height := types.NewRef(db.Datasets()).Height()
+	fmt.Println("Store is of height", height)
+	fmt.Println("| Height |   Nodes | Children | Branching | Groups | Reads | Pruned |")
+	fmt.Println("+--------+---------+----------+-----------+--------+-------+--------+")
+	chartFmt := "| %6d | %7d | %8d | %9d | %6d | %5d | %6d |\n"

-	concurrency := 32
-	refs := make(chan types.Ref, concurrency)
-	numbers := make(chan record, concurrency)
-	wg := sync.WaitGroup{}
-	mu := sync.Mutex{}
-	visitedRefs := map[hash.Hash]bool{}
+	var optimal, sum int
+	visited := map[hash.Hash]bool{}

-	for i := 0; i < concurrency; i++ {
-		go func() {
-			for ref := range refs {
-				mu.Lock()
-				visited := visitedRefs[ref.TargetHash()]
-				visitedRefs[ref.TargetHash()] = true
-				mu.Unlock()
-
-				if !visited {
-					v := ref.TargetValue(db)
-					d.Chk.NotNil(v)
-
-					children := types.RefSlice{}
-					hashes := hash.HashSlice{}
-					v.WalkRefs(func(r types.Ref) {
-						hashes = append(hashes, r.TargetHash())
-						if r.Height() > 1 { // leaves are uninteresting, so skip them.
-							children = append(children, r)
-						}
-					})
-
-					reads, split := store.CalcReads(hashes.HashSet(), 0)
-					numbers <- record{count: 1, calc: reads, split: split}
-
-					wg.Add(len(children))
-					go func() {
-						for _, r := range children {
-							refs <- r
-						}
-					}()
-				}
-				wg.Done()
-			}
-		}()
-	}
-
-	wg.Add(1)
-	refs <- types.NewRef(db.Datasets())
-	go func() {
-		wg.Wait()
-		close(refs)
-		close(numbers)
-	}()
-
-	count, calc, splits := 0, 0, 0
-	for rec := range numbers {
-		count += rec.count
-		calc += rec.calc
-		if rec.split {
-			splits++
+	current := hash.HashSlice{store.Root()}
+	for numNodes := 1; numNodes > 0; numNodes = len(current) {
+		// Start by reading the values of the current level of the graph
+		currentValues := make(map[hash.Hash]types.Value, len(current))
+		found := make(chan types.Value)
+		go func() { defer close(found); db.ReadManyValues(current.HashSet(), found) }()
+		for v := range found {
+			h := v.Hash()
+			currentValues[h] = v
+			visited[h] = true
 		}
+
+		// Iterate all the Values at the current level of the graph IN ORDER (as specified in |current|) and gather up their embedded refs. We'll build two different lists of hash.Hashes during this process:
+		// 1) An ordered list of ALL the children of the current level.
+		// 2) An ordered list of the child nodes that contain refs to chunks we haven't yet visited. This *excludes* already-visted nodes and nodes without children.
+		// We'll use 1) to get an estimate of how good the locality is among the children of the current level, and then 2) to descend to the next level of the graph.
+		orderedChildren := hash.HashSlice{}
+		nextLevel := hash.HashSlice{}
+		for _, h := range current {
+			currentValues[h].WalkRefs(func(r types.Ref) {
+				target := r.TargetHash()
+				orderedChildren = append(orderedChildren, target)
+				if !visited[target] && r.Height() > 1 {
+					nextLevel = append(nextLevel, target)
+				}
+			})
+		}
+
+		// Estimate locality among the members of |orderedChildren| by splitting into groups that are roughly |branchFactor| in size and calling CalcReads on each group. With perfect locality, we'd expect that each group could be read in a single physical read.
+		numChildren := len(orderedChildren)
+		branchFactor := numChildren / numNodes
+		numGroups := numNodes
+		if numChildren%numNodes != 0 {
+			numGroups++
+		}
+		wg := &sync.WaitGroup{}
+		reads := make([]int, numGroups)
+		for i := 0; i < numGroups; i++ {
+			wg.Add(1)
+			if i+1 == numGroups { // last group
+				go func(i int) {
+					defer wg.Done()
+					reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:].HashSet(), 0)
+				}(i)
+				continue
+			}
+			go func(i int) {
+				defer wg.Done()
+				reads[i], _ = store.CalcReads(orderedChildren[i*branchFactor:(i+1)*branchFactor].HashSet(), 0)
+			}(i)
+		}
+
+		wg.Wait()
+
+		sumOfReads := sumInts(reads)
+		fmt.Printf(chartFmt, height, numNodes, numChildren, branchFactor, numGroups, sumOfReads, numChildren-len(nextLevel))
+
+		sum += sumOfReads
+		optimal += numGroups
+		height--
+		current = nextLevel
 	}

-	fmt.Println("calculated optimal Reads", count)
-	fmt.Printf("calculated actual Reads %d, including %d splits across tables\n", calc, splits)
-	fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(calc)/float64(count))
+	fmt.Printf("\nVisited %d chunk groups\n", optimal)
+	fmt.Printf("Reading DB %s requires %.01fx optimal number of reads\n", *dbName, float64(sum)/float64(optimal))
+}
+
+func sumInts(nums []int) (sum int) {
+	for _, n := range nums {
+		sum += n
+	}
+	return
 }
--- a/go/nbs/table_reader.go
+++ b/go/nbs/table_reader.go
@@ -30,6 +30,7 @@ type tableReaderAt interface {
 }

 // tableReader implements get & has queries against a single nbs table. goroutine safe.
+// |blockSize| refers to the block-size of the underlying storage. We assume that, each time we read data, we actually have to read in blocks of this size. So, we're willing to tolerate up to |blockSize| overhead each time we read a chunk, if it helps us group more chunks together into a single read request to backing storage.
 type tableReader struct {
 	tableIndex
 	r         tableReaderAt
--- a/go/nbs/table_set.go
+++ b/go/nbs/table_set.go
@@ -86,16 +86,17 @@ func (ts tableSet) getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg
 }

 func (ts tableSet) calcReads(reqs []getRecord, blockSize uint64) (reads int, split, remaining bool) {
-	f := func(css chunkSources) (reads int, split, remaining bool) {
+	f := func(css chunkSources) (int, bool, bool) {
+		reads, split := 0, false
 		for _, haver := range css {
-			rds, remaining := haver.calcReads(reqs, blockSize)
+			rds, rmn := haver.calcReads(reqs, blockSize)
 			reads += rds
-			if !remaining {
-				return reads, split, remaining
+			if !rmn {
+				return reads, split, false
 			}
 			split = true
 		}
-		return reads, split, remaining
+		return reads, split, true
 	}
 	reads, split, remaining = f(ts.novel)
 	if remaining {