4.2 KiB
Dataset pulling algorithm
The approach is to explore the chunk graph of both sink and source in order of decreasing ref-height. As the code walks, it uses the knowledge gained about which chunks are present in the sink to both prune the source-graph-walk and build up a set of hints that can be sent to a remote Database to aid in chunk validation.
Basic algorithm
-
let
sinkbe the sink database -
let
sourcebe the source database -
let
snkQandsrcQbe priority queues ofRefprioritized by highestRef.height -
let
hintsbe a map ofhash => hash -
let
reachableChunksbe a set of hashes -
let
snkHdRefbe the ref (ofCommit) of the head of the sink dataset -
let
srcHdRefbe the ref of the sourceCommit, which must descend from theCommitindicated bysnkHdRef -
let
traverseSource(srcRef, srcQ, sink, source, reachableChunks)be- pop
srcReffromsrcQ- if
!sink.has(srcRef)- let
c=source.batchStore().Get(srcRef.targetHash) - let
v=types.DecodeValue(c, source) - insert all child refs,
cr, fromvintosrcQand into reachableRefs sink.batchStore().Put(c, srcRef.height, no hints)- (hints will all be gathered and handed to sink.batchStore at the end)
- let
- if
- pop
-
let
traverseSink(sinkRef, snkQ, sink, hints)be- pop
snkReffromsnkQ - if
snkRef.height> 1- let
v=sink.readValue(snkRef.targetHash) - insert all child refs,
cr, fromvintosnkQandhints[cr] = snkRef
- let
- pop
-
let
traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)be- pop
comReffrom bothsnkQandsrcQ - if
comRef.height> 1- if
comRefis aRefofCommit- let
v=sink.readValue(comRef.targetHash) - if
comRef== snkHdRef- ignore all parent refs
- insert each other child ref
crfromvintosnkQonly, sethints[cr] = comRef
- else
- insert each child ref
crfromvinto bothsnkQandsrcQ, sethints[cr] = comRef
- insert each child ref
- let
- if
- pop
-
let `pull(source, sink, srcHdRef, sinkHdRef)
- insert
snkHdRefintosnkQandsrcHdRefintosrcQ - create empty
hintsandreachableChunks - while
srcQis non-empty- let
srcHtandsnkHtbe the respective heights of the topRefin each ofsrcQandsnkQ - if
srcHt>snkHt, for everysrcHdRefinsrcQwhich is of greater height thansnkHttraverseSource(srcHdRef, srcQ, sink, source)
- else if
snkHt>srcHt, for everysnkHdRefinsnkQwhich is of greater height thansrcHttraverseSink(snkHdRef, snkQ, sink)
- else
- for every
comRefin which is common tosnkQandsrcQwhich is of heightsrcHt(andsnkHt)traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)
- for every
refinsrcQwhich is of heightsrcHttraverseSource(ref, srcQ, sink, source, reachableChunks)
- for every
refinsnkQwhich is of heightsnkHttraverseSink(ref, snkQ, sink, hints)
- for every
- let
- for all
hashinreachableChunks- sink.batchStore().addHint(hints[hash])
- insert
Isomorphic, but less clear, algorithm
-
let all identifiers be as above
-
let
traverseSource,traverseSink, andtraverseCommonbe as above -
let
higherThan(refA, refB)be- if refA.height == refB.height
- return refA.targetHash < refB.targetHash
- return refA.height > refB.height
- if refA.height == refB.height
-
let `pull(source, sink, srcHdRef, sinkHdRef)
- insert
snkHdRefintosnkQandsrcHdRefintosrcQ - create empty
hintsandreachableChunks - while
srcQis non-empty- if
sinkQis empty- pop
reffromsrcQ - `traverseSource(ref, srcQ, sink, source, reachableChunks))
- pop
- else if
higherThan(head of srcQ, head of snkQ)- pop
reffromsrcQ - `traverseSource(ref, srcQ, sink, source, reachableChunks))
- pop
- else if
higherThan(head of snkQ, head of srcQ)- pop
reffromsnkQ traverseSink(ref, snkQ, sink, hints)
- pop
- else, heads of both queues are the same
- pop
comReffromsnkQandsrcQ traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)
- pop
- if
- for all
hashinreachableChunks- sink.batchStore().addHint(hints[hash])
- insert