use left right diff instead of top down due to bugs in top down (#1457)

This commit is contained in:
Brian Hendriks
2021-03-18 10:53:33 -07:00
committed by GitHub
parent 05e316a8fa
commit 3c463827f4
5 changed files with 4 additions and 168 deletions
+2 -2
View File
@@ -305,7 +305,7 @@ func (d differ) diffMapsInRange(ctx context.Context, p types.Path, v1, v2 types.
panic("not implemented")
}
return v2.DiffHybrid(ctx, v1, cc)
return v2.Diff(ctx, v1, cc)
}
},
func(k types.Value) (types.Value, error) {
@@ -364,7 +364,7 @@ func (d differ) diffSets(ctx context.Context, p types.Path, v1, v2 types.Set) er
if d.leftRight {
return v2.DiffLeftRight(ctx, v1, cc)
} else {
return v2.DiffHybrid(ctx, v1, cc)
return v2.Diff(ctx, v1, cc)
}
},
func(k types.Value) (types.Value, error) { return k, nil },
+1 -10
View File
@@ -191,16 +191,7 @@ func (m Map) Diff(ctx context.Context, last Map, changes chan<- ValueChanged) er
if m.Equals(last) {
return nil
}
return orderedSequenceDiffTopDown(ctx, last.orderedSequence, m.orderedSequence, changes)
}
// DiffHybrid computes the diff from |last| to |m| using a hybrid algorithm
// which balances returning results early vs completing quickly, if possible.
func (m Map) DiffHybrid(ctx context.Context, last Map, changes chan<- ValueChanged) error {
if m.Equals(last) {
return nil
}
return orderedSequenceDiffBest(ctx, last.orderedSequence, m.orderedSequence, changes)
return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, m.orderedSequence, changes)
}
// DiffLeftRight computes the diff from |last| to |m| using a left-to-right
-140
View File
@@ -24,11 +24,7 @@ package types
import (
"context"
"golang.org/x/sync/errgroup"
"github.com/dolthub/dolt/go/libraries/utils/async"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/util/functions"
)
type DiffChangeType uint8
@@ -53,142 +49,6 @@ func sendChange(ctx context.Context, changes chan<- ValueChanged, change ValueCh
}
}
// Streams the diff from |last| to |current| into |changes|, using both left-right and top-down approach in parallel.
// The left-right diff is expected to return results earlier, whereas the top-down approach is faster overall. This "best" algorithm runs both:
// - early results from left-right are sent to |changes|.
// - if/when top-down catches up, left-right is stopped and the rest of the changes are streamed from top-down.
func orderedSequenceDiffBest(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
lrChanges := make(chan ValueChanged)
tdChanges := make(chan ValueChanged)
eg, ctx := errgroup.WithContext(ctx)
lrCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
defer close(lrChanges)
return orderedSequenceDiffLeftRight(ctx, last, current, lrChanges)
})
tdCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
defer close(tdChanges)
return orderedSequenceDiffTopDown(ctx, last, current, tdChanges)
})
eg.Go(func() error {
defer lrCancel()
defer tdCancel()
// Stream left-right changes while the top-down diff algorithm catches up.
var lrChangeCount, tdChangeCount int
for multiplexing := true; multiplexing; {
select {
case c, ok := <-lrChanges:
if !ok {
// Left-to-right diff completed.
return nil
}
lrChangeCount++
if err := sendChange(ctx, changes, c); err != nil {
return err
}
case c, ok := <-tdChanges:
if !ok {
// Top-down diff completed.
return nil
}
tdChangeCount++
if tdChangeCount > lrChangeCount {
// Top-down diff has overtaken left-right diff.
if err := sendChange(ctx, changes, c); err != nil {
return err
}
lrCancel()
multiplexing = false
}
}
}
for c := range tdChanges {
if err := sendChange(ctx, changes, c); err != nil {
return err
}
}
return nil
})
return eg.Wait()
}
// Streams the diff from |last| to |current| into |changes|, using a top-down approach.
// Top-down is parallel and efficiently returns the complete diff, but compared to left-right it's slow to start streaming changes.
func orderedSequenceDiffTopDown(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
return orderedSequenceDiffInternalNodes(ctx, last, current, changes)
}
// TODO - something other than the literal edit-distance, which is way too much cpu work for this case - https://github.com/attic-labs/noms/issues/2027
func orderedSequenceDiffInternalNodes(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
if ctx.Err() != nil {
return ctx.Err()
}
if last.treeLevel() > current.treeLevel() {
lastChild, err := last.getCompositeChildSequence(ctx, 0, uint64(last.seqLen()))
if err != nil {
return err
}
return orderedSequenceDiffInternalNodes(ctx, lastChild.(orderedSequence), current, changes)
}
if current.treeLevel() > last.treeLevel() {
currentChild, err := current.getCompositeChildSequence(ctx, 0, uint64(current.seqLen()))
if err != nil {
return err
}
return orderedSequenceDiffInternalNodes(ctx, last, currentChild.(orderedSequence), changes)
}
if last.isLeaf() && current.isLeaf() {
return orderedSequenceDiffLeftRight(ctx, last, current, changes)
}
compareFn := last.getCompareFn(current)
initialSplices, err := calcSplices(uint64(last.seqLen()), uint64(current.seqLen()), DEFAULT_MAX_SPLICE_MATRIX_SIZE,
func(i uint64, j uint64) (bool, error) { return compareFn(int(i), int(j)) })
if err != nil {
return err
}
for _, splice := range initialSplices {
var lastChild, currentChild orderedSequence
err := functions.All(
func() error {
seq, err := last.getCompositeChildSequence(ctx, splice.SpAt, splice.SpRemoved)
if err != nil {
return err
}
lastChild = seq.(orderedSequence)
return nil
},
func() error {
seq, err := current.getCompositeChildSequence(ctx, splice.SpFrom, splice.SpAdded)
if err != nil {
return err
}
currentChild = seq.(orderedSequence)
return nil
},
)
if err != nil {
return err
}
if err := orderedSequenceDiffInternalNodes(ctx, lastChild, currentChild, changes); err != nil {
return err
}
}
return nil
}
// Streams the diff from |last| to |current| into |changes|, using a left-right approach.
// Left-right immediately descends to the first change and starts streaming changes, but compared to top-down it's serial and much slower to calculate the full diff.
func orderedSequenceDiffLeftRight(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
@@ -111,9 +111,7 @@ func (suite *diffTestSuite) TestDiff() {
}
runTest := func(name string, vf valFn, cf colFn) {
//runTestDf(name, vf, cf, orderedSequenceDiffTopDown)
runTestDf(name, vf, cf, orderedSequenceDiffLeftRight)
//runTestDf(name, vf, cf, orderedSequenceDiffBest)
}
newSetAsCol := func(vals []Value) (Collection, error) { return NewSet(context.Background(), vs, vals...) }
@@ -246,9 +244,7 @@ func TestOrderedSequencesDiffCloseWithoutReading(t *testing.T) {
assert.Equal(t, err, context.Canceled)
}
t.Run("Best", func(t *testing.T) { runTest(t, orderedSequenceDiffBest) })
t.Run("LeftRight", func(t *testing.T) { runTest(t, orderedSequenceDiffLeftRight) })
t.Run("TopDown", func(t *testing.T) { runTest(t, orderedSequenceDiffTopDown) })
}
func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
@@ -320,7 +316,5 @@ func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
require.NoError(t, err)
}
runTest(orderedSequenceDiffBest)
runTest(orderedSequenceDiffLeftRight)
runTest(orderedSequenceDiffTopDown)
}
+1 -10
View File
@@ -127,16 +127,7 @@ func (s Set) Diff(ctx context.Context, last Set, changes chan<- ValueChanged) er
if s.Equals(last) {
return nil
}
return orderedSequenceDiffTopDown(ctx, last.orderedSequence, s.orderedSequence, changes)
}
// DiffHybrid computes the diff from |last| to |s| using a hybrid algorithm
// which balances returning results early vs completing quickly, if possible.
func (s Set) DiffHybrid(ctx context.Context, last Set, changes chan<- ValueChanged) error {
if s.Equals(last) {
return nil
}
return orderedSequenceDiffBest(ctx, last.orderedSequence, s.orderedSequence, changes)
return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, s.orderedSequence, changes)
}
// DiffLeftRight computes the diff from |last| to |s| using a left-to-right