use left right diff instead of top down due to bugs in top down (#1457)

2026-04-22 02:50:04 -05:00 · 2021-03-18 10:53:33 -07:00
parent 05e316a8fa
commit 3c463827f4
5 changed files with 4 additions and 168 deletions
@@ -305,7 +305,7 @@ func (d differ) diffMapsInRange(ctx context.Context, p types.Path, v1, v2 types.
 					panic("not implemented")
 				}

-				return v2.DiffHybrid(ctx, v1, cc)
+				return v2.Diff(ctx, v1, cc)
 			}
 		},
 		func(k types.Value) (types.Value, error) {
@@ -364,7 +364,7 @@ func (d differ) diffSets(ctx context.Context, p types.Path, v1, v2 types.Set) er
 			if d.leftRight {
 				return v2.DiffLeftRight(ctx, v1, cc)
 			} else {
-				return v2.DiffHybrid(ctx, v1, cc)
+				return v2.Diff(ctx, v1, cc)
 			}
 		},
 		func(k types.Value) (types.Value, error) { return k, nil },
@@ -191,16 +191,7 @@ func (m Map) Diff(ctx context.Context, last Map, changes chan<- ValueChanged) er
 	if m.Equals(last) {
 		return nil
 	}
-	return orderedSequenceDiffTopDown(ctx, last.orderedSequence, m.orderedSequence, changes)
-}
-
-// DiffHybrid computes the diff from |last| to |m| using a hybrid algorithm
-// which balances returning results early vs completing quickly, if possible.
-func (m Map) DiffHybrid(ctx context.Context, last Map, changes chan<- ValueChanged) error {
-	if m.Equals(last) {
-		return nil
-	}
-	return orderedSequenceDiffBest(ctx, last.orderedSequence, m.orderedSequence, changes)
+	return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, m.orderedSequence, changes)
 }

 // DiffLeftRight computes the diff from |last| to |m| using a left-to-right
@@ -24,11 +24,7 @@ package types
 import (
 	"context"

-	"golang.org/x/sync/errgroup"
-
-	"github.com/dolthub/dolt/go/libraries/utils/async"
 	"github.com/dolthub/dolt/go/store/d"
-	"github.com/dolthub/dolt/go/store/util/functions"
 )

 type DiffChangeType uint8
@@ -53,142 +49,6 @@ func sendChange(ctx context.Context, changes chan<- ValueChanged, change ValueCh
 	}
 }

-// Streams the diff from |last| to |current| into |changes|, using both left-right and top-down approach in parallel.
-// The left-right diff is expected to return results earlier, whereas the top-down approach is faster overall. This "best" algorithm runs both:
-// - early results from left-right are sent to |changes|.
-// - if/when top-down catches up, left-right is stopped and the rest of the changes are streamed from top-down.
-func orderedSequenceDiffBest(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
-	lrChanges := make(chan ValueChanged)
-	tdChanges := make(chan ValueChanged)
-
-	eg, ctx := errgroup.WithContext(ctx)
-
-	lrCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
-		defer close(lrChanges)
-		return orderedSequenceDiffLeftRight(ctx, last, current, lrChanges)
-	})
-	tdCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
-		defer close(tdChanges)
-		return orderedSequenceDiffTopDown(ctx, last, current, tdChanges)
-	})
-
-	eg.Go(func() error {
-		defer lrCancel()
-		defer tdCancel()
-
-		// Stream left-right changes while the top-down diff algorithm catches up.
-		var lrChangeCount, tdChangeCount int
-		for multiplexing := true; multiplexing; {
-			select {
-			case c, ok := <-lrChanges:
-				if !ok {
-					// Left-to-right diff completed.
-					return nil
-				}
-				lrChangeCount++
-				if err := sendChange(ctx, changes, c); err != nil {
-					return err
-				}
-			case c, ok := <-tdChanges:
-				if !ok {
-					// Top-down diff completed.
-					return nil
-				}
-				tdChangeCount++
-				if tdChangeCount > lrChangeCount {
-					// Top-down diff has overtaken left-right diff.
-					if err := sendChange(ctx, changes, c); err != nil {
-						return err
-					}
-					lrCancel()
-					multiplexing = false
-				}
-			}
-		}
-
-		for c := range tdChanges {
-			if err := sendChange(ctx, changes, c); err != nil {
-				return err
-			}
-		}
-
-		return nil
-	})
-
-	return eg.Wait()
-}
-
-// Streams the diff from |last| to |current| into |changes|, using a top-down approach.
-// Top-down is parallel and efficiently returns the complete diff, but compared to left-right it's slow to start streaming changes.
-func orderedSequenceDiffTopDown(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
-	return orderedSequenceDiffInternalNodes(ctx, last, current, changes)
-}
-
-// TODO - something other than the literal edit-distance, which is way too much cpu work for this case - https://github.com/attic-labs/noms/issues/2027
-func orderedSequenceDiffInternalNodes(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
-	if ctx.Err() != nil {
-		return ctx.Err()
-	}
-
-	if last.treeLevel() > current.treeLevel() {
-		lastChild, err := last.getCompositeChildSequence(ctx, 0, uint64(last.seqLen()))
-		if err != nil {
-			return err
-		}
-		return orderedSequenceDiffInternalNodes(ctx, lastChild.(orderedSequence), current, changes)
-	}
-
-	if current.treeLevel() > last.treeLevel() {
-		currentChild, err := current.getCompositeChildSequence(ctx, 0, uint64(current.seqLen()))
-		if err != nil {
-			return err
-		}
-		return orderedSequenceDiffInternalNodes(ctx, last, currentChild.(orderedSequence), changes)
-	}
-
-	if last.isLeaf() && current.isLeaf() {
-		return orderedSequenceDiffLeftRight(ctx, last, current, changes)
-	}
-
-	compareFn := last.getCompareFn(current)
-	initialSplices, err := calcSplices(uint64(last.seqLen()), uint64(current.seqLen()), DEFAULT_MAX_SPLICE_MATRIX_SIZE,
-		func(i uint64, j uint64) (bool, error) { return compareFn(int(i), int(j)) })
-	if err != nil {
-		return err
-	}
-
-	for _, splice := range initialSplices {
-		var lastChild, currentChild orderedSequence
-		err := functions.All(
-			func() error {
-				seq, err := last.getCompositeChildSequence(ctx, splice.SpAt, splice.SpRemoved)
-				if err != nil {
-					return err
-				}
-				lastChild = seq.(orderedSequence)
-				return nil
-			},
-			func() error {
-				seq, err := current.getCompositeChildSequence(ctx, splice.SpFrom, splice.SpAdded)
-				if err != nil {
-					return err
-				}
-				currentChild = seq.(orderedSequence)
-				return nil
-			},
-		)
-		if err != nil {
-			return err
-		}
-
-		if err := orderedSequenceDiffInternalNodes(ctx, lastChild, currentChild, changes); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
 // Streams the diff from |last| to |current| into |changes|, using a left-right approach.
 // Left-right immediately descends to the first change and starts streaming changes, but compared to top-down it's serial and much slower to calculate the full diff.
 func orderedSequenceDiffLeftRight(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
@@ -111,9 +111,7 @@ func (suite *diffTestSuite) TestDiff() {
 	}

 	runTest := func(name string, vf valFn, cf colFn) {
-		//runTestDf(name, vf, cf, orderedSequenceDiffTopDown)
 		runTestDf(name, vf, cf, orderedSequenceDiffLeftRight)
-		//runTestDf(name, vf, cf, orderedSequenceDiffBest)
 	}

 	newSetAsCol := func(vals []Value) (Collection, error) { return NewSet(context.Background(), vs, vals...) }
@@ -246,9 +244,7 @@ func TestOrderedSequencesDiffCloseWithoutReading(t *testing.T) {
 		assert.Equal(t, err, context.Canceled)
 	}

-	t.Run("Best", func(t *testing.T) { runTest(t, orderedSequenceDiffBest) })
 	t.Run("LeftRight", func(t *testing.T) { runTest(t, orderedSequenceDiffLeftRight) })
-	t.Run("TopDown", func(t *testing.T) { runTest(t, orderedSequenceDiffTopDown) })
 }

 func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
@@ -320,7 +316,5 @@ func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
 		require.NoError(t, err)
 	}

-	runTest(orderedSequenceDiffBest)
 	runTest(orderedSequenceDiffLeftRight)
-	runTest(orderedSequenceDiffTopDown)
 }
@@ -127,16 +127,7 @@ func (s Set) Diff(ctx context.Context, last Set, changes chan<- ValueChanged) er
 	if s.Equals(last) {
 		return nil
 	}
-	return orderedSequenceDiffTopDown(ctx, last.orderedSequence, s.orderedSequence, changes)
-}
-
-// DiffHybrid computes the diff from |last| to |s| using a hybrid algorithm
-// which balances returning results early vs completing quickly, if possible.
-func (s Set) DiffHybrid(ctx context.Context, last Set, changes chan<- ValueChanged) error {
-	if s.Equals(last) {
-		return nil
-	}
-	return orderedSequenceDiffBest(ctx, last.orderedSequence, s.orderedSequence, changes)
+	return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, s.orderedSequence, changes)
 }

 // DiffLeftRight computes the diff from |last| to |s| using a left-to-right