mirror of
https://github.com/dolthub/dolt.git
synced 2026-04-22 02:50:04 -05:00
use left right diff instead of top down due to bugs in top down (#1457)
This commit is contained in:
@@ -305,7 +305,7 @@ func (d differ) diffMapsInRange(ctx context.Context, p types.Path, v1, v2 types.
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
return v2.DiffHybrid(ctx, v1, cc)
|
||||
return v2.Diff(ctx, v1, cc)
|
||||
}
|
||||
},
|
||||
func(k types.Value) (types.Value, error) {
|
||||
@@ -364,7 +364,7 @@ func (d differ) diffSets(ctx context.Context, p types.Path, v1, v2 types.Set) er
|
||||
if d.leftRight {
|
||||
return v2.DiffLeftRight(ctx, v1, cc)
|
||||
} else {
|
||||
return v2.DiffHybrid(ctx, v1, cc)
|
||||
return v2.Diff(ctx, v1, cc)
|
||||
}
|
||||
},
|
||||
func(k types.Value) (types.Value, error) { return k, nil },
|
||||
|
||||
+1
-10
@@ -191,16 +191,7 @@ func (m Map) Diff(ctx context.Context, last Map, changes chan<- ValueChanged) er
|
||||
if m.Equals(last) {
|
||||
return nil
|
||||
}
|
||||
return orderedSequenceDiffTopDown(ctx, last.orderedSequence, m.orderedSequence, changes)
|
||||
}
|
||||
|
||||
// DiffHybrid computes the diff from |last| to |m| using a hybrid algorithm
|
||||
// which balances returning results early vs completing quickly, if possible.
|
||||
func (m Map) DiffHybrid(ctx context.Context, last Map, changes chan<- ValueChanged) error {
|
||||
if m.Equals(last) {
|
||||
return nil
|
||||
}
|
||||
return orderedSequenceDiffBest(ctx, last.orderedSequence, m.orderedSequence, changes)
|
||||
return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, m.orderedSequence, changes)
|
||||
}
|
||||
|
||||
// DiffLeftRight computes the diff from |last| to |m| using a left-to-right
|
||||
|
||||
@@ -24,11 +24,7 @@ package types
|
||||
import (
|
||||
"context"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/utils/async"
|
||||
"github.com/dolthub/dolt/go/store/d"
|
||||
"github.com/dolthub/dolt/go/store/util/functions"
|
||||
)
|
||||
|
||||
type DiffChangeType uint8
|
||||
@@ -53,142 +49,6 @@ func sendChange(ctx context.Context, changes chan<- ValueChanged, change ValueCh
|
||||
}
|
||||
}
|
||||
|
||||
// Streams the diff from |last| to |current| into |changes|, using both left-right and top-down approach in parallel.
|
||||
// The left-right diff is expected to return results earlier, whereas the top-down approach is faster overall. This "best" algorithm runs both:
|
||||
// - early results from left-right are sent to |changes|.
|
||||
// - if/when top-down catches up, left-right is stopped and the rest of the changes are streamed from top-down.
|
||||
func orderedSequenceDiffBest(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
|
||||
lrChanges := make(chan ValueChanged)
|
||||
tdChanges := make(chan ValueChanged)
|
||||
|
||||
eg, ctx := errgroup.WithContext(ctx)
|
||||
|
||||
lrCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
|
||||
defer close(lrChanges)
|
||||
return orderedSequenceDiffLeftRight(ctx, last, current, lrChanges)
|
||||
})
|
||||
tdCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
|
||||
defer close(tdChanges)
|
||||
return orderedSequenceDiffTopDown(ctx, last, current, tdChanges)
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
defer lrCancel()
|
||||
defer tdCancel()
|
||||
|
||||
// Stream left-right changes while the top-down diff algorithm catches up.
|
||||
var lrChangeCount, tdChangeCount int
|
||||
for multiplexing := true; multiplexing; {
|
||||
select {
|
||||
case c, ok := <-lrChanges:
|
||||
if !ok {
|
||||
// Left-to-right diff completed.
|
||||
return nil
|
||||
}
|
||||
lrChangeCount++
|
||||
if err := sendChange(ctx, changes, c); err != nil {
|
||||
return err
|
||||
}
|
||||
case c, ok := <-tdChanges:
|
||||
if !ok {
|
||||
// Top-down diff completed.
|
||||
return nil
|
||||
}
|
||||
tdChangeCount++
|
||||
if tdChangeCount > lrChangeCount {
|
||||
// Top-down diff has overtaken left-right diff.
|
||||
if err := sendChange(ctx, changes, c); err != nil {
|
||||
return err
|
||||
}
|
||||
lrCancel()
|
||||
multiplexing = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for c := range tdChanges {
|
||||
if err := sendChange(ctx, changes, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
// Streams the diff from |last| to |current| into |changes|, using a top-down approach.
|
||||
// Top-down is parallel and efficiently returns the complete diff, but compared to left-right it's slow to start streaming changes.
|
||||
func orderedSequenceDiffTopDown(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
|
||||
return orderedSequenceDiffInternalNodes(ctx, last, current, changes)
|
||||
}
|
||||
|
||||
// TODO - something other than the literal edit-distance, which is way too much cpu work for this case - https://github.com/attic-labs/noms/issues/2027
|
||||
func orderedSequenceDiffInternalNodes(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
if last.treeLevel() > current.treeLevel() {
|
||||
lastChild, err := last.getCompositeChildSequence(ctx, 0, uint64(last.seqLen()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return orderedSequenceDiffInternalNodes(ctx, lastChild.(orderedSequence), current, changes)
|
||||
}
|
||||
|
||||
if current.treeLevel() > last.treeLevel() {
|
||||
currentChild, err := current.getCompositeChildSequence(ctx, 0, uint64(current.seqLen()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return orderedSequenceDiffInternalNodes(ctx, last, currentChild.(orderedSequence), changes)
|
||||
}
|
||||
|
||||
if last.isLeaf() && current.isLeaf() {
|
||||
return orderedSequenceDiffLeftRight(ctx, last, current, changes)
|
||||
}
|
||||
|
||||
compareFn := last.getCompareFn(current)
|
||||
initialSplices, err := calcSplices(uint64(last.seqLen()), uint64(current.seqLen()), DEFAULT_MAX_SPLICE_MATRIX_SIZE,
|
||||
func(i uint64, j uint64) (bool, error) { return compareFn(int(i), int(j)) })
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, splice := range initialSplices {
|
||||
var lastChild, currentChild orderedSequence
|
||||
err := functions.All(
|
||||
func() error {
|
||||
seq, err := last.getCompositeChildSequence(ctx, splice.SpAt, splice.SpRemoved)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
lastChild = seq.(orderedSequence)
|
||||
return nil
|
||||
},
|
||||
func() error {
|
||||
seq, err := current.getCompositeChildSequence(ctx, splice.SpFrom, splice.SpAdded)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
currentChild = seq.(orderedSequence)
|
||||
return nil
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := orderedSequenceDiffInternalNodes(ctx, lastChild, currentChild, changes); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Streams the diff from |last| to |current| into |changes|, using a left-right approach.
|
||||
// Left-right immediately descends to the first change and starts streaming changes, but compared to top-down it's serial and much slower to calculate the full diff.
|
||||
func orderedSequenceDiffLeftRight(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
|
||||
|
||||
@@ -111,9 +111,7 @@ func (suite *diffTestSuite) TestDiff() {
|
||||
}
|
||||
|
||||
runTest := func(name string, vf valFn, cf colFn) {
|
||||
//runTestDf(name, vf, cf, orderedSequenceDiffTopDown)
|
||||
runTestDf(name, vf, cf, orderedSequenceDiffLeftRight)
|
||||
//runTestDf(name, vf, cf, orderedSequenceDiffBest)
|
||||
}
|
||||
|
||||
newSetAsCol := func(vals []Value) (Collection, error) { return NewSet(context.Background(), vs, vals...) }
|
||||
@@ -246,9 +244,7 @@ func TestOrderedSequencesDiffCloseWithoutReading(t *testing.T) {
|
||||
assert.Equal(t, err, context.Canceled)
|
||||
}
|
||||
|
||||
t.Run("Best", func(t *testing.T) { runTest(t, orderedSequenceDiffBest) })
|
||||
t.Run("LeftRight", func(t *testing.T) { runTest(t, orderedSequenceDiffLeftRight) })
|
||||
t.Run("TopDown", func(t *testing.T) { runTest(t, orderedSequenceDiffTopDown) })
|
||||
}
|
||||
|
||||
func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
|
||||
@@ -320,7 +316,5 @@ func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
runTest(orderedSequenceDiffBest)
|
||||
runTest(orderedSequenceDiffLeftRight)
|
||||
runTest(orderedSequenceDiffTopDown)
|
||||
}
|
||||
|
||||
+1
-10
@@ -127,16 +127,7 @@ func (s Set) Diff(ctx context.Context, last Set, changes chan<- ValueChanged) er
|
||||
if s.Equals(last) {
|
||||
return nil
|
||||
}
|
||||
return orderedSequenceDiffTopDown(ctx, last.orderedSequence, s.orderedSequence, changes)
|
||||
}
|
||||
|
||||
// DiffHybrid computes the diff from |last| to |s| using a hybrid algorithm
|
||||
// which balances returning results early vs completing quickly, if possible.
|
||||
func (s Set) DiffHybrid(ctx context.Context, last Set, changes chan<- ValueChanged) error {
|
||||
if s.Equals(last) {
|
||||
return nil
|
||||
}
|
||||
return orderedSequenceDiffBest(ctx, last.orderedSequence, s.orderedSequence, changes)
|
||||
return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, s.orderedSequence, changes)
|
||||
}
|
||||
|
||||
// DiffLeftRight computes the diff from |last| to |s| using a left-to-right
|
||||
|
||||
Reference in New Issue
Block a user