Add considerAllRowsModified option to Differ.

This is useful for three way merges: in the presence of a schema change, we want every key in a diff to be considered modified, even if the tuple value doesn't change.
This commit is contained in:
Nick Tobey
2023-11-21 14:30:57 -08:00
parent b3943e5397
commit a69ac26d7c
4 changed files with 27 additions and 14 deletions

View File

@@ -36,10 +36,15 @@ type Diff struct {
type DiffFn func(context.Context, Diff) error
// Differ computes the diff between two prolly trees.
// If `considerAllRowsModified` is true, it will consider every leaf to be modified and generate a diff for every leaf. (This
// is useful in cases where the schema has changed and we want to consider a leaf changed even if the byte representation
// of the leaf is the same.
type Differ[K ~[]byte, O Ordering[K]] struct {
from, to *cursor
fromStop, toStop *cursor
order O
from, to *cursor
fromStop, toStop *cursor
order O
considerAllRowsModified bool
}
func DifferFromRoots[K ~[]byte, O Ordering[K]](
@@ -47,6 +52,7 @@ func DifferFromRoots[K ~[]byte, O Ordering[K]](
fromNs NodeStore, toNs NodeStore,
from, to Node,
order O,
considerAllRowsModified bool,
) (Differ[K, O], error) {
var fc, tc *cursor
var err error
@@ -80,11 +86,12 @@ func DifferFromRoots[K ~[]byte, O Ordering[K]](
}
return Differ[K, O]{
from: fc,
to: tc,
fromStop: fs,
toStop: ts,
order: order,
from: fc,
to: tc,
fromStop: fs,
toStop: ts,
order: order,
considerAllRowsModified: considerAllRowsModified,
}, nil
}
@@ -135,7 +142,9 @@ func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) {
return sendAdded(ctx, td.to)
case cmp == 0:
if !equalcursorValues(td.from, td.to) {
// If the cursor schema has changed, then all rows should be considered modified.
// If the cursor schema hasn't changed, rows are modified iff their bytes have changed.
if td.considerAllRowsModified || !equalcursorValues(td.from, td.to) {
return sendModified(ctx, td.from, td.to)
}

View File

@@ -42,7 +42,8 @@ func DiffOrderedTrees[K, V ~[]byte, O Ordering[K]](
from, to StaticMap[K, V, O],
cb DiffFn,
) error {
differ, err := DifferFromRoots[K](ctx, from.NodeStore, to.NodeStore, from.Root, to.Root, from.Order)
// TODO: If the schema has changed, do we want every row to be considered changed?
differ, err := DifferFromRoots[K](ctx, from.NodeStore, to.NodeStore, from.Root, to.Root, from.Order, false)
if err != nil {
return err
}

View File

@@ -51,12 +51,14 @@ func ThreeWayMerge[K ~[]byte, O Ordering[K], S message.Serializer](
serializer S,
) (final Node, stats MergeStats, err error) {
ld, err := DifferFromRoots[K](ctx, ns, ns, base, left, order)
// TODO: This function is only ever called to merge artifacts. Is is possible for the tuple desc to change here?
// If it does, do we want every row to appear in the diff?
ld, err := DifferFromRoots[K](ctx, ns, ns, base, left, order, false)
if err != nil {
return Node{}, MergeStats{}, err
}
rd, err := DifferFromRoots[K](ctx, ns, ns, base, right, order)
rd, err := DifferFromRoots[K](ctx, ns, ns, base, right, order, false)
if err != nil {
return Node{}, MergeStats{}, err
}

View File

@@ -48,12 +48,13 @@ func NewThreeWayDiffer[K, V ~[]byte, O Ordering[K]](
keyless bool,
order O,
) (*ThreeWayDiffer[K, O], error) {
ld, err := DifferFromRoots[K](ctx, ns, ns, base.Root, left.Root, order)
// probably compute each of these separately
ld, err := DifferFromRoots[K](ctx, ns, ns, base.Root, left.Root, order, false)
if err != nil {
return nil, err
}
rd, err := DifferFromRoots[K](ctx, ns, ns, base.Root, right.Root, order)
rd, err := DifferFromRoots[K](ctx, ns, ns, base.Root, right.Root, order, false)
if err != nil {
return nil, err
}