Allows diff to be performed across primary key renames (#4156)

* allow diff across pk renames

* pr comments
This commit is contained in:
Dhruv Sringari
2022-08-22 14:58:55 -07:00
committed by GitHub
parent 755c04b0af
commit 9758e697c8
9 changed files with 182 additions and 67 deletions

View File

@@ -592,7 +592,7 @@ func diffRows(ctx context.Context, se *engine.SqlEngine, td diff.TableDelta, dAr
if dArgs.diffOutput == SQLDiffOutput &&
(td.ToSch == nil ||
(td.FromSch != nil && !schema.SchemasAreEqual(td.FromSch, td.ToSch))) {
_, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff")
_, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff\n")
return nil
}

View File

@@ -88,7 +88,7 @@ func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td T
}
func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error {
_, vMapping, err := MapSchemaBasedOnName(fromSch, toSch)
_, vMapping, err := schema.MapSchemaBasedOnTagAndName(fromSch, toSch)
if err != nil {
return err
}
@@ -308,46 +308,3 @@ func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, ch c
return ctx.Err()
}
}
// MapSchemaBasedOnName can be used to map column values from one schema to
// another schema. A column in |inSch| is mapped to |outSch| if they share the
// same name and primary key membership status. It returns ordinal mappings that
// can be use to map key, value val.Tuple's of schema |inSch| to |outSch|. The
// first ordinal map is for keys, and the second is for values. If a column of
// |inSch| is missing in |outSch| then that column's index in the ordinal map
// holds -1.
// TODO (dhruv): Unit tests
func MapSchemaBasedOnName(inSch, outSch schema.Schema) (val.OrdinalMapping, val.OrdinalMapping, error) {
keyMapping := make(val.OrdinalMapping, inSch.GetPKCols().Size())
valMapping := make(val.OrdinalMapping, inSch.GetNonPKCols().Size())
err := inSch.GetPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
i := inSch.GetPKCols().TagToIdx[tag]
if col, ok := outSch.GetPKCols().GetByName(col.Name); ok {
j := outSch.GetPKCols().TagToIdx[col.Tag]
keyMapping[i] = j
} else {
return true, fmt.Errorf("could not map primary key column %s", col.Name)
}
return false, nil
})
if err != nil {
return nil, nil, err
}
err = inSch.GetNonPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
i := inSch.GetNonPKCols().TagToIdx[col.Tag]
if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok {
j := outSch.GetNonPKCols().TagToIdx[col.Tag]
valMapping[i] = j
} else {
valMapping[i] = -1
}
return false, nil
})
if err != nil {
return nil, nil, err
}
return keyMapping, valMapping, nil
}

View File

@@ -261,29 +261,36 @@ func TagMappingWithNameFallback(srcSch, destSch schema.Schema) (*FieldMapping, e
return NewFieldMapping(srcSch, destSch, srcToDest)
}
// TagMappingByName takes a source schema and a destination schema and maps
// columns by matching names.
func TagMappingByName(srcSch, destSch schema.Schema) (*FieldMapping, error) {
successes := 0
srcCols := srcSch.GetAllCols()
destCols := destSch.GetAllCols()
// TagMappingByTagAndName takes a source schema and a destination schema and maps
// pks by tag and non-pks by name.
func TagMappingByTagAndName(srcSch, destSch schema.Schema) (*FieldMapping, error) {
srcToDest := make(map[uint64]uint64, destSch.GetAllCols().Size())
srcToDest := make(map[uint64]uint64, destCols.Size())
err := destCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
srcCol, ok := srcCols.GetByName(col.Name)
if !ok {
return false, nil
}
srcToDest[srcCol.Tag] = col.Tag
successes++
return false, nil
})
keyMap, valMap, err := schema.MapSchemaBasedOnTagAndName(srcSch, destSch)
if err != nil {
return nil, err
}
var successes int
for i, j := range keyMap {
if j == -1 {
continue
}
srcTag := srcSch.GetPKCols().GetAtIndex(i).Tag
dstTag := destSch.GetPKCols().GetAtIndex(j).Tag
srcToDest[srcTag] = dstTag
successes++
}
for i, j := range valMap {
if j == -1 {
continue
}
srcTag := srcSch.GetNonPKCols().GetAtIndex(i).Tag
dstTag := destSch.GetNonPKCols().GetAtIndex(j).Tag
srcToDest[srcTag] = dstTag
successes++
}
if successes == 0 {
return nil, ErrEmptyMapping
}

View File

@@ -15,6 +15,7 @@
package schema
import (
"fmt"
"strings"
"github.com/dolthub/vitess/go/vt/proto/query"
@@ -214,6 +215,48 @@ func ArePrimaryKeySetsDiffable(format *types.NomsBinFormat, fromSch, toSch Schem
return true
}
// MapSchemaBasedOnTagAndName can be used to map column values from one schema
// to another schema. A primary key column in |inSch| is mapped to |outSch| if
// they share the same tag. A non-primary key column in |inSch| is mapped to
// |outSch| purely based on the name. It returns ordinal mappings that can be
// use to map key, value val.Tuple's of schema |inSch| to |outSch|. The first
// ordinal map is for keys, and the second is for values. If a column of |inSch|
// is missing in |outSch| then that column's index in the ordinal map holds -1.
func MapSchemaBasedOnTagAndName(inSch, outSch Schema) ([]int, []int, error) {
keyMapping := make([]int, inSch.GetPKCols().Size())
valMapping := make([]int, inSch.GetNonPKCols().Size())
err := inSch.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
i := inSch.GetPKCols().TagToIdx[tag]
if col, ok := outSch.GetPKCols().GetByTag(tag); ok {
j := outSch.GetPKCols().TagToIdx[col.Tag]
keyMapping[i] = j
} else {
return true, fmt.Errorf("could not map primary key column %s", col.Name)
}
return false, nil
})
if err != nil {
return nil, nil, err
}
err = inSch.GetNonPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
i := inSch.GetNonPKCols().TagToIdx[col.Tag]
if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok {
j := outSch.GetNonPKCols().TagToIdx[col.Tag]
valMapping[i] = j
} else {
valMapping[i] = -1
}
return false, nil
})
if err != nil {
return nil, nil, err
}
return keyMapping, valMapping, nil
}
var ErrUsingSpatialKey = errors.NewKind("can't use Spatial Types as Primary Key for table %s")
// IsColSpatialType returns whether a column's type is a spatial type

View File

@@ -209,6 +209,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
From Schema
To Schema
Diffable bool
KeyMap []int
}{
{
Name: "Basic",
@@ -217,14 +218,16 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
To: MustSchemaFromCols(NewColCollection(
NewColumn("pk", 0, types.IntKind, true))),
Diffable: true,
KeyMap: []int{0},
},
{
Name: "Column renames",
Name: "PK-Column renames",
From: MustSchemaFromCols(NewColCollection(
NewColumn("pk", 1, types.IntKind, true))),
To: MustSchemaFromCols(NewColCollection(
NewColumn("pk2", 1, types.IntKind, true))),
Diffable: true,
KeyMap: []int{0},
},
{
Name: "Only pk ordering should matter for diffability",
@@ -234,6 +237,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
To: MustSchemaFromCols(NewColCollection(
NewColumn("pk", 1, types.IntKind, true))),
Diffable: true,
KeyMap: []int{0},
},
{
Name: "Only pk ordering should matter for diffability - inverse",
@@ -243,6 +247,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
NewColumn("col1", 2, types.IntKind, false),
NewColumn("pk", 1, types.IntKind, true))),
Diffable: true,
KeyMap: []int{0},
},
{
Name: "Only pk ordering should matter for diffability - compound",
@@ -254,6 +259,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
NewColumn("pk1", 0, types.IntKind, true),
NewColumn("pk2", 2, types.IntKind, true))),
Diffable: true,
KeyMap: []int{0, 1},
},
{
Name: "Tag mismatches",
@@ -279,6 +285,13 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
t.Run(test.Name, func(t *testing.T) {
d := ArePrimaryKeySetsDiffable(types.Format_Default, test.From, test.To)
require.Equal(t, test.Diffable, d)
// If they are diffable then we should be able to map their schemas from one to another.
if d {
keyMap, _, err := MapSchemaBasedOnTagAndName(test.From, test.To)
require.NoError(t, err)
require.Equal(t, test.KeyMap, keyMap)
}
})
}
}

View File

@@ -508,7 +508,7 @@ func (dp DiffPartition) rowConvForSchema(ctx context.Context, vrw types.ValueRea
return rowconv.IdentityConverter, nil
}
fm, err := rowconv.TagMappingByName(srcSch, targetSch)
fm, err := rowconv.TagMappingByTagAndName(srcSch, targetSch)
if err != nil {
return nil, err
}

View File

@@ -19,7 +19,6 @@ import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/dolt/go/libraries/doltcore/diff"
"github.com/dolthub/dolt/go/libraries/doltcore/rowconv"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
@@ -43,7 +42,7 @@ type ProllyRowConverter struct {
}
func NewProllyRowConverter(inSch, outSch schema.Schema, warnFn rowconv.WarnFunction, ns tree.NodeStore) (ProllyRowConverter, error) {
keyProj, valProj, err := diff.MapSchemaBasedOnName(inSch, outSch)
keyProj, valProj, err := schema.MapSchemaBasedOnTagAndName(inSch, outSch)
if err != nil {
return ProllyRowConverter{}, err
}

View File

@@ -3787,6 +3787,31 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{
},
},
},
{
Name: "Diff table shows diffs across primary key renames",
SetUpScript: []string{
"CREATE TABLE t (pk1 int PRIMARY KEY);",
"INSERT INTO t values (1);",
"CREATE table t2 (pk1a int, pk1b int, PRIMARY KEY (pk1a, pk1b));",
"INSERT INTO t2 values (2, 2);",
"CALL DOLT_COMMIT('-am', 'initial');",
"ALTER TABLE t RENAME COLUMN pk1 to pk2",
"ALTER TABLE t2 RENAME COLUMN pk1a to pk2a",
"ALTER TABLE t2 RENAME COLUMN pk1b to pk2b",
"CALL DOLT_COMMIT('-am', 'rename primary key')",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT from_pk2, to_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{{nil, 1, "added"}},
},
{
Query: "SELECT from_pk2a, from_pk2b, to_pk2a, to_pk2b, diff_type from dolt_diff_t2;",
Expected: []sql.Row{{nil, nil, 2, 2, "added"}},
},
},
},
}
var Dolt1DiffSystemTableScripts = []queries.ScriptTest{
@@ -4301,6 +4326,33 @@ var DiffTableFunctionScriptTests = []queries.ScriptTest{
},
},
},
{
Name: "Renaming a primary key column shows PK values in both the to and from columns",
SetUpScript: []string{
"CREATE TABLE t1 (pk int PRIMARY KEY, col1 int);",
"INSERT INTO t1 VALUES (1, 1);",
"CREATE TABLE t2 (pk1a int, pk1b int, col1 int, PRIMARY KEY (pk1a, pk1b));",
"INSERT INTO t2 VALUES (1, 1, 1);",
"CALL DOLT_COMMIT('-am', 'initial');",
"ALTER TABLE t1 RENAME COLUMN pk to pk2;",
"UPDATE t1 set col1 = 100;",
"ALTER TABLE t2 RENAME COLUMN pk1a to pk2a;",
"ALTER TABLE t2 RENAME COLUMN pk1b to pk2b;",
"UPDATE t2 set col1 = 100;",
"CALL DOLT_COMMIT('-am', 'edit');",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "select to_pk2, to_col1, from_pk, from_col1, diff_type from dolt_diff('t1', 'HEAD~', 'HEAD')",
Expected: []sql.Row{{1, 100, 1, 1, "modified"}},
},
{
Query: "select to_pk2a, to_pk2b, to_col1, from_pk1a, from_pk1b, from_col1, diff_type from dolt_diff('t2', 'HEAD~', 'HEAD');",
Expected: []sql.Row{{1, 1, 100, 1, 1, 1, "modified"}},
},
},
},
}
var LargeJsonObjectScriptTests = []queries.ScriptTest{

View File

@@ -991,3 +991,47 @@ EOF
run dolt diff --limit
[ "$status" -ne 0 ]
}
@test "diff: allowed across primary key renames" {
dolt sql <<SQL
CREATE TABLE t1 (pk int PRIMARY KEY, col1 int);
INSERT INTO t1 VALUES (1, 1);
CREATE TABLE t2 (pk1a int, pk1b int, col1 int, PRIMARY KEY (pk1a, pk1b));
INSERT INTO t2 VALUES (1, 1, 1);
SQL
dolt commit -am "initial"
dolt sql <<SQL
ALTER TABLE t1 RENAME COLUMN pk to pk2;
UPDATE t1 set col1 = 100;
ALTER TABLE t2 RENAME COLUMN pk1a to pk2a;
ALTER TABLE t2 RENAME COLUMN pk1b to pk2b;
UPDATE t2 set col1 = 100;
SQL
dolt commit -am 'rename primary key'
run dolt diff HEAD~1 HEAD
[ $status -eq 0 ]
EXPECTED_TABLE=$(cat <<'EOF'
+---+------+------+------+
| | pk | col1 | pk2 |
+---+------+------+------+
| < | 1 | 1 | NULL |
| > | NULL | 100 | 1 |
+---+------+------+------+
EOF
)
[[ "$output" =~ "$EXPECTED_TABLE" ]]
EXPECTED_TABLE=$(cat <<'EOF'
+---+------+------+------+------+------+
| | pk1a | pk1b | col1 | pk2a | pk2b |
+---+------+------+------+------+------+
| < | 1 | 1 | 1 | NULL | NULL |
| > | NULL | NULL | 100 | 1 | 1 |
+---+------+------+------+------+------+
EOF
)
[[ "$output" =~ "$EXPECTED_TABLE" ]]
}