mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-25 18:49:36 -06:00
Allows diff to be performed across primary key renames (#4156)
* allow diff across pk renames * pr comments
This commit is contained in:
@@ -592,7 +592,7 @@ func diffRows(ctx context.Context, se *engine.SqlEngine, td diff.TableDelta, dAr
|
||||
if dArgs.diffOutput == SQLDiffOutput &&
|
||||
(td.ToSch == nil ||
|
||||
(td.FromSch != nil && !schema.SchemasAreEqual(td.FromSch, td.ToSch))) {
|
||||
_, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff")
|
||||
_, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td T
|
||||
}
|
||||
|
||||
func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error {
|
||||
_, vMapping, err := MapSchemaBasedOnName(fromSch, toSch)
|
||||
_, vMapping, err := schema.MapSchemaBasedOnTagAndName(fromSch, toSch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -308,46 +308,3 @@ func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, ch c
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
// MapSchemaBasedOnName can be used to map column values from one schema to
|
||||
// another schema. A column in |inSch| is mapped to |outSch| if they share the
|
||||
// same name and primary key membership status. It returns ordinal mappings that
|
||||
// can be use to map key, value val.Tuple's of schema |inSch| to |outSch|. The
|
||||
// first ordinal map is for keys, and the second is for values. If a column of
|
||||
// |inSch| is missing in |outSch| then that column's index in the ordinal map
|
||||
// holds -1.
|
||||
// TODO (dhruv): Unit tests
|
||||
func MapSchemaBasedOnName(inSch, outSch schema.Schema) (val.OrdinalMapping, val.OrdinalMapping, error) {
|
||||
keyMapping := make(val.OrdinalMapping, inSch.GetPKCols().Size())
|
||||
valMapping := make(val.OrdinalMapping, inSch.GetNonPKCols().Size())
|
||||
|
||||
err := inSch.GetPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
|
||||
i := inSch.GetPKCols().TagToIdx[tag]
|
||||
if col, ok := outSch.GetPKCols().GetByName(col.Name); ok {
|
||||
j := outSch.GetPKCols().TagToIdx[col.Tag]
|
||||
keyMapping[i] = j
|
||||
} else {
|
||||
return true, fmt.Errorf("could not map primary key column %s", col.Name)
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
err = inSch.GetNonPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
|
||||
i := inSch.GetNonPKCols().TagToIdx[col.Tag]
|
||||
if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok {
|
||||
j := outSch.GetNonPKCols().TagToIdx[col.Tag]
|
||||
valMapping[i] = j
|
||||
} else {
|
||||
valMapping[i] = -1
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return keyMapping, valMapping, nil
|
||||
}
|
||||
|
||||
@@ -261,29 +261,36 @@ func TagMappingWithNameFallback(srcSch, destSch schema.Schema) (*FieldMapping, e
|
||||
return NewFieldMapping(srcSch, destSch, srcToDest)
|
||||
}
|
||||
|
||||
// TagMappingByName takes a source schema and a destination schema and maps
|
||||
// columns by matching names.
|
||||
func TagMappingByName(srcSch, destSch schema.Schema) (*FieldMapping, error) {
|
||||
successes := 0
|
||||
srcCols := srcSch.GetAllCols()
|
||||
destCols := destSch.GetAllCols()
|
||||
// TagMappingByTagAndName takes a source schema and a destination schema and maps
|
||||
// pks by tag and non-pks by name.
|
||||
func TagMappingByTagAndName(srcSch, destSch schema.Schema) (*FieldMapping, error) {
|
||||
srcToDest := make(map[uint64]uint64, destSch.GetAllCols().Size())
|
||||
|
||||
srcToDest := make(map[uint64]uint64, destCols.Size())
|
||||
err := destCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
|
||||
srcCol, ok := srcCols.GetByName(col.Name)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
srcToDest[srcCol.Tag] = col.Tag
|
||||
successes++
|
||||
|
||||
return false, nil
|
||||
})
|
||||
keyMap, valMap, err := schema.MapSchemaBasedOnTagAndName(srcSch, destSch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var successes int
|
||||
for i, j := range keyMap {
|
||||
if j == -1 {
|
||||
continue
|
||||
}
|
||||
srcTag := srcSch.GetPKCols().GetAtIndex(i).Tag
|
||||
dstTag := destSch.GetPKCols().GetAtIndex(j).Tag
|
||||
srcToDest[srcTag] = dstTag
|
||||
successes++
|
||||
}
|
||||
for i, j := range valMap {
|
||||
if j == -1 {
|
||||
continue
|
||||
}
|
||||
srcTag := srcSch.GetNonPKCols().GetAtIndex(i).Tag
|
||||
dstTag := destSch.GetNonPKCols().GetAtIndex(j).Tag
|
||||
srcToDest[srcTag] = dstTag
|
||||
successes++
|
||||
}
|
||||
|
||||
if successes == 0 {
|
||||
return nil, ErrEmptyMapping
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/dolthub/vitess/go/vt/proto/query"
|
||||
@@ -214,6 +215,48 @@ func ArePrimaryKeySetsDiffable(format *types.NomsBinFormat, fromSch, toSch Schem
|
||||
return true
|
||||
}
|
||||
|
||||
// MapSchemaBasedOnTagAndName can be used to map column values from one schema
|
||||
// to another schema. A primary key column in |inSch| is mapped to |outSch| if
|
||||
// they share the same tag. A non-primary key column in |inSch| is mapped to
|
||||
// |outSch| purely based on the name. It returns ordinal mappings that can be
|
||||
// use to map key, value val.Tuple's of schema |inSch| to |outSch|. The first
|
||||
// ordinal map is for keys, and the second is for values. If a column of |inSch|
|
||||
// is missing in |outSch| then that column's index in the ordinal map holds -1.
|
||||
func MapSchemaBasedOnTagAndName(inSch, outSch Schema) ([]int, []int, error) {
|
||||
keyMapping := make([]int, inSch.GetPKCols().Size())
|
||||
valMapping := make([]int, inSch.GetNonPKCols().Size())
|
||||
|
||||
err := inSch.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
|
||||
i := inSch.GetPKCols().TagToIdx[tag]
|
||||
if col, ok := outSch.GetPKCols().GetByTag(tag); ok {
|
||||
j := outSch.GetPKCols().TagToIdx[col.Tag]
|
||||
keyMapping[i] = j
|
||||
} else {
|
||||
return true, fmt.Errorf("could not map primary key column %s", col.Name)
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
err = inSch.GetNonPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
|
||||
i := inSch.GetNonPKCols().TagToIdx[col.Tag]
|
||||
if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok {
|
||||
j := outSch.GetNonPKCols().TagToIdx[col.Tag]
|
||||
valMapping[i] = j
|
||||
} else {
|
||||
valMapping[i] = -1
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return keyMapping, valMapping, nil
|
||||
}
|
||||
|
||||
var ErrUsingSpatialKey = errors.NewKind("can't use Spatial Types as Primary Key for table %s")
|
||||
|
||||
// IsColSpatialType returns whether a column's type is a spatial type
|
||||
|
||||
@@ -209,6 +209,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
|
||||
From Schema
|
||||
To Schema
|
||||
Diffable bool
|
||||
KeyMap []int
|
||||
}{
|
||||
{
|
||||
Name: "Basic",
|
||||
@@ -217,14 +218,16 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
|
||||
To: MustSchemaFromCols(NewColCollection(
|
||||
NewColumn("pk", 0, types.IntKind, true))),
|
||||
Diffable: true,
|
||||
KeyMap: []int{0},
|
||||
},
|
||||
{
|
||||
Name: "Column renames",
|
||||
Name: "PK-Column renames",
|
||||
From: MustSchemaFromCols(NewColCollection(
|
||||
NewColumn("pk", 1, types.IntKind, true))),
|
||||
To: MustSchemaFromCols(NewColCollection(
|
||||
NewColumn("pk2", 1, types.IntKind, true))),
|
||||
Diffable: true,
|
||||
KeyMap: []int{0},
|
||||
},
|
||||
{
|
||||
Name: "Only pk ordering should matter for diffability",
|
||||
@@ -234,6 +237,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
|
||||
To: MustSchemaFromCols(NewColCollection(
|
||||
NewColumn("pk", 1, types.IntKind, true))),
|
||||
Diffable: true,
|
||||
KeyMap: []int{0},
|
||||
},
|
||||
{
|
||||
Name: "Only pk ordering should matter for diffability - inverse",
|
||||
@@ -243,6 +247,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
|
||||
NewColumn("col1", 2, types.IntKind, false),
|
||||
NewColumn("pk", 1, types.IntKind, true))),
|
||||
Diffable: true,
|
||||
KeyMap: []int{0},
|
||||
},
|
||||
{
|
||||
Name: "Only pk ordering should matter for diffability - compound",
|
||||
@@ -254,6 +259,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
|
||||
NewColumn("pk1", 0, types.IntKind, true),
|
||||
NewColumn("pk2", 2, types.IntKind, true))),
|
||||
Diffable: true,
|
||||
KeyMap: []int{0, 1},
|
||||
},
|
||||
{
|
||||
Name: "Tag mismatches",
|
||||
@@ -279,6 +285,13 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) {
|
||||
t.Run(test.Name, func(t *testing.T) {
|
||||
d := ArePrimaryKeySetsDiffable(types.Format_Default, test.From, test.To)
|
||||
require.Equal(t, test.Diffable, d)
|
||||
|
||||
// If they are diffable then we should be able to map their schemas from one to another.
|
||||
if d {
|
||||
keyMap, _, err := MapSchemaBasedOnTagAndName(test.From, test.To)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, test.KeyMap, keyMap)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -508,7 +508,7 @@ func (dp DiffPartition) rowConvForSchema(ctx context.Context, vrw types.ValueRea
|
||||
return rowconv.IdentityConverter, nil
|
||||
}
|
||||
|
||||
fm, err := rowconv.TagMappingByName(srcSch, targetSch)
|
||||
fm, err := rowconv.TagMappingByTagAndName(srcSch, targetSch)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ import (
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/diff"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/rowconv"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
|
||||
@@ -43,7 +42,7 @@ type ProllyRowConverter struct {
|
||||
}
|
||||
|
||||
func NewProllyRowConverter(inSch, outSch schema.Schema, warnFn rowconv.WarnFunction, ns tree.NodeStore) (ProllyRowConverter, error) {
|
||||
keyProj, valProj, err := diff.MapSchemaBasedOnName(inSch, outSch)
|
||||
keyProj, valProj, err := schema.MapSchemaBasedOnTagAndName(inSch, outSch)
|
||||
if err != nil {
|
||||
return ProllyRowConverter{}, err
|
||||
}
|
||||
|
||||
@@ -3787,6 +3787,31 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Diff table shows diffs across primary key renames",
|
||||
SetUpScript: []string{
|
||||
"CREATE TABLE t (pk1 int PRIMARY KEY);",
|
||||
"INSERT INTO t values (1);",
|
||||
"CREATE table t2 (pk1a int, pk1b int, PRIMARY KEY (pk1a, pk1b));",
|
||||
"INSERT INTO t2 values (2, 2);",
|
||||
"CALL DOLT_COMMIT('-am', 'initial');",
|
||||
|
||||
"ALTER TABLE t RENAME COLUMN pk1 to pk2",
|
||||
"ALTER TABLE t2 RENAME COLUMN pk1a to pk2a",
|
||||
"ALTER TABLE t2 RENAME COLUMN pk1b to pk2b",
|
||||
"CALL DOLT_COMMIT('-am', 'rename primary key')",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "SELECT from_pk2, to_pk2, diff_type from dolt_diff_t;",
|
||||
Expected: []sql.Row{{nil, 1, "added"}},
|
||||
},
|
||||
{
|
||||
Query: "SELECT from_pk2a, from_pk2b, to_pk2a, to_pk2b, diff_type from dolt_diff_t2;",
|
||||
Expected: []sql.Row{{nil, nil, 2, 2, "added"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var Dolt1DiffSystemTableScripts = []queries.ScriptTest{
|
||||
@@ -4301,6 +4326,33 @@ var DiffTableFunctionScriptTests = []queries.ScriptTest{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Renaming a primary key column shows PK values in both the to and from columns",
|
||||
SetUpScript: []string{
|
||||
"CREATE TABLE t1 (pk int PRIMARY KEY, col1 int);",
|
||||
"INSERT INTO t1 VALUES (1, 1);",
|
||||
"CREATE TABLE t2 (pk1a int, pk1b int, col1 int, PRIMARY KEY (pk1a, pk1b));",
|
||||
"INSERT INTO t2 VALUES (1, 1, 1);",
|
||||
"CALL DOLT_COMMIT('-am', 'initial');",
|
||||
|
||||
"ALTER TABLE t1 RENAME COLUMN pk to pk2;",
|
||||
"UPDATE t1 set col1 = 100;",
|
||||
"ALTER TABLE t2 RENAME COLUMN pk1a to pk2a;",
|
||||
"ALTER TABLE t2 RENAME COLUMN pk1b to pk2b;",
|
||||
"UPDATE t2 set col1 = 100;",
|
||||
"CALL DOLT_COMMIT('-am', 'edit');",
|
||||
},
|
||||
Assertions: []queries.ScriptTestAssertion{
|
||||
{
|
||||
Query: "select to_pk2, to_col1, from_pk, from_col1, diff_type from dolt_diff('t1', 'HEAD~', 'HEAD')",
|
||||
Expected: []sql.Row{{1, 100, 1, 1, "modified"}},
|
||||
},
|
||||
{
|
||||
Query: "select to_pk2a, to_pk2b, to_col1, from_pk1a, from_pk1b, from_col1, diff_type from dolt_diff('t2', 'HEAD~', 'HEAD');",
|
||||
Expected: []sql.Row{{1, 1, 100, 1, 1, 1, "modified"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var LargeJsonObjectScriptTests = []queries.ScriptTest{
|
||||
|
||||
@@ -991,3 +991,47 @@ EOF
|
||||
run dolt diff --limit
|
||||
[ "$status" -ne 0 ]
|
||||
}
|
||||
|
||||
@test "diff: allowed across primary key renames" {
|
||||
dolt sql <<SQL
|
||||
CREATE TABLE t1 (pk int PRIMARY KEY, col1 int);
|
||||
INSERT INTO t1 VALUES (1, 1);
|
||||
CREATE TABLE t2 (pk1a int, pk1b int, col1 int, PRIMARY KEY (pk1a, pk1b));
|
||||
INSERT INTO t2 VALUES (1, 1, 1);
|
||||
SQL
|
||||
dolt commit -am "initial"
|
||||
|
||||
dolt sql <<SQL
|
||||
ALTER TABLE t1 RENAME COLUMN pk to pk2;
|
||||
UPDATE t1 set col1 = 100;
|
||||
ALTER TABLE t2 RENAME COLUMN pk1a to pk2a;
|
||||
ALTER TABLE t2 RENAME COLUMN pk1b to pk2b;
|
||||
UPDATE t2 set col1 = 100;
|
||||
SQL
|
||||
dolt commit -am 'rename primary key'
|
||||
|
||||
run dolt diff HEAD~1 HEAD
|
||||
[ $status -eq 0 ]
|
||||
|
||||
EXPECTED_TABLE=$(cat <<'EOF'
|
||||
+---+------+------+------+
|
||||
| | pk | col1 | pk2 |
|
||||
+---+------+------+------+
|
||||
| < | 1 | 1 | NULL |
|
||||
| > | NULL | 100 | 1 |
|
||||
+---+------+------+------+
|
||||
EOF
|
||||
)
|
||||
[[ "$output" =~ "$EXPECTED_TABLE" ]]
|
||||
|
||||
EXPECTED_TABLE=$(cat <<'EOF'
|
||||
+---+------+------+------+------+------+
|
||||
| | pk1a | pk1b | col1 | pk2a | pk2b |
|
||||
+---+------+------+------+------+------+
|
||||
| < | 1 | 1 | 1 | NULL | NULL |
|
||||
| > | NULL | NULL | 100 | 1 | 1 |
|
||||
+---+------+------+------+------+------+
|
||||
EOF
|
||||
)
|
||||
[[ "$output" =~ "$EXPECTED_TABLE" ]]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user