From 9758e697c87a5e50910b06453b4299e7154ddd38 Mon Sep 17 00:00:00 2001 From: Dhruv Sringari Date: Mon, 22 Aug 2022 14:58:55 -0700 Subject: [PATCH] Allows diff to be performed across primary key renames (#4156) * allow diff across pk renames * pr comments --- go/cmd/dolt/commands/diff.go | 2 +- go/libraries/doltcore/diff/diff_summary.go | 45 +--------------- .../doltcore/rowconv/field_mapping.go | 43 ++++++++------- go/libraries/doltcore/schema/schema.go | 43 +++++++++++++++ go/libraries/doltcore/schema/schema_test.go | 15 +++++- .../doltcore/sqle/dtables/diff_table.go | 2 +- .../doltcore/sqle/dtables/prolly_row_conv.go | 3 +- .../doltcore/sqle/enginetest/dolt_queries.go | 52 +++++++++++++++++++ integration-tests/bats/diff.bats | 44 ++++++++++++++++ 9 files changed, 182 insertions(+), 67 deletions(-) diff --git a/go/cmd/dolt/commands/diff.go b/go/cmd/dolt/commands/diff.go index 32055331d8..4f3ea60489 100644 --- a/go/cmd/dolt/commands/diff.go +++ b/go/cmd/dolt/commands/diff.go @@ -592,7 +592,7 @@ func diffRows(ctx context.Context, se *engine.SqlEngine, td diff.TableDelta, dAr if dArgs.diffOutput == SQLDiffOutput && (td.ToSch == nil || (td.FromSch != nil && !schema.SchemasAreEqual(td.FromSch, td.ToSch))) { - _, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff") + _, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff\n") return nil } diff --git a/go/libraries/doltcore/diff/diff_summary.go b/go/libraries/doltcore/diff/diff_summary.go index 6709f3d816..6da329d311 100644 --- a/go/libraries/doltcore/diff/diff_summary.go +++ b/go/libraries/doltcore/diff/diff_summary.go @@ -88,7 +88,7 @@ func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td T } func diffProllyTrees(ctx context.Context, ch chan DiffSummaryProgress, keyless bool, from, to durable.Index, fromSch, toSch schema.Schema) error { - _, vMapping, err := MapSchemaBasedOnName(fromSch, toSch) + _, vMapping, err := schema.MapSchemaBasedOnTagAndName(fromSch, toSch) if err != nil { return err } @@ -308,46 +308,3 @@ func reportNomsKeylessChanges(ctx context.Context, change *diff.Difference, ch c return ctx.Err() } } - -// MapSchemaBasedOnName can be used to map column values from one schema to -// another schema. A column in |inSch| is mapped to |outSch| if they share the -// same name and primary key membership status. It returns ordinal mappings that -// can be use to map key, value val.Tuple's of schema |inSch| to |outSch|. The -// first ordinal map is for keys, and the second is for values. If a column of -// |inSch| is missing in |outSch| then that column's index in the ordinal map -// holds -1. -// TODO (dhruv): Unit tests -func MapSchemaBasedOnName(inSch, outSch schema.Schema) (val.OrdinalMapping, val.OrdinalMapping, error) { - keyMapping := make(val.OrdinalMapping, inSch.GetPKCols().Size()) - valMapping := make(val.OrdinalMapping, inSch.GetNonPKCols().Size()) - - err := inSch.GetPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { - i := inSch.GetPKCols().TagToIdx[tag] - if col, ok := outSch.GetPKCols().GetByName(col.Name); ok { - j := outSch.GetPKCols().TagToIdx[col.Tag] - keyMapping[i] = j - } else { - return true, fmt.Errorf("could not map primary key column %s", col.Name) - } - return false, nil - }) - if err != nil { - return nil, nil, err - } - - err = inSch.GetNonPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { - i := inSch.GetNonPKCols().TagToIdx[col.Tag] - if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok { - j := outSch.GetNonPKCols().TagToIdx[col.Tag] - valMapping[i] = j - } else { - valMapping[i] = -1 - } - return false, nil - }) - if err != nil { - return nil, nil, err - } - - return keyMapping, valMapping, nil -} diff --git a/go/libraries/doltcore/rowconv/field_mapping.go b/go/libraries/doltcore/rowconv/field_mapping.go index 132f42ef3e..6a12a403da 100644 --- a/go/libraries/doltcore/rowconv/field_mapping.go +++ b/go/libraries/doltcore/rowconv/field_mapping.go @@ -261,29 +261,36 @@ func TagMappingWithNameFallback(srcSch, destSch schema.Schema) (*FieldMapping, e return NewFieldMapping(srcSch, destSch, srcToDest) } -// TagMappingByName takes a source schema and a destination schema and maps -// columns by matching names. -func TagMappingByName(srcSch, destSch schema.Schema) (*FieldMapping, error) { - successes := 0 - srcCols := srcSch.GetAllCols() - destCols := destSch.GetAllCols() +// TagMappingByTagAndName takes a source schema and a destination schema and maps +// pks by tag and non-pks by name. +func TagMappingByTagAndName(srcSch, destSch schema.Schema) (*FieldMapping, error) { + srcToDest := make(map[uint64]uint64, destSch.GetAllCols().Size()) - srcToDest := make(map[uint64]uint64, destCols.Size()) - err := destCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) { - srcCol, ok := srcCols.GetByName(col.Name) - if !ok { - return false, nil - } - - srcToDest[srcCol.Tag] = col.Tag - successes++ - - return false, nil - }) + keyMap, valMap, err := schema.MapSchemaBasedOnTagAndName(srcSch, destSch) if err != nil { return nil, err } + var successes int + for i, j := range keyMap { + if j == -1 { + continue + } + srcTag := srcSch.GetPKCols().GetAtIndex(i).Tag + dstTag := destSch.GetPKCols().GetAtIndex(j).Tag + srcToDest[srcTag] = dstTag + successes++ + } + for i, j := range valMap { + if j == -1 { + continue + } + srcTag := srcSch.GetNonPKCols().GetAtIndex(i).Tag + dstTag := destSch.GetNonPKCols().GetAtIndex(j).Tag + srcToDest[srcTag] = dstTag + successes++ + } + if successes == 0 { return nil, ErrEmptyMapping } diff --git a/go/libraries/doltcore/schema/schema.go b/go/libraries/doltcore/schema/schema.go index 2635bdce38..c9a306660d 100644 --- a/go/libraries/doltcore/schema/schema.go +++ b/go/libraries/doltcore/schema/schema.go @@ -15,6 +15,7 @@ package schema import ( + "fmt" "strings" "github.com/dolthub/vitess/go/vt/proto/query" @@ -214,6 +215,48 @@ func ArePrimaryKeySetsDiffable(format *types.NomsBinFormat, fromSch, toSch Schem return true } +// MapSchemaBasedOnTagAndName can be used to map column values from one schema +// to another schema. A primary key column in |inSch| is mapped to |outSch| if +// they share the same tag. A non-primary key column in |inSch| is mapped to +// |outSch| purely based on the name. It returns ordinal mappings that can be +// use to map key, value val.Tuple's of schema |inSch| to |outSch|. The first +// ordinal map is for keys, and the second is for values. If a column of |inSch| +// is missing in |outSch| then that column's index in the ordinal map holds -1. +func MapSchemaBasedOnTagAndName(inSch, outSch Schema) ([]int, []int, error) { + keyMapping := make([]int, inSch.GetPKCols().Size()) + valMapping := make([]int, inSch.GetNonPKCols().Size()) + + err := inSch.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) { + i := inSch.GetPKCols().TagToIdx[tag] + if col, ok := outSch.GetPKCols().GetByTag(tag); ok { + j := outSch.GetPKCols().TagToIdx[col.Tag] + keyMapping[i] = j + } else { + return true, fmt.Errorf("could not map primary key column %s", col.Name) + } + return false, nil + }) + if err != nil { + return nil, nil, err + } + + err = inSch.GetNonPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) { + i := inSch.GetNonPKCols().TagToIdx[col.Tag] + if col, ok := outSch.GetNonPKCols().GetByName(col.Name); ok { + j := outSch.GetNonPKCols().TagToIdx[col.Tag] + valMapping[i] = j + } else { + valMapping[i] = -1 + } + return false, nil + }) + if err != nil { + return nil, nil, err + } + + return keyMapping, valMapping, nil +} + var ErrUsingSpatialKey = errors.NewKind("can't use Spatial Types as Primary Key for table %s") // IsColSpatialType returns whether a column's type is a spatial type diff --git a/go/libraries/doltcore/schema/schema_test.go b/go/libraries/doltcore/schema/schema_test.go index bc0e96be15..d6d8814a10 100644 --- a/go/libraries/doltcore/schema/schema_test.go +++ b/go/libraries/doltcore/schema/schema_test.go @@ -209,6 +209,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) { From Schema To Schema Diffable bool + KeyMap []int }{ { Name: "Basic", @@ -217,14 +218,16 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) { To: MustSchemaFromCols(NewColCollection( NewColumn("pk", 0, types.IntKind, true))), Diffable: true, + KeyMap: []int{0}, }, { - Name: "Column renames", + Name: "PK-Column renames", From: MustSchemaFromCols(NewColCollection( NewColumn("pk", 1, types.IntKind, true))), To: MustSchemaFromCols(NewColCollection( NewColumn("pk2", 1, types.IntKind, true))), Diffable: true, + KeyMap: []int{0}, }, { Name: "Only pk ordering should matter for diffability", @@ -234,6 +237,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) { To: MustSchemaFromCols(NewColCollection( NewColumn("pk", 1, types.IntKind, true))), Diffable: true, + KeyMap: []int{0}, }, { Name: "Only pk ordering should matter for diffability - inverse", @@ -243,6 +247,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) { NewColumn("col1", 2, types.IntKind, false), NewColumn("pk", 1, types.IntKind, true))), Diffable: true, + KeyMap: []int{0}, }, { Name: "Only pk ordering should matter for diffability - compound", @@ -254,6 +259,7 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) { NewColumn("pk1", 0, types.IntKind, true), NewColumn("pk2", 2, types.IntKind, true))), Diffable: true, + KeyMap: []int{0, 1}, }, { Name: "Tag mismatches", @@ -279,6 +285,13 @@ func TestArePrimaryKeySetsDiffable(t *testing.T) { t.Run(test.Name, func(t *testing.T) { d := ArePrimaryKeySetsDiffable(types.Format_Default, test.From, test.To) require.Equal(t, test.Diffable, d) + + // If they are diffable then we should be able to map their schemas from one to another. + if d { + keyMap, _, err := MapSchemaBasedOnTagAndName(test.From, test.To) + require.NoError(t, err) + require.Equal(t, test.KeyMap, keyMap) + } }) } } diff --git a/go/libraries/doltcore/sqle/dtables/diff_table.go b/go/libraries/doltcore/sqle/dtables/diff_table.go index 1bc7e28412..7cd00dee7e 100644 --- a/go/libraries/doltcore/sqle/dtables/diff_table.go +++ b/go/libraries/doltcore/sqle/dtables/diff_table.go @@ -508,7 +508,7 @@ func (dp DiffPartition) rowConvForSchema(ctx context.Context, vrw types.ValueRea return rowconv.IdentityConverter, nil } - fm, err := rowconv.TagMappingByName(srcSch, targetSch) + fm, err := rowconv.TagMappingByTagAndName(srcSch, targetSch) if err != nil { return nil, err } diff --git a/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go b/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go index ee33ac4a84..a70d9adf8f 100644 --- a/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go +++ b/go/libraries/doltcore/sqle/dtables/prolly_row_conv.go @@ -19,7 +19,6 @@ import ( "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/dolt/go/libraries/doltcore/diff" "github.com/dolthub/dolt/go/libraries/doltcore/rowconv" "github.com/dolthub/dolt/go/libraries/doltcore/schema" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" @@ -43,7 +42,7 @@ type ProllyRowConverter struct { } func NewProllyRowConverter(inSch, outSch schema.Schema, warnFn rowconv.WarnFunction, ns tree.NodeStore) (ProllyRowConverter, error) { - keyProj, valProj, err := diff.MapSchemaBasedOnName(inSch, outSch) + keyProj, valProj, err := schema.MapSchemaBasedOnTagAndName(inSch, outSch) if err != nil { return ProllyRowConverter{}, err } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go index 5222eaab93..a7e1381c1f 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go @@ -3787,6 +3787,31 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{ }, }, }, + { + Name: "Diff table shows diffs across primary key renames", + SetUpScript: []string{ + "CREATE TABLE t (pk1 int PRIMARY KEY);", + "INSERT INTO t values (1);", + "CREATE table t2 (pk1a int, pk1b int, PRIMARY KEY (pk1a, pk1b));", + "INSERT INTO t2 values (2, 2);", + "CALL DOLT_COMMIT('-am', 'initial');", + + "ALTER TABLE t RENAME COLUMN pk1 to pk2", + "ALTER TABLE t2 RENAME COLUMN pk1a to pk2a", + "ALTER TABLE t2 RENAME COLUMN pk1b to pk2b", + "CALL DOLT_COMMIT('-am', 'rename primary key')", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "SELECT from_pk2, to_pk2, diff_type from dolt_diff_t;", + Expected: []sql.Row{{nil, 1, "added"}}, + }, + { + Query: "SELECT from_pk2a, from_pk2b, to_pk2a, to_pk2b, diff_type from dolt_diff_t2;", + Expected: []sql.Row{{nil, nil, 2, 2, "added"}}, + }, + }, + }, } var Dolt1DiffSystemTableScripts = []queries.ScriptTest{ @@ -4301,6 +4326,33 @@ var DiffTableFunctionScriptTests = []queries.ScriptTest{ }, }, }, + { + Name: "Renaming a primary key column shows PK values in both the to and from columns", + SetUpScript: []string{ + "CREATE TABLE t1 (pk int PRIMARY KEY, col1 int);", + "INSERT INTO t1 VALUES (1, 1);", + "CREATE TABLE t2 (pk1a int, pk1b int, col1 int, PRIMARY KEY (pk1a, pk1b));", + "INSERT INTO t2 VALUES (1, 1, 1);", + "CALL DOLT_COMMIT('-am', 'initial');", + + "ALTER TABLE t1 RENAME COLUMN pk to pk2;", + "UPDATE t1 set col1 = 100;", + "ALTER TABLE t2 RENAME COLUMN pk1a to pk2a;", + "ALTER TABLE t2 RENAME COLUMN pk1b to pk2b;", + "UPDATE t2 set col1 = 100;", + "CALL DOLT_COMMIT('-am', 'edit');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "select to_pk2, to_col1, from_pk, from_col1, diff_type from dolt_diff('t1', 'HEAD~', 'HEAD')", + Expected: []sql.Row{{1, 100, 1, 1, "modified"}}, + }, + { + Query: "select to_pk2a, to_pk2b, to_col1, from_pk1a, from_pk1b, from_col1, diff_type from dolt_diff('t2', 'HEAD~', 'HEAD');", + Expected: []sql.Row{{1, 1, 100, 1, 1, 1, "modified"}}, + }, + }, + }, } var LargeJsonObjectScriptTests = []queries.ScriptTest{ diff --git a/integration-tests/bats/diff.bats b/integration-tests/bats/diff.bats index 3a448115b0..9cab858053 100644 --- a/integration-tests/bats/diff.bats +++ b/integration-tests/bats/diff.bats @@ -991,3 +991,47 @@ EOF run dolt diff --limit [ "$status" -ne 0 ] } + +@test "diff: allowed across primary key renames" { + dolt sql < | NULL | 100 | 1 | ++---+------+------+------+ +EOF +) + [[ "$output" =~ "$EXPECTED_TABLE" ]] + + EXPECTED_TABLE=$(cat <<'EOF' ++---+------+------+------+------+------+ +| | pk1a | pk1b | col1 | pk2a | pk2b | ++---+------+------+------+------+------+ +| < | 1 | 1 | 1 | NULL | NULL | +| > | NULL | NULL | 100 | 1 | 1 | ++---+------+------+------+------+------+ +EOF +) + [[ "$output" =~ "$EXPECTED_TABLE" ]] +}