Merge pull request #8631 from dolthub/macneale4/alter-pk-diff

Give a little more information in dolt_diff_* when there is a pk change
This commit is contained in:
Neil Macneale IV
2024-12-17 13:55:25 -08:00
committed by GitHub
5 changed files with 120 additions and 43 deletions

View File

@@ -216,7 +216,7 @@ func (dt *CommitDiffTable) LookupPartitions(ctx *sql.Context, i sql.IndexLookup)
fromSch: dt.targetSchema,
}
isDiffable, err := dp.isDiffablePartition(ctx)
isDiffable, _, err := dp.isDiffablePartition(ctx)
if err != nil {
return nil, err
}

View File

@@ -684,31 +684,42 @@ func (dp DiffPartition) GetRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner
// isDiffablePartition checks if the commit pair for this partition is "diffable".
// If the primary key sets changed between the two commits, it may not be
// possible to diff them.
func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (bool, error) {
// possible to diff them. We return two bools: simpleDiff is returned if the primary key sets are close enough that we
// can confidently merge the diff (using schema.ArePrimaryKeySetsDiffable). fuzzyDiff is returned if the primary key
// sets are not close enough to merge the diff, but we can still make an approximate comparison (using schema.MapSchemaBasedOnTagAndName).
func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (simpleDiff bool, fuzzyDiff bool, err error) {
// dp.to is nil when a table has been deleted previously. In this case, we return
// false, to stop processing diffs, since that previously deleted table is considered
// a logically different table and we don't want to mix the diffs together.
if dp.to == nil {
return false, nil
return false, false, nil
}
// dp.from is nil when the to commit created a new table
if dp.from == nil {
return true, nil
return true, false, nil
}
fromSch, err := dp.from.GetSchema(ctx)
if err != nil {
return false, err
return false, false, err
}
toSch, err := dp.to.GetSchema(ctx)
if err != nil {
return false, err
return false, false, err
}
return schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch), nil
easyDiff := schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch)
if easyDiff {
return true, false, nil
}
_, _, err = schema.MapSchemaBasedOnTagAndName(fromSch, toSch)
if err == nil {
return false, true, nil
}
return false, false, nil
}
type partitionSelectFunc func(*sql.Context, DiffPartition) (bool, error)
@@ -762,6 +773,7 @@ type DiffPartitions struct {
selectFunc partitionSelectFunc
toSch schema.Schema
fromSch schema.Schema
stopNext bool
}
// processCommit is called in a commit iteration loop. Adds partitions when it finds a commit and its parent that have
@@ -821,6 +833,10 @@ func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm
}
func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {
if dps.stopNext {
return nil, io.EOF
}
for {
cmHash, optCmt, err := dps.cmItr.Next(ctx)
if err != nil {
@@ -852,16 +868,21 @@ func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {
if next != nil {
// If we can't diff this commit with its parent, don't traverse any lower
canDiff, err := next.isDiffablePartition(ctx)
simpleDiff, fuzzyDiff, err := next.isDiffablePartition(ctx)
if err != nil {
return nil, err
}
if !canDiff {
if !simpleDiff && !fuzzyDiff {
ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
return nil, io.EOF
}
if !simpleDiff && fuzzyDiff {
ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
dps.stopNext = true
}
return *next, nil
}
}

View File

@@ -115,10 +115,14 @@ func (c ProllyRowConverter) putFields(ctx context.Context, tup val.Tuple, proj v
virtualOffset := 0
for i, j := range proj {
if j == -1 {
// Skip over virtual columns in non-pk cols as they are not stored
if !isPk && c.inSchema.GetNonPKCols().GetByIndex(i).Virtual {
virtualOffset++
nonPkCols := c.inSchema.GetNonPKCols()
if len(nonPkCols.GetColumns()) > i {
// Skip over virtual columns in non-pk cols as they are not stored
if !isPk && nonPkCols.GetByIndex(i).Virtual {
virtualOffset++
}
}
continue
}

View File

@@ -528,7 +528,7 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{
},
{
Query: "SELECT COUNT(*) FROM DOLT_DIFF_t;",
Expected: []sql.Row{{1}},
Expected: []sql.Row{{7}},
},
{
Query: "SELECT to_pk, to_c1, from_pk, from_c1, diff_type FROM DOLT_DIFF_t where to_commit=@Commit4;",
@@ -536,6 +536,36 @@ var DiffSystemTableScriptTests = []queries.ScriptTest{
},
},
},
{
// Similar to previous test, but with one row to avoid ordering issues.
Name: "altered keyless table add pk", // https://github.com/dolthub/dolt/issues/8625
SetUpScript: []string{
"create table tbl (i int, j int);",
"insert into tbl values (42, 23);",
"call dolt_commit('-Am', 'commit1');",
"alter table tbl add primary key(i);",
"call dolt_commit('-am', 'commit2');",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT to_i,to_j,from_i,from_j,diff_type FROM dolt_diff_tbl;",
// Output in the situation is admittedly wonky. Updating the PK leaves in a place where we can't really render
// the diff, but we want to show something. In this case, the 'pk' column tag changes, so in the last two rows
// of the output you see we add "nil,23" and remove "nil,23" when in fact those columns were "42" with a different
// tag.
//
// In the past we just returned an empty set in this case. The
// warning is kind of essential to understand what is happening.
Expected: []sql.Row{
{42, 23, nil, nil, "added"},
{nil, nil, nil, 23, "removed"},
},
ExpectedWarningsCount: 1,
ExpectedWarning: 1105,
ExpectedWarningMessageSubstring: "due to primary key set change",
},
},
},
{
Name: "table with commit column should maintain its data in diff",
SetUpScript: []string{
@@ -713,8 +743,10 @@ var Dolt1DiffSystemTableScripts = []queries.ScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{{"2", "2", nil, nil, "added"}},
Query: "SELECT to_pk1, to_pk2, from_pk1, from_pk2, diff_type from dolt_diff_t;",
Expected: []sql.Row{
{"2", "2", nil, nil, "added"},
},
},
},
},
@@ -5298,6 +5330,7 @@ var CommitDiffSystemTableScriptTests = []queries.ScriptTest{
},
},
},
{
Name: "added and dropped table",
SetUpScript: []string{

View File

@@ -591,7 +591,10 @@ SQL
done
}
@test "sql-diff: supports multiple primary keys" {
run_2pk5col_ints() {
local query_name=$1
# Initial setup
dolt checkout -b firstbranch
dolt sql <<SQL
CREATE TABLE test (
@@ -605,41 +608,57 @@ CREATE TABLE test (
PRIMARY KEY (pk1,pk2)
);
SQL
dolt table import -u test `batshelper 2pk5col-ints.csv`
dolt table import -u test $(batshelper 2pk5col-ints.csv)
dolt add .
dolt commit -m "create/init table test"
# for each query file in helper/queries/2pk5col-ints/
# run query on db, create sql diff patch, confirm they're equivalent
dolt branch newbranch
for query in delete add update single_pk_update all_pk_update create_table
do
dolt checkout newbranch
dolt sql < $BATS_TEST_DIRNAME/helper/queries/2pk5col-ints/$query.sql
dolt add .
dolt diff -r sql
dolt commit -m "applied $query query "
# confirm a difference exists
# Apply the query
dolt checkout newbranch
dolt sql < "$BATS_TEST_DIRNAME/helper/queries/2pk5col-ints/${query_name}.sql"
dolt add .
dolt diff -r sql
dolt commit -m "applied ${query_name} query"
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]
# Confirm a difference exists
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]
dolt diff -r sql firstbranch > patch.sql newbranch
dolt checkout firstbranch
dolt sql < patch.sql
rm patch.sql
dolt add .
dolt commit -m "Reconciled with newbranch"
# Generate patch, apply on firstbranch, and verify no differences
dolt diff -r sql firstbranch > patch.sql newbranch
dolt checkout firstbranch
dolt sql < patch.sql
rm patch.sql
dolt add .
dolt commit -m "Reconciled with newbranch"
# confirm that both branches have the same content
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ "$output" = "" ]
done
# Confirm branches are identical
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ "$output" = "" ]
}
@test "sql-diff: supports multiple primary keys (delete)" {
run_2pk5col_ints "delete"
}
@test "sql-diff: supports multiple primary keys (add)" {
run_2pk5col_ints "add"
}
@test "sql-diff: supports multiple primary keys (update)" {
run_2pk5col_ints "update"
}
@test "sql-diff: supports multiple primary keys (single_pk_update)" {
run_2pk5col_ints "single_pk_update"
}
@test "sql-diff: supports multiple primary keys (all_pk_update)" {
run_2pk5col_ints "all_pk_update"
}
@test "sql-diff: supports multiple primary keys (create_table)" {
run_2pk5col_ints "create_table"
}
@test "sql-diff: escapes values for MySQL string literals" {
# https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
dolt sql <<SQL