Merge pull request #11002 from dolthub/nicktobey/sqldiff

When `dolt diff -r sql` encounters a schema change, it should either print the full diff or return an error.
This commit is contained in:
Nick Tobey
2026-05-08 13:17:36 -07:00
committed by GitHub
4 changed files with 177 additions and 70 deletions
+86 -56
View File
@@ -35,6 +35,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/schema/typecompatibility"
"github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil"
"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/tabular"
@@ -927,6 +928,7 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs)
}
doltSchemasChanged := false
var hasdiffErrors bool
for _, delta := range deltas {
if doltdb.IsFullTextTable(delta.TableName.Name) {
continue
@@ -968,7 +970,8 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs)
} else {
verr := diffUserTable(queryist, sqlCtx, delta, dArgs, dw)
if verr != nil {
return verr
cli.PrintErrln(verr.Verbose())
hasdiffErrors = true
}
}
}
@@ -985,6 +988,11 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs)
return errhand.VerboseErrorFromError(err)
}
if hasdiffErrors {
errorBuilder := errhand.BuildDError("error: encountered errors during diff")
return errorBuilder.Build()
}
return nil
}
@@ -1503,45 +1511,53 @@ func diffDatabase(
return nil
}
// arePrimaryKeySetsDiffable checks if two schemas are diffable. Assumes the
// passed in schema are from the same table between commits.
func arePrimaryKeySetsDiffable(fromTableInfo, toTableInfo *diff.TableInfo) bool {
var fromSch schema.Schema = nil
var toSch schema.Schema = nil
if fromTableInfo != nil {
fromSch = fromTableInfo.Sch
}
if toTableInfo != nil {
toSch = toTableInfo.Sch
}
if fromSch == nil && toSch == nil {
return false
// Empty case
} else if fromSch == nil || fromSch.GetAllCols().Size() == 0 ||
// areValueSetsDiffable checks if it's possible to meaningfully diff the tuples stored in a table
// between two commits. This is possible for some schema changes but not others.
func areValueSetsDiffable(fromSch, toSch schema.Schema) bool {
if fromSch == nil || fromSch.GetAllCols().Size() == 0 ||
toSch == nil || toSch.GetAllCols().Size() == 0 {
// If one side doesn't have a schema, the diff is trivial.
return true
}
// Keyless case for comparing
if schema.IsKeyless(fromSch) && schema.IsKeyless(toSch) {
return true
fromCols := fromSch.GetNonPKCols()
toCols := toSch.GetNonPKCols()
var sharedCols int
if fromCols.Size() < toCols.Size() {
sharedCols = fromCols.Size()
for i := fromCols.Size(); i < toCols.Size(); i++ {
if toCols.GetColumns()[i].Default != "" {
// If a column with a default value was added, it is not possible to generate a data diff
// without running the schema change in a SQL engine.
return false
}
}
} else {
sharedCols = toCols.Size()
}
cc1 := fromSch.GetPKCols()
cc2 := toSch.GetPKCols()
compatChecker := typecompatibility.NewTypeCompatabilityChecker()
for i := 0; i < sharedCols; i++ {
fromCol := fromCols.GetByIndex(i)
toCol := toCols.GetByIndex(i)
// TODO: We might get more accurate results from calling MapSchemaBasedOnTagAndName
if fromCol.Name != toCol.Name && fromCol.Tag != toCol.Tag {
// If fromCol.Tag == toCol.Tag, this will display a rename.
// We ought to be able to generate a data diff here, but we currently panic if a row has changed a value in this column.
// TODO: Generate SQL data diffs for renamed columns.
if cc1.Size() != cc2.Size() {
return false
}
for i := 0; i < cc1.Size(); i++ {
c1 := cc1.GetByIndex(i)
c2 := cc2.GetByIndex(i)
if c1.IsPartOfPK != c2.IsPartOfPK {
// If fromCol.Tag == toCol.Tag, this will display a drop and an add.
// In this case, we would need to generate an update statement for every row where this new column is not NULL.
// We cannot currently do this for rows whose bytes have not changed.
return false
}
if !c1.TypeInfo.ToSqlType().Equals(c2.TypeInfo.ToSqlType()) {
typeChangeInfo := compatChecker.IsTypeChangeCompatible(fromCol.TypeInfo, toCol.TypeInfo)
binaryCompatible := typeChangeInfo.Compatible && !typeChangeInfo.RewriteRows
if !binaryCompatible {
// It is not possible to semantically compare the values of this column at these two commits.
// This means that it's possible for the column to have the same bytes at both commits, while not
// representing the same value. We do not have enough context to produce a correct data diff.
return false
}
}
@@ -1557,30 +1573,40 @@ func diffRows(
dArgs *diffArgs,
dw diffWriter,
) errhand.VerboseError {
diffable := arePrimaryKeySetsDiffable(fromTableInfo, toTableInfo)
canSqlDiff := !(toTableInfo == nil || (fromTableInfo != nil && !schema.SchemasAreEqual(fromTableInfo.Sch, toTableInfo.Sch)))
var toSch, fromSch sql.Schema
var fromSch schema.Schema = nil
var toSch schema.Schema = nil
if fromTableInfo != nil {
pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), fromTableInfo.Name, fromTableInfo.Sch)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
fromSch = pkSch.Schema
fromSch = fromTableInfo.Sch
}
if toTableInfo != nil {
pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), toTableInfo.Name, toTableInfo.Sch)
toSch = toTableInfo.Sch
}
var toSqlSch, fromSqlSch sql.Schema
if fromTableInfo != nil {
pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), fromTableInfo.Name, fromSch)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
toSch = pkSch.Schema
fromSqlSch = pkSch.Schema
}
if toTableInfo != nil {
pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), toTableInfo.Name, toSch)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
toSqlSch = pkSch.Schema
}
var unionSch sql.Schema
if fromSch.Equals(toSch) {
unionSch = fromSch
if dArgs.diffOutput == SQLDiffOutput {
// When generating SQL output, it doesn't make sense to generate a data diff for columns that no longer exist.
// Thus, we always use the schema on the target commit.
unionSch = toSqlSch
} else if fromSqlSch.Equals(toSqlSch) {
unionSch = fromSqlSch
} else {
unionSch = unionSchemas(fromSch, toSch)
unionSch = unionSchemas(fromSqlSch, toSqlSch)
}
// We always instantiate a RowWriter in case the diffWriter needs it to close off any work from schema output
@@ -1606,23 +1632,26 @@ func diffRows(
rowWriter = realWriter
}
// can't diff
if !diffable {
// TODO: this messes up some structured output if the user didn't redirect it
cli.PrintErrf("Primary key sets differ between revisions for table '%s', skipping data diff\n", tableSummary.ToTableName)
err = rowWriter.Close(sqlCtx)
if !schema.ArePrimaryKeySetsDiffable(fromSch, toSch) {
err := rowWriter.Close(sqlCtx)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
err = fmt.Errorf("Primary key sets differ between revisions for table '%s', skipping data diff", tableSummary.ToTableName)
// When outputting SQL, it's an error if we can't generate a data diff. Otherwise we write to stderr and continue.
if dArgs.diffOutput == SQLDiffOutput {
return errhand.VerboseErrorFromError(err)
}
cli.PrintErrln(err)
return nil
} else if dArgs.diffOutput == SQLDiffOutput && !canSqlDiff {
// TODO: this is overly broad, we can absolutely do better
_, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff for table '%s'\n", tableSummary.ToTableName)
err = rowWriter.Close(sqlCtx)
}
if dArgs.diffOutput == SQLDiffOutput && !areValueSetsDiffable(fromSch, toSch) {
err := rowWriter.Close(sqlCtx)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
return nil
return errhand.VerboseErrorFromError(fmt.Errorf("Incompatible schema change, skipping data diff for table '%s'", tableSummary.ToTableName))
}
// no data diff requested
@@ -1649,6 +1678,8 @@ func diffRows(
return nil
}
defer rowWriter.Close(sqlCtx)
columnNames, format := getColumnNames(fromTableInfo, toTableInfo)
query := fmt.Sprintf("select %s ? from dolt_diff(?, ?, ?)", format)
var params []interface{}
@@ -1680,7 +1711,6 @@ func diffRows(
}
defer rowIter.Close(sqlCtx)
defer rowWriter.Close(sqlCtx)
var modifiedColNames map[string]bool
if dArgs.skinny {
+82 -11
View File
@@ -1173,17 +1173,19 @@ SQL
dolt commit -m "hello"
run dolt diff main another-branch
echo $output
[ $status -eq 0 ]
! [[ "$output" =~ "panic" ]] || false
[[ "$output" =~ "pv1" ]] || false
[[ "$output" =~ "cv1" ]] || false
[ $status -eq 0 ]
[[ "$output" =~ "Primary key sets differ between revisions for table 'a', skipping data diff" ]] || false
run dolt diff main..another-branch
echo $output
[ $status -eq 0 ]
! [[ "$output" =~ "panic" ]] || false
[[ "$output" =~ "pv1" ]] || false
[[ "$output" =~ "cv1" ]] || false
[ $status -eq 0 ]
[[ "$output" =~ "Primary key sets differ between revisions for table 'a', skipping data diff" ]] || false
}
@test "diff: sql update queries only show changed columns" {
@@ -1361,9 +1363,8 @@ SQL
dolt commit -am "cm5"
dolt sql -q "alter table t add primary key (pk)"
dolt diff -r sql
run dolt diff -r sql
[ $status -eq 0 ]
[ $status -eq 1 ]
[ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ]
[ "${lines[1]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk);' ]
[ "${lines[2]}" = "Primary key sets differ between revisions for table 't', skipping data diff" ]
@@ -1374,7 +1375,7 @@ SQL
dolt sql -q "alter table t drop primary key"
dolt sql -q "alter table t add primary key (pk, val)"
run dolt diff -r sql
[ $status -eq 0 ]
[ $status -eq 1 ]
[ "${lines[0]}" = 'ALTER TABLE `t` ADD `pk2` int;' ]
[ "${lines[1]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ]
[ "${lines[2]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk,val);' ]
@@ -1392,12 +1393,11 @@ SQL
dolt sql -q "alter table t add primary key (pk)"
run dolt diff -r sql
[ $status -eq 0 ]
[ $status -eq 1 ]
[ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ]
[ "${lines[1]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk);' ]
[ "${lines[2]}" = "Primary key sets differ between revisions for table 't', skipping data diff" ]
dolt diff
run dolt diff
[ $status -eq 0 ]
[[ "$output" =~ '+ PRIMARY KEY (`pk`)' ]] || false
@@ -1408,13 +1408,11 @@ SQL
dolt sql -q "alter table t drop primary key"
dolt diff -r sql
run dolt diff -r sql
[ $status -eq 0 ]
[ $status -eq 1 ]
[ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ]
[[ "$output" =~ "Primary key sets differ between revisions for table 't', skipping data diff" ]] || false
dolt diff
run dolt diff
[ $status -eq 0 ]
[[ "$output" =~ '- PRIMARY KEY (`pk`)' ]] || false
@@ -2350,8 +2348,9 @@ EOF
[[ $output = '' ]] || false
run dolt diff HEAD~1 -r sql --filter=modified
[ $status -eq 0 ]
[ $status -eq 1 ]
[ "${lines[0]}" = 'ALTER TABLE `t` MODIFY COLUMN `val2` varchar(255);' ]
[[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false
# Test filter with schema changes - rename column
dolt sql -q "ALTER TABLE t RENAME COLUMN val2 TO val3"
@@ -2465,3 +2464,75 @@ EOF
[ $status -eq 0 ]
[[ $output = '' ]] || false
}
@test "diff: -r sql returns error when data statements cannot be generated" {
dolt sql -q "create table t(pk int primary key, val1 int, val2 varchar(10))"
dolt add .
dolt sql -q "INSERT INTO t VALUES (1, 1, '1')"
dolt commit -am "cm1"
dolt branch old
dolt sql -q "UPDATE t SET val2 = '2'"
dolt add .
dolt commit -am "cm2"
dolt branch start
dolt checkout -b widen
dolt sql -q "alter table t modify column val2 varchar(20)"
dolt add .
dolt commit -am "widen"
run dolt diff -r sql old
[ $status -eq 0 ]
[[ "$output" =~ 'UPDATE `t` SET `val2`='"'2'"' WHERE `pk`=1;' ]] || false
dolt checkout start
dolt checkout -b narrow
dolt sql -q "alter table t modify column val2 varchar(5)"
dolt add .
dolt commit -am "narrow"
run dolt diff -r sql old
[ $status -eq 1 ]
[[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false
dolt checkout start
dolt checkout -b typechange
dolt sql -q "alter table t modify column val2 text"
dolt add .
dolt commit -am "typechange"
run dolt diff -r sql old
[ $status -eq 1 ]
[[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false
dolt checkout start
dolt checkout -b rename
dolt sql -q "alter table t rename column val2 to val3"
dolt add .
dolt commit -am "rename"
run dolt diff -r sql old
[ $status -eq 0 ]
[[ "$output" =~ 'UPDATE `t` SET `val3`='"'2'"' WHERE `pk`=1;' ]] || false
dolt checkout start
dolt checkout -b add
# A naive data diff would not know to emit the update statement.
dolt sql -q "alter table t add column val3 varchar(10) default '3'"
dolt sql -q "update t set val3 = NULL"
dolt add .
dolt commit -am "add"
run dolt diff -r sql old
[ $status -eq 1 ]
[[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false
dolt checkout start
# Dropping a column that isn't the last column leads to tuples that can't be diffed.
dolt checkout -b drop
dolt sql -q "alter table t drop column val1"
dolt add .
dolt commit -am "drop"
run dolt diff -r sql old
[ $status -eq 1 ]
[[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false
dolt checkout start
}
+1
View File
@@ -428,6 +428,7 @@ SQL
dolt diff -r json
run no_stderr dolt diff -r json
[ $status -eq 0 ]
echo "output"
[ "$output" = '{"tables":[{"name":"t","schema_diff":["ALTER TABLE `t` DROP PRIMARY KEY;","ALTER TABLE `t` ADD PRIMARY KEY (pk);"],"data_diff":[]}]}' ]
run no_stdout dolt diff -r json
+8 -3
View File
@@ -2,6 +2,8 @@
load $BATS_TEST_DIRNAME/helper/common.bash
load $BATS_TEST_DIRNAME/helper/sql-diff.bash
bats_require_minimum_version 1.5.0
setup() {
setup_common
}
@@ -479,10 +481,13 @@ SQL
# confirm a difference exists
run dolt diff -r sql firstbranch newbranch
[ "$status" -eq 0 ]
[ ! "$output" = "" ]
[ "$status" -eq 1 ]
[[ "$output" =~ "Incompatible schema change, skipping data diff for table 'newname'" ]] || false
dolt diff -r sql firstbranch newbranch > query
# capture only stdout and redirect to a file
run --separate-stderr dolt diff -r sql firstbranch newbranch
echo "$output" > query
cat query
grep 'RENAME' query
dolt checkout firstbranch
cat query