diff --git a/go/cmd/dolt/commands/diff.go b/go/cmd/dolt/commands/diff.go index 9ee51cea64..7038c078a9 100644 --- a/go/cmd/dolt/commands/diff.go +++ b/go/cmd/dolt/commands/diff.go @@ -35,6 +35,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/libraries/doltcore/env" "github.com/dolthub/dolt/go/libraries/doltcore/schema" + "github.com/dolthub/dolt/go/libraries/doltcore/schema/typecompatibility" "github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo" "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/tabular" @@ -927,6 +928,7 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs) } doltSchemasChanged := false + var hasdiffErrors bool for _, delta := range deltas { if doltdb.IsFullTextTable(delta.TableName.Name) { continue @@ -968,7 +970,8 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs) } else { verr := diffUserTable(queryist, sqlCtx, delta, dArgs, dw) if verr != nil { - return verr + cli.PrintErrln(verr.Verbose()) + hasdiffErrors = true } } } @@ -985,6 +988,11 @@ func diffUserTables(queryist cli.Queryist, sqlCtx *sql.Context, dArgs *diffArgs) return errhand.VerboseErrorFromError(err) } + if hasdiffErrors { + errorBuilder := errhand.BuildDError("error: encountered errors during diff") + return errorBuilder.Build() + } + return nil } @@ -1503,45 +1511,53 @@ func diffDatabase( return nil } -// arePrimaryKeySetsDiffable checks if two schemas are diffable. Assumes the -// passed in schema are from the same table between commits. -func arePrimaryKeySetsDiffable(fromTableInfo, toTableInfo *diff.TableInfo) bool { - var fromSch schema.Schema = nil - var toSch schema.Schema = nil - if fromTableInfo != nil { - fromSch = fromTableInfo.Sch - } - if toTableInfo != nil { - toSch = toTableInfo.Sch - } - - if fromSch == nil && toSch == nil { - return false - // Empty case - } else if fromSch == nil || fromSch.GetAllCols().Size() == 0 || +// areValueSetsDiffable checks if it's possible to meaningfully diff the tuples stored in a table +// between two commits. This is possible for some schema changes but not others. +func areValueSetsDiffable(fromSch, toSch schema.Schema) bool { + if fromSch == nil || fromSch.GetAllCols().Size() == 0 || toSch == nil || toSch.GetAllCols().Size() == 0 { + // If one side doesn't have a schema, the diff is trivial. return true } - // Keyless case for comparing - if schema.IsKeyless(fromSch) && schema.IsKeyless(toSch) { - return true + fromCols := fromSch.GetNonPKCols() + toCols := toSch.GetNonPKCols() + + var sharedCols int + if fromCols.Size() < toCols.Size() { + sharedCols = fromCols.Size() + for i := fromCols.Size(); i < toCols.Size(); i++ { + if toCols.GetColumns()[i].Default != "" { + // If a column with a default value was added, it is not possible to generate a data diff + // without running the schema change in a SQL engine. + return false + } + } + } else { + sharedCols = toCols.Size() } - cc1 := fromSch.GetPKCols() - cc2 := toSch.GetPKCols() + compatChecker := typecompatibility.NewTypeCompatabilityChecker() + for i := 0; i < sharedCols; i++ { + fromCol := fromCols.GetByIndex(i) + toCol := toCols.GetByIndex(i) + // TODO: We might get more accurate results from calling MapSchemaBasedOnTagAndName + if fromCol.Name != toCol.Name && fromCol.Tag != toCol.Tag { + // If fromCol.Tag == toCol.Tag, this will display a rename. + // We ought to be able to generate a data diff here, but we currently panic if a row has changed a value in this column. + // TODO: Generate SQL data diffs for renamed columns. - if cc1.Size() != cc2.Size() { - return false - } - - for i := 0; i < cc1.Size(); i++ { - c1 := cc1.GetByIndex(i) - c2 := cc2.GetByIndex(i) - if c1.IsPartOfPK != c2.IsPartOfPK { + // If fromCol.Tag == toCol.Tag, this will display a drop and an add. + // In this case, we would need to generate an update statement for every row where this new column is not NULL. + // We cannot currently do this for rows whose bytes have not changed. return false } - if !c1.TypeInfo.ToSqlType().Equals(c2.TypeInfo.ToSqlType()) { + typeChangeInfo := compatChecker.IsTypeChangeCompatible(fromCol.TypeInfo, toCol.TypeInfo) + binaryCompatible := typeChangeInfo.Compatible && !typeChangeInfo.RewriteRows + if !binaryCompatible { + // It is not possible to semantically compare the values of this column at these two commits. + // This means that it's possible for the column to have the same bytes at both commits, while not + // representing the same value. We do not have enough context to produce a correct data diff. return false } } @@ -1557,30 +1573,40 @@ func diffRows( dArgs *diffArgs, dw diffWriter, ) errhand.VerboseError { - diffable := arePrimaryKeySetsDiffable(fromTableInfo, toTableInfo) - canSqlDiff := !(toTableInfo == nil || (fromTableInfo != nil && !schema.SchemasAreEqual(fromTableInfo.Sch, toTableInfo.Sch))) - - var toSch, fromSch sql.Schema + var fromSch schema.Schema = nil + var toSch schema.Schema = nil if fromTableInfo != nil { - pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), fromTableInfo.Name, fromTableInfo.Sch) - if err != nil { - return errhand.VerboseErrorFromError(err) - } - fromSch = pkSch.Schema + fromSch = fromTableInfo.Sch } if toTableInfo != nil { - pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), toTableInfo.Name, toTableInfo.Sch) + toSch = toTableInfo.Sch + } + + var toSqlSch, fromSqlSch sql.Schema + if fromTableInfo != nil { + pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), fromTableInfo.Name, fromSch) if err != nil { return errhand.VerboseErrorFromError(err) } - toSch = pkSch.Schema + fromSqlSch = pkSch.Schema + } + if toTableInfo != nil { + pkSch, err := sqlutil.FromDoltSchema(sqlCtx, sqlCtx.GetCurrentDatabase(), toTableInfo.Name, toSch) + if err != nil { + return errhand.VerboseErrorFromError(err) + } + toSqlSch = pkSch.Schema } var unionSch sql.Schema - if fromSch.Equals(toSch) { - unionSch = fromSch + if dArgs.diffOutput == SQLDiffOutput { + // When generating SQL output, it doesn't make sense to generate a data diff for columns that no longer exist. + // Thus, we always use the schema on the target commit. + unionSch = toSqlSch + } else if fromSqlSch.Equals(toSqlSch) { + unionSch = fromSqlSch } else { - unionSch = unionSchemas(fromSch, toSch) + unionSch = unionSchemas(fromSqlSch, toSqlSch) } // We always instantiate a RowWriter in case the diffWriter needs it to close off any work from schema output @@ -1606,23 +1632,26 @@ func diffRows( rowWriter = realWriter } - // can't diff - if !diffable { - // TODO: this messes up some structured output if the user didn't redirect it - cli.PrintErrf("Primary key sets differ between revisions for table '%s', skipping data diff\n", tableSummary.ToTableName) - err = rowWriter.Close(sqlCtx) + if !schema.ArePrimaryKeySetsDiffable(fromSch, toSch) { + err := rowWriter.Close(sqlCtx) if err != nil { return errhand.VerboseErrorFromError(err) } + err = fmt.Errorf("Primary key sets differ between revisions for table '%s', skipping data diff", tableSummary.ToTableName) + // When outputting SQL, it's an error if we can't generate a data diff. Otherwise we write to stderr and continue. + if dArgs.diffOutput == SQLDiffOutput { + return errhand.VerboseErrorFromError(err) + } + cli.PrintErrln(err) return nil - } else if dArgs.diffOutput == SQLDiffOutput && !canSqlDiff { - // TODO: this is overly broad, we can absolutely do better - _, _ = fmt.Fprintf(cli.CliErr, "Incompatible schema change, skipping data diff for table '%s'\n", tableSummary.ToTableName) - err = rowWriter.Close(sqlCtx) + } + + if dArgs.diffOutput == SQLDiffOutput && !areValueSetsDiffable(fromSch, toSch) { + err := rowWriter.Close(sqlCtx) if err != nil { return errhand.VerboseErrorFromError(err) } - return nil + return errhand.VerboseErrorFromError(fmt.Errorf("Incompatible schema change, skipping data diff for table '%s'", tableSummary.ToTableName)) } // no data diff requested @@ -1649,6 +1678,8 @@ func diffRows( return nil } + defer rowWriter.Close(sqlCtx) + columnNames, format := getColumnNames(fromTableInfo, toTableInfo) query := fmt.Sprintf("select %s ? from dolt_diff(?, ?, ?)", format) var params []interface{} @@ -1680,7 +1711,6 @@ func diffRows( } defer rowIter.Close(sqlCtx) - defer rowWriter.Close(sqlCtx) var modifiedColNames map[string]bool if dArgs.skinny { diff --git a/integration-tests/bats/diff.bats b/integration-tests/bats/diff.bats index e920fd918b..5fa9008c45 100644 --- a/integration-tests/bats/diff.bats +++ b/integration-tests/bats/diff.bats @@ -1173,17 +1173,19 @@ SQL dolt commit -m "hello" run dolt diff main another-branch echo $output + [ $status -eq 0 ] ! [[ "$output" =~ "panic" ]] || false [[ "$output" =~ "pv1" ]] || false [[ "$output" =~ "cv1" ]] || false - [ $status -eq 0 ] + [[ "$output" =~ "Primary key sets differ between revisions for table 'a', skipping data diff" ]] || false run dolt diff main..another-branch echo $output + [ $status -eq 0 ] ! [[ "$output" =~ "panic" ]] || false [[ "$output" =~ "pv1" ]] || false [[ "$output" =~ "cv1" ]] || false - [ $status -eq 0 ] + [[ "$output" =~ "Primary key sets differ between revisions for table 'a', skipping data diff" ]] || false } @test "diff: sql update queries only show changed columns" { @@ -1361,9 +1363,8 @@ SQL dolt commit -am "cm5" dolt sql -q "alter table t add primary key (pk)" - dolt diff -r sql run dolt diff -r sql - [ $status -eq 0 ] + [ $status -eq 1 ] [ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ] [ "${lines[1]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk);' ] [ "${lines[2]}" = "Primary key sets differ between revisions for table 't', skipping data diff" ] @@ -1374,7 +1375,7 @@ SQL dolt sql -q "alter table t drop primary key" dolt sql -q "alter table t add primary key (pk, val)" run dolt diff -r sql - [ $status -eq 0 ] + [ $status -eq 1 ] [ "${lines[0]}" = 'ALTER TABLE `t` ADD `pk2` int;' ] [ "${lines[1]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ] [ "${lines[2]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk,val);' ] @@ -1392,12 +1393,11 @@ SQL dolt sql -q "alter table t add primary key (pk)" run dolt diff -r sql - [ $status -eq 0 ] + [ $status -eq 1 ] [ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ] [ "${lines[1]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk);' ] [ "${lines[2]}" = "Primary key sets differ between revisions for table 't', skipping data diff" ] - dolt diff run dolt diff [ $status -eq 0 ] [[ "$output" =~ '+ PRIMARY KEY (`pk`)' ]] || false @@ -1408,13 +1408,11 @@ SQL dolt sql -q "alter table t drop primary key" - dolt diff -r sql run dolt diff -r sql - [ $status -eq 0 ] + [ $status -eq 1 ] [ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ] [[ "$output" =~ "Primary key sets differ between revisions for table 't', skipping data diff" ]] || false - dolt diff run dolt diff [ $status -eq 0 ] [[ "$output" =~ '- PRIMARY KEY (`pk`)' ]] || false @@ -2350,8 +2348,9 @@ EOF [[ $output = '' ]] || false run dolt diff HEAD~1 -r sql --filter=modified - [ $status -eq 0 ] + [ $status -eq 1 ] [ "${lines[0]}" = 'ALTER TABLE `t` MODIFY COLUMN `val2` varchar(255);' ] + [[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false # Test filter with schema changes - rename column dolt sql -q "ALTER TABLE t RENAME COLUMN val2 TO val3" @@ -2465,3 +2464,75 @@ EOF [ $status -eq 0 ] [[ $output = '' ]] || false } + +@test "diff: -r sql returns error when data statements cannot be generated" { + dolt sql -q "create table t(pk int primary key, val1 int, val2 varchar(10))" + dolt add . + dolt sql -q "INSERT INTO t VALUES (1, 1, '1')" + + dolt commit -am "cm1" + dolt branch old + + dolt sql -q "UPDATE t SET val2 = '2'" + dolt add . + dolt commit -am "cm2" + + dolt branch start + dolt checkout -b widen + dolt sql -q "alter table t modify column val2 varchar(20)" + dolt add . + dolt commit -am "widen" + run dolt diff -r sql old + [ $status -eq 0 ] + [[ "$output" =~ 'UPDATE `t` SET `val2`='"'2'"' WHERE `pk`=1;' ]] || false + dolt checkout start + + dolt checkout -b narrow + dolt sql -q "alter table t modify column val2 varchar(5)" + dolt add . + dolt commit -am "narrow" + run dolt diff -r sql old + [ $status -eq 1 ] + [[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false + dolt checkout start + + dolt checkout -b typechange + dolt sql -q "alter table t modify column val2 text" + dolt add . + dolt commit -am "typechange" + run dolt diff -r sql old + [ $status -eq 1 ] + [[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false + dolt checkout start + + dolt checkout -b rename + dolt sql -q "alter table t rename column val2 to val3" + dolt add . + dolt commit -am "rename" + run dolt diff -r sql old + [ $status -eq 0 ] + [[ "$output" =~ 'UPDATE `t` SET `val3`='"'2'"' WHERE `pk`=1;' ]] || false + dolt checkout start + + dolt checkout -b add + # A naive data diff would not know to emit the update statement. + dolt sql -q "alter table t add column val3 varchar(10) default '3'" + dolt sql -q "update t set val3 = NULL" + dolt add . + dolt commit -am "add" + run dolt diff -r sql old + [ $status -eq 1 ] + [[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false + dolt checkout start + + # Dropping a column that isn't the last column leads to tuples that can't be diffed. + dolt checkout -b drop + dolt sql -q "alter table t drop column val1" + dolt add . + dolt commit -am "drop" + run dolt diff -r sql old + [ $status -eq 1 ] + [[ "$output" =~ "Incompatible schema change, skipping data diff for table 't'" ]] || false + dolt checkout start + +} \ No newline at end of file diff --git a/integration-tests/bats/json-diff.bats b/integration-tests/bats/json-diff.bats index 59bac16e92..6f8b1a706f 100644 --- a/integration-tests/bats/json-diff.bats +++ b/integration-tests/bats/json-diff.bats @@ -428,6 +428,7 @@ SQL dolt diff -r json run no_stderr dolt diff -r json [ $status -eq 0 ] + echo "output" [ "$output" = '{"tables":[{"name":"t","schema_diff":["ALTER TABLE `t` DROP PRIMARY KEY;","ALTER TABLE `t` ADD PRIMARY KEY (pk);"],"data_diff":[]}]}' ] run no_stdout dolt diff -r json diff --git a/integration-tests/bats/sql-diff.bats b/integration-tests/bats/sql-diff.bats index ede7a1a5b9..4ae3309ebb 100644 --- a/integration-tests/bats/sql-diff.bats +++ b/integration-tests/bats/sql-diff.bats @@ -2,6 +2,8 @@ load $BATS_TEST_DIRNAME/helper/common.bash load $BATS_TEST_DIRNAME/helper/sql-diff.bash +bats_require_minimum_version 1.5.0 + setup() { setup_common } @@ -479,10 +481,13 @@ SQL # confirm a difference exists run dolt diff -r sql firstbranch newbranch - [ "$status" -eq 0 ] - [ ! "$output" = "" ] + [ "$status" -eq 1 ] + [[ "$output" =~ "Incompatible schema change, skipping data diff for table 'newname'" ]] || false - dolt diff -r sql firstbranch newbranch > query + # capture only stdout and redirect to a file + run --separate-stderr dolt diff -r sql firstbranch newbranch + echo "$output" > query + cat query grep 'RENAME' query dolt checkout firstbranch cat query