Unblock Keyless SQL Diffs and Minimize SQL Diffs (#2205)

1. Unblock the ability to get SQL diffs on keyless tables
2. Minimizes the update statements in a SQL diff
This commit is contained in:
Vinai Rachakonda
2021-10-06 09:44:47 -07:00
committed by GitHub
parent 14e70eb2a2
commit 08eb50be97
6 changed files with 170 additions and 19 deletions

View File

@@ -390,17 +390,6 @@ func diffUserTables(ctx context.Context, fromRoot, toRoot *doltdb.RootValue, dAr
return strings.Compare(tableDeltas[i].ToName, tableDeltas[j].ToName) < 0
})
for _, td := range tableDeltas {
if dArgs.diffOutput == SQLDiffOutput {
ok, err := td.IsKeyless(ctx)
if err != nil {
return errhand.VerboseErrorFromError(err)
}
if ok {
// todo: implement keyless SQL diff
continue
}
}
if !dArgs.tableSet.Contains(td.FromName) && !dArgs.tableSet.Contains(td.ToName) {
continue
}
@@ -613,10 +602,23 @@ func sqlSchemaDiff(ctx context.Context, td diff.TableDelta, toSchemas map[string
case diff.SchDiffRemoved:
cli.Print(sqlfmt.AlterTableDropColStmt(td.ToName, cd.Old.Name))
case diff.SchDiffModified:
// Ignore any primary key set changes here
if cd.Old.IsPartOfPK != cd.New.IsPartOfPK {
continue
}
cli.Print(sqlfmt.AlterTableRenameColStmt(td.ToName, cd.Old.Name, cd.New.Name))
}
}
// Print changes between a primary key set change. It contains an ALTER TABLE DROP and an ALTER TABLE ADD
if !schema.ColCollsAreEqual(fromSch.GetPKCols(), toSch.GetPKCols()) {
cli.Println(sqlfmt.AlterTableDropPks(td.ToName))
if toSch.GetPKCols().Size() > 0 {
cli.Println(sqlfmt.AlterTableAddPrimaryKeys(td.ToName, toSch.GetPKCols()))
}
}
for _, idxDiff := range diff.DiffSchIndexes(fromSch, toSch) {
switch idxDiff.DiffType {
case diff.SchDiffNone:

View File

@@ -18,6 +18,8 @@ import (
"errors"
"io"
"github.com/dolthub/dolt/go/libraries/utils/set"
"github.com/dolthub/dolt/go/libraries/doltcore/row"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlfmt"
@@ -95,8 +97,15 @@ func (sds *SQLDiffSink) ProcRowWithProps(r row.Row, props pipeline.ReadableMap)
case DiffModifiedOld:
return nil
case DiffModifiedNew:
// TODO: minimize update statement to modified rows
stmt, err := sqlfmt.RowAsUpdateStmt(r, sds.tableName, sds.sch)
// Pass in the update as a setStr
keys := make([]string, len(colDiffs))
i := 0
for k := range colDiffs {
keys[i] = k
i++
}
stmt, err := sqlfmt.RowAsUpdateStmt(r, sds.tableName, sds.sch, set.NewStrSet(keys))
if err != nil {
return err

View File

@@ -24,6 +24,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/row"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo"
"github.com/dolthub/dolt/go/libraries/utils/set"
"github.com/dolthub/dolt/go/store/types"
)
@@ -94,9 +95,10 @@ func RowAsDeleteStmt(r row.Row, tableName string, tableSch schema.Schema) (strin
b.WriteString(" WHERE (")
seenOne := false
isKeyless := tableSch.GetPKCols().Size() == 0
_, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
col, _ := tableSch.GetAllCols().GetByTag(tag)
if col.IsPartOfPK {
if col.IsPartOfPK || isKeyless {
if seenOne {
b.WriteString(" AND ")
}
@@ -120,7 +122,7 @@ func RowAsDeleteStmt(r row.Row, tableName string, tableSch schema.Schema) (strin
return b.String(), nil
}
func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema) (string, error) {
func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema, colDiffs *set.StrSet) (string, error) {
var b strings.Builder
b.WriteString("UPDATE ")
b.WriteString(QuoteIdentifier(tableName))
@@ -130,7 +132,8 @@ func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema) (strin
seenOne := false
_, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
col, _ := tableSch.GetAllCols().GetByTag(tag)
if !col.IsPartOfPK {
exists := colDiffs.Contains(col.Name)
if !col.IsPartOfPK && exists {
if seenOne {
b.WriteRune(',')
}

View File

@@ -25,6 +25,7 @@ import (
"github.com/dolthub/dolt/go/libraries/doltcore/row"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
"github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo"
"github.com/dolthub/dolt/go/libraries/utils/set"
"github.com/dolthub/dolt/go/store/types"
)
@@ -42,6 +43,14 @@ type test struct {
expectedOutput string
}
type updateTest struct {
name string
row row.Row
sch schema.Schema
expectedOutput string
collDiff *set.StrSet
}
func TestTableDropStmt(t *testing.T) {
stmt := DropTableStmt("table_name")
@@ -154,24 +163,34 @@ func TestRowAsUpdateStmt(t *testing.T) {
id := uuid.MustParse("00000000-0000-0000-0000-000000000000")
tableName := "people"
tests := []test{
tests := []updateTest{
{
name: "simple row",
row: dtestutils.NewTypedRow(id, "some guy", 100, false, strPointer("normie")),
sch: dtestutils.TypedSchema,
expectedOutput: "UPDATE `people` SET `name`='some guy',`age`=100,`is_married`=FALSE,`title`='normie' WHERE (`id`='00000000-0000-0000-0000-000000000000');",
collDiff: set.NewStrSet([]string{"name", "age", "is_married", "title"}),
},
{
name: "embedded quotes",
row: dtestutils.NewTypedRow(id, `It's "Mister Perfect" to you`, 100, false, strPointer("normie")),
sch: dtestutils.TypedSchema,
expectedOutput: "UPDATE `people` SET `name`='It\\'s \\\"Mister Perfect\\\" to you',`age`=100,`is_married`=FALSE,`title`='normie' WHERE (`id`='00000000-0000-0000-0000-000000000000');",
collDiff: set.NewStrSet([]string{"name", "age", "is_married", "title"}),
},
{
name: "null values",
row: dtestutils.NewTypedRow(id, "some guy", 100, false, nil),
sch: dtestutils.TypedSchema,
expectedOutput: "UPDATE `people` SET `name`='some guy',`age`=100,`is_married`=FALSE,`title`=NULL WHERE (`id`='00000000-0000-0000-0000-000000000000');",
collDiff: set.NewStrSet([]string{"name", "age", "is_married", "title"}),
},
{
name: "partial update",
row: dtestutils.NewTypedRow(id, "some guy", 100, false, nil),
sch: dtestutils.TypedSchema,
expectedOutput: "UPDATE `people` SET `name`='some guy' WHERE (`id`='00000000-0000-0000-0000-000000000000');",
collDiff: set.NewStrSet([]string{"name"}),
},
}
@@ -180,16 +199,17 @@ func TestRowAsUpdateStmt(t *testing.T) {
schema.NewColumn("anotherColumn", 1, types.IntKind, true),
)
tests = append(tests, test{
tests = append(tests, updateTest{
name: "negative values and columns with spaces",
row: dtestutils.NewRow(trickySch, types.Float(-3.14), types.Int(-42)),
sch: trickySch,
expectedOutput: "UPDATE `people` SET `a name with spaces`=-3.14 WHERE (`anotherColumn`=-42);",
collDiff: set.NewStrSet([]string{"a name with spaces"}),
})
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
stmt, err := RowAsUpdateStmt(tt.row, tableName, tt.sch)
stmt, err := RowAsUpdateStmt(tt.row, tableName, tt.sch, tt.collDiff)
assert.NoError(t, err)
assert.Equal(t, tt.expectedOutput, stmt)
})

View File

@@ -188,6 +188,33 @@ func AlterTableRenameColStmt(tableName string, oldColName string, newColName str
return b.String()
}
func AlterTableDropPks(tableName string) string {
var b strings.Builder
b.WriteString("ALTER TABLE ")
b.WriteString(QuoteIdentifier(tableName))
b.WriteString(" DROP PRIMARY KEY")
b.WriteRune(';')
return b.String()
}
func AlterTableAddPrimaryKeys(tableName string, pks *schema.ColCollection) string {
var b strings.Builder
b.WriteString("ALTER TABLE ")
b.WriteString(QuoteIdentifier(tableName))
b.WriteString(" ADD PRIMARY KEY (")
for i := 0; i < pks.Size(); i++ {
if i == 0 {
b.WriteString(pks.GetAtIndex(i).Name)
} else {
b.WriteString("," + pks.GetAtIndex(i).Name)
}
}
b.WriteRune(')')
b.WriteRune(';')
return b.String()
}
func RenameTableStmt(fromName string, toName string) string {
var b strings.Builder
b.WriteString("RENAME TABLE ")

View File

@@ -489,3 +489,93 @@ SQL
[[ "$output" =~ "cv1" ]] || false
[ $status -eq 0 ]
}
@test "diff: sql update queries only show changed columns" {
dolt sql -q "create table t(pk int primary key, val1 int, val2 int)"
dolt sql -q "INSERT INTO t VALUES (1, 1, 1)"
dolt commit -am "cm1"
dolt sql -q "UPDATE t SET val1=2 where pk=1"
run dolt diff -r sql
[ $status -eq 0 ]
[[ "$output" = 'UPDATE `t` SET `val1`=2 WHERE (`pk`=1);' ]] || false
dolt commit -am "cm2"
dolt sql -q "UPDATE t SET val1=3, val2=4 where pk = 1"
run dolt diff -r sql
[ $status -eq 0 ]
[[ "$output" = 'UPDATE `t` SET `val1`=3,`val2`=4 WHERE (`pk`=1);' ]] || false
dolt commit -am "cm3"
dolt sql -q "UPDATE t SET val1=3 where (pk=1);"
run dolt diff -r sql
[ $status -eq 0 ]
[[ "$output" = '' ]] || false
dolt sql -q "alter table t add val3 int"
dolt commit -am "cm4"
dolt sql -q "update t set val1=30,val3=4 where pk=1"
run dolt diff -r sql
[ $status -eq 0 ]
[[ "$output" = 'UPDATE `t` SET `val1`=30,`val3`=4 WHERE (`pk`=1);' ]] || false
}
@test "diff: run through some keyless sql diffs" {
dolt sql -q "create table t(pk int, val int)"
dolt commit -am "cm1"
dolt sql -q "INSERT INTO t values (1, 1)"
run dolt diff -r sql
[ $status -eq 0 ]
[[ "$output" = 'INSERT INTO `t` (`pk`,`val`) VALUES (1,1);' ]] || false
dolt commit -am "cm2"
dolt sql -q "INSERT INTO t values (1, 1)"
run dolt diff -r sql
[ $status -eq 0 ]
[[ "$output" = 'INSERT INTO `t` (`pk`,`val`) VALUES (1,1);' ]] || false
dolt commit -am "cm3"
dolt sql -q "UPDATE t SET val = 2 where pk = 1"
run dolt diff -r sql
[ $status -eq 0 ]
[ "${lines[0]}" = 'DELETE FROM `t` WHERE (`pk`=1 AND `val`=1);' ]
[ "${lines[1]}" = 'DELETE FROM `t` WHERE (`pk`=1 AND `val`=1);' ]
[ "${lines[2]}" = 'INSERT INTO `t` (`pk`,`val`) VALUES (1,2);' ]
[ "${lines[3]}" = 'INSERT INTO `t` (`pk`,`val`) VALUES (1,2);' ]
dolt commit -am "cm4"
dolt sql -q "DELETE FROM t WHERE val < 3"
run dolt diff -r sql
[ $status -eq 0 ]
[ "${lines[0]}" = 'DELETE FROM `t` WHERE (`pk`=1 AND `val`=2);' ]
[ "${lines[1]}" = 'DELETE FROM `t` WHERE (`pk`=1 AND `val`=2);' ]
dolt commit -am "cm5"
dolt sql -q "alter table t add primary key (pk)"
run dolt diff -r sql
[ $status -eq 0 ]
[ "${lines[0]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ]
[ "${lines[1]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk);' ]
[ "${lines[2]}" = 'warning: skipping data diff due to primary key set change' ]
dolt commit -am "cm6"
dolt sql -q "alter table t add column pk2 int"
dolt sql -q "alter table t drop primary key"
dolt sql -q "alter table t add primary key (pk, val)"
run dolt diff -r sql
[ $status -eq 0 ]
[ "${lines[0]}" = 'ALTER TABLE `t` ADD `pk2` INT;' ]
[ "${lines[1]}" = 'ALTER TABLE `t` DROP PRIMARY KEY;' ]
[ "${lines[2]}" = 'ALTER TABLE `t` ADD PRIMARY KEY (pk,val);' ]
[ "${lines[3]}" = 'warning: skipping data diff due to primary key set change' ]
}