Block merges that corrupt indexes

Today, many merges can leave indexes in a corrupted state. This PR blocks those merges as a short-term fix.
In the long term we will make these merges automatic.
This commit is contained in:
Dhruv Sringari
2022-12-21 12:06:07 -08:00
parent d0c6b85b90
commit befb373612
6 changed files with 218 additions and 187 deletions
+82
View File
@@ -30,6 +30,7 @@ import (
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly/tree"
"github.com/dolthub/dolt/go/store/types"
"github.com/dolthub/go-mysql-server/sql"
)
type MergeOpts struct {
@@ -129,6 +130,13 @@ func (rm *RootMerger) MergeTable(ctx context.Context, tblName string, opts edito
return nil, nil, fmt.Errorf("%w.\n%s", ErrSchemaConflict, schConflicts.AsError().Error())
}
if types.IsFormat_DOLT(tm.vrw.Format()) {
err = rm.maybeAbortDueToUnmergeableIndexes(tm.name, tm.leftSch, tm.rightSch, mergeSch)
if err != nil {
return nil, nil, err
}
}
mergeTbl, err := tm.leftTbl.UpdateSchema(ctx, mergeSch)
if err != nil {
return nil, nil, err
@@ -353,6 +361,80 @@ func (rm *RootMerger) maybeShortCircuit(ctx context.Context, tm TableMerger, opt
return nil, nil, nil
}
func (rm *RootMerger) maybeAbortDueToUnmergeableIndexes(tableName string, leftSchema, rightSchema, targetSchema schema.Schema) error {
leftOk, err := validateTupleFields(leftSchema, targetSchema)
if err != nil {
return err
}
rightOk, err := validateTupleFields(rightSchema, targetSchema)
if err != nil {
return err
}
if !leftOk || !rightOk {
return fmt.Errorf("table %s can't be automatically merged.\nTo merge this table, make the schema on the source and target branch equal.", tableName)
}
return nil
}
func validateTupleFields(existingSch schema.Schema, targetSch schema.Schema) (bool, error) {
existingVD := existingSch.GetValueDescriptor()
targetVD := targetSch.GetValueDescriptor()
_, valMapping, err := schema.MapSchemaBasedOnTagAndName(existingSch, targetSch)
if err != nil {
return false, err
}
for i, j := range valMapping {
// If the field positions have changed between existing and target, bail.
if i != j {
return false, nil
}
// If the field types have changed between existing and target, bail.
if existingVD.Types[i].Enc != targetVD.Types[j].Enc {
return false, nil
}
// If a not null constraint was added, bail.
if existingVD.Types[j].Nullable && !targetVD.Types[j].Nullable {
return false, nil
}
// If the collation was changed, bail.
// Different collations will affect the ordering of any secondary indexes using this column.
existingStr, ok1 := existingSch.GetNonPKCols().GetByIndex(i).TypeInfo.ToSqlType().(sql.StringType)
targetStr, ok2 := targetSch.GetNonPKCols().GetByIndex(i).TypeInfo.ToSqlType().(sql.StringType)
if ok1 && ok2 && !existingStr.Collation().Equals(targetStr.Collation()) {
return false, nil
}
}
_, valMapping, err = schema.MapSchemaBasedOnTagAndName(targetSch, existingSch)
if err != nil {
return false, err
}
for i, j := range valMapping {
if i == j {
continue
}
// If we haven't bailed so far, then these fields were added at the end.
// If they are not-null bail.
if !targetVD.Types[i].Nullable {
return false, nil
}
}
return true, nil
}
func setConflicts(ctx context.Context, cons durable.ConflictIndex, tbl, mergeTbl, ancTbl, tableToUpdate *doltdb.Table) (*doltdb.Table, error) {
ancSch, err := ancTbl.GetSchema(ctx)
if err != nil {
@@ -68,6 +68,7 @@ type mergeSchemaTest struct {
name string
setup []testCommand
sch schema.Schema
skip bool
}
type mergeSchemaConflictTest struct {
@@ -131,6 +132,7 @@ var mergeSchemaTests = []mergeSchemaTest{
newColTypeInfo("c9", uint64(4508), typeinfo.Int32Type, false)),
schema.NewIndex("c1_idx", []uint64{8201}, []uint64{8201, 3228}, nil, schema.IndexProperties{IsUserDefined: true}),
),
skip: true,
},
{
name: "add constraint, drop constraint, merge",
@@ -152,6 +154,7 @@ var mergeSchemaTests = []mergeSchemaTest{
newColTypeInfo("c3", uint64(4696), typeinfo.Int32Type, false)),
schema.NewIndex("c1_idx", []uint64{8201}, []uint64{8201, 3228}, nil, schema.IndexProperties{IsUserDefined: true}),
),
skip: true,
},
{
name: "add index, drop index, merge",
@@ -186,6 +189,7 @@ var mergeSchemaTests = []mergeSchemaTest{
{commands.CommitCmd{}, []string{"-m", "modified branch other"}},
{commands.CheckoutCmd{}, []string{env.DefaultInitBranch}},
},
// hmmm, we created new columns with a rename?
sch: schemaFromColsAndIdxs(
colCollection(
newColTypeInfo("pk", uint64(3228), typeinfo.Int32Type, true, schema.NotNullConstraint{}),
@@ -194,6 +198,7 @@ var mergeSchemaTests = []mergeSchemaTest{
newColTypeInfo("c33", uint64(4696), typeinfo.Int32Type, false)),
schema.NewIndex("c1_idx", []uint64{8201}, []uint64{8201, 3228}, nil, schema.IndexProperties{IsUserDefined: true}),
),
skip: true,
},
{
name: "rename indexes",
@@ -546,6 +551,11 @@ func fkCollection(fks ...doltdb.ForeignKey) *doltdb.ForeignKeyCollection {
}
func testMergeSchemas(t *testing.T, test mergeSchemaTest) {
if test.skip {
t.Skip()
return
}
dEnv := dtestutils.CreateTestEnv()
ctx := context.Background()
@@ -15,6 +15,8 @@
package enginetest
import (
"fmt"
"github.com/dolthub/go-mysql-server/enginetest/queries"
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/plan"
@@ -3049,6 +3051,8 @@ var DoltVerifyConstraintsTestScripts = []queries.ScriptTest{
},
}
var errTmplNoAutomaticMerge = "table %s can't be automatically merged.\nTo merge this table, make the schema on the source and target branch equal."
var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
{
Name: "dropping columns",
@@ -3063,15 +3067,68 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
LeftSetUpScript: []string{
"insert into t values (5, 50, 500), (6, 60, 600);",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
// desired behavior
{
Query: "select pk, col2 from t;",
Expected: []sql.Row{{1, 100}, {2, 200}, {3, 300}, {4, 400}, {5, 500}, {6, 600}},
Skip: true,
},
},
},
{
Name: "adding nullable columns to one side",
AncSetUpScript: []string{
"create table t (pk int primary key, col1 int);",
"insert into t values (1, 1);",
},
RightSetUpScript: []string{
"alter table t add column col2 int;",
"alter table t add column col3 int;",
"insert into t values (2, 2, 2, 2);",
},
LeftSetUpScript: []string{
"insert into t values (3, 3);",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
},
{
Query: "select pk, col2 from t;",
Expected: []sql.Row{{1, 100}, {2, 200}, {3, 300}, {4, 400}, {5, 500}, {6, 600}},
Skip: true, // outputs 5,50 and 6,50
Query: "select * from t;",
Expected: []sql.Row{{1, 1, nil, nil}, {2, 2, 2, 2}, {3, 3, nil, nil}},
},
},
},
{
Name: "adding a non-null column to one side",
AncSetUpScript: []string{
"create table t (pk int primary key, col1 int);",
"insert into t values (1, 1);",
},
RightSetUpScript: []string{
"alter table t add column col2 int not null default 0",
"alter table t add column col3 int;",
"update t set col2 = 1 where pk = 1;",
"insert into t values (2, 2, 2, null);",
},
LeftSetUpScript: []string{
"insert into t values (3, 3);",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
{
Query: "select * from t;",
Expected: []sql.Row{{1, 1, 1, nil}, {2, 2, 2, nil}, {3, 3, 0, nil}},
Skip: true,
},
},
},
@@ -3091,8 +3148,8 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
{
Query: "select pk, col1, col2 from t;",
@@ -3124,9 +3181,10 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
// desired behavior
{
Query: "select pk, col1, col2 form t;",
Expected: []sql.Row{
@@ -3156,8 +3214,8 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
{
Query: "select pk, col1 from t;",
@@ -3189,9 +3247,8 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
Skip: true,
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
{
Query: "select pk, col1 from t order by col1;",
@@ -3223,8 +3280,8 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
{
Query: "select pk, col1 from t;",
@@ -3255,10 +3312,8 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
},
Assertions: []queries.ScriptTestAssertion{
{
// this merge should probably error. It's not clear how this merge should fix PK 3.
Query: "call dolt_merge('right');",
ExpectedErrStr: "some merge error",
Skip: true,
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
},
},
@@ -3445,9 +3500,8 @@ var ThreeWayMergeWithSchemaChangeTestScripts = []MergeScriptTest{
Assertions: []queries.ScriptTestAssertion{
// TODO: Fails secondary index validation. Changing the ordinal ordering of secondary indexes definitely breaks merge
{
Query: "call dolt_merge('right');",
Expected: []sql.Row{{0, 0}},
Skip: true,
Query: "call dolt_merge('right');",
ExpectedErrStr: fmt.Sprintf(errTmplNoAutomaticMerge, "t"),
},
},
},
@@ -0,0 +1,43 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
setup_common
skip_nbf_not_dolt
}
teardown() {
teardown_common
}
@test "merge-3way-schema-changes: blocked merge can be fixed by making the schema identical" {
dolt sql -q "create table t (pk int primary key);"
dolt commit -Am "ancestor"
dolt checkout -b right
dolt sql -q "insert into t values (1);"
dolt sql -q "alter table t add column col1 int not null default 0;"
dolt commit -am "right"
dolt checkout main
dolt sql -q "insert into t values (2);"
dolt commit -am "left"
run dolt merge right
[ $status -ne 0 ]
[[ $output =~ "table t can't be automatically merged." ]]
run dolt diff main right --schema -r sql
[ $status -eq 0 ]
[[ $output =~ 'ALTER TABLE `t` ADD `col1` int NOT NULL DEFAULT 0;' ]]
dolt sql -q 'ALTER TABLE `t` ADD `col1` int NOT NULL DEFAULT 0;'
dolt commit -am "fix merge"
dolt merge right
run dolt sql -r csv -q "select * from t;"
[[ $output =~ "pk,col1" ]]
[[ $output =~ "1,0" ]]
[[ $output =~ "2,0" ]]
}
-165
View File
@@ -1,165 +0,0 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
# These tests describe how merge is broken today. New format only.
setup() {
setup_common
skip_nbf_not_dolt
dolt sql <<SQL
CREATE TABLE t (
pk int PRIMARY KEY
);
SQL
dolt commit -Am "added table t"
}
teardown() {
assert_feature_version
teardown_common
}
@test "merge-broken: dropping columns" {
dolt sql -q "alter table t add column col1 int;"
dolt sql -q "alter table t add column col2 int;"
dolt sql -q "insert into t values (1, 10, 100), (2, 20, 200);"
dolt commit -am "added column with data"
dolt checkout -b other
dolt sql -q "alter table t drop column col1;"
dolt sql -q "insert into t (pk, col2) values (3, 300), (4, 400);"
dolt commit -am "added more data"
dolt checkout main
dolt sql -q "insert into t (pk, col1, col2) values (5, 50, 500), (6, 60, 600);"
dolt commit -am "dropped column and added data"
dolt merge other
run dolt sql -r csv -q "select pk, col2 from t;"
[ $status -eq 0 ]
[[ $output =~ "1,100" ]] || false
[[ $output =~ "2,200" ]] || false
[[ $output =~ "3,300" ]] || false
[[ $output =~ "4,400" ]] || false
skip "outputs 5,50 and 6,60"
[[ $output =~ "5,500" ]] || false
[[ $output =~ "6,600" ]] || false
}
@test "merge-broken: adding different columns to both sides" {
dolt sql -q "insert into t values (1), (2);"
dolt commit -Am "added data"
dolt checkout -b other
dolt sql -q "alter table t add column col2 int;"
dolt sql -q "insert into t values (3, 300), (4, 400);"
dolt commit -Am "added column with data on right"
dolt checkout main
dolt sql -q "alter table t add column col1 int;"
dolt sql -q "insert into t values (5, 50), (6, 60);"
dolt commit -Am "added column with data on left"
dolt merge other
run dolt sql -r csv -q "select pk, col1, col2 from t;"
[ $status -eq 0 ]
[[ $output =~ "1,," ]] || false
[[ $output =~ "2,," ]] || false
[[ $output =~ "5,50," ]] || false
[[ $output =~ "6,60," ]] || false
skip "output incorrect: 300 is in the wrong column"
[[ $output =~ "3,,300" ]] || false
[[ $output =~ "4,,400" ]] || false
}
@test "merge-broken: re-ordering columns" {
dolt sql -q "alter table t add column col1 int;"
dolt sql -q "alter table t add column col2 int;"
dolt sql -q "insert into t values (1, 10, 100), (2, 20, 200);"
dolt commit -am "added columns with data"
dolt checkout -b other
dolt sql -q "alter table t drop column col1;"
dolt sql -q "alter table t add column col1 int;"
dolt sql -q "insert into t (pk, col2, col1) values (3, 300, 30), (4, 400, 40);"
dolt commit -am "move column one to end"
dolt checkout main
dolt sql -q "insert into t (pk, col1, col2) values (5, 50, 500), (6, 60, 600);"
dolt commit -am "reordered columns and added data"
dolt merge other
run dolt sql -r csv -q "select pk, col1, col2 from t;"
[ $status -eq 0 ]
[[ $output =~ "1,,100" ]] || false
[[ $output =~ "2,,200" ]] || false
[[ $output =~ "3,30,300" ]] || false
[[ $output =~ "4,40,400" ]] || false
skip "outputs 5,500,50 and 6,600,60"
[[ $output =~ "5,50,500" ]]
[[ $output =~ "6,60,600" ]]
}
@test "merge-broken: changing the type of a column" {
dolt sql -q "alter table t add column col1 int;"
dolt sql -q "insert into t values (1, 10), (2, 20);"
dolt commit -am "initial"
dolt checkout -b other
dolt sql -q "alter table t modify column col1 varchar(100);"
dolt sql -q "insert into t values (3, 'thirty'), (4, 'forty');"
dolt commit -am "changed type"
dolt checkout main
dolt sql -q "insert into t values (5, 50), (6, 60);"
dolt commit -am "left"
dolt merge other
run dolt sql -r csv -q "select pk, col1 from t;"
[ $status -eq 0 ]
[[ $output =~ "1,10" ]] || false
[[ $output =~ "2,20" ]] || false
[[ $output =~ "3,thirty" ]] || false
[[ $output =~ "4,forty" ]] || false
skip "outputs garbled text"
[[ $output =~ "5,50" ]] || false
[[ $output =~ "6,60" ]] || false
}
@test "merge-broken: adding a not-null constraint with default to a column" {
dolt sql -q "alter table t add column col1 int;"
dolt sql -q "insert into t values (1, null), (2, null);"
dolt commit -am "initial"
dolt checkout -b other
dolt sql -q "update t set col1 = 9999 where col1 is null;"
dolt sql -q "alter table t modify column col1 int not null default 9999;"
dolt sql -q "insert into t values (3, 30), (4, 40);"
dolt commit -am "added not-null constraint with default"
dolt checkout main
dolt sql -q "insert into t values (5, null), (6, null);"
dolt commit -am "added data"
dolt merge other
run dolt sql -r csv -q "select pk, col1 from t;"
[ $status -eq 0 ]
[[ $output =~ "1,9999" ]] || false
[[ $output =~ "2,9999" ]] || false
[[ $output =~ "3,30" ]] || false
[[ $output =~ "4,40" ]] || false
skip "garbled"
[[ $output =~ "5,9999" ]] || false
[[ $output =~ "6,9999" ]] || false
}
@@ -187,8 +187,15 @@ EOF
}
@test "dolt merge other into $DEFAULT_BRANCH" {
# throws a conflict
dolt merge other
run dolt version
if [[ $output =~ "__DOLT__" ]]; then
run dolt merge other
[ $status -ne 0 ]
[[ $output =~ "table abc can't be automatically merged" ]] || false
else
# throws a conflict
dolt merge other
fi
}
@test "dolt table import" {