From 75c0b505e5d5a79cf5774cabb9ff6b3054b36479 Mon Sep 17 00:00:00 2001 From: Andy Arthur Date: Tue, 9 May 2023 15:40:07 -0700 Subject: [PATCH] go/{merge, sqle, prolly}: Added NOT NULL constraints to constraint violations table --- .../doltcore/doltdb/durable/artifact_index.go | 6 +- go/libraries/doltcore/doltdb/table.go | 2 +- .../doltcore/merge/merge_prolly_rows.go | 100 +++++++++++++++--- go/libraries/doltcore/merge/merge_rows.go | 8 +- .../doltcore/merge/schema_merge_test.go | 27 +++++ go/libraries/doltcore/merge/violations_fk.go | 1 + .../merge/violations_unique_prolly.go | 31 ++++++ .../dtables/constraint_violations_prolly.go | 17 +++ .../sqle/enginetest/dolt_queries_merge.go | 46 ++++++++ go/store/prolly/artifact_map.go | 17 +-- go/store/val/tuple_descriptor.go | 5 + 11 files changed, 235 insertions(+), 25 deletions(-) diff --git a/go/libraries/doltcore/doltdb/durable/artifact_index.go b/go/libraries/doltcore/doltdb/durable/artifact_index.go index a078584c38..07da0dd342 100644 --- a/go/libraries/doltcore/doltdb/durable/artifact_index.go +++ b/go/libraries/doltcore/doltdb/durable/artifact_index.go @@ -137,7 +137,11 @@ func (i prollyArtifactIndex) ConflictCount(ctx context.Context) (uint64, error) } func (i prollyArtifactIndex) ConstraintViolationCount(ctx context.Context) (uint64, error) { - return i.index.CountOfTypes(ctx, prolly.ArtifactTypeForeignKeyViol, prolly.ArtifactTypeUniqueKeyViol, prolly.ArtifactTypeChkConsViol) + return i.index.CountOfTypes(ctx, + prolly.ArtifactTypeForeignKeyViol, + prolly.ArtifactTypeUniqueKeyViol, + prolly.ArtifactTypeChkConsViol, + prolly.ArtifactTypeNullViol) } func (i prollyArtifactIndex) ClearConflicts(ctx context.Context) (ArtifactIndex, error) { diff --git a/go/libraries/doltcore/doltdb/table.go b/go/libraries/doltcore/doltdb/table.go index cc889b13dc..95cb79fb20 100644 --- a/go/libraries/doltcore/doltdb/table.go +++ b/go/libraries/doltcore/doltdb/table.go @@ -355,7 +355,7 @@ func (t *Table) GetConstraintViolationsSchema(ctx context.Context) (schema.Schem } typeType, err := typeinfo.FromSqlType( - gmstypes.MustCreateEnumType([]string{"foreign key", "unique index", "check constraint"}, sql.Collation_Default)) + gmstypes.MustCreateEnumType([]string{"foreign key", "unique index", "check constraint", "not null"}, sql.Collation_Default)) if err != nil { return nil, err } diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index e462b186cf..9971a80750 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -156,12 +156,17 @@ func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Sch return nil, nil, err } - // validator shares editor with conflict merge + // validator shares artifact editor with conflict merge uniq, err := newUniqValidator(ctx, finalSch, tm, valueMerger, ae) if err != nil { return nil, nil, err } + nullChk, err := newNullValidator(ctx, finalSch, tm, valueMerger, ae) + if err != nil { + return nil, nil, err + } + s := &MergeStats{ Operation: TableModified, } @@ -177,7 +182,16 @@ func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Sch if err != nil { return nil, nil, err } - s.DataConflicts += cnt + s.ConstraintViolations += cnt + + cnt, err = nullChk.validateDiff(ctx, diff) + if err != nil { + return nil, nil, err + } + s.ConstraintViolations += cnt + if cnt > 0 { + continue + } switch diff.Op { case tree.DiffOpDivergentModifyConflict, tree.DiffOpDivergentDeleteConflict: @@ -379,7 +393,7 @@ func newUniqValidator(ctx context.Context, sch schema.Schema, tm *TableMerger, v return uv, nil } -func (uv uniqValidator) validateDiff(ctx context.Context, diff tree.ThreeWayDiff) (conflicts int, err error) { +func (uv uniqValidator) validateDiff(ctx context.Context, diff tree.ThreeWayDiff) (violations int, err error) { var value val.Tuple switch diff.Op { case tree.DiffOpRightAdd, tree.DiffOpRightModify: @@ -399,7 +413,7 @@ func (uv uniqValidator) validateDiff(ctx context.Context, diff tree.ThreeWayDiff for _, idx := range uv.indexes { err = idx.findCollisions(ctx, diff.Key, value, func(k, v val.Tuple) error { - conflicts++ + violations++ return uv.insertArtifact(ctx, k, v, idx.meta) }) if err != nil { @@ -491,6 +505,68 @@ func (idx uniqIndex) findCollisions(ctx context.Context, key, value val.Tuple, c return cb(key, value) } +// nullValidator enforces NOT NULL constraints on merge +type nullValidator struct { + // final is the merge result schema + final schema.Schema + // rightMap maps right-side value tuples to |final| + rightMap val.OrdinalMapping + // edits is the artifacts maps editor + edits *prolly.ArtifactsEditor + // theirRootish is the hash.Hash of the right-side + // rootish being merged + theirRootish hash.Hash +} + +func newNullValidator(ctx context.Context, final schema.Schema, tm *TableMerger, vm *valueMerger, edits *prolly.ArtifactsEditor) (nullValidator, error) { + theirRootish, err := tm.rightSrc.HashOf() + if err != nil { + return nullValidator{}, err + } + return nullValidator{ + final: final, + rightMap: vm.rightMapping, + edits: edits, + theirRootish: theirRootish, + }, nil +} + +func (nv nullValidator) validateDiff(ctx context.Context, diff tree.ThreeWayDiff) (count int, err error) { + var violations []string + switch diff.Op { + case tree.DiffOpRightAdd, tree.DiffOpRightModify: + for to, from := range nv.rightMap { + col := nv.final.GetNonPKCols().GetByIndex(to) + if col.IsNullable() { + continue + } + if from < 0 { + // non-nullable column in |nv.final| does not exist + // on the right side of the merge, check if it will + // be populated with a default value + if col.Default == "" { + violations = append(violations, col.Name) + } + } else { + if diff.Right.FieldIsNull(from) { + violations = append(violations, col.Name) + } + } + } + } + if len(violations) > 0 { + var meta prolly.ConstraintViolationMeta + if meta, err = newNotNullViolationMeta(violations, diff.Right); err != nil { + return 0, err + } + err = nv.edits.ReplaceConstraintViolation(ctx, diff.Key, nv.theirRootish, prolly.ArtifactTypeNullViol, meta) + if err != nil { + return 0, err + } + } + return len(violations), nil +} + // conflictMerger processing primary key diffs // with conflict types into artifact table writes. type conflictMerger struct { @@ -596,12 +672,12 @@ func (m *primaryMerger) merge(ctx *sql.Context, diff tree.ThreeWayDiff, sourceSc return fmt.Errorf("cannot merge keyless tables with reordered columns") } } else { - tempTupleValue, err := remapTupleWithColumnDefaults(ctx, &diff.Right, sourceSch.GetValueDescriptor(), + tempTupleValue, err := remapTupleWithColumnDefaults(ctx, diff.Right, sourceSch.GetValueDescriptor(), m.valueMerger.rightMapping, m.tableMerger, m.finalSch, m.valueMerger.syncPool) if err != nil { return err } - newTupleValue = *tempTupleValue + newTupleValue = tempTupleValue } return m.mut.Put(ctx, diff.Key, newTupleValue) case tree.DiffOpRightDelete: @@ -732,7 +808,7 @@ func remapTuple(tuple val.Tuple, desc val.TupleDesc, mapping val.OrdinalMapping) // currently being merged and associated node store. |mergedSch| is the new schema of the table and is used to look up // column default values to apply to any existing rows when a new column is added as part of a merge. |pool| is used to // to allocate memory for the new tuple. A pointer to the new tuple data is returned, along with any error encountered. -func remapTupleWithColumnDefaults(ctx *sql.Context, tuple *val.Tuple, tupleDesc val.TupleDesc, mapping val.OrdinalMapping, tm *TableMerger, mergedSch schema.Schema, pool pool.BuffPool) (*val.Tuple, error) { +func remapTupleWithColumnDefaults(ctx *sql.Context, tuple val.Tuple, tupleDesc val.TupleDesc, mapping val.OrdinalMapping, tm *TableMerger, mergedSch schema.Schema, pool pool.BuffPool) (val.Tuple, error) { tb := val.NewTupleBuilder(mergedSch.GetValueDescriptor()) for to, from := range mapping { @@ -768,12 +844,10 @@ func remapTupleWithColumnDefaults(ctx *sql.Context, tuple *val.Tuple, tupleDesc } } } else { - tb.PutRaw(to, tupleDesc.GetField(from, *tuple)) + tb.PutRaw(to, tupleDesc.GetField(from, tuple)) } } - - newTuple := tb.Build(pool) - return &newTuple, nil + return tb.Build(pool), nil } func mergeTableArtifacts(ctx context.Context, tm *TableMerger, mergeTbl *doltdb.Table) (*doltdb.Table, error) { @@ -912,12 +986,12 @@ func migrateDataToMergedSchema(ctx *sql.Context, tm *TableMerger, vm *valueMerge return err } - newValueTuple, err := remapTupleWithColumnDefaults(ctx, &valueTuple, valueDescriptor, vm.leftMapping, tm, mergedSch, vm.syncPool) + newValueTuple, err := remapTupleWithColumnDefaults(ctx, valueTuple, valueDescriptor, vm.leftMapping, tm, mergedSch, vm.syncPool) if err != nil { return err } - err = mut.Put(ctx, keyTuple, *newValueTuple) + err = mut.Put(ctx, keyTuple, newValueTuple) if err != nil { return err } diff --git a/go/libraries/doltcore/merge/merge_rows.go b/go/libraries/doltcore/merge/merge_rows.go index cc2fac0174..271d514e66 100644 --- a/go/libraries/doltcore/merge/merge_rows.go +++ b/go/libraries/doltcore/merge/merge_rows.go @@ -318,10 +318,10 @@ func validateTupleFields(existingSch schema.Schema, targetSch schema.Schema) (bo return false, nil } - // If a not-null constraint was added, bail. - if existingVD.Types[existingIndex].Nullable && !targetVD.Types[targetIndex].Nullable { - return false, nil - } + //// If a not-null constraint was added, bail. + //if existingVD.Types[existingIndex].Nullable && !targetVD.Types[targetIndex].Nullable { + // return false, nil + //} // If the collation was changed, bail. // Different collations will affect the ordering of any secondary indexes using this column. diff --git a/go/libraries/doltcore/merge/schema_merge_test.go b/go/libraries/doltcore/merge/schema_merge_test.go index df50f4d452..f127dfffaa 100644 --- a/go/libraries/doltcore/merge/schema_merge_test.go +++ b/go/libraries/doltcore/merge/schema_merge_test.go @@ -66,6 +66,9 @@ func TestSchemaMerge(t *testing.T) { t.Run("column default tests", func(t *testing.T) { testSchemaMerge(t, columnDefaultTests) }) + t.Run("nullability tests", func(t *testing.T) { + testSchemaMerge(t, nullabilityTests) + }) t.Run("column type change tests", func(t *testing.T) { testSchemaMerge(t, typeChangeTests) }) @@ -286,6 +289,30 @@ var columnDefaultTests = []schemaMergeTest{ }, } +var nullabilityTests = []schemaMergeTest{ + { + name: "add not null column to empty table", + ancestor: tbl(sch("CREATE TABLE t (id int PRIMARY KEY) ")), + left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int NOT NULL)")), + right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY) ")), + merged: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int NOT NULL)")), + }, + { + name: "add not null constraint to existing column", + ancestor: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int) "), row(1, 1)), + left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int NOT NULL)"), row(1, 1)), + right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int) "), row(1, 1)), + merged: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int NOT NULL)"), row(1, 1)), + }, + { + name: "add not null column to non-empty table", + ancestor: tbl(sch("CREATE TABLE t (id int PRIMARY KEY) "), row(1)), + left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int NOT NULL DEFAULT '19')"), row(1, 1)), + right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY) "), row(1), row(2)), + merged: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int NOT NULL DEFAULT '19')"), row(1, 1), row(2, 19)), + }, +} + var columnReorderingTests = []schemaMergeTest{} var typeChangeTests = []schemaMergeTest{ diff --git a/go/libraries/doltcore/merge/violations_fk.go b/go/libraries/doltcore/merge/violations_fk.go index 8cbbd4fc42..ecc3a5bfcc 100644 --- a/go/libraries/doltcore/merge/violations_fk.go +++ b/go/libraries/doltcore/merge/violations_fk.go @@ -57,6 +57,7 @@ const ( CvType_ForeignKey CvType = iota + 1 CvType_UniqueIndex CvType_CheckConstraint + CvType_NotNull ) type FKViolationReceiver interface { diff --git a/go/libraries/doltcore/merge/violations_unique_prolly.go b/go/libraries/doltcore/merge/violations_unique_prolly.go index aa2e03c11d..cb8b413160 100644 --- a/go/libraries/doltcore/merge/violations_unique_prolly.go +++ b/go/libraries/doltcore/merge/violations_unique_prolly.go @@ -16,6 +16,7 @@ package merge import ( "context" + "encoding/json" "fmt" "strings" @@ -149,3 +150,33 @@ func ordinalMappingFromIndex(def schema.Index) (m val.OrdinalMapping) { } return } + +type NullViolationMeta struct { + Columns []string `json:"Columns"` +} + +func newNotNullViolationMeta(violations []string, value val.Tuple) (prolly.ConstraintViolationMeta, error) { + info, err := json.Marshal(NullViolationMeta{Columns: violations}) + if err != nil { + return prolly.ConstraintViolationMeta{}, err + } + return prolly.ConstraintViolationMeta{ + VInfo: info, + Value: value, + }, nil +} + +func (m NullViolationMeta) Unmarshall(ctx *sql.Context) (val types.JSONDocument, err error) { + return types.JSONDocument{Val: m}, nil +} + +func (m NullViolationMeta) Compare(ctx *sql.Context, v types.JSONValue) (cmp int, err error) { + ours := types.JSONDocument{Val: m} + return ours.Compare(ctx, v) +} + +func (m NullViolationMeta) ToString(ctx *sql.Context) (string, error) { + return fmt.Sprintf("{Columns: [%s]}", strings.Join(m.Columns, ",")), nil +} + +var _ types.JSONValue = FkCVMeta{} diff --git a/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go b/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go index 3bbe6444a3..cbc593555a 100644 --- a/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go +++ b/go/libraries/doltcore/sqle/dtables/constraint_violations_prolly.go @@ -117,6 +117,12 @@ func (cvt *prollyConstraintViolationsTable) PartitionRows(ctx *sql.Context, part return nil, err } kd, vd := sch.GetMapDescriptors() + + // value tuples encoded in ConstraintViolationMeta may + // violate the not null constraints assumed by fixed access + kd = kd.WithoutFixedAccess() + vd = vd.WithoutFixedAccess() + return prollyCVIter{ itr: itr, sch: sch, @@ -206,6 +212,13 @@ func (itr prollyCVIter) Next(ctx *sql.Context) (sql.Row, error) { return nil, err } r[o] = m + case prolly.ArtifactTypeNullViol: + var m merge.NullViolationMeta + err = json.Unmarshal(meta.VInfo, &m) + if err != nil { + return nil, err + } + r[o] = m default: panic("json not implemented for artifact type") } @@ -295,6 +308,8 @@ func mapCVType(artifactType prolly.ArtifactType) (outType uint64) { outType = uint64(merge.CvType_UniqueIndex) case prolly.ArtifactTypeChkConsViol: outType = uint64(merge.CvType_CheckConstraint) + case prolly.ArtifactTypeNullViol: + outType = uint64(merge.CvType_NotNull) default: panic("unhandled cv type") } @@ -309,6 +324,8 @@ func unmapCVType(in merge.CvType) (out prolly.ArtifactType) { out = prolly.ArtifactTypeUniqueKeyViol case merge.CvType_CheckConstraint: out = prolly.ArtifactTypeChkConsViol + case merge.CvType_NotNull: + out = prolly.ArtifactTypeNullViol default: panic("unhandled cv type") } diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go index a52dcf8375..3251163479 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries_merge.go @@ -1606,6 +1606,52 @@ var Dolt1MergeScripts = []queries.ScriptTest{ }, }, }, + { + Name: "try to merge a nullable field into a non-null column", + SetUpScript: []string{ + "SET dolt_force_transaction_commit = on;", + "create table test (pk int primary key, c0 int)", + "insert into test values (1,1),(3,3);", + "call dolt_commit('-Am', 'new table with NULL value');", + "call dolt_checkout('-b', 'other')", + "insert into test values (2,NULL);", + "call dolt_commit('-am', 'inserted null value')", + "call dolt_checkout('main');", + "alter table test modify c0 int not null;", + "insert into test values (4,4)", + "call dolt_commit('-am', 'modified column c0 to not null');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "call dolt_merge('other')", + Expected: []sql.Row{{0, 1}}, + }, + { + Query: "select violation_type, pk, violation_info from dolt_constraint_violations_test", + Expected: []sql.Row{ + {uint16(4), 2, types.JSONDocument{Val: merge.NullViolationMeta{Columns: []string{"c0"}}}}, + }, + }, + }, + }, + { + Name: "dolt_revert() detects not null violation (issue #4527)", + SetUpScript: []string{ + "create table test2 (pk int primary key, c0 int)", + "insert into test2 values (1,1),(2,NULL),(3,3);", + "call dolt_commit('-Am', 'new table with NULL value');", + "delete from test2 where pk = 2;", + "call dolt_commit('-am', 'deleted row with NULL value');", + "alter table test2 modify c0 int not null", + "call dolt_commit('-am', 'modified column c0 to not null');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "call dolt_revert('head~1');", + ExpectedErrStr: "revert currently does not handle constraint violations", + }, + }, + }, } var KeylessMergeCVsAndConflictsScripts = []queries.ScriptTest{ diff --git a/go/store/prolly/artifact_map.go b/go/store/prolly/artifact_map.go index 6f6b723ccd..b987d790bd 100644 --- a/go/store/prolly/artifact_map.go +++ b/go/store/prolly/artifact_map.go @@ -41,6 +41,11 @@ const ( ArtifactTypeUniqueKeyViol // ArtifactTypeChkConsViol is the type for check constraint violations. ArtifactTypeChkConsViol + // ArtifactTypeNullViol is the type for nullability violations. + ArtifactTypeNullViol +) + +const ( artifactMapPendingBufferSize = 650_000 ) @@ -191,11 +196,11 @@ func (m ArtifactMap) IterAll(ctx context.Context) (ArtifactIter, error) { } func (m ArtifactMap) IterAllCVs(ctx context.Context) (ArtifactIter, error) { - itr, err := m.iterAllOfTypes(ctx, ArtifactTypeForeignKeyViol, ArtifactTypeUniqueKeyViol, ArtifactTypeChkConsViol) - if err != nil { - return nil, err - } - return itr, nil + return m.iterAllOfTypes(ctx, + ArtifactTypeForeignKeyViol, + ArtifactTypeUniqueKeyViol, + ArtifactTypeChkConsViol, + ArtifactTypeNullViol) } // IterAllConflicts returns an iterator for the conflicts. @@ -495,7 +500,7 @@ var _ ArtifactIter = multiArtifactTypeItr{} // newMultiArtifactTypeItr creates an iter that iterates an artifact if its type exists in |types|. func newMultiArtifactTypeItr(itr ArtifactIter, types []ArtifactType) multiArtifactTypeItr { - members := make([]bool, 5) + members := make([]bool, 6) for _, t := range types { members[uint8(t)] = true } diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index b7a3b1fcef..7af7a38f18 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -176,6 +176,11 @@ func (td TupleDesc) GetFixedAccess() FixedAccess { return td.fast } +// WithoutFixedAccess returns a copy of |td| without fixed access metadata. +func (td TupleDesc) WithoutFixedAccess() TupleDesc { + return TupleDesc{Types: td.Types, cmp: td.cmp} +} + // GetBool reads a bool from the ith field of the Tuple. // If the ith field is NULL, |ok| is set to false. func (td TupleDesc) GetBool(i int, tup Tuple) (v bool, ok bool) {