From 49a434a090caac72c83d18792272fd2b95280595 Mon Sep 17 00:00:00 2001 From: Jason Fulghum Date: Mon, 13 Oct 2025 16:55:32 -0700 Subject: [PATCH] Relax column tag uniqueness to only require tags to be unique in each table --- go/cmd/dolt/commands/diff.go | 4 +- go/libraries/doltcore/diff/table_deltas.go | 1 + go/libraries/doltcore/doltdb/root_val.go | 34 ++----- go/libraries/doltcore/merge/merge_schema.go | 1 + .../doltcore/rowconv/row_converter.go | 1 + go/libraries/doltcore/schema/col_coll.go | 2 +- go/libraries/doltcore/schema/index_coll.go | 1 + go/libraries/doltcore/schema/schema.go | 1 + go/libraries/doltcore/sqle/alterschema.go | 14 +-- go/libraries/doltcore/sqle/database.go | 17 ---- .../doltcore/sqle/enginetest/dolt_queries.go | 98 +++++++++++++++++++ .../doltcore/sqle/index/noms_index_iter.go | 1 + go/libraries/doltcore/sqle/sqlfmt/row_fmt.go | 5 + .../doltcore/sqle/sqlfmt/schema_fmt.go | 1 + integration-tests/bats/merge.bats | 30 ------ 15 files changed, 120 insertions(+), 91 deletions(-) diff --git a/go/cmd/dolt/commands/diff.go b/go/cmd/dolt/commands/diff.go index e25416d009..d35a2756b4 100644 --- a/go/cmd/dolt/commands/diff.go +++ b/go/cmd/dolt/commands/diff.go @@ -1664,13 +1664,13 @@ func getColumnNames(fromTableInfo, toTableInfo *diff.TableInfo) (colNames []stri var cols []string if fromSch != nil { - fromSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { + fromSch.GetAllCols().Iter(func(_ uint64, col schema.Column) (stop bool, err error) { cols = append(cols, fmt.Sprintf("from_%s", col.Name)) return false, nil }) } if toSch != nil { - toSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { + toSch.GetAllCols().Iter(func(_ uint64, col schema.Column) (stop bool, err error) { cols = append(cols, fmt.Sprintf("to_%s", col.Name)) return false, nil }) diff --git a/go/libraries/doltcore/diff/table_deltas.go b/go/libraries/doltcore/diff/table_deltas.go index d26ed675be..201b555011 100644 --- a/go/libraries/doltcore/diff/table_deltas.go +++ b/go/libraries/doltcore/diff/table_deltas.go @@ -359,6 +359,7 @@ func schemasOverlap(from, to schema.Schema) bool { fromColSet := set.NewUint64Set(fromCols.Tags) toColSet := set.NewUint64Set(toCols.Tags) + // TAGS: We use tags here, but only over a single table overlappingTags := fromColSet.Intersection(toColSet) numOverlaps := overlappingTags.Size() for _, tag := range overlappingTags.AsSlice() { diff --git a/go/libraries/doltcore/doltdb/root_val.go b/go/libraries/doltcore/doltdb/root_val.go index da78ba7a53..4d24587696 100644 --- a/go/libraries/doltcore/doltdb/root_val.go +++ b/go/libraries/doltcore/doltdb/root_val.go @@ -408,7 +408,6 @@ func GenerateTagsForNewColumns( // If we found any existing columns set them in the newTags list. for _, col := range existingCols { - col := col for i := range newColNames { // Only re-use tags if the noms kind didn't change // TODO: revisit this when new storage format is further along @@ -607,24 +606,6 @@ func GetTableInsensitive(ctx context.Context, root RootValue, tName TableName) ( return tbl, resolvedName, ok, nil } -// GetTableByColTag looks for the table containing the given column tag. -func GetTableByColTag(ctx context.Context, root RootValue, tag uint64) (tbl *Table, name TableName, found bool, err error) { - err = root.IterTables(ctx, func(tn TableName, t *Table, s schema.Schema) (bool, error) { - _, found = s.GetAllCols().GetByTag(tag) - if found { - name, tbl = tn, t - } - - return found, nil - }) - - if err != nil { - return nil, TableName{}, false, err - } - - return tbl, name, found, nil -} - // GetAllTableNames retrieves all table names for a RootValue. Dolt only has a single schema (the default empty schema), // and does not have root objects, so both of those are ignored here specifically. func (root *rootValue) GetAllTableNames(ctx context.Context, _ bool) ([]TableName, error) { @@ -1366,7 +1347,7 @@ func GetSchemaHash(ctx context.Context, root RootValue, name TableName, override return root.GetTableSchemaHash(ctx, name) } -// ValidateTagUniqueness checks for tag collisions between the given table and the set of tables in then given root. +// ValidateTagUniqueness checks for tag collisions between columns in the given table. func ValidateTagUniqueness(ctx context.Context, root RootValue, tableName string, table *Table) error { prevTName := TableName{Name: tableName} prevHash, err := GetSchemaHash(ctx, root, prevTName, table.overriddenSchema) @@ -1394,16 +1375,13 @@ func ValidateTagUniqueness(ctx context.Context, root RootValue, tableName string return err } - existing, err := GetAllTagsForRoots(ctx, root) - if err != nil { - return err - } - + tagsSeen := make(map[uint64]struct{}) err = sch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { - oldTableName, ok := existing.Get(tag) - if ok && oldTableName != tableName { - return true, schema.NewErrTagPrevUsed(tag, col.Name, tableName, oldTableName) + if _, ok := tagsSeen[tag]; ok { + return true, schema.NewErrTagPrevUsed(tag, col.Name, tableName, tableName) } + + tagsSeen[tag] = struct{}{} return false, nil }) if err != nil { diff --git a/go/libraries/doltcore/merge/merge_schema.go b/go/libraries/doltcore/merge/merge_schema.go index 10c8c2fb7c..d8275ec6be 100644 --- a/go/libraries/doltcore/merge/merge_schema.go +++ b/go/libraries/doltcore/merge/merge_schema.go @@ -720,6 +720,7 @@ func mapColumns(ourCC, theirCC, ancCC *schema.ColCollection) (columnMappings, er theirCC = schema.NewColCollection(theirCC.GetColumns()...) theirTagsToCols := theirCC.TagToCol + // TAGS: Use of tags here is safe since it's constrained to a single table columnMappings := make(columnMappings, 0) _ = ourCC.Iter(func(tag uint64, ourCol schema.Column) (stop bool, err error) { theirCol, foundTheirByTag := theirCC.GetByTag(ourCol.Tag) diff --git a/go/libraries/doltcore/rowconv/row_converter.go b/go/libraries/doltcore/rowconv/row_converter.go index 74f8bf8061..eb5edc5648 100644 --- a/go/libraries/doltcore/rowconv/row_converter.go +++ b/go/libraries/doltcore/rowconv/row_converter.go @@ -64,6 +64,7 @@ func NewRowConverter(ctx context.Context, vrw types.ValueReadWriter, mapping *Fi convFuncs := make(map[uint64]types.MarshalCallback, len(mapping.SrcToDest)) for srcTag, destTag := range mapping.SrcToDest { + // TAGS: Use of tags here is safe since it's constrained to a single table destCol, destOk := mapping.DestSch.GetAllCols().GetByTag(destTag) srcCol, srcOk := mapping.SrcSch.GetAllCols().GetByTag(srcTag) diff --git a/go/libraries/doltcore/schema/col_coll.go b/go/libraries/doltcore/schema/col_coll.go index 07497d3a45..290416cb5f 100644 --- a/go/libraries/doltcore/schema/col_coll.go +++ b/go/libraries/doltcore/schema/col_coll.go @@ -165,7 +165,7 @@ func (cc *ColCollection) IndexOf(colName string) int { idx := -1 var i = 0 - _ = cc.Iter(func(tag uint64, col Column) (stop bool, err error) { + _ = cc.Iter(func(_ uint64, col Column) (stop bool, err error) { defer func() { i++ }() diff --git a/go/libraries/doltcore/schema/index_coll.go b/go/libraries/doltcore/schema/index_coll.go index 40e76dc2a8..f324fc34ab 100644 --- a/go/libraries/doltcore/schema/index_coll.go +++ b/go/libraries/doltcore/schema/index_coll.go @@ -210,6 +210,7 @@ func (ixc *indexCollectionImpl) AddIndexByColTags(indexName string, tags []uint6 return nil, fmt.Errorf("tags %v do not exist on this table", tags) } + // TAGS: Use of tags here is safe since it's constrained to a single table for _, tag := range tags { // we already validated the tag exists c, _ := ixc.colColl.GetByTag(tag) diff --git a/go/libraries/doltcore/schema/schema.go b/go/libraries/doltcore/schema/schema.go index 31d5fbddb7..a8ba13c751 100644 --- a/go/libraries/doltcore/schema/schema.go +++ b/go/libraries/doltcore/schema/schema.go @@ -315,6 +315,7 @@ func MapSchemaBasedOnTagAndName(inSch, outSch Schema) (val.OrdinalMapping, val.O return keyMapping, make([]int, inSch.GetNonPKCols().Size()), nil } + // TAGS: Use of tags here is confined to a single table err := inSch.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) { i := inSch.GetPKCols().TagToIdx[tag] if foundCol, ok := outSch.GetPKCols().GetByTag(tag); ok { diff --git a/go/libraries/doltcore/sqle/alterschema.go b/go/libraries/doltcore/sqle/alterschema.go index 5176758b35..80d13f65f8 100755 --- a/go/libraries/doltcore/sqle/alterschema.go +++ b/go/libraries/doltcore/sqle/alterschema.go @@ -141,19 +141,7 @@ func validateNewColumn( return false, nil }) - if err != nil { - return err - } - - _, oldTblName, found, err := doltdb.GetTableByColTag(ctx, root, tag) - if err != nil { - return err - } - if found { - return schema.NewErrTagPrevUsed(tag, newColName, tblName, oldTblName.Name) - } - - return nil + return err } var ErrPrimaryKeySetsIncompatible = errors.New("primary key sets incompatible") diff --git a/go/libraries/doltcore/sqle/database.go b/go/libraries/doltcore/sqle/database.go index 0bee4d6d19..446d4b2ce5 100644 --- a/go/libraries/doltcore/sqle/database.go +++ b/go/libraries/doltcore/sqle/database.go @@ -1888,23 +1888,6 @@ func (db Database) createDoltTable(ctx *sql.Context, tableName string, schemaNam return sql.ErrTableAlreadyExists.New(tableName) } - var conflictingTbls []string - _ = doltSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { - _, oldTableName, exists, err := doltdb.GetTableByColTag(ctx, root, tag) - if err != nil { - return true, err - } - if exists && oldTableName.Name != tableName { - errStr := schema.NewErrTagPrevUsed(tag, col.Name, tableName, oldTableName.Name).Error() - conflictingTbls = append(conflictingTbls, errStr) - } - return false, nil - }) - - if len(conflictingTbls) > 0 { - return fmt.Errorf("%s", strings.Join(conflictingTbls, "\n")) - } - newRoot, err := doltdb.CreateEmptyTable(ctx, root, doltdb.TableName{Name: tableName, Schema: schemaName}, doltSch) if err != nil { return err diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go index 124c8512c9..8286b03bed 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go @@ -1553,6 +1553,104 @@ var DoltScripts = []queries.ScriptTest{ }, }, }, + { + // Assert that tables can have the same tag values without causing any problems. + Name: "Overlapping column tags – errors", + SetUpScript: []string{ + "CREATE TABLE t2(pk int primary key, c1 int, c2 float);", + "INSERT INTO t2 VALUES (1, -1, 1.11), (2, -2, 2.22);", + "CALL dolt_update_column_tag('t2', 'pk', 1);", + "CALL dolt_update_column_tag('t2', 'c1', 2);", + "CALL dolt_update_column_tag('t2', 'c2', 3);", + }, + Assertions: []queries.ScriptTestAssertion{ + { + // re-using the same column tag in a table is not allowed + Query: "CALL dolt_update_column_tag('t2', 'c2', 2);", + ExpectedErrStr: "failed to update table schema: two different columns with the same tag", + }, + }, + }, + { + // Assert that tables can have the same tag values without causing any problems. + Name: "Overlapping column tags – join", + SetUpScript: []string{ + "CREATE TABLE t1(pk int primary key, c1 varchar(100), c2 int);", + "CREATE TABLE t2(pk int primary key, c1 int, c2 float);", + "INSERT INTO t1 VALUES (1, 'one', 1), (2, 'two', 2);", + "INSERT INTO t2 VALUES (1, -1, 1.11), (2, -2, 2.22);", + + // reset column tags so that each table has the same tags + "CALL dolt_update_column_tag('t1', 'pk', 1);", + "CALL dolt_update_column_tag('t1', 'c1', 2);", + "CALL dolt_update_column_tag('t1', 'c2', 3);", + "CALL dolt_update_column_tag('t2', 'pk', 1);", + "CALL dolt_update_column_tag('t2', 'c1', 2);", + "CALL dolt_update_column_tag('t2', 'c2', 3);", + }, + Assertions: []queries.ScriptTestAssertion{ + { + // Assert that we can pull out each column correctly, without confusing columns + Query: "SELECT t1.pk, t2.pk, t1.c1, t2.c1, t1.c2, t2.c2 from t1 join t2 on t1.pk = t2.pk;", + Expected: []sql.Row{ + {1, 1, "one", -1, 1, 1.11}, + {2, 2, "two", -2, 2, 2.22}, + }, + }, + { + Query: "SELECT dt1.pk, dt2.pk, dt1.c1, dt2.c1, dt1.c2, dt2.c2 from (SELECT * FROM t1) dt1 join (SELECT * FROM t2) dt2 on dt1.pk = dt2.pk;", + Expected: []sql.Row{ + {1, 1, "one", -1, 1, 1.11}, + {2, 2, "two", -2, 2, 2.22}, + }, + }, + }, + }, + { + // Assert that tables can have the same tag values without causing any problems. + Name: "Overlapping column tags – merge", + SetUpScript: []string{ + "CREATE TABLE t1(pk int primary key, c1 varchar(100), c2 int);", + "CREATE TABLE t2(pk int primary key, c1 int, c2 float);", + "INSERT INTO t1 VALUES (1, 'one', 1), (2, 'two', 2);", + "INSERT INTO t2 VALUES (1, -1, 1.11), (2, -2, 2.22);", + + // reset column tags so that each table has the same tags + "CALL dolt_update_column_tag('t1', 'pk', 1);", + "CALL dolt_update_column_tag('t1', 'c1', 2);", + "CALL dolt_update_column_tag('t1', 'c2', 3);", + "CALL dolt_update_column_tag('t2', 'pk', 1);", + "CALL dolt_update_column_tag('t2', 'c1', 2);", + "CALL dolt_update_column_tag('t2', 'c2', 3);", + + "CALL dolt_commit('-Am', 'initial tables');", + "CALL dolt_branch('b1');", + "CALL dolt_commit('--allow-empty', '-m', 'empty commit');", + + // Check out branch b1, make a change + "CALL dolt_checkout('b1');", + "UPDATE t1 SET c1 = 'UNO' where pk=1;", + "CALL dolt_commit('-Am', 'changes on b1');", + }, + Assertions: []queries.ScriptTestAssertion{ + { + Query: "CALL dolt_checkout('main');", + Expected: []sql.Row{{0, "Switched to branch 'main'"}}, + }, + { + Query: "CALL dolt_merge('b1');", + Expected: []sql.Row{{doltCommit, 0, 0, "merge successful"}}, + }, + { + // Assert that we can pull out each column correctly, without confusing columns + Query: "SELECT t1.pk, t2.pk, t1.c1, t2.c1, t1.c2, t2.c2 from t1 join t2 on t1.pk = t2.pk;", + Expected: []sql.Row{ + {1, 1, "UNO", -1, 1, 1.11}, + {2, 2, "two", -2, 2, 2.22}, + }, + }, + }, + }, } func makeLargeInsert(sz int) string { diff --git a/go/libraries/doltcore/sqle/index/noms_index_iter.go b/go/libraries/doltcore/sqle/index/noms_index_iter.go index df6ed72a5f..c946b02302 100644 --- a/go/libraries/doltcore/sqle/index/noms_index_iter.go +++ b/go/libraries/doltcore/sqle/index/noms_index_iter.go @@ -254,6 +254,7 @@ func NewCoveringIndexRowIterAdapter(ctx *sql.Context, idx DoltIndex, keyIter *no } } + // TAGS: Use of tags here is safe since it's constrained to a single table for i, col := range cols { _, partOfIndexKey := idxCols.GetByTag(col.Tag) _, partOfTableKeys := tblPKCols.GetByTag(col.Tag) diff --git a/go/libraries/doltcore/sqle/sqlfmt/row_fmt.go b/go/libraries/doltcore/sqle/sqlfmt/row_fmt.go index 63190ae35a..86db188039 100644 --- a/go/libraries/doltcore/sqle/sqlfmt/row_fmt.go +++ b/go/libraries/doltcore/sqle/sqlfmt/row_fmt.go @@ -76,6 +76,7 @@ func RowAsInsertStmt(r row.Row, tableName string, tableSch schema.Schema) (strin b.WriteString(" VALUES (") seenOne = false + // TAGS: Use of tags here is safe since it's constrained to a single table _, err = r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) { if seenOne { b.WriteRune(',') @@ -107,6 +108,7 @@ func RowAsDeleteStmt(r row.Row, tableName string, tableSch schema.Schema) (strin b.WriteString(" WHERE (") seenOne := false isKeyless := tableSch.GetPKCols().Size() == 0 + // TAGS: Use of tags here is safe since it's constrained to a single table _, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) { col, _ := tableSch.GetAllCols().GetByTag(tag) if col.IsPartOfPK || isKeyless { @@ -141,6 +143,7 @@ func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema, colsTo b.WriteString("SET ") seenOne := false + // TAGS: Use of tags here is safe since it's constrained to a single table _, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) { col, _ := tableSch.GetAllCols().GetByTag(tag) exists := colsToUpdate.Contains(col.Name) @@ -166,6 +169,7 @@ func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema, colsTo b.WriteString(" WHERE (") seenOne = false + // TAGS: Use of tags here is safe since it's constrained to a single table _, err = r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) { col, _ := tableSch.GetAllCols().GetByTag(tag) if col.IsPartOfPK { @@ -198,6 +202,7 @@ func RowAsTupleString(r row.Row, tableSch schema.Schema) (string, error) { b.WriteString("(") seenOne := false + // TAGS: Use of tags here is safe since it's constrained to a single table _, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) { if seenOne { b.WriteRune(',') diff --git a/go/libraries/doltcore/sqle/sqlfmt/schema_fmt.go b/go/libraries/doltcore/sqle/sqlfmt/schema_fmt.go index e0fbd4218a..14ad8870f9 100644 --- a/go/libraries/doltcore/sqle/sqlfmt/schema_fmt.go +++ b/go/libraries/doltcore/sqle/sqlfmt/schema_fmt.go @@ -227,6 +227,7 @@ func GenerateCreateTableIndexDefinition(index schema.Index) (string, bool) { // GenerateCreateTableForeignKeyDefinition returns foreign key definition for CREATE TABLE statement with indentation of 2 spaces func GenerateCreateTableForeignKeyDefinition(fk doltdb.ForeignKey, sch, parentSch schema.Schema) string { + // TAGS: Use of tags here is within a single table, so is safe var fkCols []string if fk.IsResolved() { for _, tag := range fk.TableColumns { diff --git a/integration-tests/bats/merge.bats b/integration-tests/bats/merge.bats index d72764dc88..7fce9774d0 100644 --- a/integration-tests/bats/merge.bats +++ b/integration-tests/bats/merge.bats @@ -1149,36 +1149,6 @@ SQL } -@test "merge: ourRoot modifies, theirRoot renames" { - dolt checkout -b merge_branch - dolt sql -q "ALTER TABLE test1 RENAME TO new_name" - dolt add . - dolt commit -am "rename test1" - - dolt checkout main - dolt sql -q "INSERT INTO test1 VALUES (0,1,2)" - dolt commit -am "add pk 0 to test1" - - run dolt merge merge_branch - log_status_eq 1 - [[ "$output" =~ "cannot merge, column pk on table new_name has duplicate tag as table test1. This was likely because one of the tables is a rename of the other" ]] || false -} - -@test "merge: ourRoot modifies the schema, theirRoot renames" { - dolt checkout -b merge_branch - dolt sql -q "ALTER TABLE test1 RENAME TO new_name" - dolt add . - dolt commit -am "rename test1" - - dolt checkout main - dolt sql -q "ALTER TABLE test1 DROP COLUMN c2;" - dolt commit -am "modify test1" - - run dolt merge merge_branch - log_status_eq 1 - [[ "$output" =~ "cannot merge, column pk on table new_name has duplicate tag as table test1. This was likely because one of the tables is a rename of the other" ]] || false -} - @test "merge: dolt merge commits successful non-fast-forward merge" { dolt branch other dolt sql -q "INSERT INTO test1 VALUES (1,2,3)"