Relax column tag uniqueness to only require tags to be unique in each table

This commit is contained in:
Jason Fulghum
2025-10-13 16:55:32 -07:00
parent fce9cc1c6e
commit 49a434a090
15 changed files with 120 additions and 91 deletions
+2 -2
View File
@@ -1664,13 +1664,13 @@ func getColumnNames(fromTableInfo, toTableInfo *diff.TableInfo) (colNames []stri
var cols []string
if fromSch != nil {
fromSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
fromSch.GetAllCols().Iter(func(_ uint64, col schema.Column) (stop bool, err error) {
cols = append(cols, fmt.Sprintf("from_%s", col.Name))
return false, nil
})
}
if toSch != nil {
toSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
toSch.GetAllCols().Iter(func(_ uint64, col schema.Column) (stop bool, err error) {
cols = append(cols, fmt.Sprintf("to_%s", col.Name))
return false, nil
})
@@ -359,6 +359,7 @@ func schemasOverlap(from, to schema.Schema) bool {
fromColSet := set.NewUint64Set(fromCols.Tags)
toColSet := set.NewUint64Set(toCols.Tags)
// TAGS: We use tags here, but only over a single table
overlappingTags := fromColSet.Intersection(toColSet)
numOverlaps := overlappingTags.Size()
for _, tag := range overlappingTags.AsSlice() {
+6 -28
View File
@@ -408,7 +408,6 @@ func GenerateTagsForNewColumns(
// If we found any existing columns set them in the newTags list.
for _, col := range existingCols {
col := col
for i := range newColNames {
// Only re-use tags if the noms kind didn't change
// TODO: revisit this when new storage format is further along
@@ -607,24 +606,6 @@ func GetTableInsensitive(ctx context.Context, root RootValue, tName TableName) (
return tbl, resolvedName, ok, nil
}
// GetTableByColTag looks for the table containing the given column tag.
func GetTableByColTag(ctx context.Context, root RootValue, tag uint64) (tbl *Table, name TableName, found bool, err error) {
err = root.IterTables(ctx, func(tn TableName, t *Table, s schema.Schema) (bool, error) {
_, found = s.GetAllCols().GetByTag(tag)
if found {
name, tbl = tn, t
}
return found, nil
})
if err != nil {
return nil, TableName{}, false, err
}
return tbl, name, found, nil
}
// GetAllTableNames retrieves all table names for a RootValue. Dolt only has a single schema (the default empty schema),
// and does not have root objects, so both of those are ignored here specifically.
func (root *rootValue) GetAllTableNames(ctx context.Context, _ bool) ([]TableName, error) {
@@ -1366,7 +1347,7 @@ func GetSchemaHash(ctx context.Context, root RootValue, name TableName, override
return root.GetTableSchemaHash(ctx, name)
}
// ValidateTagUniqueness checks for tag collisions between the given table and the set of tables in then given root.
// ValidateTagUniqueness checks for tag collisions between columns in the given table.
func ValidateTagUniqueness(ctx context.Context, root RootValue, tableName string, table *Table) error {
prevTName := TableName{Name: tableName}
prevHash, err := GetSchemaHash(ctx, root, prevTName, table.overriddenSchema)
@@ -1394,16 +1375,13 @@ func ValidateTagUniqueness(ctx context.Context, root RootValue, tableName string
return err
}
existing, err := GetAllTagsForRoots(ctx, root)
if err != nil {
return err
}
tagsSeen := make(map[uint64]struct{})
err = sch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
oldTableName, ok := existing.Get(tag)
if ok && oldTableName != tableName {
return true, schema.NewErrTagPrevUsed(tag, col.Name, tableName, oldTableName)
if _, ok := tagsSeen[tag]; ok {
return true, schema.NewErrTagPrevUsed(tag, col.Name, tableName, tableName)
}
tagsSeen[tag] = struct{}{}
return false, nil
})
if err != nil {
@@ -720,6 +720,7 @@ func mapColumns(ourCC, theirCC, ancCC *schema.ColCollection) (columnMappings, er
theirCC = schema.NewColCollection(theirCC.GetColumns()...)
theirTagsToCols := theirCC.TagToCol
// TAGS: Use of tags here is safe since it's constrained to a single table
columnMappings := make(columnMappings, 0)
_ = ourCC.Iter(func(tag uint64, ourCol schema.Column) (stop bool, err error) {
theirCol, foundTheirByTag := theirCC.GetByTag(ourCol.Tag)
@@ -64,6 +64,7 @@ func NewRowConverter(ctx context.Context, vrw types.ValueReadWriter, mapping *Fi
convFuncs := make(map[uint64]types.MarshalCallback, len(mapping.SrcToDest))
for srcTag, destTag := range mapping.SrcToDest {
// TAGS: Use of tags here is safe since it's constrained to a single table
destCol, destOk := mapping.DestSch.GetAllCols().GetByTag(destTag)
srcCol, srcOk := mapping.SrcSch.GetAllCols().GetByTag(srcTag)
+1 -1
View File
@@ -165,7 +165,7 @@ func (cc *ColCollection) IndexOf(colName string) int {
idx := -1
var i = 0
_ = cc.Iter(func(tag uint64, col Column) (stop bool, err error) {
_ = cc.Iter(func(_ uint64, col Column) (stop bool, err error) {
defer func() {
i++
}()
@@ -210,6 +210,7 @@ func (ixc *indexCollectionImpl) AddIndexByColTags(indexName string, tags []uint6
return nil, fmt.Errorf("tags %v do not exist on this table", tags)
}
// TAGS: Use of tags here is safe since it's constrained to a single table
for _, tag := range tags {
// we already validated the tag exists
c, _ := ixc.colColl.GetByTag(tag)
+1
View File
@@ -315,6 +315,7 @@ func MapSchemaBasedOnTagAndName(inSch, outSch Schema) (val.OrdinalMapping, val.O
return keyMapping, make([]int, inSch.GetNonPKCols().Size()), nil
}
// TAGS: Use of tags here is confined to a single table
err := inSch.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) {
i := inSch.GetPKCols().TagToIdx[tag]
if foundCol, ok := outSch.GetPKCols().GetByTag(tag); ok {
+1 -13
View File
@@ -141,19 +141,7 @@ func validateNewColumn(
return false, nil
})
if err != nil {
return err
}
_, oldTblName, found, err := doltdb.GetTableByColTag(ctx, root, tag)
if err != nil {
return err
}
if found {
return schema.NewErrTagPrevUsed(tag, newColName, tblName, oldTblName.Name)
}
return nil
return err
}
var ErrPrimaryKeySetsIncompatible = errors.New("primary key sets incompatible")
-17
View File
@@ -1888,23 +1888,6 @@ func (db Database) createDoltTable(ctx *sql.Context, tableName string, schemaNam
return sql.ErrTableAlreadyExists.New(tableName)
}
var conflictingTbls []string
_ = doltSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
_, oldTableName, exists, err := doltdb.GetTableByColTag(ctx, root, tag)
if err != nil {
return true, err
}
if exists && oldTableName.Name != tableName {
errStr := schema.NewErrTagPrevUsed(tag, col.Name, tableName, oldTableName.Name).Error()
conflictingTbls = append(conflictingTbls, errStr)
}
return false, nil
})
if len(conflictingTbls) > 0 {
return fmt.Errorf("%s", strings.Join(conflictingTbls, "\n"))
}
newRoot, err := doltdb.CreateEmptyTable(ctx, root, doltdb.TableName{Name: tableName, Schema: schemaName}, doltSch)
if err != nil {
return err
@@ -1553,6 +1553,104 @@ var DoltScripts = []queries.ScriptTest{
},
},
},
{
// Assert that tables can have the same tag values without causing any problems.
Name: "Overlapping column tags errors",
SetUpScript: []string{
"CREATE TABLE t2(pk int primary key, c1 int, c2 float);",
"INSERT INTO t2 VALUES (1, -1, 1.11), (2, -2, 2.22);",
"CALL dolt_update_column_tag('t2', 'pk', 1);",
"CALL dolt_update_column_tag('t2', 'c1', 2);",
"CALL dolt_update_column_tag('t2', 'c2', 3);",
},
Assertions: []queries.ScriptTestAssertion{
{
// re-using the same column tag in a table is not allowed
Query: "CALL dolt_update_column_tag('t2', 'c2', 2);",
ExpectedErrStr: "failed to update table schema: two different columns with the same tag",
},
},
},
{
// Assert that tables can have the same tag values without causing any problems.
Name: "Overlapping column tags join",
SetUpScript: []string{
"CREATE TABLE t1(pk int primary key, c1 varchar(100), c2 int);",
"CREATE TABLE t2(pk int primary key, c1 int, c2 float);",
"INSERT INTO t1 VALUES (1, 'one', 1), (2, 'two', 2);",
"INSERT INTO t2 VALUES (1, -1, 1.11), (2, -2, 2.22);",
// reset column tags so that each table has the same tags
"CALL dolt_update_column_tag('t1', 'pk', 1);",
"CALL dolt_update_column_tag('t1', 'c1', 2);",
"CALL dolt_update_column_tag('t1', 'c2', 3);",
"CALL dolt_update_column_tag('t2', 'pk', 1);",
"CALL dolt_update_column_tag('t2', 'c1', 2);",
"CALL dolt_update_column_tag('t2', 'c2', 3);",
},
Assertions: []queries.ScriptTestAssertion{
{
// Assert that we can pull out each column correctly, without confusing columns
Query: "SELECT t1.pk, t2.pk, t1.c1, t2.c1, t1.c2, t2.c2 from t1 join t2 on t1.pk = t2.pk;",
Expected: []sql.Row{
{1, 1, "one", -1, 1, 1.11},
{2, 2, "two", -2, 2, 2.22},
},
},
{
Query: "SELECT dt1.pk, dt2.pk, dt1.c1, dt2.c1, dt1.c2, dt2.c2 from (SELECT * FROM t1) dt1 join (SELECT * FROM t2) dt2 on dt1.pk = dt2.pk;",
Expected: []sql.Row{
{1, 1, "one", -1, 1, 1.11},
{2, 2, "two", -2, 2, 2.22},
},
},
},
},
{
// Assert that tables can have the same tag values without causing any problems.
Name: "Overlapping column tags merge",
SetUpScript: []string{
"CREATE TABLE t1(pk int primary key, c1 varchar(100), c2 int);",
"CREATE TABLE t2(pk int primary key, c1 int, c2 float);",
"INSERT INTO t1 VALUES (1, 'one', 1), (2, 'two', 2);",
"INSERT INTO t2 VALUES (1, -1, 1.11), (2, -2, 2.22);",
// reset column tags so that each table has the same tags
"CALL dolt_update_column_tag('t1', 'pk', 1);",
"CALL dolt_update_column_tag('t1', 'c1', 2);",
"CALL dolt_update_column_tag('t1', 'c2', 3);",
"CALL dolt_update_column_tag('t2', 'pk', 1);",
"CALL dolt_update_column_tag('t2', 'c1', 2);",
"CALL dolt_update_column_tag('t2', 'c2', 3);",
"CALL dolt_commit('-Am', 'initial tables');",
"CALL dolt_branch('b1');",
"CALL dolt_commit('--allow-empty', '-m', 'empty commit');",
// Check out branch b1, make a change
"CALL dolt_checkout('b1');",
"UPDATE t1 SET c1 = 'UNO' where pk=1;",
"CALL dolt_commit('-Am', 'changes on b1');",
},
Assertions: []queries.ScriptTestAssertion{
{
Query: "CALL dolt_checkout('main');",
Expected: []sql.Row{{0, "Switched to branch 'main'"}},
},
{
Query: "CALL dolt_merge('b1');",
Expected: []sql.Row{{doltCommit, 0, 0, "merge successful"}},
},
{
// Assert that we can pull out each column correctly, without confusing columns
Query: "SELECT t1.pk, t2.pk, t1.c1, t2.c1, t1.c2, t2.c2 from t1 join t2 on t1.pk = t2.pk;",
Expected: []sql.Row{
{1, 1, "UNO", -1, 1, 1.11},
{2, 2, "two", -2, 2, 2.22},
},
},
},
},
}
func makeLargeInsert(sz int) string {
@@ -254,6 +254,7 @@ func NewCoveringIndexRowIterAdapter(ctx *sql.Context, idx DoltIndex, keyIter *no
}
}
// TAGS: Use of tags here is safe since it's constrained to a single table
for i, col := range cols {
_, partOfIndexKey := idxCols.GetByTag(col.Tag)
_, partOfTableKeys := tblPKCols.GetByTag(col.Tag)
@@ -76,6 +76,7 @@ func RowAsInsertStmt(r row.Row, tableName string, tableSch schema.Schema) (strin
b.WriteString(" VALUES (")
seenOne = false
// TAGS: Use of tags here is safe since it's constrained to a single table
_, err = r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
if seenOne {
b.WriteRune(',')
@@ -107,6 +108,7 @@ func RowAsDeleteStmt(r row.Row, tableName string, tableSch schema.Schema) (strin
b.WriteString(" WHERE (")
seenOne := false
isKeyless := tableSch.GetPKCols().Size() == 0
// TAGS: Use of tags here is safe since it's constrained to a single table
_, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
col, _ := tableSch.GetAllCols().GetByTag(tag)
if col.IsPartOfPK || isKeyless {
@@ -141,6 +143,7 @@ func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema, colsTo
b.WriteString("SET ")
seenOne := false
// TAGS: Use of tags here is safe since it's constrained to a single table
_, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
col, _ := tableSch.GetAllCols().GetByTag(tag)
exists := colsToUpdate.Contains(col.Name)
@@ -166,6 +169,7 @@ func RowAsUpdateStmt(r row.Row, tableName string, tableSch schema.Schema, colsTo
b.WriteString(" WHERE (")
seenOne = false
// TAGS: Use of tags here is safe since it's constrained to a single table
_, err = r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
col, _ := tableSch.GetAllCols().GetByTag(tag)
if col.IsPartOfPK {
@@ -198,6 +202,7 @@ func RowAsTupleString(r row.Row, tableSch schema.Schema) (string, error) {
b.WriteString("(")
seenOne := false
// TAGS: Use of tags here is safe since it's constrained to a single table
_, err := r.IterSchema(tableSch, func(tag uint64, val types.Value) (stop bool, err error) {
if seenOne {
b.WriteRune(',')
@@ -227,6 +227,7 @@ func GenerateCreateTableIndexDefinition(index schema.Index) (string, bool) {
// GenerateCreateTableForeignKeyDefinition returns foreign key definition for CREATE TABLE statement with indentation of 2 spaces
func GenerateCreateTableForeignKeyDefinition(fk doltdb.ForeignKey, sch, parentSch schema.Schema) string {
// TAGS: Use of tags here is within a single table, so is safe
var fkCols []string
if fk.IsResolved() {
for _, tag := range fk.TableColumns {
-30
View File
@@ -1149,36 +1149,6 @@ SQL
}
@test "merge: ourRoot modifies, theirRoot renames" {
dolt checkout -b merge_branch
dolt sql -q "ALTER TABLE test1 RENAME TO new_name"
dolt add .
dolt commit -am "rename test1"
dolt checkout main
dolt sql -q "INSERT INTO test1 VALUES (0,1,2)"
dolt commit -am "add pk 0 to test1"
run dolt merge merge_branch
log_status_eq 1
[[ "$output" =~ "cannot merge, column pk on table new_name has duplicate tag as table test1. This was likely because one of the tables is a rename of the other" ]] || false
}
@test "merge: ourRoot modifies the schema, theirRoot renames" {
dolt checkout -b merge_branch
dolt sql -q "ALTER TABLE test1 RENAME TO new_name"
dolt add .
dolt commit -am "rename test1"
dolt checkout main
dolt sql -q "ALTER TABLE test1 DROP COLUMN c2;"
dolt commit -am "modify test1"
run dolt merge merge_branch
log_status_eq 1
[[ "$output" =~ "cannot merge, column pk on table new_name has duplicate tag as table test1. This was likely because one of the tables is a rename of the other" ]] || false
}
@test "merge: dolt merge commits successful non-fast-forward merge" {
dolt branch other
dolt sql -q "INSERT INTO test1 VALUES (1,2,3)"