match keyless tables by column intersection, ddl diff bats

This commit is contained in:
Andy Arthur
2020-12-17 11:49:57 -08:00
parent 16955d80cc
commit b635b72e7b
3 changed files with 173 additions and 16 deletions

View File

@@ -82,6 +82,23 @@ SQL
[[ "${lines[4]}" = "9,2" ]] || false
}
@test "keyless column add/drop" {
run dolt --keyless sql <<SQL
ALTER TABLE keyless ADD COLUMN c2 int;
ALTER TABLE keyless DROP COLUMN c0;
SQL
[ $status -eq 0 ]
dolt --keyless sql -q "SELECT * FROM keyless ORDER BY c1;" -r csv
run dolt --keyless sql -q "SELECT * FROM keyless ORDER BY c1;" -r csv
[ $status -eq 0 ]
[[ "${lines[0]}" = "c1,c2" ]] || false
[[ "${lines[1]}" = "0," ]] || false
[[ "${lines[2]}" = "1," ]] || false
[[ "${lines[3]}" = "1," ]] || false
[[ "${lines[4]}" = "2," ]] || false
}
# keyless tables allow duplicate rows
@test "keyless table import" {
skip "unimplemented"
@@ -187,6 +204,27 @@ SQL
[[ "${lines[10]}" = "2,2,," ]] || false
}
@test "keyless diff column add/drop" {
run dolt --keyless sql <<SQL
ALTER TABLE keyless ADD COLUMN c2 int;
ALTER TABLE keyless DROP COLUMN c0;
SQL
[ $status -eq 0 ]
dolt --keyless diff
run dolt --keyless diff
[ $status -eq 0 ]
[[ "${lines[3]}" =~ "CREATE TABLE keyless (" ]] || false
[[ "${lines[4]}" =~ "- \`c0\` INT" ]] || false
[[ "${lines[5]}" =~ " \`c1\` INT" ]] || false
[[ "${lines[6]}" =~ "+ \`c2\` INT" ]] || false
[[ "${lines[7]}" =~ " PRIMARY KEY ()" ]] || false
[[ "${lines[8]}" =~ ");" ]] || false
[[ "${lines[10]}" =~ "| < | c1 | | c0 |" ]] || false
[[ "${lines[11]}" =~ "| > | c1 | c2 | |" ]] || false
}
@test "keyless merge fast-forward" {
skip "unimplemented"
dolt checkout -b other
@@ -210,6 +248,7 @@ SQL
dolt --keyless sql -q "INSERT INTO keyless VALUES (7,7),(8,8),(9,9);"
dolt --keyless commit -am "inserted on other"
dolt --keyless diff master
run dolt --keyless diff master
[ $status -eq 0 ]
[ "$output" = "" ]

View File

@@ -19,6 +19,8 @@ import (
"fmt"
"sort"
"github.com/dolthub/dolt/go/libraries/utils/set"
"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
@@ -187,6 +189,11 @@ type TableDelta struct {
// GetTableDeltas returns a slice of TableDelta objects for each table that changed between fromRoot and toRoot.
// It matches tables across roots using the tag of the first primary key column in the table's schema.
func GetTableDeltas(ctx context.Context, fromRoot, toRoot *doltdb.RootValue) (deltas []TableDelta, err error) {
deltas, err = getKeylessDeltas(ctx, fromRoot, toRoot)
if err != nil {
return nil, err
}
fromTables := make(map[uint64]*doltdb.Table)
fromTableNames := make(map[uint64]string)
fromTableFKs := make(map[uint64][]doltdb.ForeignKey)
@@ -198,6 +205,10 @@ func GetTableDeltas(ctx context.Context, fromRoot, toRoot *doltdb.RootValue) (de
}
err = fromRoot.IterTables(ctx, func(name string, table *doltdb.Table, sch schema.Schema) (stop bool, err error) {
if schema.IsKeyless(sch) {
return
}
th, err := table.HashOf()
if err != nil {
return true, err
@@ -220,26 +231,19 @@ func GetTableDeltas(ctx context.Context, fromRoot, toRoot *doltdb.RootValue) (de
}
err = toRoot.IterTables(ctx, func(name string, table *doltdb.Table, sch schema.Schema) (stop bool, err error) {
if schema.IsKeyless(sch) {
return
}
th, err := table.HashOf()
if err != nil {
return true, err
}
toFKs, _ := toFKC.KeysForTable(name)
toFksParentSch := make(map[string]schema.Schema)
for _, toFk := range toFKs {
toRefTable, _, ok, err := toRoot.GetTableInsensitive(ctx, toFk.ReferencedTableName)
if err != nil {
return true, err
}
if !ok {
continue // as the schemas are for display-only, we can skip on any missing parents (they were deleted, etc.)
}
toRefSch, err := toRefTable.GetSchema(ctx)
if err != nil {
return true, err
}
toFksParentSch[toFk.ReferencedTableName] = toRefSch
toFksParentSch, err := getFkParentSchs(ctx, toRoot, toFKs...)
if err != nil {
return false, err
}
pkTag := getUniqueTag(sch)
@@ -318,14 +322,118 @@ func GetStagedUnstagedTableDeltas(ctx context.Context, ddb *doltdb.DoltDB, rsr e
return staged, unstaged, nil
}
// we don't have any stable identifier to a keyless table, have to do an n^2 match
// todo: this is a good reason to implement table tags
func getKeylessDeltas(ctx context.Context, fromRoot, toRoot *doltdb.RootValue) (deltas []TableDelta, err error) {
type fromTable struct {
tags *set.Uint64Set
tbl *doltdb.Table
hsh hash.Hash
}
fromTables := make(map[string]fromTable)
err = fromRoot.IterTables(ctx, func(name string, tbl *doltdb.Table, sch schema.Schema) (stop bool, err error) {
if !schema.IsKeyless(sch) {
return
}
h, err := tbl.HashOf()
if err != nil {
return false, err
}
fromTables[name] = fromTable{
tags: set.NewUint64Set(sch.GetAllCols().Tags),
tbl: tbl,
hsh: h,
}
return
})
if err != nil {
return nil, err
}
err = toRoot.IterTables(ctx, func(name string, tbl *doltdb.Table, sch schema.Schema) (stop bool, err error) {
if !schema.IsKeyless(sch) {
return
}
toTblHash, err := tbl.HashOf()
if err != nil {
return false, err
}
delta := TableDelta{
ToName: name,
ToTable: tbl,
}
toTableTags := set.NewUint64Set(sch.GetAllCols().Tags)
for fromName, fromTbl := range fromTables {
// |tbl| and |fromTbl| have the same identity
// if they have column tags in common
if toTableTags.Intersection(fromTbl.tags).Size() > 0 {
// consume matched fromTable
delete(fromTables, fromName)
if toTblHash.Equal(fromTbl.hsh) {
// no diff, skip table
return
}
delta.FromName = fromName
delta.FromTable = fromTbl.tbl
break
}
}
// append if matched or unmatched
deltas = append(deltas, delta)
return
})
if err != nil {
return nil, err
}
// all unmatched pairs are table drops
for name, fromPair := range fromTables {
deltas = append(deltas, TableDelta{
FromName: name,
FromTable: fromPair.tbl,
})
}
return deltas, nil
}
func getUniqueTag(sch schema.Schema) uint64 {
if schema.IsKeyless(sch) {
// todo: this will break for column changes
return sch.GetNonPKCols().Tags[0]
panic("keyless tables have no stable column tags")
}
return sch.GetPKCols().Tags[0]
}
func getFkParentSchs(ctx context.Context, root *doltdb.RootValue, fks ...doltdb.ForeignKey) (map[string]schema.Schema, error) {
schs := make(map[string]schema.Schema)
for _, toFk := range fks {
toRefTable, _, ok, err := root.GetTableInsensitive(ctx, toFk.ReferencedTableName)
if err != nil {
return nil, err
}
if !ok {
continue // as the schemas are for display-only, we can skip on any missing parents (they were deleted, etc.)
}
toRefSch, err := toRefTable.GetSchema(ctx)
if err != nil {
return nil, err
}
schs[toFk.ReferencedTableName] = toRefSch
}
return schs, nil
}
// IsAdd returns true if the table was added between the fromRoot and toRoot.
func (td TableDelta) IsAdd() bool {
return td.FromTable == nil && td.ToTable != nil

View File

@@ -53,6 +53,16 @@ func (us *Uint64Set) Remove(i uint64) {
delete(us.uints, i)
}
func (us *Uint64Set) Intersection(other *Uint64Set) *Uint64Set {
inter := &Uint64Set{uints: make(map[uint64]bool)}
for member := range us.uints {
if other.Contains(member) {
inter.Add(member)
}
}
return inter
}
func (us *Uint64Set) AsSlice() []uint64 {
sl := make([]uint64, 0, us.Size())
for k := range us.uints {