improved super schema names (#221)

This commit is contained in:
Brian Hendriks
2019-11-21 15:09:05 -08:00
committed by GitHub
parent 39be831e85
commit 549e8ea54f
5 changed files with 170 additions and 93 deletions
+116 -44
View File
@@ -16,37 +16,17 @@ package rowconv
import (
"context"
"errors"
"fmt"
"github.com/liquidata-inc/dolt/go/libraries/utils/set"
"github.com/liquidata-inc/dolt/go/libraries/doltcore/doltdb"
"github.com/liquidata-inc/dolt/go/libraries/doltcore/schema"
"github.com/liquidata-inc/dolt/go/store/hash"
"github.com/liquidata-inc/dolt/go/store/types"
)
// RowConvForSuperSchema creates a RowConverter for transforming rows with the the given schema to the given super schema.
// This is done by mapping the column tag and type to the super schema column representing that tag and type.
func RowConvForSuperSchema(sch, super schema.Schema) (*RowConverter, error) {
inNameToOutName := make(map[string]string)
allCols := sch.GetAllCols()
err := allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
inNameToOutName[col.Name] = fmt.Sprintf("%d_%s", col.Tag, col.Kind.String())
return false, nil
})
if err != nil {
return nil, err
}
fm, err := NewFieldMappingFromNameMap(sch, super, inNameToOutName)
if err != nil {
return nil, err
}
return NewRowConverter(fm)
}
// TagKindPair is a simple tuple that holds a tag and a NomsKind of a column
type TagKindPair struct {
// Tag is the tag of a column
@@ -65,31 +45,83 @@ type NameKindPair struct {
Kind types.NomsKind
}
// SuperSchema is an immutable schema generated by a SuperSchemaGen which defines methods for getting the schema
// and mapping another schema onto the super schema
type SuperSchema struct {
sch schema.Schema
namedCols map[TagKindPair]string
}
// GetSchema gets the underlying schema.Schema object
func (ss SuperSchema) GetSchema() schema.Schema {
if ss.sch == nil {
panic("Bug: super schema not generated.")
}
return ss.sch
}
// RowConvForSchema creates a RowConverter for transforming rows with the the given schema to this super schema.
// This is done by mapping the column tag and type to the super schema column representing that tag and type.
func (ss SuperSchema) RowConvForSchema(sch schema.Schema) (*RowConverter, error) {
inNameToOutName := make(map[string]string)
allCols := sch.GetAllCols()
err := allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
tkp := TagKindPair{Tag: tag, Kind: col.Kind}
outName, ok := ss.namedCols[tkp]
if !ok {
return false, errors.New("failed to map columns")
}
inNameToOutName[col.Name] = outName
return false, nil
})
if err != nil {
return nil, err
}
fm, err := NewFieldMappingFromNameMap(sch, ss.sch, inNameToOutName)
if err != nil {
return nil, err
}
return NewRowConverter(fm)
}
// SuperSchemaGen is a utility class used to generate the superset of several schemas.
type SuperSchemaGen struct {
tagKindToDestTag map[TagKindPair]uint64
usedTags map[uint64]struct{}
names map[TagKindPair]*set.StrSet
}
// NewSuperSchemaGen creates a new SuperSchemaGen
func NewSuperSchemaGen() *SuperSchemaGen {
return &SuperSchemaGen{make(map[TagKindPair]uint64), make(map[uint64]struct{})}
return &SuperSchemaGen{
tagKindToDestTag: make(map[TagKindPair]uint64),
usedTags: make(map[uint64]struct{}),
names: make(map[TagKindPair]*set.StrSet),
}
}
// AddSchema will add a schema which will be incorporated into the superset of schemas
func (ss *SuperSchemaGen) AddSchema(sch schema.Schema) error {
func (ssg *SuperSchemaGen) AddSchema(sch schema.Schema) error {
err := sch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
tagKind := TagKindPair{Tag: tag, Kind: col.Kind}
_, exists := ss.tagKindToDestTag[tagKind]
_, exists := ssg.tagKindToDestTag[tagKind]
if !exists {
destTag := tag
for {
_, collides := ss.usedTags[destTag]
_, collides := ssg.usedTags[destTag]
if !collides {
ss.tagKindToDestTag[tagKind] = destTag
ss.usedTags[destTag] = struct{}{}
ssg.tagKindToDestTag[tagKind] = destTag
ssg.usedTags[destTag] = struct{}{}
ssg.names[tagKind] = set.NewStrSet([]string{col.Name})
return false, nil
}
@@ -99,6 +131,8 @@ func (ss *SuperSchemaGen) AddSchema(sch schema.Schema) error {
destTag++
}
}
} else {
ssg.names[tagKind].Add(col.Name)
}
return false, nil
@@ -111,19 +145,52 @@ func (ss *SuperSchemaGen) AddSchema(sch schema.Schema) error {
return nil
}
func (ssg *SuperSchemaGen) nameCols() map[TagKindPair]string {
colNames := make(map[string][]TagKindPair)
for tagKind, names := range ssg.names {
name := fmt.Sprintf("%d_%s", tagKind.Tag, tagKind.Kind.String())
if names.Size() == 1 {
name = names.AsSlice()[0]
}
colNames[name] = append(colNames[name], tagKind)
}
results := make(map[TagKindPair]string)
for name, tagKinds := range colNames {
if len(tagKinds) == 1 {
results[tagKinds[0]] = name
continue
}
for _, tagKind := range tagKinds {
name := fmt.Sprintf("%s_%s_%d", name, tagKind.Kind.String(), tagKind.Tag)
results[tagKind] = name
}
}
return results
}
// GenerateSuperSchema takes all the accumulated schemas and generates a schema which is the superset of all of
// those schemas.
func (ss *SuperSchemaGen) GenerateSuperSchema(additionalCols ...NameKindPair) (schema.Schema, error) {
func (ssg *SuperSchemaGen) GenerateSuperSchema(additionalCols ...NameKindPair) (SuperSchema, error) {
namedCols := ssg.nameCols()
colColl, _ := schema.NewColCollection()
for tagKind, destTag := range ss.tagKindToDestTag {
colName := fmt.Sprintf("%d_%s", tagKind.Tag, tagKind.Kind.String())
for tagKind, colName := range namedCols {
destTag, ok := ssg.tagKindToDestTag[tagKind]
if !ok {
panic("mismatch between namedCols and tagKindToDestTag")
}
col := schema.NewColumn(colName, destTag, tagKind.Kind, false)
var err error
colColl, err = colColl.Append(col)
if err != nil {
return nil, err
return SuperSchema{}, err
}
}
@@ -131,35 +198,40 @@ func (ss *SuperSchemaGen) GenerateSuperSchema(additionalCols ...NameKindPair) (s
nextReserved := schema.ReservedTagMin
for _, nameKindPair := range additionalCols {
if _, ok := colColl.GetByName(nameKindPair.Name); ok {
return SuperSchema{}, errors.New("Additional column name collision: " + nameKindPair.Name)
}
for {
if _, ok := ss.usedTags[nextReserved]; !ok {
if _, ok := ssg.usedTags[nextReserved]; !ok {
break
}
nextReserved++
}
var err error
ss.usedTags[nextReserved] = struct{}{}
ssg.usedTags[nextReserved] = struct{}{}
colColl, err = colColl.Append(schema.NewColumn(nameKindPair.Name, nextReserved, nameKindPair.Kind, false))
if err != nil {
return nil, err
return SuperSchema{}, err
}
}
}
return schema.UnkeyedSchemaFromCols(colColl), nil
sch := schema.UnkeyedSchemaFromCols(colColl)
return SuperSchema{sch: sch, namedCols: namedCols}, nil
}
// AddHistoryOfTableAtCommit will traverse a commit graph adding all versions of a tables schema to the schemas being
// supersetted.
func (ss *SuperSchemaGen) AddHistoryOfTableAtCommit(ctx context.Context, tblName string, ddb *doltdb.DoltDB, cm *doltdb.Commit) error {
func (ssg *SuperSchemaGen) AddHistoryOfTableAtCommit(ctx context.Context, tblName string, ddb *doltdb.DoltDB, cm *doltdb.Commit) error {
addedSchemas := make(map[hash.Hash]struct{})
processedCommits := make(map[hash.Hash]struct{})
return ss.addHistoryOfTableAtCommit(ctx, tblName, addedSchemas, processedCommits, ddb, cm)
return ssg.addHistoryOfTableAtCommit(ctx, tblName, addedSchemas, processedCommits, ddb, cm)
}
func (ss *SuperSchemaGen) addHistoryOfTableAtCommit(ctx context.Context, tblName string, addedSchemas, processedCommits map[hash.Hash]struct{}, ddb *doltdb.DoltDB, cm *doltdb.Commit) error {
func (ssg *SuperSchemaGen) addHistoryOfTableAtCommit(ctx context.Context, tblName string, addedSchemas, processedCommits map[hash.Hash]struct{}, ddb *doltdb.DoltDB, cm *doltdb.Commit) error {
cmHash, err := cm.HashOf()
if err != nil {
@@ -200,7 +272,7 @@ func (ss *SuperSchemaGen) addHistoryOfTableAtCommit(ctx context.Context, tblName
return err
}
err = ss.AddSchema(sch)
err = ssg.AddSchema(sch)
if err != nil {
return err
@@ -221,7 +293,7 @@ func (ss *SuperSchemaGen) addHistoryOfTableAtCommit(ctx context.Context, tblName
return err
}
err = ss.addHistoryOfTableAtCommit(ctx, tblName, addedSchemas, processedCommits, ddb, cm)
err = ssg.addHistoryOfTableAtCommit(ctx, tblName, addedSchemas, processedCommits, ddb, cm)
if err != nil {
return err
@@ -233,7 +305,7 @@ func (ss *SuperSchemaGen) addHistoryOfTableAtCommit(ctx context.Context, tblName
// AddHistoryOfTable will traverse all commit graphs which have local branches associated with them and add all
// passed versions of a table's schema to the schemas being supersetted
func (ss *SuperSchemaGen) AddHistoryOfTable(ctx context.Context, tblName string, ddb *doltdb.DoltDB) error {
func (ssg *SuperSchemaGen) AddHistoryOfTable(ctx context.Context, tblName string, ddb *doltdb.DoltDB) error {
refs, err := ddb.GetRefs(ctx)
if err != nil {
@@ -256,7 +328,7 @@ func (ss *SuperSchemaGen) AddHistoryOfTable(ctx context.Context, tblName string,
return err
}
err = ss.addHistoryOfTableAtCommit(ctx, tblName, addedSchemas, processedCommits, ddb, cm)
err = ssg.addHistoryOfTableAtCommit(ctx, tblName, addedSchemas, processedCommits, ddb, cm)
if err != nil {
return err
@@ -64,9 +64,9 @@ func TestSuperSchemaGen(t *testing.T) {
envtestutils.MustSchema(idColTag0TypeUUID, firstColTag1TypeStr, lastColTag2TypeStr),
},
mustSchemaFromTagAndKind(map[string]TagKindPair{
"0_UUID": TagKindPair{0, types.UUIDKind},
"1_String": TagKindPair{1, types.StringKind},
"2_String": TagKindPair{2, types.StringKind},
"id": TagKindPair{0, types.UUIDKind},
"first": TagKindPair{1, types.StringKind},
"last": TagKindPair{2, types.StringKind},
}),
},
{
@@ -76,10 +76,10 @@ func TestSuperSchemaGen(t *testing.T) {
envtestutils.MustSchema(idColTag0TypeUint, firstColTag1TypeStr, lastColTag2TypeStr),
},
mustSchemaFromTagAndKind(map[string]TagKindPair{
"0_UUID": TagKindPair{0, types.UUIDKind},
"0_Uint": TagKindPair{schema.ReservedTagMin, types.UintKind},
"1_String": TagKindPair{1, types.StringKind},
"2_String": TagKindPair{2, types.StringKind},
"id_UUID_0": TagKindPair{0, types.UUIDKind},
"id_Uint_0": TagKindPair{schema.ReservedTagMin, types.UintKind},
"first": TagKindPair{1, types.StringKind},
"last": TagKindPair{2, types.StringKind},
}),
},
{
@@ -90,11 +90,11 @@ func TestSuperSchemaGen(t *testing.T) {
envtestutils.MustSchema(idColTag0TypeUUID, firstColTag1TypeStr, lastColTag2TypeStr, ageColTag3TypeInt),
},
mustSchemaFromTagAndKind(map[string]TagKindPair{
"0_UUID": TagKindPair{0, types.UUIDKind},
"1_String": TagKindPair{1, types.StringKind},
"2_String": TagKindPair{2, types.StringKind},
"3_String": TagKindPair{3, types.StringKind},
"3_Int": TagKindPair{schema.ReservedTagMin, types.IntKind},
"id": TagKindPair{0, types.UUIDKind},
"first": TagKindPair{1, types.StringKind},
"last": TagKindPair{2, types.StringKind},
"addr": TagKindPair{3, types.StringKind},
"age": TagKindPair{schema.ReservedTagMin, types.IntKind},
}),
},
{
@@ -105,9 +105,9 @@ func TestSuperSchemaGen(t *testing.T) {
envtestutils.MustSchema(idColTag0TypeUUID, firstColTag1TypeStr, lastColTag2TypeStr, titleColTag3TypeStr),
},
mustSchemaFromTagAndKind(map[string]TagKindPair{
"0_UUID": TagKindPair{0, types.UUIDKind},
"1_String": TagKindPair{1, types.StringKind},
"2_String": TagKindPair{2, types.StringKind},
"id": TagKindPair{0, types.UUIDKind},
"first": TagKindPair{1, types.StringKind},
"last": TagKindPair{2, types.StringKind},
"3_String": TagKindPair{3, types.StringKind},
}),
},
@@ -121,13 +121,13 @@ func TestSuperSchemaGen(t *testing.T) {
envtestutils.MustSchema(idColTag0TypeUUID, firstColTag1TypeStr, lastColTag2TypeStr, addrColTag3TypeStr, ageColTag4TypeUint),
},
mustSchemaFromTagAndKind(map[string]TagKindPair{
"0_UUID": TagKindPair{0, types.UUIDKind},
"1_String": TagKindPair{1, types.StringKind},
"2_String": TagKindPair{2, types.StringKind},
"3_String": TagKindPair{3, types.StringKind},
"3_Int": TagKindPair{schema.ReservedTagMin, types.IntKind},
"4_Int": TagKindPair{4, types.IntKind},
"4_Uint": TagKindPair{schema.ReservedTagMin + 1, types.UintKind},
"id": TagKindPair{0, types.UUIDKind},
"first": TagKindPair{1, types.StringKind},
"last": TagKindPair{2, types.StringKind},
"addr": TagKindPair{3, types.StringKind},
"age_Int_3": TagKindPair{schema.ReservedTagMin, types.IntKind},
"age_Int_4": TagKindPair{4, types.IntKind},
"age_Uint_4": TagKindPair{schema.ReservedTagMin + 1, types.UintKind},
}),
},
}
@@ -141,9 +141,10 @@ func TestSuperSchemaGen(t *testing.T) {
require.NoError(t, err)
}
result, err := ssg.GenerateSuperSchema()
ss, err := ssg.GenerateSuperSchema()
require.NoError(t, err)
result := ss.GetSchema()
eq, err := schema.SchemasAreEqual(result, test.expected)
require.NoError(t, err)
assert.True(t, eq)
@@ -210,17 +211,19 @@ func TestSuperSchemaFromHistory(t *testing.T) {
err := ssg.AddHistoryOfTable(ctx, tblName, dEnv.DoltDB)
require.NoError(t, err)
result, err := ssg.GenerateSuperSchema(NameKindPair{"extra", types.StringKind})
ss, err := ssg.GenerateSuperSchema(NameKindPair{"extra", types.StringKind})
require.NoError(t, err)
result := ss.GetSchema()
expected := mustSchemaFromTagAndKind(map[string]TagKindPair{
"0_UUID": TagKindPair{0, types.UUIDKind},
"1_String": TagKindPair{1, types.StringKind},
"2_String": TagKindPair{2, types.StringKind},
"3_Int": TagKindPair{3, types.IntKind},
"3_String": TagKindPair{schema.ReservedTagMin, types.StringKind},
"4_Uint": TagKindPair{4, types.UintKind},
"extra": {schema.ReservedTagMin + 1, types.StringKind},
"id": TagKindPair{0, types.UUIDKind},
"first": TagKindPair{1, types.StringKind},
"last": TagKindPair{2, types.StringKind},
"age_Int_3": TagKindPair{3, types.IntKind},
"addr": TagKindPair{schema.ReservedTagMin, types.StringKind},
"age_Uint_4": TagKindPair{4, types.UintKind},
"extra": {schema.ReservedTagMin + 1, types.StringKind},
})
eq, err := schema.SchemasAreEqual(result, expected)
+12 -12
View File
@@ -354,19 +354,19 @@ var ageColTag3TypeInt = schema.NewColumn("age", 3, types.IntKind, false)
var ageColTag4TypeUint = schema.NewColumn("age", 4, types.UintKind, false)
var diffSchema = envtestutils.MustSchema(
schema.NewColumn("to_0_Int", 0, types.IntKind, false),
schema.NewColumn("to_1_String", 1, types.StringKind, false),
schema.NewColumn("to_2_String", 2, types.StringKind, false),
schema.NewColumn("to_3_Int", 3, types.IntKind, false),
schema.NewColumn("to_4_Uint", 4, types.UintKind, false),
schema.NewColumn("to_3_String", 5, types.StringKind, false),
schema.NewColumn("to_id", 0, types.IntKind, false),
schema.NewColumn("to_first", 1, types.StringKind, false),
schema.NewColumn("to_last", 2, types.StringKind, false),
schema.NewColumn("to_age_Int_3", 3, types.IntKind, false),
schema.NewColumn("to_age_Uint_4", 4, types.UintKind, false),
schema.NewColumn("to_addr", 5, types.StringKind, false),
schema.NewColumn("to_commit", 6, types.StringKind, false),
schema.NewColumn("from_0_Int", 7, types.IntKind, false),
schema.NewColumn("from_1_String", 8, types.StringKind, false),
schema.NewColumn("from_2_String", 9, types.StringKind, false),
schema.NewColumn("from_3_Int", 10, types.IntKind, false),
schema.NewColumn("from_4_Uint", 11, types.UintKind, false),
schema.NewColumn("from_3_String", 12, types.StringKind, false),
schema.NewColumn("from_id", 7, types.IntKind, false),
schema.NewColumn("from_first", 8, types.StringKind, false),
schema.NewColumn("from_last", 9, types.StringKind, false),
schema.NewColumn("from_age_Int_3", 10, types.IntKind, false),
schema.NewColumn("from_age_Uint_4", 11, types.UintKind, false),
schema.NewColumn("from_addr", 12, types.StringKind, false),
schema.NewColumn("from_commit", 13, types.StringKind, false),
)
+7 -5
View File
@@ -40,7 +40,7 @@ var _ sql.FilteredTable = (*DiffTable)(nil)
type DiffTable struct {
name string
dEnv *env.DoltEnv
superSch schema.Schema
ss rowconv.SuperSchema
joiner *rowconv.Joiner
fromRoot *doltdb.RootValue
toRoot *doltdb.RootValue
@@ -58,12 +58,14 @@ func NewDiffTable(name string, dEnv *env.DoltEnv) (*DiffTable, error) {
return nil, err
}
sch, err := ssg.GenerateSuperSchema(rowconv.NameKindPair{Name: "commit", Kind: types.StringKind})
ss, err := ssg.GenerateSuperSchema(rowconv.NameKindPair{Name: "commit", Kind: types.StringKind})
if err != nil {
panic(err)
}
sch := ss.GetSchema()
j, err := rowconv.NewJoiner(
[]rowconv.NamedSchema{{Name: diff.To, Sch: sch}, {Name: diff.From, Sch: sch}},
map[string]rowconv.ColNamingFunc{
@@ -87,7 +89,7 @@ func NewDiffTable(name string, dEnv *env.DoltEnv) (*DiffTable, error) {
return nil, err
}
return &DiffTable{name, dEnv, sch, j, root2, root1, "current", "HEAD", nil}, nil
return &DiffTable{name, dEnv, ss, j, root2, root1, "current", "HEAD", nil}, nil
}
func (dt *DiffTable) Name() string {
@@ -161,13 +163,13 @@ func (dt *DiffTable) PartitionRows(ctx *sql.Context, part sql.Partition) (sql.Ro
return nil, err
}
fromConv, err := rowconv.RowConvForSuperSchema(fromSch, dt.superSch)
fromConv, err := dt.ss.RowConvForSchema(fromSch)
if err != nil {
return nil, err
}
toConv, err := rowconv.RowConvForSuperSchema(toSch, dt.superSch)
toConv, err := dt.ss.RowConvForSchema(toSch)
if err != nil {
return nil, err
+1 -1
View File
@@ -45,7 +45,7 @@ func TestExecuteSelect(t *testing.T) {
}
}
func SkipTestExecuteSelectDiff(t *testing.T) {
func TestExecuteSelectDiff(t *testing.T) {
for _, test := range SelectDiffTests {
t.Run(test.Name, func(t *testing.T) {
testSelectDiffQuery(t, test)