Merge pull request #4626 from dolthub/james/import

more permissive constraints and types when importing
This commit is contained in:
James Cor
2022-10-26 16:46:01 -07:00
committed by GitHub
4 changed files with 51 additions and 82 deletions

View File

@@ -16,6 +16,7 @@ package actions
import (
"context"
"errors"
"io"
"math"
"strconv"
@@ -61,7 +62,6 @@ func InferColumnTypesFromTableReader(ctx context.Context, rd table.ReadCloser, a
var curr, prev row.Row
i := newInferrer(rd.GetSchema(), args)
OUTER:
for j := 0; true; j++ {
var err error
@@ -130,10 +130,8 @@ func (inf *inferrer) inferColumnTypes() (*schema.ColCollection, error) {
col.TypeInfo = inferredTypes[tag]
col.Tag = schema.ReservedTagMin + tag
col.Constraints = []schema.ColConstraint{schema.NotNullConstraint{}}
if inf.nullable.Contains(tag) {
col.Constraints = []schema.ColConstraint(nil)
}
// for large imports, it is possible to miss all the null values, so we cannot accurately add not null constraint
col.Constraints = []schema.ColConstraint(nil)
cols = append(cols, col)
return false, nil
@@ -218,32 +216,27 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei
return ti
}
if strings.Contains(strVal, "-") {
i, err := strconv.ParseInt(strVal, 10, 64)
if err != nil {
return typeinfo.UnknownType
}
if i >= math.MinInt32 && i <= math.MaxInt32 {
return typeinfo.Int32Type
} else {
return typeinfo.Int64Type
}
// always parse as signed int
i, err := strconv.ParseInt(strVal, 10, 64)
// use string for out of range
if errors.Is(err, strconv.ErrRange) {
return typeinfo.StringDefaultType
}
if err != nil {
return typeinfo.UnknownType
}
// handle leading zero case
if len(strVal) > 1 && strVal[0] == '0' {
return typeinfo.StringDefaultType
}
if i >= math.MinInt32 && i <= math.MaxInt32 {
return typeinfo.Int32Type
} else {
ui, err := strconv.ParseUint(strVal, 10, 64)
if err != nil {
return typeinfo.UnknownType
}
// handle leading zero case
if len(strVal) > 1 && strVal[0] == '0' {
return typeinfo.StringDefaultType
}
if ui <= math.MaxUint32 {
return typeinfo.Uint32Type
} else {
return typeinfo.Uint64Type
}
return typeinfo.Int64Type
}
}
@@ -286,14 +279,13 @@ func chronoTypes() []typeinfo.TypeInfo {
func numericTypes() []typeinfo.TypeInfo {
// prefer:
// ints over floats
// unsigned over signed
// smaller over larger
return []typeinfo.TypeInfo{
//typeinfo.Uint8Type,
//typeinfo.Uint16Type,
//typeinfo.Uint24Type,
typeinfo.Uint32Type,
typeinfo.Uint64Type,
//typeinfo.Uint32Type,
//typeinfo.Uint64Type,
//typeinfo.Int8Type,
//typeinfo.Int16Type,
@@ -398,12 +390,6 @@ func findCommonNumericType(nums typeInfoSet) typeinfo.TypeInfo {
typeinfo.Int24Type,
typeinfo.Int16Type,
typeinfo.Int8Type,
typeinfo.Uint64Type,
typeinfo.Uint32Type,
typeinfo.Uint24Type,
typeinfo.Uint16Type,
typeinfo.Uint8Type,
}
for _, numType := range mostToLeast {
if setHasType(nums, numType) {

View File

@@ -49,14 +49,14 @@ func TestLeastPermissiveType(t *testing.T) {
{"lower bool", "true", 0.0, typeinfo.BoolType},
{"upper bool", "FALSE", 0.0, typeinfo.BoolType},
{"yes", "yes", 0.0, typeinfo.StringDefaultType},
{"one", "1", 0.0, typeinfo.Uint32Type},
{"one", "1", 0.0, typeinfo.Int32Type},
{"negative one", "-1", 0.0, typeinfo.Int32Type},
{"negative one point 0", "-1.0", 0.0, typeinfo.Float32Type},
{"negative one point 0 with FT of 0.1", "-1.0", 0.1, typeinfo.Int32Type},
{"negative one point one with FT of 0.1", "-1.1", 0.1, typeinfo.Float32Type},
{"negative one point 999 with FT of 1.0", "-1.999", 1.0, typeinfo.Int32Type},
{"zero point zero zero zero zero", "0.0000", 0.0, typeinfo.Float32Type},
{"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Uint64Type},
{"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Int64Type},
{"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringDefaultType},
}
@@ -75,7 +75,7 @@ func TestLeastPermissiveNumericType(t *testing.T) {
floatThreshold float64
expType typeinfo.TypeInfo
}{
{"zero", "0", 0.0, typeinfo.Uint32Type},
{"zero", "0", 0.0, typeinfo.Int32Type},
{"zero float", "0.0", 0.0, typeinfo.Float32Type},
{"zero float with floatThreshold of 0.1", "0.0", 0.1, typeinfo.Int32Type},
{"negative float", "-1.3451234", 0.0, typeinfo.Float32Type},
@@ -85,8 +85,8 @@ func TestLeastPermissiveNumericType(t *testing.T) {
{"all zeroes", "0000", 0.0, typeinfo.StringDefaultType},
{"leading zeroes", "01", 0.0, typeinfo.StringDefaultType},
{"negative int", "-1234", 0.0, typeinfo.Int32Type},
{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.Uint64Type},
{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.UnknownType},
{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
{"math.MinInt64", strconv.FormatInt(math.MinInt64, 10), 0.0, typeinfo.Int64Type},
}
@@ -142,14 +142,6 @@ func testFindCommonType(t *testing.T) {
},
expType: typeinfo.Int64Type,
},
{
name: "all unsigned ints",
inferSet: typeInfoSet{
typeinfo.Uint32Type: {},
typeinfo.Uint64Type: {},
},
expType: typeinfo.Uint64Type,
},
{
name: "all floats",
inferSet: typeInfoSet{
@@ -159,35 +151,31 @@ func testFindCommonType(t *testing.T) {
expType: typeinfo.Float64Type,
},
{
name: "32 bit ints and uints",
name: "32 bit ints",
inferSet: typeInfoSet{
typeinfo.Int32Type: {},
typeinfo.Uint32Type: {},
typeinfo.Int32Type: {},
},
expType: typeinfo.Int32Type,
},
{
name: "64 bit ints and uints",
name: "64 bit ints",
inferSet: typeInfoSet{
typeinfo.Int64Type: {},
typeinfo.Uint64Type: {},
typeinfo.Int64Type: {},
},
expType: typeinfo.Int64Type,
},
{
name: "32 bit ints, uints, and floats",
name: "32 bit ints and floats",
inferSet: typeInfoSet{
typeinfo.Int32Type: {},
typeinfo.Uint32Type: {},
typeinfo.Float32Type: {},
},
expType: typeinfo.Float32Type,
},
{
name: "64 bit ints, uints, and floats",
name: "64 bit ints and floats",
inferSet: typeInfoSet{
typeinfo.Int64Type: {},
typeinfo.Uint64Type: {},
typeinfo.Float64Type: {},
},
expType: typeinfo.Float64Type,
@@ -228,11 +216,6 @@ func testFindCommonType(t *testing.T) {
func testFindCommonTypeFromSingleType(t *testing.T) {
allTypes := []typeinfo.TypeInfo{
typeinfo.Uint8Type,
typeinfo.Uint16Type,
typeinfo.Uint24Type,
typeinfo.Uint32Type,
typeinfo.Uint64Type,
typeinfo.Int8Type,
typeinfo.Int16Type,
typeinfo.Int24Type,
@@ -388,7 +371,7 @@ func TestInferSchema(t *testing.T) {
},
map[string]typeinfo.TypeInfo{
"int": typeinfo.Int32Type,
"uint": typeinfo.Uint64Type,
"uint": typeinfo.StringDefaultType,
"uuid": typeinfo.UuidType,
"float": typeinfo.Float32Type,
"bool": typeinfo.BoolType,
@@ -404,7 +387,7 @@ func TestInferSchema(t *testing.T) {
floatThreshold: 0,
},
map[string]typeinfo.TypeInfo{
"mix": typeinfo.Uint64Type,
"mix": typeinfo.StringDefaultType,
"uuid": typeinfo.UuidType,
},
nil,
@@ -500,7 +483,7 @@ func TestInferSchema(t *testing.T) {
err = allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
idx := schema.IndexOfConstraint(col.Constraints, schema.NotNullConstraintType)
assert.True(t, idx == -1 == test.nullableCols.Contains(col.Name), "%s unexpected nullability", col.Name)
assert.True(t, idx == -1, "%s unexpected not null constraint", col.Name)
return false, nil
})
require.NoError(t, err)

View File

@@ -169,7 +169,7 @@ pk,c1,c2,c3,c4,c5
9,1,2,3,4,5
DELIM
dolt table import -c --pk=pk test 1pk5col-ints.csv
run dolt sql -q "create table fktest(id int not null, tpk int unsigned, c2 int, primary key(id), foreign key (tpk) references test(pk))"
run dolt sql -q "create table fktest(id int not null, tpk int, c2 int, primary key(id), foreign key (tpk) references test(pk))"
[ "$status" -eq 0 ]
run dolt sql -q "insert into fktest values (1, 0, 1)"
[ "$status" -eq 0 ]
@@ -567,7 +567,7 @@ DELIM
[[ "$output" =~ "CREATE TABLE \`test\`" ]]
[[ "$output" =~ "\`pk\` int" ]]
[[ "$output" =~ "\`str\` varchar(16383)" ]]
[[ "$output" =~ "\`int\` int unsigned" ]]
[[ "$output" =~ "\`int\` int" ]]
[[ "$output" =~ "\`bool\` tinyint" ]]
[[ "$output" =~ "\`float\` float" ]]
[[ "$output" =~ "\`date\` date" ]]

View File

@@ -82,7 +82,7 @@ teardown() {
[[ "$output" =~ "\`string\` varchar(16383)" ]] || false
[[ "$output" =~ "\`boolean\` tinyint" ]] || false
[[ "$output" =~ "\`float\` float" ]] || false
[[ "$output" =~ "\`uint\` int unsigned" ]] || false
[[ "$output" =~ "\`uint\` int" ]] || false
[[ "$output" =~ "\`uuid\` char(36) CHARACTER SET ascii COLLATE ascii_bin" ]] || false
}
@@ -259,9 +259,9 @@ DELIM
run dolt diff --schema
[ "$status" -eq 0 ]
[[ "$output" =~ '+ `x` varchar(16383) NOT NULL,' ]] || false
[[ "$output" =~ '+ `y` float NOT NULL,' ]] || false
[[ "$output" =~ '+ `z` int NOT NULL,' ]] || false
[[ "$output" =~ '+ `x` varchar(16383),' ]] || false
[[ "$output" =~ '+ `y` float,' ]] || false
[[ "$output" =~ '+ `z` int,' ]] || false
# assert no columns were deleted/replaced
[[ ! "$output" = "- \`" ]] || false
@@ -282,9 +282,9 @@ DELIM
run dolt diff --schema
[ "$status" -eq 0 ]
[[ "$output" =~ '+ `x` varchar(16383) NOT NULL,' ]] || false
[[ "$output" =~ '+ `y` float NOT NULL,' ]] || false
[[ "$output" =~ '+ `z` int NOT NULL,' ]] || false
[[ "$output" =~ '+ `x` varchar(16383),' ]] || false
[[ "$output" =~ '+ `y` float,' ]] || false
[[ "$output" =~ '+ `z` int,' ]] || false
# assert no columns were deleted/replaced
[[ ! "$output" = "- \`" ]] || false
@@ -308,9 +308,9 @@ DELIM
run dolt diff --schema
[ "$status" -eq 0 ]
[[ "$output" =~ '- `a` varchar(16383) NOT NULL,' ]] || false
[[ "$output" =~ '- `b` float NOT NULL,' ]] || false
[[ "$output" =~ '- `c` tinyint NOT NULL,' ]] || false
[[ "$output" =~ '- `a` varchar(16383),' ]] || false
[[ "$output" =~ '- `b` float,' ]] || false
[[ "$output" =~ '- `c` tinyint,' ]] || false
# assert no columns were added
[[ ! "$output" = "+ \`" ]] || false
}