mirror of
https://github.com/dolthub/dolt.git
synced 2026-02-18 10:09:21 -06:00
Merge pull request #4626 from dolthub/james/import
more permissive constraints and types when importing
This commit is contained in:
@@ -16,6 +16,7 @@ package actions
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"math"
|
||||
"strconv"
|
||||
@@ -61,7 +62,6 @@ func InferColumnTypesFromTableReader(ctx context.Context, rd table.ReadCloser, a
|
||||
|
||||
var curr, prev row.Row
|
||||
i := newInferrer(rd.GetSchema(), args)
|
||||
|
||||
OUTER:
|
||||
for j := 0; true; j++ {
|
||||
var err error
|
||||
@@ -130,10 +130,8 @@ func (inf *inferrer) inferColumnTypes() (*schema.ColCollection, error) {
|
||||
col.TypeInfo = inferredTypes[tag]
|
||||
col.Tag = schema.ReservedTagMin + tag
|
||||
|
||||
col.Constraints = []schema.ColConstraint{schema.NotNullConstraint{}}
|
||||
if inf.nullable.Contains(tag) {
|
||||
col.Constraints = []schema.ColConstraint(nil)
|
||||
}
|
||||
// for large imports, it is possible to miss all the null values, so we cannot accurately add not null constraint
|
||||
col.Constraints = []schema.ColConstraint(nil)
|
||||
|
||||
cols = append(cols, col)
|
||||
return false, nil
|
||||
@@ -218,32 +216,27 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei
|
||||
return ti
|
||||
}
|
||||
|
||||
if strings.Contains(strVal, "-") {
|
||||
i, err := strconv.ParseInt(strVal, 10, 64)
|
||||
if err != nil {
|
||||
return typeinfo.UnknownType
|
||||
}
|
||||
if i >= math.MinInt32 && i <= math.MaxInt32 {
|
||||
return typeinfo.Int32Type
|
||||
} else {
|
||||
return typeinfo.Int64Type
|
||||
}
|
||||
// always parse as signed int
|
||||
i, err := strconv.ParseInt(strVal, 10, 64)
|
||||
|
||||
// use string for out of range
|
||||
if errors.Is(err, strconv.ErrRange) {
|
||||
return typeinfo.StringDefaultType
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return typeinfo.UnknownType
|
||||
}
|
||||
|
||||
// handle leading zero case
|
||||
if len(strVal) > 1 && strVal[0] == '0' {
|
||||
return typeinfo.StringDefaultType
|
||||
}
|
||||
|
||||
if i >= math.MinInt32 && i <= math.MaxInt32 {
|
||||
return typeinfo.Int32Type
|
||||
} else {
|
||||
ui, err := strconv.ParseUint(strVal, 10, 64)
|
||||
if err != nil {
|
||||
return typeinfo.UnknownType
|
||||
}
|
||||
|
||||
// handle leading zero case
|
||||
if len(strVal) > 1 && strVal[0] == '0' {
|
||||
return typeinfo.StringDefaultType
|
||||
}
|
||||
|
||||
if ui <= math.MaxUint32 {
|
||||
return typeinfo.Uint32Type
|
||||
} else {
|
||||
return typeinfo.Uint64Type
|
||||
}
|
||||
return typeinfo.Int64Type
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,14 +279,13 @@ func chronoTypes() []typeinfo.TypeInfo {
|
||||
func numericTypes() []typeinfo.TypeInfo {
|
||||
// prefer:
|
||||
// ints over floats
|
||||
// unsigned over signed
|
||||
// smaller over larger
|
||||
return []typeinfo.TypeInfo{
|
||||
//typeinfo.Uint8Type,
|
||||
//typeinfo.Uint16Type,
|
||||
//typeinfo.Uint24Type,
|
||||
typeinfo.Uint32Type,
|
||||
typeinfo.Uint64Type,
|
||||
//typeinfo.Uint32Type,
|
||||
//typeinfo.Uint64Type,
|
||||
|
||||
//typeinfo.Int8Type,
|
||||
//typeinfo.Int16Type,
|
||||
@@ -398,12 +390,6 @@ func findCommonNumericType(nums typeInfoSet) typeinfo.TypeInfo {
|
||||
typeinfo.Int24Type,
|
||||
typeinfo.Int16Type,
|
||||
typeinfo.Int8Type,
|
||||
|
||||
typeinfo.Uint64Type,
|
||||
typeinfo.Uint32Type,
|
||||
typeinfo.Uint24Type,
|
||||
typeinfo.Uint16Type,
|
||||
typeinfo.Uint8Type,
|
||||
}
|
||||
for _, numType := range mostToLeast {
|
||||
if setHasType(nums, numType) {
|
||||
|
||||
@@ -49,14 +49,14 @@ func TestLeastPermissiveType(t *testing.T) {
|
||||
{"lower bool", "true", 0.0, typeinfo.BoolType},
|
||||
{"upper bool", "FALSE", 0.0, typeinfo.BoolType},
|
||||
{"yes", "yes", 0.0, typeinfo.StringDefaultType},
|
||||
{"one", "1", 0.0, typeinfo.Uint32Type},
|
||||
{"one", "1", 0.0, typeinfo.Int32Type},
|
||||
{"negative one", "-1", 0.0, typeinfo.Int32Type},
|
||||
{"negative one point 0", "-1.0", 0.0, typeinfo.Float32Type},
|
||||
{"negative one point 0 with FT of 0.1", "-1.0", 0.1, typeinfo.Int32Type},
|
||||
{"negative one point one with FT of 0.1", "-1.1", 0.1, typeinfo.Float32Type},
|
||||
{"negative one point 999 with FT of 1.0", "-1.999", 1.0, typeinfo.Int32Type},
|
||||
{"zero point zero zero zero zero", "0.0000", 0.0, typeinfo.Float32Type},
|
||||
{"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Uint64Type},
|
||||
{"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Int64Type},
|
||||
{"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringDefaultType},
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ func TestLeastPermissiveNumericType(t *testing.T) {
|
||||
floatThreshold float64
|
||||
expType typeinfo.TypeInfo
|
||||
}{
|
||||
{"zero", "0", 0.0, typeinfo.Uint32Type},
|
||||
{"zero", "0", 0.0, typeinfo.Int32Type},
|
||||
{"zero float", "0.0", 0.0, typeinfo.Float32Type},
|
||||
{"zero float with floatThreshold of 0.1", "0.0", 0.1, typeinfo.Int32Type},
|
||||
{"negative float", "-1.3451234", 0.0, typeinfo.Float32Type},
|
||||
@@ -85,8 +85,8 @@ func TestLeastPermissiveNumericType(t *testing.T) {
|
||||
{"all zeroes", "0000", 0.0, typeinfo.StringDefaultType},
|
||||
{"leading zeroes", "01", 0.0, typeinfo.StringDefaultType},
|
||||
{"negative int", "-1234", 0.0, typeinfo.Int32Type},
|
||||
{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.Uint64Type},
|
||||
{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.UnknownType},
|
||||
{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
|
||||
{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
|
||||
{"math.MinInt64", strconv.FormatInt(math.MinInt64, 10), 0.0, typeinfo.Int64Type},
|
||||
}
|
||||
|
||||
@@ -142,14 +142,6 @@ func testFindCommonType(t *testing.T) {
|
||||
},
|
||||
expType: typeinfo.Int64Type,
|
||||
},
|
||||
{
|
||||
name: "all unsigned ints",
|
||||
inferSet: typeInfoSet{
|
||||
typeinfo.Uint32Type: {},
|
||||
typeinfo.Uint64Type: {},
|
||||
},
|
||||
expType: typeinfo.Uint64Type,
|
||||
},
|
||||
{
|
||||
name: "all floats",
|
||||
inferSet: typeInfoSet{
|
||||
@@ -159,35 +151,31 @@ func testFindCommonType(t *testing.T) {
|
||||
expType: typeinfo.Float64Type,
|
||||
},
|
||||
{
|
||||
name: "32 bit ints and uints",
|
||||
name: "32 bit ints",
|
||||
inferSet: typeInfoSet{
|
||||
typeinfo.Int32Type: {},
|
||||
typeinfo.Uint32Type: {},
|
||||
typeinfo.Int32Type: {},
|
||||
},
|
||||
expType: typeinfo.Int32Type,
|
||||
},
|
||||
{
|
||||
name: "64 bit ints and uints",
|
||||
name: "64 bit ints",
|
||||
inferSet: typeInfoSet{
|
||||
typeinfo.Int64Type: {},
|
||||
typeinfo.Uint64Type: {},
|
||||
typeinfo.Int64Type: {},
|
||||
},
|
||||
expType: typeinfo.Int64Type,
|
||||
},
|
||||
{
|
||||
name: "32 bit ints, uints, and floats",
|
||||
name: "32 bit ints and floats",
|
||||
inferSet: typeInfoSet{
|
||||
typeinfo.Int32Type: {},
|
||||
typeinfo.Uint32Type: {},
|
||||
typeinfo.Float32Type: {},
|
||||
},
|
||||
expType: typeinfo.Float32Type,
|
||||
},
|
||||
{
|
||||
name: "64 bit ints, uints, and floats",
|
||||
name: "64 bit ints and floats",
|
||||
inferSet: typeInfoSet{
|
||||
typeinfo.Int64Type: {},
|
||||
typeinfo.Uint64Type: {},
|
||||
typeinfo.Float64Type: {},
|
||||
},
|
||||
expType: typeinfo.Float64Type,
|
||||
@@ -228,11 +216,6 @@ func testFindCommonType(t *testing.T) {
|
||||
|
||||
func testFindCommonTypeFromSingleType(t *testing.T) {
|
||||
allTypes := []typeinfo.TypeInfo{
|
||||
typeinfo.Uint8Type,
|
||||
typeinfo.Uint16Type,
|
||||
typeinfo.Uint24Type,
|
||||
typeinfo.Uint32Type,
|
||||
typeinfo.Uint64Type,
|
||||
typeinfo.Int8Type,
|
||||
typeinfo.Int16Type,
|
||||
typeinfo.Int24Type,
|
||||
@@ -388,7 +371,7 @@ func TestInferSchema(t *testing.T) {
|
||||
},
|
||||
map[string]typeinfo.TypeInfo{
|
||||
"int": typeinfo.Int32Type,
|
||||
"uint": typeinfo.Uint64Type,
|
||||
"uint": typeinfo.StringDefaultType,
|
||||
"uuid": typeinfo.UuidType,
|
||||
"float": typeinfo.Float32Type,
|
||||
"bool": typeinfo.BoolType,
|
||||
@@ -404,7 +387,7 @@ func TestInferSchema(t *testing.T) {
|
||||
floatThreshold: 0,
|
||||
},
|
||||
map[string]typeinfo.TypeInfo{
|
||||
"mix": typeinfo.Uint64Type,
|
||||
"mix": typeinfo.StringDefaultType,
|
||||
"uuid": typeinfo.UuidType,
|
||||
},
|
||||
nil,
|
||||
@@ -500,7 +483,7 @@ func TestInferSchema(t *testing.T) {
|
||||
|
||||
err = allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
|
||||
idx := schema.IndexOfConstraint(col.Constraints, schema.NotNullConstraintType)
|
||||
assert.True(t, idx == -1 == test.nullableCols.Contains(col.Name), "%s unexpected nullability", col.Name)
|
||||
assert.True(t, idx == -1, "%s unexpected not null constraint", col.Name)
|
||||
return false, nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -169,7 +169,7 @@ pk,c1,c2,c3,c4,c5
|
||||
9,1,2,3,4,5
|
||||
DELIM
|
||||
dolt table import -c --pk=pk test 1pk5col-ints.csv
|
||||
run dolt sql -q "create table fktest(id int not null, tpk int unsigned, c2 int, primary key(id), foreign key (tpk) references test(pk))"
|
||||
run dolt sql -q "create table fktest(id int not null, tpk int, c2 int, primary key(id), foreign key (tpk) references test(pk))"
|
||||
[ "$status" -eq 0 ]
|
||||
run dolt sql -q "insert into fktest values (1, 0, 1)"
|
||||
[ "$status" -eq 0 ]
|
||||
@@ -567,7 +567,7 @@ DELIM
|
||||
[[ "$output" =~ "CREATE TABLE \`test\`" ]]
|
||||
[[ "$output" =~ "\`pk\` int" ]]
|
||||
[[ "$output" =~ "\`str\` varchar(16383)" ]]
|
||||
[[ "$output" =~ "\`int\` int unsigned" ]]
|
||||
[[ "$output" =~ "\`int\` int" ]]
|
||||
[[ "$output" =~ "\`bool\` tinyint" ]]
|
||||
[[ "$output" =~ "\`float\` float" ]]
|
||||
[[ "$output" =~ "\`date\` date" ]]
|
||||
|
||||
@@ -82,7 +82,7 @@ teardown() {
|
||||
[[ "$output" =~ "\`string\` varchar(16383)" ]] || false
|
||||
[[ "$output" =~ "\`boolean\` tinyint" ]] || false
|
||||
[[ "$output" =~ "\`float\` float" ]] || false
|
||||
[[ "$output" =~ "\`uint\` int unsigned" ]] || false
|
||||
[[ "$output" =~ "\`uint\` int" ]] || false
|
||||
[[ "$output" =~ "\`uuid\` char(36) CHARACTER SET ascii COLLATE ascii_bin" ]] || false
|
||||
}
|
||||
|
||||
@@ -259,9 +259,9 @@ DELIM
|
||||
|
||||
run dolt diff --schema
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '+ `x` varchar(16383) NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '+ `y` float NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '+ `z` int NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '+ `x` varchar(16383),' ]] || false
|
||||
[[ "$output" =~ '+ `y` float,' ]] || false
|
||||
[[ "$output" =~ '+ `z` int,' ]] || false
|
||||
# assert no columns were deleted/replaced
|
||||
[[ ! "$output" = "- \`" ]] || false
|
||||
|
||||
@@ -282,9 +282,9 @@ DELIM
|
||||
|
||||
run dolt diff --schema
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '+ `x` varchar(16383) NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '+ `y` float NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '+ `z` int NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '+ `x` varchar(16383),' ]] || false
|
||||
[[ "$output" =~ '+ `y` float,' ]] || false
|
||||
[[ "$output" =~ '+ `z` int,' ]] || false
|
||||
# assert no columns were deleted/replaced
|
||||
[[ ! "$output" = "- \`" ]] || false
|
||||
|
||||
@@ -308,9 +308,9 @@ DELIM
|
||||
|
||||
run dolt diff --schema
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ '- `a` varchar(16383) NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '- `b` float NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '- `c` tinyint NOT NULL,' ]] || false
|
||||
[[ "$output" =~ '- `a` varchar(16383),' ]] || false
|
||||
[[ "$output" =~ '- `b` float,' ]] || false
|
||||
[[ "$output" =~ '- `c` tinyint,' ]] || false
|
||||
# assert no columns were added
|
||||
[[ ! "$output" = "+ \`" ]] || false
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user