diff --git a/go/libraries/doltcore/env/actions/infer_schema.go b/go/libraries/doltcore/env/actions/infer_schema.go index fc4af28ad3..74578ae5b9 100644 --- a/go/libraries/doltcore/env/actions/infer_schema.go +++ b/go/libraries/doltcore/env/actions/infer_schema.go @@ -16,6 +16,7 @@ package actions import ( "context" + "errors" "io" "math" "strconv" @@ -61,7 +62,6 @@ func InferColumnTypesFromTableReader(ctx context.Context, rd table.ReadCloser, a var curr, prev row.Row i := newInferrer(rd.GetSchema(), args) - OUTER: for j := 0; true; j++ { var err error @@ -130,10 +130,8 @@ func (inf *inferrer) inferColumnTypes() (*schema.ColCollection, error) { col.TypeInfo = inferredTypes[tag] col.Tag = schema.ReservedTagMin + tag - col.Constraints = []schema.ColConstraint{schema.NotNullConstraint{}} - if inf.nullable.Contains(tag) { - col.Constraints = []schema.ColConstraint(nil) - } + // for large imports, it is possible to miss all the null values, so we cannot accurately add not null constraint + col.Constraints = []schema.ColConstraint(nil) cols = append(cols, col) return false, nil @@ -218,32 +216,27 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei return ti } - if strings.Contains(strVal, "-") { - i, err := strconv.ParseInt(strVal, 10, 64) - if err != nil { - return typeinfo.UnknownType - } - if i >= math.MinInt32 && i <= math.MaxInt32 { - return typeinfo.Int32Type - } else { - return typeinfo.Int64Type - } + // always parse as signed int + i, err := strconv.ParseInt(strVal, 10, 64) + + // use string for out of range + if errors.Is(err, strconv.ErrRange) { + return typeinfo.StringDefaultType + } + + if err != nil { + return typeinfo.UnknownType + } + + // handle leading zero case + if len(strVal) > 1 && strVal[0] == '0' { + return typeinfo.StringDefaultType + } + + if i >= math.MinInt32 && i <= math.MaxInt32 { + return typeinfo.Int32Type } else { - ui, err := strconv.ParseUint(strVal, 10, 64) - if err != nil { - return typeinfo.UnknownType - } - - // handle leading zero case - if len(strVal) > 1 && strVal[0] == '0' { - return typeinfo.StringDefaultType - } - - if ui <= math.MaxUint32 { - return typeinfo.Uint32Type - } else { - return typeinfo.Uint64Type - } + return typeinfo.Int64Type } } @@ -286,14 +279,13 @@ func chronoTypes() []typeinfo.TypeInfo { func numericTypes() []typeinfo.TypeInfo { // prefer: // ints over floats - // unsigned over signed // smaller over larger return []typeinfo.TypeInfo{ //typeinfo.Uint8Type, //typeinfo.Uint16Type, //typeinfo.Uint24Type, - typeinfo.Uint32Type, - typeinfo.Uint64Type, + //typeinfo.Uint32Type, + //typeinfo.Uint64Type, //typeinfo.Int8Type, //typeinfo.Int16Type, @@ -398,12 +390,6 @@ func findCommonNumericType(nums typeInfoSet) typeinfo.TypeInfo { typeinfo.Int24Type, typeinfo.Int16Type, typeinfo.Int8Type, - - typeinfo.Uint64Type, - typeinfo.Uint32Type, - typeinfo.Uint24Type, - typeinfo.Uint16Type, - typeinfo.Uint8Type, } for _, numType := range mostToLeast { if setHasType(nums, numType) { diff --git a/go/libraries/doltcore/env/actions/infer_schema_test.go b/go/libraries/doltcore/env/actions/infer_schema_test.go index 0f15d995d3..5e78a4e6a9 100644 --- a/go/libraries/doltcore/env/actions/infer_schema_test.go +++ b/go/libraries/doltcore/env/actions/infer_schema_test.go @@ -49,14 +49,14 @@ func TestLeastPermissiveType(t *testing.T) { {"lower bool", "true", 0.0, typeinfo.BoolType}, {"upper bool", "FALSE", 0.0, typeinfo.BoolType}, {"yes", "yes", 0.0, typeinfo.StringDefaultType}, - {"one", "1", 0.0, typeinfo.Uint32Type}, + {"one", "1", 0.0, typeinfo.Int32Type}, {"negative one", "-1", 0.0, typeinfo.Int32Type}, {"negative one point 0", "-1.0", 0.0, typeinfo.Float32Type}, {"negative one point 0 with FT of 0.1", "-1.0", 0.1, typeinfo.Int32Type}, {"negative one point one with FT of 0.1", "-1.1", 0.1, typeinfo.Float32Type}, {"negative one point 999 with FT of 1.0", "-1.999", 1.0, typeinfo.Int32Type}, {"zero point zero zero zero zero", "0.0000", 0.0, typeinfo.Float32Type}, - {"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Uint64Type}, + {"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Int64Type}, {"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringDefaultType}, } @@ -75,7 +75,7 @@ func TestLeastPermissiveNumericType(t *testing.T) { floatThreshold float64 expType typeinfo.TypeInfo }{ - {"zero", "0", 0.0, typeinfo.Uint32Type}, + {"zero", "0", 0.0, typeinfo.Int32Type}, {"zero float", "0.0", 0.0, typeinfo.Float32Type}, {"zero float with floatThreshold of 0.1", "0.0", 0.1, typeinfo.Int32Type}, {"negative float", "-1.3451234", 0.0, typeinfo.Float32Type}, @@ -85,8 +85,8 @@ func TestLeastPermissiveNumericType(t *testing.T) { {"all zeroes", "0000", 0.0, typeinfo.StringDefaultType}, {"leading zeroes", "01", 0.0, typeinfo.StringDefaultType}, {"negative int", "-1234", 0.0, typeinfo.Int32Type}, - {"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.Uint64Type}, - {"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.UnknownType}, + {"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType}, + {"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType}, {"math.MinInt64", strconv.FormatInt(math.MinInt64, 10), 0.0, typeinfo.Int64Type}, } @@ -142,14 +142,6 @@ func testFindCommonType(t *testing.T) { }, expType: typeinfo.Int64Type, }, - { - name: "all unsigned ints", - inferSet: typeInfoSet{ - typeinfo.Uint32Type: {}, - typeinfo.Uint64Type: {}, - }, - expType: typeinfo.Uint64Type, - }, { name: "all floats", inferSet: typeInfoSet{ @@ -159,35 +151,31 @@ func testFindCommonType(t *testing.T) { expType: typeinfo.Float64Type, }, { - name: "32 bit ints and uints", + name: "32 bit ints", inferSet: typeInfoSet{ - typeinfo.Int32Type: {}, - typeinfo.Uint32Type: {}, + typeinfo.Int32Type: {}, }, expType: typeinfo.Int32Type, }, { - name: "64 bit ints and uints", + name: "64 bit ints", inferSet: typeInfoSet{ - typeinfo.Int64Type: {}, - typeinfo.Uint64Type: {}, + typeinfo.Int64Type: {}, }, expType: typeinfo.Int64Type, }, { - name: "32 bit ints, uints, and floats", + name: "32 bit ints and floats", inferSet: typeInfoSet{ typeinfo.Int32Type: {}, - typeinfo.Uint32Type: {}, typeinfo.Float32Type: {}, }, expType: typeinfo.Float32Type, }, { - name: "64 bit ints, uints, and floats", + name: "64 bit ints and floats", inferSet: typeInfoSet{ typeinfo.Int64Type: {}, - typeinfo.Uint64Type: {}, typeinfo.Float64Type: {}, }, expType: typeinfo.Float64Type, @@ -228,11 +216,6 @@ func testFindCommonType(t *testing.T) { func testFindCommonTypeFromSingleType(t *testing.T) { allTypes := []typeinfo.TypeInfo{ - typeinfo.Uint8Type, - typeinfo.Uint16Type, - typeinfo.Uint24Type, - typeinfo.Uint32Type, - typeinfo.Uint64Type, typeinfo.Int8Type, typeinfo.Int16Type, typeinfo.Int24Type, @@ -388,7 +371,7 @@ func TestInferSchema(t *testing.T) { }, map[string]typeinfo.TypeInfo{ "int": typeinfo.Int32Type, - "uint": typeinfo.Uint64Type, + "uint": typeinfo.StringDefaultType, "uuid": typeinfo.UuidType, "float": typeinfo.Float32Type, "bool": typeinfo.BoolType, @@ -404,7 +387,7 @@ func TestInferSchema(t *testing.T) { floatThreshold: 0, }, map[string]typeinfo.TypeInfo{ - "mix": typeinfo.Uint64Type, + "mix": typeinfo.StringDefaultType, "uuid": typeinfo.UuidType, }, nil, @@ -500,7 +483,7 @@ func TestInferSchema(t *testing.T) { err = allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) { idx := schema.IndexOfConstraint(col.Constraints, schema.NotNullConstraintType) - assert.True(t, idx == -1 == test.nullableCols.Contains(col.Name), "%s unexpected nullability", col.Name) + assert.True(t, idx == -1, "%s unexpected not null constraint", col.Name) return false, nil }) require.NoError(t, err) diff --git a/integration-tests/bats/import-create-tables.bats b/integration-tests/bats/import-create-tables.bats index 3917525bc2..57297909ca 100755 --- a/integration-tests/bats/import-create-tables.bats +++ b/integration-tests/bats/import-create-tables.bats @@ -169,7 +169,7 @@ pk,c1,c2,c3,c4,c5 9,1,2,3,4,5 DELIM dolt table import -c --pk=pk test 1pk5col-ints.csv - run dolt sql -q "create table fktest(id int not null, tpk int unsigned, c2 int, primary key(id), foreign key (tpk) references test(pk))" + run dolt sql -q "create table fktest(id int not null, tpk int, c2 int, primary key(id), foreign key (tpk) references test(pk))" [ "$status" -eq 0 ] run dolt sql -q "insert into fktest values (1, 0, 1)" [ "$status" -eq 0 ] @@ -567,7 +567,7 @@ DELIM [[ "$output" =~ "CREATE TABLE \`test\`" ]] [[ "$output" =~ "\`pk\` int" ]] [[ "$output" =~ "\`str\` varchar(16383)" ]] - [[ "$output" =~ "\`int\` int unsigned" ]] + [[ "$output" =~ "\`int\` int" ]] [[ "$output" =~ "\`bool\` tinyint" ]] [[ "$output" =~ "\`float\` float" ]] [[ "$output" =~ "\`date\` date" ]] diff --git a/integration-tests/bats/schema-import.bats b/integration-tests/bats/schema-import.bats index 0fd97086da..83ffb3e5ae 100755 --- a/integration-tests/bats/schema-import.bats +++ b/integration-tests/bats/schema-import.bats @@ -82,7 +82,7 @@ teardown() { [[ "$output" =~ "\`string\` varchar(16383)" ]] || false [[ "$output" =~ "\`boolean\` tinyint" ]] || false [[ "$output" =~ "\`float\` float" ]] || false - [[ "$output" =~ "\`uint\` int unsigned" ]] || false + [[ "$output" =~ "\`uint\` int" ]] || false [[ "$output" =~ "\`uuid\` char(36) CHARACTER SET ascii COLLATE ascii_bin" ]] || false } @@ -259,9 +259,9 @@ DELIM run dolt diff --schema [ "$status" -eq 0 ] - [[ "$output" =~ '+ `x` varchar(16383) NOT NULL,' ]] || false - [[ "$output" =~ '+ `y` float NOT NULL,' ]] || false - [[ "$output" =~ '+ `z` int NOT NULL,' ]] || false + [[ "$output" =~ '+ `x` varchar(16383),' ]] || false + [[ "$output" =~ '+ `y` float,' ]] || false + [[ "$output" =~ '+ `z` int,' ]] || false # assert no columns were deleted/replaced [[ ! "$output" = "- \`" ]] || false @@ -282,9 +282,9 @@ DELIM run dolt diff --schema [ "$status" -eq 0 ] - [[ "$output" =~ '+ `x` varchar(16383) NOT NULL,' ]] || false - [[ "$output" =~ '+ `y` float NOT NULL,' ]] || false - [[ "$output" =~ '+ `z` int NOT NULL,' ]] || false + [[ "$output" =~ '+ `x` varchar(16383),' ]] || false + [[ "$output" =~ '+ `y` float,' ]] || false + [[ "$output" =~ '+ `z` int,' ]] || false # assert no columns were deleted/replaced [[ ! "$output" = "- \`" ]] || false @@ -308,9 +308,9 @@ DELIM run dolt diff --schema [ "$status" -eq 0 ] - [[ "$output" =~ '- `a` varchar(16383) NOT NULL,' ]] || false - [[ "$output" =~ '- `b` float NOT NULL,' ]] || false - [[ "$output" =~ '- `c` tinyint NOT NULL,' ]] || false + [[ "$output" =~ '- `a` varchar(16383),' ]] || false + [[ "$output" =~ '- `b` float,' ]] || false + [[ "$output" =~ '- `c` tinyint,' ]] || false # assert no columns were added [[ ! "$output" = "+ \`" ]] || false }