mirror of
https://github.com/dolthub/dolt.git
synced 2026-03-03 10:08:59 -06:00
dolt table import: json,csv: Support BOM file headers.
The semantics are as follows: For CSV files, the default import is an uninterpreted character encoding where newline has to match 0xa and the delimeters have to match. In general Dolt expects UTF8, but non-UTF8 characters in string fields can make it through to the imported table for encodings which are close enough to ASCII, for example. If there is a UTF8, UTF16LE or UTF16BE BOM header, then character decoding of the input stream switches to the indicated encoding. For JSON files, the default import is UTF8 character encoding. If there is a UTF8, UTF16LE or UTF16BE BOM header, then character decoding of the input stream switches to the indicated encoding.
This commit is contained in:
@@ -22,6 +22,8 @@ import (
|
||||
|
||||
"github.com/bcicen/jstream"
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/row"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
@@ -52,12 +54,17 @@ func OpenJSONReader(vrw types.ValueReadWriter, path string, fs filesys.ReadableF
|
||||
return NewJSONReader(vrw, r, sch)
|
||||
}
|
||||
|
||||
// The bytes of the supplied reader are treated as UTF-8. If there is a UTF8,
|
||||
// UTF16LE or UTF16BE BOM at the first bytes read, then it is stripped and the
|
||||
// remaining contents of the reader are treated as that encoding.
|
||||
func NewJSONReader(vrw types.ValueReadWriter, r io.ReadCloser, sch schema.Schema) (*JSONReader, error) {
|
||||
if sch == nil {
|
||||
return nil, errors.New("schema must be provided to JsonReader")
|
||||
}
|
||||
|
||||
decoder := jstream.NewDecoder(r, 2) // extract JSON values at a depth level of 1
|
||||
textReader := transform.NewReader(r, unicode.BOMOverride(unicode.UTF8.NewDecoder()))
|
||||
|
||||
decoder := jstream.NewDecoder(textReader, 2) // extract JSON values at a depth level of 1
|
||||
|
||||
return &JSONReader{vrw: vrw, closer: r, sch: sch, jsonStream: decoder}, nil
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
@@ -24,6 +25,8 @@ import (
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/row"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
@@ -33,25 +36,7 @@ import (
|
||||
"github.com/dolthub/dolt/go/store/types"
|
||||
)
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
testJSON := `{
|
||||
"rows": [
|
||||
{
|
||||
"id": 0,
|
||||
"first name": "tim",
|
||||
"last name": "sehn"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"first name": "brian",
|
||||
"last name": "hendriks"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
fs := filesys.EmptyInMemFS("/")
|
||||
require.NoError(t, fs.WriteFile("file.json", []byte(testJSON), os.ModePerm))
|
||||
|
||||
func testGoodJSON(t *testing.T, getReader func(types.ValueReadWriter, schema.Schema) (*JSONReader, error)) {
|
||||
colColl := schema.NewColCollection(
|
||||
schema.Column{
|
||||
Name: "id",
|
||||
@@ -83,7 +68,7 @@ func TestReader(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
vrw := types.NewMemoryValueStore()
|
||||
reader, err := OpenJSONReader(vrw, "file.json", fs, sch)
|
||||
reader, err := getReader(vrw, sch)
|
||||
require.NoError(t, err)
|
||||
|
||||
verifySchema, err := reader.VerifySchema(sch)
|
||||
@@ -109,6 +94,75 @@ func TestReader(t *testing.T) {
|
||||
assert.Equal(t, enginetest.WidenRows(sqlSch.Schema, expectedRows), rows)
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
testJSON := `{
|
||||
"rows": [
|
||||
{
|
||||
"id": 0,
|
||||
"first name": "tim",
|
||||
"last name": "sehn"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"first name": "brian",
|
||||
"last name": "hendriks"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
fs := filesys.EmptyInMemFS("/")
|
||||
require.NoError(t, fs.WriteFile("file.json", []byte(testJSON), os.ModePerm))
|
||||
|
||||
testGoodJSON(t, func(vrw types.ValueReadWriter, sch schema.Schema) (*JSONReader, error) {
|
||||
return OpenJSONReader(vrw, "file.json", fs, sch)
|
||||
})
|
||||
}
|
||||
|
||||
func TestReaderBOMHandling(t *testing.T) {
|
||||
testJSON := `{
|
||||
"rows": [
|
||||
{
|
||||
"id": 0,
|
||||
"first name": "tim",
|
||||
"last name": "sehn"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"first name": "brian",
|
||||
"last name": "hendriks"
|
||||
}
|
||||
]
|
||||
}`
|
||||
t.Run("UTF-8", func(t *testing.T) {
|
||||
bs := bytes.NewBuffer([]byte(testJSON))
|
||||
reader := transform.NewReader(bs, unicode.UTF8.NewEncoder())
|
||||
testGoodJSON(t, func(vrw types.ValueReadWriter, sch schema.Schema) (*JSONReader, error) {
|
||||
return NewJSONReader(vrw, io.NopCloser(reader), sch)
|
||||
})
|
||||
})
|
||||
t.Run("UTF-8 BOM", func(t *testing.T) {
|
||||
bs := bytes.NewBuffer([]byte(testJSON))
|
||||
reader := transform.NewReader(bs, unicode.UTF8BOM.NewEncoder())
|
||||
testGoodJSON(t, func(vrw types.ValueReadWriter, sch schema.Schema) (*JSONReader, error) {
|
||||
return NewJSONReader(vrw, io.NopCloser(reader), sch)
|
||||
})
|
||||
})
|
||||
t.Run("UTF-16 LE BOM", func(t *testing.T) {
|
||||
bs := bytes.NewBuffer([]byte(testJSON))
|
||||
reader := transform.NewReader(bs, unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder())
|
||||
testGoodJSON(t, func(vrw types.ValueReadWriter, sch schema.Schema) (*JSONReader, error) {
|
||||
return NewJSONReader(vrw, io.NopCloser(reader), sch)
|
||||
})
|
||||
})
|
||||
t.Run("UTF-16 BE BOM", func(t *testing.T) {
|
||||
bs := bytes.NewBuffer([]byte(testJSON))
|
||||
reader := transform.NewReader(bs, unicode.UTF16(unicode.BigEndian, unicode.UseBOM).NewEncoder())
|
||||
testGoodJSON(t, func(vrw types.ValueReadWriter, sch schema.Schema) (*JSONReader, error) {
|
||||
return NewJSONReader(vrw, io.NopCloser(reader), sch)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestReaderBadJson(t *testing.T) {
|
||||
testJSON := ` {
|
||||
"rows": [
|
||||
|
||||
@@ -27,6 +27,8 @@ import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/dolthub/go-mysql-server/sql"
|
||||
textunicode "golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/row"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/schema"
|
||||
@@ -73,6 +75,14 @@ func OpenCSVReader(nbf *types.NomsBinFormat, path string, fs filesys.ReadableFS,
|
||||
}
|
||||
|
||||
// NewCSVReader creates a CSVReader from a given ReadCloser. The CSVFileInfo should describe the csv file being read.
|
||||
//
|
||||
// The interpretation of the bytes of the supplied reader is a little murky. If
|
||||
// there is a UTF8, UTF16LE or UTF16BE BOM as the first bytes read, then the
|
||||
// BOM is stripped and the remaining contents of the reader are treated as that
|
||||
// encoding. If we are not in any of those marked encodings, then some of the
|
||||
// bytes go uninterpreted until we get to the SQL layer. It is currently the
|
||||
// case that newlines must be encoded as a '0xa' byte and the delimiter must
|
||||
// match |info.Delim|.
|
||||
func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo) (*CSVReader, error) {
|
||||
if len(info.Delim) < 1 {
|
||||
return nil, fmt.Errorf("delimiter '%s' has invalid length", info.Delim)
|
||||
@@ -81,7 +91,9 @@ func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo)
|
||||
return nil, fmt.Errorf("invalid delimiter: %s", string(info.Delim))
|
||||
}
|
||||
|
||||
br := bufio.NewReaderSize(r, ReadBufSize)
|
||||
textReader := transform.NewReader(r, textunicode.BOMOverride(transform.Nop))
|
||||
|
||||
br := bufio.NewReaderSize(textReader, ReadBufSize)
|
||||
colStrs, err := getColHeaders(br, info)
|
||||
|
||||
if err != nil {
|
||||
@@ -102,18 +114,6 @@ func NewCSVReader(nbf *types.NomsBinFormat, r io.ReadCloser, info *CSVFileInfo)
|
||||
}, nil
|
||||
}
|
||||
|
||||
// trimBOM checks if the given string has the Byte Order Mark, and removes it if it is
|
||||
// the BOM is there if the first 3 bytes are xEF\xBB\xBF and indicates that a file is in UTF-8 encoding
|
||||
func trimBOM(s string) string {
|
||||
if len(s) < 3 {
|
||||
return s
|
||||
}
|
||||
if s[0] == '\xEF' && s[1] == '\xBB' && s[2] == '\xBF' {
|
||||
return s[3:]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func getColHeaders(br *bufio.Reader, info *CSVFileInfo) ([]string, error) {
|
||||
colStrs := info.Columns
|
||||
if info.HasHeaderLine {
|
||||
@@ -124,7 +124,6 @@ func getColHeaders(br *bufio.Reader, info *CSVFileInfo) ([]string, error) {
|
||||
} else if strings.TrimSpace(line) == "" {
|
||||
return nil, errors.New("Header line is empty")
|
||||
}
|
||||
line = trimBOM(line)
|
||||
colStrsFromFile, err := csvSplitLine(line, info.Delim, info.EscapeQuotes)
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -20,6 +20,11 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/row"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/table"
|
||||
"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped"
|
||||
@@ -67,6 +72,13 @@ func mustRow(r row.Row, err error) row.Row {
|
||||
return r
|
||||
}
|
||||
|
||||
func mustEncodeBytes(t *testing.T, bs []byte, enc encoding.Encoding) []byte {
|
||||
ret, n, err := transform.Bytes(enc.NewEncoder(), bs)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, n, len(bs))
|
||||
return ret
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
colNames := []string{"name", "age", "title"}
|
||||
_, sch := untyped.NewUntypedSchema(colNames...)
|
||||
@@ -82,33 +94,42 @@ func TestReader(t *testing.T) {
|
||||
mustRow(untyped.NewRowFromStrings(types.Format_Default, sch, []string{"Jack Jackson", "27"})),
|
||||
}
|
||||
|
||||
utf8bomBytes := mustEncodeBytes(t, []byte(PersonDB1), unicode.UTF8BOM)
|
||||
require.Equal(t, utf8bomBytes[0:3], []byte{0xEF, 0xBB, 0xBF})
|
||||
utf16leBytes := mustEncodeBytes(t, []byte(PersonDB1), unicode.UTF16(unicode.LittleEndian, unicode.UseBOM))
|
||||
utf16beBytes := mustEncodeBytes(t, []byte(PersonDB1), unicode.UTF16(unicode.BigEndian, unicode.UseBOM))
|
||||
|
||||
tests := []struct {
|
||||
inputStr string
|
||||
input []byte
|
||||
expectedRows []row.Row
|
||||
info *CSVFileInfo
|
||||
}{
|
||||
{PersonDB1, goodExpectedRows, NewCSVInfo()},
|
||||
{PersonDB2, goodExpectedRows, NewCSVInfo()},
|
||||
{PersonDB3, goodExpectedRows, NewCSVInfo()},
|
||||
{[]byte(PersonDB1), goodExpectedRows, NewCSVInfo()},
|
||||
{[]byte(PersonDB2), goodExpectedRows, NewCSVInfo()},
|
||||
{[]byte(PersonDB3), goodExpectedRows, NewCSVInfo()},
|
||||
|
||||
{PersonDBWithBadRow, badExpectedRows, NewCSVInfo()},
|
||||
{PersonDBWithBadRow2, badExpectedRows, NewCSVInfo()},
|
||||
{PersonDBWithBadRow3, badExpectedRows, NewCSVInfo()},
|
||||
{utf8bomBytes, goodExpectedRows, NewCSVInfo()},
|
||||
{utf16leBytes, goodExpectedRows, NewCSVInfo()},
|
||||
{utf16beBytes, goodExpectedRows, NewCSVInfo()},
|
||||
|
||||
{[]byte(PersonDBWithBadRow), badExpectedRows, NewCSVInfo()},
|
||||
{[]byte(PersonDBWithBadRow2), badExpectedRows, NewCSVInfo()},
|
||||
{[]byte(PersonDBWithBadRow3), badExpectedRows, NewCSVInfo()},
|
||||
|
||||
{
|
||||
PersonDBWithoutHeaders,
|
||||
[]byte(PersonDBWithoutHeaders),
|
||||
goodExpectedRows,
|
||||
NewCSVInfo().SetHasHeaderLine(false).SetColumns(colNames),
|
||||
},
|
||||
{
|
||||
PersonDBDifferentHeaders,
|
||||
[]byte(PersonDBDifferentHeaders),
|
||||
goodExpectedRows,
|
||||
NewCSVInfo().SetHasHeaderLine(true).SetColumns(colNames),
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
rows, numBad, err := readTestRows(t, test.inputStr, test.info)
|
||||
rows, numBad, err := readTestRows(t, test.input, test.info)
|
||||
|
||||
if err != nil {
|
||||
t.Fatal("Unexpected Error:", err)
|
||||
@@ -136,11 +157,11 @@ func TestReader(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func readTestRows(t *testing.T, inputStr string, info *CSVFileInfo) ([]row.Row, int, error) {
|
||||
func readTestRows(t *testing.T, input []byte, info *CSVFileInfo) ([]row.Row, int, error) {
|
||||
const root = "/"
|
||||
const path = "/file.csv"
|
||||
|
||||
fs := filesys.NewInMemFS(nil, map[string][]byte{path: []byte(inputStr)}, root)
|
||||
fs := filesys.NewInMemFS(nil, map[string][]byte{path: input}, root)
|
||||
csvR, err := OpenCSVReader(types.Format_Default, path, fs, info)
|
||||
defer csvR.Close(context.Background())
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ import (
|
||||
"github.com/dolthub/dolt/go/libraries/utils/iohelp"
|
||||
)
|
||||
|
||||
// Application is the application ID used for all events emitted by this application. Other applications (not dolt)
|
||||
// Application is the application ID used for all events emitted by this application. Other applications (not dolt)
|
||||
// should set this once at initialization.
|
||||
var Application = eventsapi.AppID_APP_DOLT
|
||||
|
||||
|
||||
Binary file not shown.
|
Binary file not shown.
|
@@ -0,0 +1,4 @@
|
||||
id, title, start date, end date, first name, last name
|
||||
0, "ceo", "", "", "tim", "sehn"
|
||||
1, "founder", "", "", "aaron", "son"
|
||||
2, "founder", "", "", "brian", "hendriks"
|
||||
|
BIN
integration-tests/bats/helper/employees-tbl.utf16bebom.json
Normal file
BIN
integration-tests/bats/helper/employees-tbl.utf16bebom.json
Normal file
Binary file not shown.
BIN
integration-tests/bats/helper/employees-tbl.utf16lebom.json
Normal file
BIN
integration-tests/bats/helper/employees-tbl.utf16lebom.json
Normal file
Binary file not shown.
28
integration-tests/bats/helper/employees-tbl.utf8bom.json
Normal file
28
integration-tests/bats/helper/employees-tbl.utf8bom.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"rows": [
|
||||
{
|
||||
"id": 0,
|
||||
"first name": "tim",
|
||||
"last name": "sehn",
|
||||
"title": "ceo",
|
||||
"start date": "",
|
||||
"end date": ""
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"first name": "aaron",
|
||||
"last name": "son",
|
||||
"title": "founder",
|
||||
"start date": "",
|
||||
"end date": ""
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"first name": "brian",
|
||||
"last name": "hendricks",
|
||||
"title": "founder",
|
||||
"start date": "",
|
||||
"end date": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -60,23 +60,6 @@ teardown() {
|
||||
teardown_common
|
||||
}
|
||||
|
||||
@test "import-create-tables: correctly ignores byte order mark (BOM)" {
|
||||
printf '\xEF\xBB\xBF' > bom.csv
|
||||
cat <<DELIM >> bom.csv
|
||||
c1,c2
|
||||
1,2
|
||||
DELIM
|
||||
|
||||
run dolt table import -c bom bom.csv
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Rows Processed: 1, Additions: 1, Modifications: 0, Had No Effect: 0" ]] || false
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
|
||||
run dolt sql -q "select c1 from bom"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "1" ]] || false
|
||||
}
|
||||
|
||||
@test "import-create-tables: create a table with json import" {
|
||||
run dolt table import -c -s `batshelper employees-sch.sql` employees `batshelper employees-tbl.json`
|
||||
[ "$status" -eq 0 ]
|
||||
@@ -90,6 +73,46 @@ DELIM
|
||||
[ "${#lines[@]}" -eq 7 ]
|
||||
}
|
||||
|
||||
@test "import-create-tables: create a table with json import, utf8 with bom" {
|
||||
run dolt table import -c -s `batshelper employees-sch.sql` employees `batshelper employees-tbl.utf8bom.json`
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
run dolt ls
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "employees" ]] || false
|
||||
run dolt sql -q "select * from employees"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "tim" ]] || false
|
||||
[ "${#lines[@]}" -eq 7 ]
|
||||
}
|
||||
|
||||
@test "import-create-tables: create a table with json import, utf16le with bom" {
|
||||
run dolt table import -c -s `batshelper employees-sch.sql` employees `batshelper employees-tbl.utf16lebom.json`
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
run dolt ls
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "employees" ]] || false
|
||||
run dolt sql -q "select * from employees"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "tim" ]] || false
|
||||
[ "${#lines[@]}" -eq 7 ]
|
||||
}
|
||||
|
||||
@test "import-create-tables: create a table with json import, utf16be with bom" {
|
||||
run dolt table import -c -s `batshelper employees-sch.sql` employees `batshelper employees-tbl.utf16bebom.json`
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
run dolt ls
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "employees" ]] || false
|
||||
run dolt sql -q "select * from employees"
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "tim" ]] || false
|
||||
[ "${#lines[@]}" -eq 7 ]
|
||||
}
|
||||
|
||||
@test "import-create-tables: create a table with json import. no schema." {
|
||||
run dolt table import -c employees `batshelper employees-tbl.json`
|
||||
[ "$status" -ne 0 ]
|
||||
|
||||
@@ -270,6 +270,84 @@ SQL
|
||||
[[ "${lines[6]}" =~ "end date" ]] || false
|
||||
}
|
||||
|
||||
@test "import-update-tables: update table with a csv with columns in different order, utf8 with bom" {
|
||||
dolt sql <<SQL
|
||||
CREATE TABLE employees (
|
||||
\`id\` varchar(20) NOT NULL COMMENT 'tag:0',
|
||||
\`first name\` LONGTEXT COMMENT 'tag:1',
|
||||
\`last name\` LONGTEXT COMMENT 'tag:2',
|
||||
\`title\` LONGTEXT COMMENT 'tag:3',
|
||||
\`start date\` LONGTEXT COMMENT 'tag:4',
|
||||
\`end date\` LONGTEXT COMMENT 'tag:5',
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
SQL
|
||||
run dolt table import -u employees `batshelper employees-tbl-schema-unordered.utf8bom.csv`
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0" ]] || false
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
run dolt schema export employees
|
||||
[[ "$status" -eq 0 ]] || false
|
||||
[[ "${lines[1]}" =~ "id" ]] || false
|
||||
[[ "${lines[2]}" =~ "first name" ]] || false
|
||||
[[ "${lines[3]}" =~ "last name" ]] || false
|
||||
[[ "${lines[4]}" =~ "title" ]] || false
|
||||
[[ "${lines[5]}" =~ "start date" ]] || false
|
||||
[[ "${lines[6]}" =~ "end date" ]] || false
|
||||
}
|
||||
|
||||
@test "import-update-tables: update table with a csv with columns in different order, utf16le with bom" {
|
||||
dolt sql <<SQL
|
||||
CREATE TABLE employees (
|
||||
\`id\` varchar(20) NOT NULL COMMENT 'tag:0',
|
||||
\`first name\` LONGTEXT COMMENT 'tag:1',
|
||||
\`last name\` LONGTEXT COMMENT 'tag:2',
|
||||
\`title\` LONGTEXT COMMENT 'tag:3',
|
||||
\`start date\` LONGTEXT COMMENT 'tag:4',
|
||||
\`end date\` LONGTEXT COMMENT 'tag:5',
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
SQL
|
||||
run dolt table import -u employees `batshelper employees-tbl-schema-unordered.utf16lebom.csv`
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0" ]] || false
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
run dolt schema export employees
|
||||
[[ "$status" -eq 0 ]] || false
|
||||
[[ "${lines[1]}" =~ "id" ]] || false
|
||||
[[ "${lines[2]}" =~ "first name" ]] || false
|
||||
[[ "${lines[3]}" =~ "last name" ]] || false
|
||||
[[ "${lines[4]}" =~ "title" ]] || false
|
||||
[[ "${lines[5]}" =~ "start date" ]] || false
|
||||
[[ "${lines[6]}" =~ "end date" ]] || false
|
||||
}
|
||||
|
||||
@test "import-update-tables: update table with a csv with columns in different order, utf16be with bom" {
|
||||
dolt sql <<SQL
|
||||
CREATE TABLE employees (
|
||||
\`id\` varchar(20) NOT NULL COMMENT 'tag:0',
|
||||
\`first name\` LONGTEXT COMMENT 'tag:1',
|
||||
\`last name\` LONGTEXT COMMENT 'tag:2',
|
||||
\`title\` LONGTEXT COMMENT 'tag:3',
|
||||
\`start date\` LONGTEXT COMMENT 'tag:4',
|
||||
\`end date\` LONGTEXT COMMENT 'tag:5',
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
SQL
|
||||
run dolt table import -u employees `batshelper employees-tbl-schema-unordered.utf16bebom.csv`
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "$output" =~ "Rows Processed: 3, Additions: 3, Modifications: 0, Had No Effect: 0" ]] || false
|
||||
[[ "$output" =~ "Import completed successfully." ]] || false
|
||||
run dolt schema export employees
|
||||
[[ "$status" -eq 0 ]] || false
|
||||
[[ "${lines[1]}" =~ "id" ]] || false
|
||||
[[ "${lines[2]}" =~ "first name" ]] || false
|
||||
[[ "${lines[3]}" =~ "last name" ]] || false
|
||||
[[ "${lines[4]}" =~ "title" ]] || false
|
||||
[[ "${lines[5]}" =~ "start date" ]] || false
|
||||
[[ "${lines[6]}" =~ "end date" ]] || false
|
||||
}
|
||||
|
||||
@test "import-update-tables: updating table by inputting string longer than char column throws an error" {
|
||||
cat <<DELIM > 1pk1col-rpt-chars.csv
|
||||
pk,c
|
||||
|
||||
Reference in New Issue
Block a user