Expose Load data and add a bunch of tests. (#1419)

This PR exposes LOAD DATA to Dolt. This is a relatively new path to import data into a dolt repo.
This commit is contained in:
Vinai Rachakonda
2021-03-09 16:22:01 -05:00
committed by GitHub
parent 7ab8a153f6
commit 3dcb68ce13
6 changed files with 304 additions and 10 deletions
+1 -1
View File
@@ -8,7 +8,7 @@ from multiprocessing import Process
def _connect(user, host, port, database):
return mysql.connector.connect(user=user, host=host, port=port, database=database)
return mysql.connector.connect(user=user, host=host, port=port, database=database, allow_local_infile=True)
def _print_err_and_exit(e):
+275
View File
@@ -0,0 +1,275 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
setup_common
}
teardown() {
assert_feature_version
teardown_common
}
@test "simple load data from file into table" {
cat <<DELIM > 1pk5col-ints.csv
pk||c1||c2||c3||c4||c5
0||1||2||3||4||5
1||1||2||3||4||5
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int primary key, c1 int, c2 int, c3 int, c4 int, c5 int);
LOAD DATA INFILE '1pk5col-ints.csv' INTO TABLE test CHARACTER SET UTF8MB4 FIELDS TERMINATED BY '||' ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1,c2,c3,c4,c5" ]
[ "${lines[1]}" = "0,1,2,3,4,5" ]
[ "${lines[2]}" = "1,1,2,3,4,5" ]
}
@test "load data into unknown table throws error" {
run dolt sql << SQL
SET secure_file_priv='./';
LOAD DATA INFILE '1pk5col-ints.csv' INTO TABLE test CHARACTER SET UTF8MB4 FIELDS TERMINATED BY '||' ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES;
SQL
[ "$status" -eq 1 ]
[[ "$output" =~ "table not found: test" ]] || false
}
@test "load data with unknown file throws error" {
skip "Different error msg on windows."
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int primary key, c1 int, c2 int, c3 int, c4 int, c5 int);
LOAD DATA INFILE 'hello-ints.csv' INTO TABLE test CHARACTER SET UTF8MB4 FIELDS TERMINATED BY '||' ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES;
SQL
[ "$status" -eq 1 ]
[[ "$output" =~ "no such file or directory" ]] || false
}
@test "load data works with enclosed terms" {
cat <<DELIM > 1pk5col-ints.csv
pk||c1||c2||c3||c4||c5
"0"||"1"||"2"||"3"||"4"||"5"
"1"||"1"||"2"||"3"||"4"||"5"
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int primary key, c1 int, c2 int, c3 int, c4 int, c5 int);
LOAD DATA INFILE '1pk5col-ints.csv' INTO TABLE test CHARACTER SET UTF8MB4 FIELDS TERMINATED BY '||' ENCLOSED BY '"' ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1,c2,c3,c4,c5" ]
[ "${lines[1]}" = "0,1,2,3,4,5" ]
[ "${lines[2]}" = "1,1,2,3,4,5" ]
}
@test "load data works with prefixed terms" {
cat <<DELIM > prefixed.txt
pk
sssHi
sssHello
ignore me
sssYo
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk longtext);
LOAD DATA INFILE 'prefixed.txt' INTO TABLE test CHARACTER SET UTF8MB4 LINES STARTING BY 'sss' IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test ORDER BY pk"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk" ]
[ "${lines[1]}" = "Hello" ]
[ "${lines[2]}" = "Hi" ]
[ "${lines[3]}" = "Yo" ]
}
@test "load data works when the number of input columns in the file is less than the number of schema columns" {
cat <<DELIM > 1pk2col-ints.csv
pk,c1
0,1
1,1
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int primary key, c1 int, c2 int);
LOAD DATA INFILE '1pk2col-ints.csv' INTO TABLE test FIELDS TERMINATED BY ',' IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1,c2" ]
[ "${lines[1]}" = "0,1," ]
[ "${lines[2]}" = "1,1," ]
}
@test "load data works with fields separated by tabs" {
skip "This needs to be fixed."
cat <<DELIM > 1pk2col-ints.csv
pk c1
0 1
1 1
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int primary key, c1 int);
LOAD DATA INFILE '1pk2col-ints.csv' INTO TABLE test FIELDS TERMINATED BY '\t' IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1" ]
[ "${lines[1]}" = "0,1" ]
[ "${lines[2]}" = "1,1" ]
}
@test "load data recognizes certain nulls" {
cat <<DELIM > 1pk2col-ints.csv
pk
\N
NULL
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk longtext);
LOAD DATA INFILE '1pk2col-ints.csv' INTO TABLE test FIELDS IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -q "select COUNT(*) from test WHERE pk IS NULL"
[ "$status" -eq 0 ]
[[ "$output" =~ "2" ]] || false
}
@test "load data works when column order is mismatched" {
skip "This needs to be fixed."
cat <<DELIM > 1pk2col-ints.csv
pk,c1
"hi","1"
"hello","2"
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int, c1 longtext);
LOAD DATA INFILE '1pk2col-ints.csv' INTO TABLE test FIELDS ENCLOSED BY '"' TERMINATED BY ',' IGNORE 1 LINES (c1,pk);
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1" ]
[ "${lines[1]}" = "1,hi" ]
[ "${lines[2]}" = "2,hello" ]
}
@test "load data with different column types that uses optionally" {
skip "This functionality is not present yet."
cat <<DELIM > complex.csv
1,"a string",100.20
2,"a string containing a , comma",102.20
3,"a string containing a \" quote",102.20
4,"a string containing a \", quote and comma",102.20
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int, c1 longtext, c2 float);
LOAD DATA INFILE 'complex.csv' INTO TABLE test FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"';
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1,c2" ]
[ "${lines[1]}" = "1,a string,100.20" ]
[ "${lines[2]}" = "2,a string containing a , comma,100.20" ]
[ "${lines[3]}" = "3,a string containing a \" quote,100.20" ]
[ "${lines[4]}" = "4,a string containing a \", quote and comma,100.20" ]
}
@test "load data works with escaped columns" {
skip "This functionality is not present yet."
cat <<DELIM > escape.txt
"hi"
"\hello"
"Try\\N"
"new\ns"
DELIM
run dolt sql << SQL
SET secure_file_priv='./testdata';
CREATE TABLE loadtable(pk longtext);
LOAD DATA INFILE 'test5.txt' INTO TABLE loadtable FIELDS ENCLOSED BY '\"';
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk" ]
[ "${lines[1]}" = "hi" ]
[ "${lines[2]}" = "hello" ]
[ "${lines[3]}" = "TryN" ]
[ "${lines[4]}" = "new\ns" ]
}
@test "load data when the number of input columns in the file is greater than the number of schema columns" {
skip "This functionality is not present yet."
cat <<DELIM > 1pk5col-ints.csv
pk||c1||c2||c3||c4||c5
0||1||2||3||4||5||6
1||1||2||3||4||5||6
DELIM
run dolt sql << SQL
SET secure_file_priv='./';
CREATE TABLE test(pk int primary key, c1 int, c2 int, c3 int, c4 int, c5 int);
LOAD DATA INFILE '1pk5col-ints.csv' INTO TABLE test CHARACTER SET UTF8MB4 FIELDS TERMINATED BY '||' ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES;
SQL
[ "$status" -eq 0 ]
run dolt sql -r csv -q "select * from test"
[ "$status" -eq 0 ]
[ "${lines[0]}" = "pk,c1,c2,c3,c4,c5" ]
[ "${lines[1]}" = "0,1,2,3,4,5" ]
[ "${lines[2]}" = "1,1,2,3,4,5" ]
}
+16 -2
View File
@@ -514,12 +514,11 @@ SQL
}
@test "sql-server: DOLT_MERGE ff works" {
skiponwindows "Has dependencies that are missing on the Jenkins Windows installation."
skiponwindows "Has dependencies that are missing on the Jenkins Windows installation."
cd repo1
start_sql_server repo1
multi_query 1 "
CREATE TABLE test (
pk int primary key
@@ -538,4 +537,19 @@ SQL
server_query 1 "SELECT * FROM test" "pk\n1\n2\n3\n1000"
server_query 1 "SELECT COUNT(*) FROM dolt_log" "COUNT(*)\n3"
}
@test "sql-server: LOAD DATA LOCAL INFILE works" {
skiponwindows "Has dependencies that are missing on the Jenkins Windows installation."
cd repo1
start_sql_server repo1
multi_query 1 "
CREATE TABLE test(pk int primary key, c1 int, c2 int, c3 int, c4 int, c5 int);
SET local_infile=1;
LOAD DATA LOCAL INFILE '$BATS_TEST_DIRNAME/helper/1pk5col-ints.csv' INTO TABLE test CHARACTER SET UTF8MB4 FIELDS TERMINATED BY ',' ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES;
"
server_query 1 "SELECT * FROM test" "pk,c1,c2,c3,c4,c5\n0,1,2,3,4,5\n1,1,2,3,4,5"
}
+9 -1
View File
@@ -882,6 +882,12 @@ func processQuery(ctx *sql.Context, query string, se *sqlEngine) (sql.Schema, sq
return se.ddl(ctx, s, query)
case *sqlparser.DBDDL:
return se.dbddl(ctx, s, query)
case *sqlparser.Load:
if s.Local {
return nil, nil, fmt.Errorf("LOCAL supported only in sql-server mode")
}
return se.query(ctx, query)
default:
return nil, nil, fmt.Errorf("Unsupported SQL statement: '%v'.", query)
}
@@ -1105,6 +1111,8 @@ func canProcessAsBatchInsert(ctx *sql.Context, sqlStatement sqlparser.Statement,
return false, nil
}
return true, nil
case *sqlparser.Load:
return true, nil
default:
return false, nil
@@ -1197,7 +1205,7 @@ func updateBatchInsertOutput() {
// Updates the batch insert stats with the results of an INSERT, UPDATE, or DELETE statement.
func mergeResultIntoStats(statement sqlparser.Statement, rowIter sql.RowIter, s *stats) error {
switch statement.(type) {
case *sqlparser.Insert, *sqlparser.Delete, *sqlparser.Update:
case *sqlparser.Insert, *sqlparser.Delete, *sqlparser.Update, *sqlparser.Load:
break
default:
return nil
+1 -1
View File
@@ -18,7 +18,7 @@ require (
github.com/denisbrodbeck/machineid v1.0.1
github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi v0.0.0-20201005193433-3ee972b1d078
github.com/dolthub/fslock v0.0.2
github.com/dolthub/go-mysql-server v0.8.1-0.20210309004624-52301a405ca7
github.com/dolthub/go-mysql-server v0.8.1-0.20210309180032-824ffc2adbbd
github.com/dolthub/ishell v0.0.0-20210205014355-16a4ce758446
github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66
github.com/dolthub/sqllogictest/go v0.0.0-20201105013724-5123fc66e12c
+2 -5
View File
@@ -148,8 +148,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/dolthub/fslock v0.0.2 h1:8vUh47iKovgrtXNrXVIzsIoWLlspoXg+3nslhUzgKSw=
github.com/dolthub/fslock v0.0.2/go.mod h1:0i7bsNkK+XHwFL3dIsSWeXSV7sykVzzVr6+jq8oeEo0=
github.com/dolthub/go-mysql-server v0.8.1-0.20210309004624-52301a405ca7 h1:wXGcLyEL1fpd+0k5xK489LTTB3DUsy74zomeDL3i/0I=
github.com/dolthub/go-mysql-server v0.8.1-0.20210309004624-52301a405ca7/go.mod h1:WjNGQ7bj8F/x/d6gj2y5L52CTL08LkhazvO1Mbue43w=
github.com/dolthub/go-mysql-server v0.8.1-0.20210309180032-824ffc2adbbd h1:Q4visWiZ9OHvYmqhplWifrbW2oHnL2vzqGi/oySkP2M=
github.com/dolthub/go-mysql-server v0.8.1-0.20210309180032-824ffc2adbbd/go.mod h1:WjNGQ7bj8F/x/d6gj2y5L52CTL08LkhazvO1Mbue43w=
github.com/dolthub/ishell v0.0.0-20210205014355-16a4ce758446 h1:0ol5pj+QlKUKAtqs1LiPM3ZJKs+rHPgLSsMXmhTrCAM=
github.com/dolthub/ishell v0.0.0-20210205014355-16a4ce758446/go.mod h1:dhGBqcCEfK5kuFmeO5+WOx3hqc1k3M29c1oS/R7N4ms=
github.com/dolthub/mmap-go v1.0.4-0.20201107010347-f9f2a9588a66 h1:WRPDbpJWEnPxPmiuOTndT+lUWUeGjx6eoNOK9O4tQQQ=
@@ -383,7 +383,6 @@ github.com/jirfag/go-printf-func-name v0.0.0-20191110105641-45db9963cdd3/go.mod
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5 h1:lrdPtrORjGv1HbbEvKWDUAy97mPpFm4B8hp77tcCUJY=
github.com/jmoiron/sqlx v1.2.1-0.20190826204134-d7d95172beb5/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
@@ -436,7 +435,6 @@ github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-b
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/maratori/testpackage v1.0.1/go.mod h1:ddKdw+XG0Phzhx8BFDTKgpWP4i7MpApTE5fXSKAqwDU=
@@ -1091,7 +1089,6 @@ honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4 h1:UoveltGrhghAA7ePc+e+QYDHXrBps2PqFZiHkGR/xK8=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.5 h1:nI5egYTGJakVyOryqLs1cQO5dO0ksin5XXs2pspk75k=
honnef.co/go/tools v0.0.1-2020.1.5/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=