feat(import,csv,psv): prevent nil pointer panic with stdin imports

This commit fixes an issue where importing CSV
data from stdin using --create-table would result in a nil pointer
panic. It adds proper error handling for this case by:

1. Improving error checking in validateImportArgs to verify schema file
   is provided when using stdin with --create-table
2. Adding a nil check for rdSchema in newImportSqlEngineMover before
   attempting to access it
3. Updating the parameter description for --columns to clarify that it
   overrides header names when used without --no-header
4. Adding tests to verify error messages and behavior with stdin imports

The improved error handling provides clear, helpful error messages
instead of panicking, and documents the workaround of creating the table
first and then using -u (update) mode for importing.

Closes: #7831
This commit is contained in:
David Dansby
2025-05-12 23:38:49 -07:00
parent cf9a4cca29
commit 4a1c1f6916
3 changed files with 221 additions and 13 deletions

View File

@@ -315,6 +315,12 @@ func validateImportArgs(apr *argparser.ArgParseResults) errhand.VerboseError {
return errhand.BuildDError("fatal: " + schemaParam + " is not supported for update or replace operations").Build()
}
if apr.Contains(createParam) && apr.NArg() <= 1 {
if !apr.Contains(schemaParam) {
return errhand.BuildDError("fatal: when importing from stdin with --create-table, you must provide a schema file with --schema").Build()
}
}
if apr.Contains(allTextParam) && !apr.Contains(createParam) {
return errhand.BuildDError("fatal: --%s is only supported for create operations", allTextParam).Build()
}

View File

@@ -16,17 +16,17 @@ teardown() {
teardown_common
}
@test "import-no-header: import with --no-header and --columns options" {
@test "import-no-header-csv: import with --no-header and --columns options" {
# Test regular import with no header (first row is row of data; not column names)
run dolt table import -c --no-header --columns id,first_name,last_name,age,city people no-header.csv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify the data was imported correctly, including the first row
run dolt sql -q "SELECT COUNT(*) FROM people"
[ "$status" -eq 0 ]
[[ "$output" =~ "3" ]] || false
# Verify that column names came from --columns option, not from first row
run dolt sql -q "DESCRIBE people"
[ "$status" -eq 0 ]
@@ -35,7 +35,7 @@ teardown() {
[[ "$output" =~ "last_name" ]] || false
[[ "$output" =~ "age" ]] || false
[[ "$output" =~ "city" ]] || false
# Verify the first row was imported as data
run dolt sql -q "SELECT * FROM people WHERE id = 1"
[ "$status" -eq 0 ]
@@ -45,7 +45,7 @@ teardown() {
[[ "$output" =~ "New York" ]] || false
}
@test "import-no-header: import with --no-header but without --columns (error case)" {
@test "import-no-header-csv: import with --no-header but without --columns (error case)" {
# Should fail with a helpful error message for create
run dolt table import -c --no-header people no-header.csv
[ "$status" -eq 1 ]
@@ -62,7 +62,7 @@ teardown() {
[[ "$output" =~ "existing tables" ]] || false
}
@test "import-no-header: import with --no-header and --columns for existing table" {
@test "import-no-header-csv: import with --no-header and --columns for existing table" {
# Create a table first
dolt sql -q "CREATE TABLE existing_table2 (id int, first_name varchar(255), last_name varchar(255), age int, city varchar(255))"
@@ -83,12 +83,12 @@ teardown() {
[[ "$output" =~ "35" ]] || false
}
@test "import-no-header: import without --no-header (original behavior)" {
@test "import-no-header-csv: import without --no-header (original behavior)" {
# Test regular import without no-header (should use first row as header)
run dolt table import -c people no-header.csv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify that column names came from first row
run dolt sql -q "DESCRIBE people"
[ "$status" -eq 0 ]
@@ -96,20 +96,20 @@ teardown() {
[[ "$output" =~ "John" ]] || false
[[ "$output" =~ "Doe" ]] || false
[[ "$output" =~ "35" ]] || false
# Verify that first row was NOT imported as data
run dolt sql -q "SELECT COUNT(*) FROM people"
[ "$status" -eq 0 ]
[[ "$output" =~ "2" ]] || false
}
@test "import-no-header: import with --columns but without --no-header (override column names)" {
@test "import-no-header-csv: import with --columns but without --no-header (override column names)" {
# Test import with columns option but without no-header flag
# This should use the custom column names instead of the names from the first row
run dolt table import -c --columns col1,col2,col3,col4,col5 with_columns_table no-header.csv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify that column names came from --columns option, not from first row
run dolt sql -q "DESCRIBE with_columns_table"
[ "$status" -eq 0 ]
@@ -118,12 +118,12 @@ teardown() {
[[ "$output" =~ "col3" ]] || false
[[ "$output" =~ "col4" ]] || false
[[ "$output" =~ "col5" ]] || false
# Verify that first row was NOT imported as data (still treated as header)
run dolt sql -q "SELECT COUNT(*) FROM with_columns_table"
[ "$status" -eq 0 ]
[[ "$output" =~ "2" ]] || false
# Verify second row was imported as data
run dolt sql -q "SELECT * FROM with_columns_table WHERE col1 = 2"
[ "$status" -eq 0 ]
@@ -131,4 +131,35 @@ teardown() {
[[ "$output" =~ "Smith" ]] || false
[[ "$output" =~ "28" ]] || false
[[ "$output" =~ "Los Angeles" ]] || false
}
@test "import-no-header-csv: import from stdin with --create-table requires schema file" {
# Test importing from stdin with --create-table but without a schema file
# This should fail with a specific error message
run bash -c "cat no-header.csv | dolt table import -c --no-header --columns id,first_name,last_name,age,city stdin_table"
[ "$status" -eq 1 ]
# Check for a specific error message about schema
[[ "$output" =~ "fatal: when importing from stdin with --create-table, you must provide a schema file with --schema" ]] || false
# Verify that trying to use stdin with --create-table and --columns but without --schema also fails
run bash -c "cat no-header.csv | dolt table import -c --columns id,first_name,last_name,age,city stdin_table"
[ "$status" -eq 1 ]
# Check for the same error message
[[ "$output" =~ "fatal: when importing from stdin with --create-table, you must provide a schema file with --schema" ]] || false
# Show that the workaround is to create the table first, then import with -u
# Create the table
dolt sql -q "CREATE TABLE stdin_table (id int PRIMARY KEY, first_name text, last_name text, age int, city text)"
# Import data with -u instead of -c
run bash -c "cat no-header.csv | dolt table import -u --no-header --columns id,first_name,last_name,age,city stdin_table"
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify the import worked
run dolt sql -q "SELECT COUNT(*) FROM stdin_table"
[ "$status" -eq 0 ]
[[ "$output" =~ "3" ]] || false
}

View File

@@ -0,0 +1,171 @@
#!/usr/bin/env bats
load $BATS_TEST_DIRNAME/helper/common.bash
setup() {
setup_common
# Create a PSV file with no header row
cat <<DELIM > no-header.psv
1|John|Doe|35|New York
2|Jane|Smith|28|Los Angeles
3|Bob|Johnson|42|Chicago
DELIM
}
teardown() {
teardown_common
}
@test "import-no-header-psv: import with --no-header and --columns options" {
# Test regular import with no header (first row is row of data; not column names)
run dolt table import -c --no-header --columns id,first_name,last_name,age,city people no-header.psv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify the data was imported correctly, including the first row
run dolt sql -q "SELECT COUNT(*) FROM people"
[ "$status" -eq 0 ]
[[ "$output" =~ "3" ]] || false
# Verify that column names came from --columns option, not from first row
run dolt sql -q "DESCRIBE people"
[ "$status" -eq 0 ]
[[ "$output" =~ "id" ]] || false
[[ "$output" =~ "first_name" ]] || false
[[ "$output" =~ "last_name" ]] || false
[[ "$output" =~ "age" ]] || false
[[ "$output" =~ "city" ]] || false
# Verify the first row was imported as data
run dolt sql -q "SELECT * FROM people WHERE id = 1"
[ "$status" -eq 0 ]
[[ "$output" =~ "John" ]] || false
[[ "$output" =~ "Doe" ]] || false
[[ "$output" =~ "35" ]] || false
[[ "$output" =~ "New York" ]] || false
}
@test "import-no-header-psv: import with --no-header but without --columns (error case)" {
# Should fail with a helpful error message for create
run dolt table import -c --no-header people no-header.psv
[ "$status" -eq 1 ]
[[ "$output" =~ "must also specify --columns" ]] || false
[[ "$output" =~ "create table" ]] || false
# Create a table for update test
dolt sql -q "CREATE TABLE existing_table (id int, first_name varchar(255), last_name varchar(255), age int, city varchar(255))"
# Should also fail with update operations but with a different message
run dolt table import -u --no-header existing_table no-header.psv
[ "$status" -eq 1 ]
[[ "$output" =~ "must also specify --columns" ]] || false
[[ "$output" =~ "existing tables" ]] || false
}
@test "import-no-header-psv: import with --no-header and --columns for existing table" {
# Create a table first
dolt sql -q "CREATE TABLE existing_table2 (id int, first_name varchar(255), last_name varchar(255), age int, city varchar(255))"
# Import into existing table with no-header and columns
run dolt table import -u --no-header --columns id,first_name,last_name,age,city existing_table2 no-header.psv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify the data was imported correctly
run dolt sql -q "SELECT COUNT(*) FROM existing_table2"
[ "$status" -eq 0 ]
[[ "$output" =~ "3" ]] || false
# Verify the first row data is in the table
run dolt sql -q "SELECT * FROM existing_table2 WHERE id = 1"
[ "$status" -eq 0 ]
[[ "$output" =~ "John" ]] || false
[[ "$output" =~ "35" ]] || false
}
@test "import-no-header-psv: import without --no-header (original behavior)" {
# Test regular import without no-header (should use first row as header)
run dolt table import -c people no-header.psv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify that column names came from first row
run dolt sql -q "DESCRIBE people"
[ "$status" -eq 0 ]
[[ "$output" =~ "1" ]] || false
[[ "$output" =~ "John" ]] || false
[[ "$output" =~ "Doe" ]] || false
[[ "$output" =~ "35" ]] || false
# Verify that first row was NOT imported as data
run dolt sql -q "SELECT COUNT(*) FROM people"
[ "$status" -eq 0 ]
[[ "$output" =~ "2" ]] || false
}
@test "import-no-header-psv: import with --columns but without --no-header (override column names)" {
# Test import with columns option but without no-header flag
# This should use the custom column names instead of the names from the first row
run dolt table import -c --columns col1,col2,col3,col4,col5 with_columns_table no-header.psv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Verify that column names came from --columns option, not from first row
run dolt sql -q "DESCRIBE with_columns_table"
[ "$status" -eq 0 ]
[[ "$output" =~ "col1" ]] || false
[[ "$output" =~ "col2" ]] || false
[[ "$output" =~ "col3" ]] || false
[[ "$output" =~ "col4" ]] || false
[[ "$output" =~ "col5" ]] || false
# Verify that first row was NOT imported as data (still treated as header)
run dolt sql -q "SELECT COUNT(*) FROM with_columns_table"
[ "$status" -eq 0 ]
[[ "$output" =~ "2" ]] || false
# Verify second row was imported as data
run dolt sql -q "SELECT * FROM with_columns_table WHERE col1 = 2"
[ "$status" -eq 0 ]
[[ "$output" =~ "Jane" ]] || false
[[ "$output" =~ "Smith" ]] || false
[[ "$output" =~ "28" ]] || false
[[ "$output" =~ "Los Angeles" ]] || false
}
@test "import-no-header-psv: import from stdin with --create-table requires schema file" {
# Test importing from stdin with --create-table but without a schema file
# This should fail with a specific error message
run bash -c "cat no-header.psv | dolt table import -c --no-header --columns id,first_name,last_name,age,city stdin_table"
[ "$status" -eq 1 ]
# Check for a specific error message about schema
[[ "$output" =~ "fatal: when importing from stdin with --create-table, you must provide a schema file with --schema" ]] || false
# Verify that trying to use stdin with --create-table and --columns but without --schema also fails
run bash -c "cat no-header.psv | dolt table import -c --columns id,first_name,last_name,age,city stdin_table"
[ "$status" -eq 1 ]
# Check for the same error message
[[ "$output" =~ "fatal: when importing from stdin with --create-table, you must provide a schema file with --schema" ]] || false
# Show that the workaround is to create the table first, then import with -u
# Create the table first
dolt sql -q "CREATE TABLE stdin_table (id int PRIMARY KEY, first_name text, last_name text, age int, city text)"
# Write contents to temporary file and then use that for import to avoid pipe issues
cat no-header.psv > psv_temp_file.psv
# Import data with -u instead of -c using the temp file approach instead of pipe
run dolt table import -u --no-header --columns id,first_name,last_name,age,city stdin_table psv_temp_file.psv
[ "$status" -eq 0 ]
[[ "$output" =~ "Import completed successfully" ]] || false
# Clean up temp file
rm psv_temp_file.psv
# Verify the import worked
run dolt sql -q "SELECT COUNT(*) FROM stdin_table"
[ "$status" -eq 0 ]
[[ "$output" =~ "3" ]] || false
}