diff --git a/.github/workflows/ci-bump-homebrew.yaml b/.github/workflows/cd-bump-homebrew.yaml similarity index 100% rename from .github/workflows/ci-bump-homebrew.yaml rename to .github/workflows/cd-bump-homebrew.yaml diff --git a/.github/workflows/ci-bump-winget.yaml b/.github/workflows/cd-bump-winget.yaml similarity index 100% rename from .github/workflows/ci-bump-winget.yaml rename to .github/workflows/cd-bump-winget.yaml diff --git a/.github/workflows/cd-release.yaml b/.github/workflows/cd-release.yaml index 506114c061..fdf8f2dfed 100644 --- a/.github/workflows/cd-release.yaml +++ b/.github/workflows/cd-release.yaml @@ -1,4 +1,4 @@ -name: Release +name: Release Dolt on: workflow_dispatch: diff --git a/.github/workflows/ci-email.yaml b/.github/workflows/email-report.yaml similarity index 100% rename from .github/workflows/ci-email.yaml rename to .github/workflows/email-report.yaml diff --git a/.github/workflows/ci-fuzzer.yaml b/.github/workflows/k8s-fuzzer.yaml similarity index 100% rename from .github/workflows/ci-fuzzer.yaml rename to .github/workflows/k8s-fuzzer.yaml diff --git a/.github/workflows/ci-import-benchmarks.yaml b/.github/workflows/k8s-import-benchmarks-pull-report.yaml similarity index 100% rename from .github/workflows/ci-import-benchmarks.yaml rename to .github/workflows/k8s-import-benchmarks-pull-report.yaml diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/k8s-nightly-performance-benchmarks-email-report.yaml similarity index 100% rename from .github/workflows/ci-nightly.yaml rename to .github/workflows/k8s-nightly-performance-benchmarks-email-report.yaml diff --git a/.github/workflows/ci-performance-benchmarks-release.yaml b/.github/workflows/k8s-performance-benchmarks-email-report.yaml similarity index 100% rename from .github/workflows/ci-performance-benchmarks-release.yaml rename to .github/workflows/k8s-performance-benchmarks-email-report.yaml diff --git a/.github/workflows/ci-performance-benchmarks.yaml b/.github/workflows/k8s-performance-benchmarks-pull-report.yaml similarity index 100% rename from .github/workflows/ci-performance-benchmarks.yaml rename to .github/workflows/k8s-performance-benchmarks-pull-report.yaml diff --git a/.github/workflows/ci-sql-correctness-release.yaml b/.github/workflows/k8s-sql-correctness.yaml similarity index 99% rename from .github/workflows/ci-sql-correctness-release.yaml rename to .github/workflows/k8s-sql-correctness.yaml index ca08d061f6..2dfd22916d 100644 --- a/.github/workflows/ci-sql-correctness-release.yaml +++ b/.github/workflows/k8s-sql-correctness.yaml @@ -31,7 +31,7 @@ jobs: VERSION: ${{ github.event.inputs.version || github.event.client_payload.version }} ACTOR: ${{ github.event.client_payload.actor || github.actor }} - correctness-release: + correctness: runs-on: ubuntu-18.04 needs: set-version-actor name: Dolt SQL Correctness diff --git a/.github/workflows/ci-performance-benchmarks-report.yaml b/.github/workflows/pull-report.yaml similarity index 50% rename from .github/workflows/ci-performance-benchmarks-report.yaml rename to .github/workflows/pull-report.yaml index b4ce0f19aa..e8aed9e01f 100644 --- a/.github/workflows/ci-performance-benchmarks-report.yaml +++ b/.github/workflows/pull-report.yaml @@ -1,4 +1,4 @@ -# name: Performance Benchmark Reporter +name: Post to Pull Request on: repository_dispatch: @@ -47,40 +47,3 @@ jobs: env: ACTOR: ${{ github.event.client_payload.actor }} ISSUE_NUMBER: ${{ github.event.client_payload.issue_number }} - report-email: - name: Report Performance Benchmarks via Email - runs-on: ubuntu-18.04 - if: ${{ github.event.client_payload.issue_number == -1 }} - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-west-2 - - name: Get benchmark results - id: get-results - run: aws s3api get-object --bucket="$BUCKET" --key="$KEY" results.log - env: - KEY: ${{ github.event.client_payload.key }} - BUCKET: ${{ github.event.client_payload.bucket }} - - name: Send Email - if: ${{ github.event.client_payload.actor_email == '' }} - uses: ./.github/actions/ses-email-action - with: - region: us-west-2 - version: ${{ github.event.client_payload.dolt_version }} - format: ${{ github.event.client_payload.noms_bin_format }} - toAddresses: '["${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}"]' - dataFile: ${{ format('{0}/results.log', github.workspace) }} - - name: Send Email - if: ${{ github.event.client_payload.actor_email != '' }} - uses: ./.github/actions/ses-email-action - with: - region: us-west-2 - version: ${{ github.event.client_payload.dolt_version }} - format: ${{ github.event.client_payload.noms_bin_format }} - toAddresses: '["${{ github.event.client_payload.actor_email }}"]' - dataFile: ${{ format('{0}/results.log', github.workspace) }} diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go index 4d0e8c9cc1..544d461626 100644 --- a/go/cmd/dolt/commands/tblcmds/import.go +++ b/go/cmd/dolt/commands/tblcmds/import.go @@ -544,7 +544,7 @@ func move(ctx context.Context, rd table.SqlRowReader, wr *mvdata.SqlEngineTableW r := pipeline.GetTransFailureSqlRow(trf) if r != nil { - cli.PrintErr(sql.FormatRow(r)) + cli.PrintErr(sql.FormatRow(r), "\n") } return false diff --git a/go/libraries/doltcore/table/untyped/csv/reader.go b/go/libraries/doltcore/table/untyped/csv/reader.go index b34baafd7b..de0549d928 100644 --- a/go/libraries/doltcore/table/untyped/csv/reader.go +++ b/go/libraries/doltcore/table/untyped/csv/reader.go @@ -187,7 +187,7 @@ func (csvr *CSVReader) ReadSqlRow(crx context.Context) (sql.Row, error) { return nil, io.EOF } - colVals, err := csvr.csvReadRecords(nil) + rowVals, err := csvr.csvReadRecords(nil) if err == io.EOF { csvr.isDone = true @@ -195,34 +195,49 @@ func (csvr *CSVReader) ReadSqlRow(crx context.Context) (sql.Row, error) { } schSize := csvr.sch.GetAllCols().Size() - if len(colVals) != schSize { + if len(rowVals) != schSize { var out strings.Builder - for _, cv := range colVals { + for _, cv := range rowVals { if cv != nil { out.WriteString(*cv) } out.WriteRune(',') } - return nil, table.NewBadRow(nil, - fmt.Sprintf("csv reader's schema expects %d fields, but line only has %d values.", schSize, len(colVals)), - fmt.Sprintf("line: '%s'", out.String()), + + badMpStr, unusedRowValues := interpretRowSizeError(csvr.sch, rowVals) + + args := []string{ + fmt.Sprintf("CSV reader expected %d values, but saw %d.", schSize, len(rowVals)), + fmt.Sprintf("row values: '%s'", badMpStr), + } + + if len(unusedRowValues) > 0 { + args = append(args, fmt.Sprintf("with the following values left over: '%v'", unusedRowValues)) + } + + return rowValsToSQLRows(rowVals), table.NewBadRow(nil, + args..., ) } if err != nil { - return nil, table.NewBadRow(nil, err.Error()) + return rowValsToSQLRows(rowVals), table.NewBadRow(nil, err.Error()) } + return rowValsToSQLRows(rowVals), nil +} + +func rowValsToSQLRows(rowVals []*string) sql.Row { var sqlRow sql.Row - for _, colVal := range colVals { - if colVal == nil { + for _, rowVal := range rowVals { + if rowVal == nil { sqlRow = append(sqlRow, nil) } else { - sqlRow = append(sqlRow, *colVal) + sqlRow = append(sqlRow, *rowVal) } } - return sqlRow, nil + return sqlRow } // GetSchema gets the schema of the rows that this reader will return @@ -455,3 +470,43 @@ func (csvr *CSVReader) parseQuotedField(rs *recordState) (kontinue bool, err err } } } + +// interpretRowSizeError returns a format map (written as a string) of a set of columns to their row values. It also +// returns a slice of an unused strings. +func interpretRowSizeError(schema schema.Schema, rowVals []*string) (string, []string) { + cols := schema.GetAllCols().GetColumns() + + keyValPairs := make([][]string, len(cols)) + unusedRowValues := make([]string, 0) + + // 1. Start by adding all cols to the map and their relevant pair + for i, col := range cols { + if i >= len(rowVals) { + keyValPairs[i] = []string{col.Name, ""} + } else { + keyValPairs[i] = []string{col.Name, *rowVals[i]} + } + } + + // 2. Append any unused row values to print to the user + for i := len(cols); i < len(rowVals); i++ { + if rowVals[i] == nil { + unusedRowValues = append(unusedRowValues, fmt.Sprintf("%q", "")) + } else { + unusedRowValues = append(unusedRowValues, fmt.Sprintf("%q", *rowVals[i])) + } + } + + // 3. Pretty print the column names to value pairings + var b bytes.Buffer + + b.Write([]byte("{\n")) + + for _, pair := range keyValPairs { + b.Write([]byte(fmt.Sprintf("\t%q: %q\n", pair[0], pair[1]))) + } + + b.Write([]byte("}\n")) + + return b.String(), unusedRowValues +} diff --git a/go/utils/remotesrv/main.go b/go/utils/remotesrv/main.go index 40f3c3a415..4cb6ba9cc9 100644 --- a/go/utils/remotesrv/main.go +++ b/go/utils/remotesrv/main.go @@ -42,9 +42,7 @@ func main() { err := os.Chdir(*dirParam) if err != nil { - log.Fatalln("failed to chdir to:", *dirParam) - log.Fatalln("error:", err.Error()) - os.Exit(1) + log.Fatalln("failed to chdir to:", *dirParam, "error:", err.Error()) } else { log.Println("cwd set to " + *dirParam) } diff --git a/integration-tests/bats/1pk5col-ints.bats b/integration-tests/bats/1pk5col-ints.bats index 0a2c720485..bf3173b20a 100755 --- a/integration-tests/bats/1pk5col-ints.bats +++ b/integration-tests/bats/1pk5col-ints.bats @@ -513,10 +513,11 @@ pk,c1,c2,c3,c4,c5 DELIM run dolt table import test -u badline.csv [ "$status" -eq 1 ] - [[ "${lines[0]}" =~ "Additions" ]] || false + echo $output + [[ "${lines[0]}" =~ "Additions: 2" ]] || false [[ "${lines[1]}" =~ "A bad row was encountered" ]] || false - [[ "${lines[2]}" =~ "expects 6 fields" ]] || false - [[ "${lines[2]}" =~ "line only has 1 value" ]] || false + [[ "${lines[2]}" =~ "Bad Row: [2]" ]] || false + [[ "${lines[3]}" =~ "CSV reader expected 6 values, but saw 1" ]] || false } @test "1pk5col-ints: import data from a csv file with a bad header" { diff --git a/integration-tests/bats/import-update-tables.bats b/integration-tests/bats/import-update-tables.bats index 5f702fcfd8..8706fb45e6 100644 --- a/integration-tests/bats/import-update-tables.bats +++ b/integration-tests/bats/import-update-tables.bats @@ -320,15 +320,15 @@ DELIM run dolt table import -u --continue persons persons.csv [ "$status" -eq 0 ] - [[ "$output" =~ "The following rows were skipped:" ]] || false - [[ "$output" =~ "[2,little,doe,10]" ]] || false - [[ "$output" =~ "[3,little,doe,4]" ]] || false - [[ "$output" =~ "[4,little,doe,1]" ]] || false - [[ "$output" =~ "Rows Processed: 1, Additions: 1, Modifications: 0, Had No Effect: 0" ]] || false - [[ "$output" =~ "Import completed successfully." ]] || false + [[ "${lines[0]}" =~ "The following rows were skipped:" ]] || false + [[ "${lines[1]}" =~ '[2,little,doe,10]' ]] || false + [[ "${lines[2]}" =~ '[3,little,doe,4]' ]] || false + [[ "${lines[3]}" =~ '[4,little,doe,1]' ]] || false + [[ "${lines[4]}" =~ "Rows Processed: 1, Additions: 1, Modifications: 0, Had No Effect: 0" ]] || false + [[ "${lines[5]}" =~ "Lines skipped: 3" ]] || false + [[ "${lines[6]}" =~ "Import completed successfully." ]] || false run dolt sql -r csv -q "select * from persons" - skip "this only worked b/c no rollback on keyless tables; this also fails on primary key tables" [ "${#lines[@]}" -eq 2 ] [[ "$output" =~ "ID,LastName,FirstName,Age" ]] || false [[ "$output" =~ "1,jon,doe,20" ]] || false @@ -381,7 +381,8 @@ DELIM run dolt table import -u test bad-updates.csv [ "$status" -eq 1 ] [[ "$output" =~ "A bad row was encountered while moving data" ]] || false - [[ "$output" =~ "csv reader's schema expects 2 fields, but line only has 3 values" ]] || false + [[ "$output" =~ "CSV reader expected 2 values, but saw 3" ]] || false + [[ "$output" =~ "with the following values left over: '[\"\"]'" ]] || false run dolt table import -u --continue test bad-updates.csv [ "$status" -eq 0 ] @@ -1168,3 +1169,48 @@ DELIM [[ "$output" =~ "3,large" ]] || false [[ "$output" =~ "4,x-small" ]] || false # should be empty } + +@test "import-update-tables: test better error message for mismatching column count with schema" { + # Case where there are fewer values in a row than the number of columns in the schema + cat < bad-updates.csv +pk,v1, v2 +5,5 +6,5 +DELIM + + dolt sql -q "CREATE TABLE test(pk BIGINT PRIMARY KEY, v1 BIGINT DEFAULT 2 NOT NULL, v2 int)" + dolt sql -q "INSERT INTO test (pk, v1, v2) VALUES (1, 2, 3), (2, 3, 4)" + + run dolt table import -u test bad-updates.csv + [ "$status" -eq 1 ] + [[ "$output" =~ "A bad row was encountered while moving data" ]] || false + [[ "$output" =~ "CSV reader expected 3 values, but saw 2" ]] || false + [[ "$output" =~ "row values:" ]] + ! [[ "$output" =~ "with the following values left over: '[\"\"]'" ]] || false + + # Case there are more columns in the rows than the number of columns in the schema + cat < bad-updates.csv +pk,v1 +5,7,5 +6,5,5 +DELIM + + run dolt table import -u test bad-updates.csv + [ "$status" -eq 1 ] + [[ "$output" =~ "A bad row was encountered while moving data" ]] || false + [[ "$output" =~ "CSV reader expected 2 values, but saw 3" ]] || false + [[ "$output" =~ "row values:" ]] + [[ "$output" =~ '"pk": "5"' ]] + [[ "$output" =~ '"v1": "7"' ]] + [[ "$output" =~ "with the following values left over: '[\"5\"]'" ]] || false + + # Add a continue statement + run dolt table import -u --continue test bad-updates.csv + [ "$status" -eq 0 ] + [[ "${lines[2]}" =~ "The following rows were skipped:" ]] || false + [[ "${lines[3]}" =~ '[5,7,5]' ]] || false + [[ "${lines[4]}" =~ '[6,5,5]' ]] || false + [[ "${lines[5]}" =~ "Rows Processed: 0, Additions: 0, Modifications: 0, Had No Effect: 0" ]] || false + [[ "${lines[6]}" =~ "Lines skipped: 2" ]] || false + [[ "${lines[7]}" =~ "Import completed successfully." ]] || false +}