Merge remote-tracking branch 'origin/main' into taylor/jwtauth

This commit is contained in:
Taylor Bantle
2022-07-22 14:17:37 -07:00
16 changed files with 129 additions and 66 deletions

View File

@@ -1,4 +1,4 @@
name: Release
name: Release Dolt
on:
workflow_dispatch:

View File

@@ -31,7 +31,7 @@ jobs:
VERSION: ${{ github.event.inputs.version || github.event.client_payload.version }}
ACTOR: ${{ github.event.client_payload.actor || github.actor }}
correctness-release:
correctness:
runs-on: ubuntu-18.04
needs: set-version-actor
name: Dolt SQL Correctness

View File

@@ -1,4 +1,4 @@
# name: Performance Benchmark Reporter
name: Post to Pull Request
on:
repository_dispatch:
@@ -47,40 +47,3 @@ jobs:
env:
ACTOR: ${{ github.event.client_payload.actor }}
ISSUE_NUMBER: ${{ github.event.client_payload.issue_number }}
report-email:
name: Report Performance Benchmarks via Email
runs-on: ubuntu-18.04
if: ${{ github.event.client_payload.issue_number == -1 }}
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Get benchmark results
id: get-results
run: aws s3api get-object --bucket="$BUCKET" --key="$KEY" results.log
env:
KEY: ${{ github.event.client_payload.key }}
BUCKET: ${{ github.event.client_payload.bucket }}
- name: Send Email
if: ${{ github.event.client_payload.actor_email == '' }}
uses: ./.github/actions/ses-email-action
with:
region: us-west-2
version: ${{ github.event.client_payload.dolt_version }}
format: ${{ github.event.client_payload.noms_bin_format }}
toAddresses: '["${{ secrets.PERF_REPORTS_EMAIL_ADDRESS }}"]'
dataFile: ${{ format('{0}/results.log', github.workspace) }}
- name: Send Email
if: ${{ github.event.client_payload.actor_email != '' }}
uses: ./.github/actions/ses-email-action
with:
region: us-west-2
version: ${{ github.event.client_payload.dolt_version }}
format: ${{ github.event.client_payload.noms_bin_format }}
toAddresses: '["${{ github.event.client_payload.actor_email }}"]'
dataFile: ${{ format('{0}/results.log', github.workspace) }}

View File

@@ -544,7 +544,7 @@ func move(ctx context.Context, rd table.SqlRowReader, wr *mvdata.SqlEngineTableW
r := pipeline.GetTransFailureSqlRow(trf)
if r != nil {
cli.PrintErr(sql.FormatRow(r))
cli.PrintErr(sql.FormatRow(r), "\n")
}
return false

View File

@@ -187,7 +187,7 @@ func (csvr *CSVReader) ReadSqlRow(crx context.Context) (sql.Row, error) {
return nil, io.EOF
}
colVals, err := csvr.csvReadRecords(nil)
rowVals, err := csvr.csvReadRecords(nil)
if err == io.EOF {
csvr.isDone = true
@@ -195,34 +195,49 @@ func (csvr *CSVReader) ReadSqlRow(crx context.Context) (sql.Row, error) {
}
schSize := csvr.sch.GetAllCols().Size()
if len(colVals) != schSize {
if len(rowVals) != schSize {
var out strings.Builder
for _, cv := range colVals {
for _, cv := range rowVals {
if cv != nil {
out.WriteString(*cv)
}
out.WriteRune(',')
}
return nil, table.NewBadRow(nil,
fmt.Sprintf("csv reader's schema expects %d fields, but line only has %d values.", schSize, len(colVals)),
fmt.Sprintf("line: '%s'", out.String()),
badMpStr, unusedRowValues := interpretRowSizeError(csvr.sch, rowVals)
args := []string{
fmt.Sprintf("CSV reader expected %d values, but saw %d.", schSize, len(rowVals)),
fmt.Sprintf("row values: '%s'", badMpStr),
}
if len(unusedRowValues) > 0 {
args = append(args, fmt.Sprintf("with the following values left over: '%v'", unusedRowValues))
}
return rowValsToSQLRows(rowVals), table.NewBadRow(nil,
args...,
)
}
if err != nil {
return nil, table.NewBadRow(nil, err.Error())
return rowValsToSQLRows(rowVals), table.NewBadRow(nil, err.Error())
}
return rowValsToSQLRows(rowVals), nil
}
func rowValsToSQLRows(rowVals []*string) sql.Row {
var sqlRow sql.Row
for _, colVal := range colVals {
if colVal == nil {
for _, rowVal := range rowVals {
if rowVal == nil {
sqlRow = append(sqlRow, nil)
} else {
sqlRow = append(sqlRow, *colVal)
sqlRow = append(sqlRow, *rowVal)
}
}
return sqlRow, nil
return sqlRow
}
// GetSchema gets the schema of the rows that this reader will return
@@ -455,3 +470,43 @@ func (csvr *CSVReader) parseQuotedField(rs *recordState) (kontinue bool, err err
}
}
}
// interpretRowSizeError returns a format map (written as a string) of a set of columns to their row values. It also
// returns a slice of an unused strings.
func interpretRowSizeError(schema schema.Schema, rowVals []*string) (string, []string) {
cols := schema.GetAllCols().GetColumns()
keyValPairs := make([][]string, len(cols))
unusedRowValues := make([]string, 0)
// 1. Start by adding all cols to the map and their relevant pair
for i, col := range cols {
if i >= len(rowVals) {
keyValPairs[i] = []string{col.Name, ""}
} else {
keyValPairs[i] = []string{col.Name, *rowVals[i]}
}
}
// 2. Append any unused row values to print to the user
for i := len(cols); i < len(rowVals); i++ {
if rowVals[i] == nil {
unusedRowValues = append(unusedRowValues, fmt.Sprintf("%q", ""))
} else {
unusedRowValues = append(unusedRowValues, fmt.Sprintf("%q", *rowVals[i]))
}
}
// 3. Pretty print the column names to value pairings
var b bytes.Buffer
b.Write([]byte("{\n"))
for _, pair := range keyValPairs {
b.Write([]byte(fmt.Sprintf("\t%q: %q\n", pair[0], pair[1])))
}
b.Write([]byte("}\n"))
return b.String(), unusedRowValues
}

View File

@@ -42,9 +42,7 @@ func main() {
err := os.Chdir(*dirParam)
if err != nil {
log.Fatalln("failed to chdir to:", *dirParam)
log.Fatalln("error:", err.Error())
os.Exit(1)
log.Fatalln("failed to chdir to:", *dirParam, "error:", err.Error())
} else {
log.Println("cwd set to " + *dirParam)
}

View File

@@ -513,10 +513,11 @@ pk,c1,c2,c3,c4,c5
DELIM
run dolt table import test -u badline.csv
[ "$status" -eq 1 ]
[[ "${lines[0]}" =~ "Additions" ]] || false
echo $output
[[ "${lines[0]}" =~ "Additions: 2" ]] || false
[[ "${lines[1]}" =~ "A bad row was encountered" ]] || false
[[ "${lines[2]}" =~ "expects 6 fields" ]] || false
[[ "${lines[2]}" =~ "line only has 1 value" ]] || false
[[ "${lines[2]}" =~ "Bad Row: [2]" ]] || false
[[ "${lines[3]}" =~ "CSV reader expected 6 values, but saw 1" ]] || false
}
@test "1pk5col-ints: import data from a csv file with a bad header" {

View File

@@ -320,15 +320,15 @@ DELIM
run dolt table import -u --continue persons persons.csv
[ "$status" -eq 0 ]
[[ "$output" =~ "The following rows were skipped:" ]] || false
[[ "$output" =~ "[2,little,doe,10]" ]] || false
[[ "$output" =~ "[3,little,doe,4]" ]] || false
[[ "$output" =~ "[4,little,doe,1]" ]] || false
[[ "$output" =~ "Rows Processed: 1, Additions: 1, Modifications: 0, Had No Effect: 0" ]] || false
[[ "$output" =~ "Import completed successfully." ]] || false
[[ "${lines[0]}" =~ "The following rows were skipped:" ]] || false
[[ "${lines[1]}" =~ '[2,little,doe,10]' ]] || false
[[ "${lines[2]}" =~ '[3,little,doe,4]' ]] || false
[[ "${lines[3]}" =~ '[4,little,doe,1]' ]] || false
[[ "${lines[4]}" =~ "Rows Processed: 1, Additions: 1, Modifications: 0, Had No Effect: 0" ]] || false
[[ "${lines[5]}" =~ "Lines skipped: 3" ]] || false
[[ "${lines[6]}" =~ "Import completed successfully." ]] || false
run dolt sql -r csv -q "select * from persons"
skip "this only worked b/c no rollback on keyless tables; this also fails on primary key tables"
[ "${#lines[@]}" -eq 2 ]
[[ "$output" =~ "ID,LastName,FirstName,Age" ]] || false
[[ "$output" =~ "1,jon,doe,20" ]] || false
@@ -381,7 +381,8 @@ DELIM
run dolt table import -u test bad-updates.csv
[ "$status" -eq 1 ]
[[ "$output" =~ "A bad row was encountered while moving data" ]] || false
[[ "$output" =~ "csv reader's schema expects 2 fields, but line only has 3 values" ]] || false
[[ "$output" =~ "CSV reader expected 2 values, but saw 3" ]] || false
[[ "$output" =~ "with the following values left over: '[\"\"]'" ]] || false
run dolt table import -u --continue test bad-updates.csv
[ "$status" -eq 0 ]
@@ -1168,3 +1169,48 @@ DELIM
[[ "$output" =~ "3,large" ]] || false
[[ "$output" =~ "4,x-small" ]] || false # should be empty
}
@test "import-update-tables: test better error message for mismatching column count with schema" {
# Case where there are fewer values in a row than the number of columns in the schema
cat <<DELIM > bad-updates.csv
pk,v1, v2
5,5
6,5
DELIM
dolt sql -q "CREATE TABLE test(pk BIGINT PRIMARY KEY, v1 BIGINT DEFAULT 2 NOT NULL, v2 int)"
dolt sql -q "INSERT INTO test (pk, v1, v2) VALUES (1, 2, 3), (2, 3, 4)"
run dolt table import -u test bad-updates.csv
[ "$status" -eq 1 ]
[[ "$output" =~ "A bad row was encountered while moving data" ]] || false
[[ "$output" =~ "CSV reader expected 3 values, but saw 2" ]] || false
[[ "$output" =~ "row values:" ]]
! [[ "$output" =~ "with the following values left over: '[\"\"]'" ]] || false
# Case there are more columns in the rows than the number of columns in the schema
cat <<DELIM > bad-updates.csv
pk,v1
5,7,5
6,5,5
DELIM
run dolt table import -u test bad-updates.csv
[ "$status" -eq 1 ]
[[ "$output" =~ "A bad row was encountered while moving data" ]] || false
[[ "$output" =~ "CSV reader expected 2 values, but saw 3" ]] || false
[[ "$output" =~ "row values:" ]]
[[ "$output" =~ '"pk": "5"' ]]
[[ "$output" =~ '"v1": "7"' ]]
[[ "$output" =~ "with the following values left over: '[\"5\"]'" ]] || false
# Add a continue statement
run dolt table import -u --continue test bad-updates.csv
[ "$status" -eq 0 ]
[[ "${lines[2]}" =~ "The following rows were skipped:" ]] || false
[[ "${lines[3]}" =~ '[5,7,5]' ]] || false
[[ "${lines[4]}" =~ '[6,5,5]' ]] || false
[[ "${lines[5]}" =~ "Rows Processed: 0, Additions: 0, Modifications: 0, Had No Effect: 0" ]] || false
[[ "${lines[6]}" =~ "Lines skipped: 2" ]] || false
[[ "${lines[7]}" =~ "Import completed successfully." ]] || false
}