From 1c40607a226c8b186661bfbd7f2e0d4081727b11 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 10:23:48 -0700 Subject: [PATCH 01/16] First pass at implementing a proper statement scanner that includes line numbers. Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql.go | 26 +---- go/cmd/dolt/commands/sql_statement_scanner.go | 105 ++++++++++++++++++ .../commands/sql_statement_scanner_test.go | 59 ++++++++++ 3 files changed, 165 insertions(+), 25 deletions(-) create mode 100755 go/cmd/dolt/commands/sql_statement_scanner.go create mode 100755 go/cmd/dolt/commands/sql_statement_scanner_test.go diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go index 49cd830b22..962f264dd4 100644 --- a/go/cmd/dolt/commands/sql.go +++ b/go/cmd/dolt/commands/sql.go @@ -15,8 +15,6 @@ package commands import ( - "bufio" - "bytes" "context" "fmt" "io" @@ -553,31 +551,9 @@ func saveQuery(ctx context.Context, root *doltdb.RootValue, dEnv *env.DoltEnv, q return newRoot, nil } -// ScanStatements is a split function for a Scanner that returns each SQL statement in the input as a token. It doesn't -// work for strings that contain semi-colons. Supporting that requires implementing a state machine. -func scanStatements(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := bytes.IndexByte(data, ';'); i >= 0 { - // We have a full ;-terminated line. - return i + 1, data[0:i], nil - } - // If we're at EOF, we have a final, non-terminated line. Return it. - if atEOF { - return len(data), data, nil - } - // Request more data. - return 0, nil, nil -} - // runBatchMode processes queries until EOF. The Root of the sqlEngine may be updated. func runBatchMode(ctx *sql.Context, se *sqlEngine, input io.Reader) error { - scanner := bufio.NewScanner(input) - const maxCapacity = 512 * 1024 - buf := make([]byte, maxCapacity) - scanner.Buffer(buf, maxCapacity) - scanner.Split(scanStatements) + scanner := NewSqlStatementScanner(input) var query string for scanner.Scan() { diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go new file mode 100755 index 0000000000..fde2be0225 --- /dev/null +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -0,0 +1,105 @@ +package commands + +import ( + "bufio" + "io" +) + +type statementScanner struct { + *bufio.Scanner + lastStatementLineNum int + lineNum int + quoteChar byte + lastChar byte + numConsecutiveQuoteChars int +} + +func NewSqlStatementScanner(input io.Reader) *statementScanner { + scanner := bufio.NewScanner(input) + const maxCapacity = 512 * 1024 + buf := make([]byte, maxCapacity) + scanner.Buffer(buf, maxCapacity) + + s := &statementScanner{ + Scanner: scanner, + } + scanner.Split(s.scanStatements) + + return s +} + +func (s *statementScanner) Scan() bool { + s.lineNum++ + scanned := s.Scanner.Scan() + return scanned +} + +const ( + sQuote byte = '\'' + dQuote = '"' + backslash = '\\' +) + +// ScanStatements is a split function for a Scanner that returns each SQL statement in the input as a token. +func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + + for i := range data { + switch data[i] { + case '\n': + s.lineNum++ + case ';': + if s.quoteChar == 0 { + return i + 1, data[0:i], nil + } + case backslash: + // escaped quote character + if s.lastChar == backslash { + break + } + case sQuote, dQuote: + // escaped quote character + if s.lastChar == backslash { + break + } + + // two quotes in a row + if s.lastChar == data[i] { + s.numConsecutiveQuoteChars++ + if s.numConsecutiveQuoteChars % 2 == 1 { + // escaped quote character + } + break + } + + // end quote + if s.quoteChar == data[i] { + s.quoteChar = 0 + s.numConsecutiveQuoteChars = 0 + break + } + + // embedded quote + if s.quoteChar != data[i] { + break + } + + // open quote + s.quoteChar = data[i] + default: + s.numConsecutiveQuoteChars = 0 + } + + s.lastChar = data[i] + } + + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), data, nil + } + + // Request more data. + return 0, nil, nil +} diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go new file mode 100755 index 0000000000..29c5044805 --- /dev/null +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -0,0 +1,59 @@ +package commands + +import ( + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "strings" + "testing" +) + +func TestScanStatements(t *testing.T) { + type testcase struct { + input string + statements []string + } + + testcases := []testcase { + { + input: `select * from foo; select baz from foo; +select +a from b; select 1`, + statements: []string{ + "select * from foo", + "select baz from foo", + "select \na from b", + "select 1", + }, + }, + { + input: `insert into foo values ('a', "b;", 'c;;"" +'); update foo set baz = bar, +qux = '"hello"""' where xyzzy = ";;';'"; +create table foo (a int not null default ';', +primary key (a));`, + statements: []string{ + `insert into foo values ('a', "b;", 'c;;"" +')`, + `update foo set baz = bar, +qux = '"hello"""' where xyzzy = ";;';'`, + `create table foo (a int not null default ';', +primary key (a))`, + }, + }, + } + + for _, tt := range testcases { + t.Run(tt.input, func(t *testing.T) { + reader := strings.NewReader(tt.input) + scanner := NewSqlStatementScanner(reader) + var i int + for scanner.Scan() { + require.True(t, i < len(tt.statements)) + assert.Equal(t, tt.statements[i], strings.TrimSpace(scanner.Text())) + i++ + } + + require.NoError(t, scanner.Err()) + }) + } +} \ No newline at end of file From 5d5957275abd9f9ba9937830a122929b293f4e65 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 11:56:54 -0700 Subject: [PATCH 02/16] Getting closer to statement parser Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 2 +- go/cmd/dolt/commands/sql_statement_scanner_test.go | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index fde2be0225..0e8e97d479 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -82,7 +82,7 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, } // embedded quote - if s.quoteChar != data[i] { + if s.quoteChar != 0 && s.quoteChar != data[i] { break } diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index 29c5044805..28a1d9afc5 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -25,6 +25,12 @@ a from b; select 1`, "select 1", }, }, + { + input: `insert into foo values (";;';'");`, + statements: []string{ + `insert into foo values (";;';'")`, + }, + }, { input: `insert into foo values ('a', "b;", 'c;;"" '); update foo set baz = bar, @@ -35,7 +41,7 @@ primary key (a));`, `insert into foo values ('a', "b;", 'c;;"" ')`, `update foo set baz = bar, -qux = '"hello"""' where xyzzy = ";;';'`, +qux = '"hello"""' where xyzzy = ";;';'"`, `create table foo (a int not null default ';', primary key (a))`, }, From 11ad423dce30c013eea5ce397303653b3fa767fc Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 13:57:47 -0700 Subject: [PATCH 03/16] More tests Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 12 +-- .../commands/sql_statement_scanner_test.go | 78 +++++++++++++++++-- 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index 0e8e97d479..aa0437606d 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -12,6 +12,7 @@ type statementScanner struct { quoteChar byte lastChar byte numConsecutiveQuoteChars int + numConsecutiveBackslashes int } func NewSqlStatementScanner(input io.Reader) *statementScanner { @@ -55,13 +56,14 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, return i + 1, data[0:i], nil } case backslash: - // escaped quote character - if s.lastChar == backslash { - break - } + s.numConsecutiveBackslashes++ + s.numConsecutiveQuoteChars = 0 case sQuote, dQuote: + numConsecutiveBackslashes := s.numConsecutiveBackslashes + s.numConsecutiveBackslashes = 0 + // escaped quote character - if s.lastChar == backslash { + if s.lastChar == backslash && numConsecutiveBackslashes % 2 == 1 { break } diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index 28a1d9afc5..f3bb000722 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -14,23 +14,85 @@ func TestScanStatements(t *testing.T) { } testcases := []testcase { + { + input: `insert into foo values (";;';'");`, + statements: []string{ + `insert into foo values (";;';'")`, + }, + }, + { + input: `select ''';;'; select ";\;"`, + statements: []string{ + `select ''';;'`, + `select ";\;"`, + }, + }, + { + input: `select '\\'''; select '";";'; select 1`, + statements: []string{ + `select '\\'''`, + `select '";";'`, + `select 1`, + }, + }, + { + input: `insert into foo values(''); select 1`, + statements: []string{ + `insert into foo values('')`, + `select 1`, + }, + }, + { + input: `insert into foo values('''); select 1`, + statements: []string{ + `insert into foo values('''); select 1`, + }, + }, + { + input: `insert into foo values(''''); select 1`, + statements: []string{ + `insert into foo values('''')`, + `select 1`, + }, + }, + { + input: `insert into foo values(""); select 1`, + statements: []string{ + `insert into foo values("")`, + `select 1`, + }, + }, + { + input: `insert into foo values("""); select 1`, + statements: []string{ + `insert into foo values("""); select 1`, + }, + }, + { + input: `insert into foo values(""""); select 1`, + statements: []string{ + `insert into foo values("""")`, + `select 1`, + }, + }, + { + input: `select '\''; select "hell\"o"`, + statements: []string{ + `select '\''`, + `select "hell\"o"`, + }, + }, { input: `select * from foo; select baz from foo; -select +select a from b; select 1`, statements: []string{ "select * from foo", "select baz from foo", - "select \na from b", + "select\na from b", "select 1", }, }, - { - input: `insert into foo values (";;';'");`, - statements: []string{ - `insert into foo values (";;';'")`, - }, - }, { input: `insert into foo values ('a', "b;", 'c;;"" '); update foo set baz = bar, From 7b5cd4926270ece8f9fbf4f5938a044fdb11d335 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 16:26:45 -0700 Subject: [PATCH 04/16] Fully working scanner minus line number verification Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 103 +++++++++++------- 1 file changed, 62 insertions(+), 41 deletions(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index aa0437606d..8f21cef4b0 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -11,8 +11,8 @@ type statementScanner struct { lineNum int quoteChar byte lastChar byte - numConsecutiveQuoteChars int numConsecutiveBackslashes int + ignoreNextChar bool; } func NewSqlStatementScanner(input io.Reader) *statementScanner { @@ -48,50 +48,62 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, } for i := range data { - switch data[i] { - case '\n': - s.lineNum++ - case ';': - if s.quoteChar == 0 { - return i + 1, data[0:i], nil - } - case backslash: - s.numConsecutiveBackslashes++ - s.numConsecutiveQuoteChars = 0 - case sQuote, dQuote: - numConsecutiveBackslashes := s.numConsecutiveBackslashes - s.numConsecutiveBackslashes = 0 - // escaped quote character - if s.lastChar == backslash && numConsecutiveBackslashes % 2 == 1 { - break - } - - // two quotes in a row - if s.lastChar == data[i] { - s.numConsecutiveQuoteChars++ - if s.numConsecutiveQuoteChars % 2 == 1 { - // escaped quote character + if !s.ignoreNextChar { + switch data[i] { + case '\n': + s.lineNum++ + case ';': + if s.quoteChar == 0 { + _, _, _ = s.resetState() + return i + 1, data[0:i], nil } - break - } + case backslash: + s.numConsecutiveBackslashes++ + case sQuote, dQuote: + numConsecutiveBackslashes := s.numConsecutiveBackslashes + s.numConsecutiveBackslashes = 0 - // end quote - if s.quoteChar == data[i] { - s.quoteChar = 0 - s.numConsecutiveQuoteChars = 0 - break - } + // escaped quote character + if s.lastChar == backslash && numConsecutiveBackslashes%2 == 1 { + break + } - // embedded quote - if s.quoteChar != 0 && s.quoteChar != data[i] { - break - } + // currently in a quoted string + if s.quoteChar != 0 { - // open quote - s.quoteChar = data[i] - default: - s.numConsecutiveQuoteChars = 0 + // end quote or two consecutive quote characters (a form of escaping quote chars) + if s.quoteChar == data[i] { + var nextChar byte = 0 + if i+1 < len(data) { + nextChar = data[i+1] + } + + if nextChar == s.quoteChar { + // escaped quote. skip the next character + s.ignoreNextChar = true + break + } else if atEOF || i+1 < len(data) { + // end quote + s.quoteChar = 0 + break + } else { + // need more data to make a decision + return s.resetState() + } + } + + // embedded quote ('"' or "'") + break + } + + // open quote + s.quoteChar = data[i] + default: + s.numConsecutiveBackslashes = 0 + } + } else { + s.ignoreNextChar = false } s.lastChar = data[i] @@ -103,5 +115,14 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, } // Request more data. - return 0, nil, nil + return s.resetState() } + +// resetState resets the internal state of the scanner and returns the "more data" response for a split function +func (s *statementScanner) resetState() (advance int, token []byte, err error) { + s.quoteChar = 0 + s.numConsecutiveBackslashes = 0 + s.ignoreNextChar = false + s.lastChar = 0 + return 0, nil, nil +} \ No newline at end of file From 1e390c794eb029aef11459dd93c62a19af7b87f5 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 16:32:10 -0700 Subject: [PATCH 05/16] More tests of weird string input Signed-off-by: Zach Musgrave --- .../dolt/commands/sql_statement_scanner_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index f3bb000722..ccf10d02b2 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -13,6 +13,7 @@ func TestScanStatements(t *testing.T) { statements []string } + // Some of these include malformed input (e.g. strings that aren't properly terminated) testcases := []testcase { { input: `insert into foo values (";;';'");`, @@ -27,6 +28,13 @@ func TestScanStatements(t *testing.T) { `select ";\;"`, }, }, + { + input: `select ''';;'; select ";\;`, + statements: []string{ + `select ''';;'`, + `select ";\;`, + }, + }, { input: `select '\\'''; select '";";'; select 1`, statements: []string{ @@ -35,6 +43,13 @@ func TestScanStatements(t *testing.T) { `select 1`, }, }, + { + input: `select '\\''; select '";";'; select 1`, + statements: []string{ + `select '\\''; select '";"`, + `'; select 1`, + }, + }, { input: `insert into foo values(''); select 1`, statements: []string{ From b2fe53eb82795dc7b9c12588d9457ae0d3cdb4ce Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 16:57:15 -0700 Subject: [PATCH 06/16] Fully replaced sql statement scanner Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql.go | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go index 962f264dd4..1771fade4e 100644 --- a/go/cmd/dolt/commands/sql.go +++ b/go/cmd/dolt/commands/sql.go @@ -561,11 +561,6 @@ func runBatchMode(ctx *sql.Context, se *sqlEngine, input io.Reader) error { if len(query) == 0 || query == "\n" { continue } - if !batchInsertEarlySemicolon(query) { - query += ";" - // TODO: We should fix this problem by properly implementing a state machine for scanStatements - continue - } if err := processBatchQuery(ctx, query, se); err != nil { verr := formatQueryError(query, err) cli.PrintErrln(verr.Verbose()) @@ -583,36 +578,6 @@ func runBatchMode(ctx *sql.Context, se *sqlEngine, input io.Reader) error { return flushBatchedEdits(ctx, se) } -// batchInsertEarlySemicolon loops through a string to check if Scan stopped too early on a semicolon -func batchInsertEarlySemicolon(query string) bool { - quotes := []uint8{'\'', '"'} - midQuote := false - queryLength := len(query) - for i := 0; i < queryLength; i++ { - for _, quote := range quotes { - if query[i] == quote { - i++ - midQuote = true - inEscapeMode := false - for ; i < queryLength; i++ { - if inEscapeMode { - inEscapeMode = false - } else { - if query[i] == quote { - midQuote = false - break - } else if query[i] == '\\' { - inEscapeMode = true - } - } - } - break - } - } - } - return !midQuote -} - // runShell starts a SQL shell. Returns when the user exits the shell. The Root of the sqlEngine may // be updated by any queries which were processed. func runShell(ctx *sql.Context, se *sqlEngine, mrEnv env.MultiRepoEnv) error { From c6a74f819fab9846866f4f1e21f8966ada70e023 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 20:16:03 -0700 Subject: [PATCH 07/16] Working line numbers with tests Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 40 +++++++++++------ .../commands/sql_statement_scanner_test.go | 45 +++++++++++++++++++ 2 files changed, 71 insertions(+), 14 deletions(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index 8f21cef4b0..470b66c220 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -3,16 +3,19 @@ package commands import ( "bufio" "io" + "unicode" ) type statementScanner struct { *bufio.Scanner - lastStatementLineNum int - lineNum int - quoteChar byte - lastChar byte - numConsecutiveBackslashes int - ignoreNextChar bool; + statementStartLine int // the line number of the first line of the last parsed statement + startLineNum int // the line number we began parsing the most recent token at + lineNum int // the current line being parsed in the input + quoteChar byte // the opening quote character of the current quote being parsed, or 0 if the current parse location isn't inside a quoted string + lastChar byte // the last character parsed + ignoreNextChar bool // whether to ignore the next character + numConsecutiveBackslashes int // the number of consecutive backslashes encountered + seenNonWhitespaceChar bool // whether we have encountered a non-whitespace character since we returned the last token } func NewSqlStatementScanner(input io.Reader) *statementScanner { @@ -23,22 +26,19 @@ func NewSqlStatementScanner(input io.Reader) *statementScanner { s := &statementScanner{ Scanner: scanner, + lineNum: 1, + // nextStatementLineNum: 1, } scanner.Split(s.scanStatements) return s } -func (s *statementScanner) Scan() bool { - s.lineNum++ - scanned := s.Scanner.Scan() - return scanned -} - const ( sQuote byte = '\'' dQuote = '"' backslash = '\\' + backtick = '`' ) // ScanStatements is a split function for a Scanner that returns each SQL statement in the input as a token. @@ -47,20 +47,29 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, return 0, nil, nil } - for i := range data { + s.startLineNum = s.lineNum + for i := range data { if !s.ignoreNextChar { + // this doesn't handle unicode characters correctly and will break on some things, but it's only used for line + // number reporting. + if !s.seenNonWhitespaceChar && !unicode.IsSpace(rune(data[i])) { + s.seenNonWhitespaceChar = true + s.statementStartLine = s.lineNum + } + switch data[i] { case '\n': s.lineNum++ case ';': if s.quoteChar == 0 { + s.startLineNum = s.lineNum _, _, _ = s.resetState() return i + 1, data[0:i], nil } case backslash: s.numConsecutiveBackslashes++ - case sQuote, dQuote: + case sQuote, dQuote, backtick: numConsecutiveBackslashes := s.numConsecutiveBackslashes s.numConsecutiveBackslashes = 0 @@ -124,5 +133,8 @@ func (s *statementScanner) resetState() (advance int, token []byte, err error) { s.numConsecutiveBackslashes = 0 s.ignoreNextChar = false s.lastChar = 0 + // rewind the line number to where we started parsing this token + s.lineNum = s.startLineNum + s.seenNonWhitespaceChar = false return 0, nil, nil } \ No newline at end of file diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index ccf10d02b2..046a11e255 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -11,6 +11,7 @@ func TestScanStatements(t *testing.T) { type testcase struct { input string statements []string + lineNums []int } // Some of these include malformed input (e.g. strings that aren't properly terminated) @@ -35,6 +36,15 @@ func TestScanStatements(t *testing.T) { `select ";\;`, }, }, + { + input: `select ''';;'; select ";\; +;`, + statements: []string{ + `select ''';;'`, + `select ";\; +;`, + }, + }, { input: `select '\\'''; select '";";'; select 1`, statements: []string{ @@ -107,11 +117,38 @@ a from b; select 1`, "select\na from b", "select 1", }, + lineNums: []int { + 1, 1, 2, 3, + }, + }, + { + input: "create table dumb (`hell\\`o;` int primary key);", + statements: []string{ + "create table dumb (`hell\\`o;` int primary key)", + }, + }, + { + input: "create table dumb (`hell``o;` int primary key); select \n" + + "baz from foo;\n" + + "\n" + + "select\n" + + "a from b; select 1\n\n", + statements: []string{ + "create table dumb (`hell``o;` int primary key)", + "select \nbaz from foo", + "select\na from b", + "select 1", + }, + lineNums: []int { + 1, 1, 4, 5, + }, }, { input: `insert into foo values ('a', "b;", 'c;;"" '); update foo set baz = bar, qux = '"hello"""' where xyzzy = ";;';'"; + + create table foo (a int not null default ';', primary key (a));`, statements: []string{ @@ -122,6 +159,9 @@ qux = '"hello"""' where xyzzy = ";;';'"`, `create table foo (a int not null default ';', primary key (a))`, }, + lineNums: []int { + 1, 2, 6, + }, }, } @@ -133,6 +173,11 @@ primary key (a))`, for scanner.Scan() { require.True(t, i < len(tt.statements)) assert.Equal(t, tt.statements[i], strings.TrimSpace(scanner.Text())) + if tt.lineNums != nil { + assert.Equal(t, tt.lineNums[i], scanner.statementStartLine) + } else { + assert.Equal(t, 1, scanner.statementStartLine) + } i++ } From 5f763c240af6bd0529f077e2c607c37f9e33d919 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 20:19:52 -0700 Subject: [PATCH 08/16] Got rid of unnecessary struct vars, made local Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index 470b66c220..c3999b5c48 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -10,12 +10,7 @@ type statementScanner struct { *bufio.Scanner statementStartLine int // the line number of the first line of the last parsed statement startLineNum int // the line number we began parsing the most recent token at - lineNum int // the current line being parsed in the input - quoteChar byte // the opening quote character of the current quote being parsed, or 0 if the current parse location isn't inside a quoted string - lastChar byte // the last character parsed - ignoreNextChar bool // whether to ignore the next character - numConsecutiveBackslashes int // the number of consecutive backslashes encountered - seenNonWhitespaceChar bool // whether we have encountered a non-whitespace character since we returned the last token + lineNum int // the current line number being parsed } func NewSqlStatementScanner(input io.Reader) *statementScanner { @@ -47,14 +42,22 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, return 0, nil, nil } + var ( + quoteChar byte // the opening quote character of the current quote being parsed, or 0 if the current parse location isn't inside a quoted string + lastChar byte // the last character parsed + ignoreNextChar bool // whether to ignore the next character + numConsecutiveBackslashes int // the number of consecutive backslashes encountered + seenNonWhitespaceChar bool // whether we have encountered a non-whitespace character since we returned the last token + ) + s.startLineNum = s.lineNum for i := range data { - if !s.ignoreNextChar { + if !ignoreNextChar { // this doesn't handle unicode characters correctly and will break on some things, but it's only used for line // number reporting. - if !s.seenNonWhitespaceChar && !unicode.IsSpace(rune(data[i])) { - s.seenNonWhitespaceChar = true + if !seenNonWhitespaceChar && !unicode.IsSpace(rune(data[i])) { + seenNonWhitespaceChar = true s.statementStartLine = s.lineNum } @@ -62,39 +65,39 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, case '\n': s.lineNum++ case ';': - if s.quoteChar == 0 { + if quoteChar == 0 { s.startLineNum = s.lineNum _, _, _ = s.resetState() return i + 1, data[0:i], nil } case backslash: - s.numConsecutiveBackslashes++ + numConsecutiveBackslashes++ case sQuote, dQuote, backtick: - numConsecutiveBackslashes := s.numConsecutiveBackslashes - s.numConsecutiveBackslashes = 0 + prevNumConsecutiveBackslashes := numConsecutiveBackslashes + numConsecutiveBackslashes = 0 // escaped quote character - if s.lastChar == backslash && numConsecutiveBackslashes%2 == 1 { + if lastChar == backslash && prevNumConsecutiveBackslashes%2 == 1 { break } // currently in a quoted string - if s.quoteChar != 0 { + if quoteChar != 0 { // end quote or two consecutive quote characters (a form of escaping quote chars) - if s.quoteChar == data[i] { + if quoteChar == data[i] { var nextChar byte = 0 if i+1 < len(data) { nextChar = data[i+1] } - if nextChar == s.quoteChar { + if nextChar == quoteChar { // escaped quote. skip the next character - s.ignoreNextChar = true + ignoreNextChar = true break } else if atEOF || i+1 < len(data) { // end quote - s.quoteChar = 0 + quoteChar = 0 break } else { // need more data to make a decision @@ -107,15 +110,15 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, } // open quote - s.quoteChar = data[i] + quoteChar = data[i] default: - s.numConsecutiveBackslashes = 0 + numConsecutiveBackslashes = 0 } } else { - s.ignoreNextChar = false + ignoreNextChar = false } - s.lastChar = data[i] + lastChar = data[i] } // If we're at EOF, we have a final, non-terminated line. Return it. @@ -129,12 +132,7 @@ func (s *statementScanner) scanStatements(data []byte, atEOF bool) (advance int, // resetState resets the internal state of the scanner and returns the "more data" response for a split function func (s *statementScanner) resetState() (advance int, token []byte, err error) { - s.quoteChar = 0 - s.numConsecutiveBackslashes = 0 - s.ignoreNextChar = false - s.lastChar = 0 // rewind the line number to where we started parsing this token s.lineNum = s.startLineNum - s.seenNonWhitespaceChar = false return 0, nil, nil } \ No newline at end of file From 38c309548c479df9a65413cd1c9d440d4fde5f66 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 20:20:35 -0700 Subject: [PATCH 09/16] Typo Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index c3999b5c48..df9ba60f2d 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -22,7 +22,6 @@ func NewSqlStatementScanner(input io.Reader) *statementScanner { s := &statementScanner{ Scanner: scanner, lineNum: 1, - // nextStatementLineNum: 1, } scanner.Split(s.scanStatements) From 8ddad0bb8dd34c23755d18a35a042d23ca796bbb Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 20:33:50 -0700 Subject: [PATCH 10/16] Unskipped and updated bats test for batch insert failure Signed-off-by: Zach Musgrave --- bats/sql-batch.bats | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/bats/sql-batch.bats b/bats/sql-batch.bats index 6b8d1eb058..d0d8ad9080 100644 --- a/bats/sql-batch.bats +++ b/bats/sql-batch.bats @@ -52,7 +52,25 @@ insert into test values poop; SQL [ "$status" -ne 0 ] [[ "$output" =~ "Error processing batch" ]] || false - skip "No line number and query on error" - [[ "$output" =~ " 3 " ]] || false - [[ "$output" =~ "insert into test values poop;" ]] || false + [[ "$output" =~ "error on line 3 for query" ]] || false + [[ "$output" =~ "insert into test values poop" ]] || false + + run dolt sql < Date: Wed, 22 Apr 2020 20:34:26 -0700 Subject: [PATCH 11/16] Slightly more robust test Signed-off-by: Zach Musgrave --- bats/sql-batch.bats | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bats/sql-batch.bats b/bats/sql-batch.bats index d0d8ad9080..4559e1c250 100644 --- a/bats/sql-batch.bats +++ b/bats/sql-batch.bats @@ -68,6 +68,8 @@ test values (2,0,0,0,0,0) insert into test values poop; + +insert into test values (3,0,0,0,0,0); SQL [ "$status" -ne 0 ] [[ "$output" =~ "Error processing batch" ]] || false From dcfa6b3f7062d2587f67040f112b176722b57e75 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 20:35:19 -0700 Subject: [PATCH 12/16] Print out line numbers for batch processing errors Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go index 1771fade4e..26e4c9d103 100644 --- a/go/cmd/dolt/commands/sql.go +++ b/go/cmd/dolt/commands/sql.go @@ -402,7 +402,7 @@ func CollectDBs(mrEnv env.MultiRepoEnv, createDB createDBFunc) []dsqle.Database return dbs } -func formatQueryError(query string, err error) errhand.VerboseError { +func formatQueryError(message string, err error) errhand.VerboseError { const ( maxStatementLen = 128 maxPosWhenTruncated = 64 @@ -454,6 +454,9 @@ func formatQueryError(query string, err error) errhand.VerboseError { return verrBuilder.Build() } else { + if len(message) > 0 { + err = fmt.Errorf("%s: %s", message, err.Error()) + } return errhand.VerboseErrorFromError(err) } } @@ -562,7 +565,9 @@ func runBatchMode(ctx *sql.Context, se *sqlEngine, input io.Reader) error { continue } if err := processBatchQuery(ctx, query, se); err != nil { - verr := formatQueryError(query, err) + // TODO: this line number will not be accurate for errors that occur when flushing a batch of inserts (as opposed + // to processing the query) + verr := formatQueryError(fmt.Sprintf("error on line %d for query %s", scanner.statementStartLine, query), err) cli.PrintErrln(verr.Verbose()) return err } From ba85935138a19ab636b27fe38bf195a5e306f58d Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 22 Apr 2020 20:37:50 -0700 Subject: [PATCH 13/16] Formatting Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 16 +++++------ .../commands/sql_statement_scanner_test.go | 27 ++++++++++--------- .../doltcore/sql/sqltestutil/selectqueries.go | 12 ++++----- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index df9ba60f2d..067a4be53f 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -8,9 +8,9 @@ import ( type statementScanner struct { *bufio.Scanner - statementStartLine int // the line number of the first line of the last parsed statement - startLineNum int // the line number we began parsing the most recent token at - lineNum int // the current line number being parsed + statementStartLine int // the line number of the first line of the last parsed statement + startLineNum int // the line number we began parsing the most recent token at + lineNum int // the current line number being parsed } func NewSqlStatementScanner(input io.Reader) *statementScanner { @@ -29,10 +29,10 @@ func NewSqlStatementScanner(input io.Reader) *statementScanner { } const ( - sQuote byte = '\'' - dQuote = '"' - backslash = '\\' - backtick = '`' + sQuote byte = '\'' + dQuote = '"' + backslash = '\\' + backtick = '`' ) // ScanStatements is a split function for a Scanner that returns each SQL statement in the input as a token. @@ -134,4 +134,4 @@ func (s *statementScanner) resetState() (advance int, token []byte, err error) { // rewind the line number to where we started parsing this token s.lineNum = s.startLineNum return 0, nil, nil -} \ No newline at end of file +} diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index 046a11e255..f6db9e0371 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -1,21 +1,22 @@ package commands import ( - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "strings" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestScanStatements(t *testing.T) { type testcase struct { - input string + input string statements []string - lineNums []int + lineNums []int } // Some of these include malformed input (e.g. strings that aren't properly terminated) - testcases := []testcase { + testcases := []testcase{ { input: `insert into foo values (";;';'");`, statements: []string{ @@ -117,7 +118,7 @@ a from b; select 1`, "select\na from b", "select 1", }, - lineNums: []int { + lineNums: []int{ 1, 1, 2, 3, }, }, @@ -129,17 +130,17 @@ a from b; select 1`, }, { input: "create table dumb (`hell``o;` int primary key); select \n" + - "baz from foo;\n" + - "\n" + - "select\n" + - "a from b; select 1\n\n", + "baz from foo;\n" + + "\n" + + "select\n" + + "a from b; select 1\n\n", statements: []string{ "create table dumb (`hell``o;` int primary key)", "select \nbaz from foo", "select\na from b", "select 1", }, - lineNums: []int { + lineNums: []int{ 1, 1, 4, 5, }, }, @@ -159,7 +160,7 @@ qux = '"hello"""' where xyzzy = ";;';'"`, `create table foo (a int not null default ';', primary key (a))`, }, - lineNums: []int { + lineNums: []int{ 1, 2, 6, }, }, @@ -184,4 +185,4 @@ primary key (a))`, require.NoError(t, scanner.Err()) }) } -} \ No newline at end of file +} diff --git a/go/libraries/doltcore/sql/sqltestutil/selectqueries.go b/go/libraries/doltcore/sql/sqltestutil/selectqueries.go index c2a29a34c0..2c157b3687 100644 --- a/go/libraries/doltcore/sql/sqltestutil/selectqueries.go +++ b/go/libraries/doltcore/sql/sqltestutil/selectqueries.go @@ -952,9 +952,9 @@ var CaseSensitivityTests = []SelectTest{ AdditionalSetup: CreateTableFn("MiXeDcAsE", NewSchema("test", types.StringKind), NewRow(types.String("1"))), - Query: "select mixedcAse.TeSt from MIXEDCASE", - ExpectedSchema: NewResultSetSchema("TeSt", types.StringKind), - ExpectedRows: Rs(NewResultSetRow(types.String("1"))), + Query: "select mixedcAse.TeSt from MIXEDCASE", + ExpectedSchema: NewResultSetSchema("TeSt", types.StringKind), + ExpectedRows: Rs(NewResultSetRow(types.String("1"))), SkipOnSqlEngine: true, }, { @@ -972,9 +972,9 @@ var CaseSensitivityTests = []SelectTest{ AdditionalSetup: CreateTableFn("MiXeDcAsE", NewSchema("test", types.StringKind), NewRow(types.String("1"))), - Query: "select mC.TeSt from MIXEDCASE as MC", - ExpectedSchema: NewResultSetSchema("TeSt", types.StringKind), - ExpectedRows: Rs(NewResultSetRow(types.String("1"))), + Query: "select mC.TeSt from MIXEDCASE as MC", + ExpectedSchema: NewResultSetSchema("TeSt", types.StringKind), + ExpectedRows: Rs(NewResultSetRow(types.String("1"))), SkipOnSqlEngine: true, }, { From 11c7670dcb1f6492386dbb4399a1112b2ceeb540 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Thu, 23 Apr 2020 09:39:20 -0700 Subject: [PATCH 14/16] Fixed tags test (error message format change) Signed-off-by: Zach Musgrave --- bats/tags.bats | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bats/tags.bats b/bats/tags.bats index ec18321305..0579471c10 100644 --- a/bats/tags.bats +++ b/bats/tags.bats @@ -383,12 +383,12 @@ CREATE TABLE test ( PRIMARY KEY (pk)); SQL [ $status -ne 0 ] - [[ "${lines[0]}" =~ "Cannot create column pk, the tag 1234 was already used in table aaa" ]] || false - [[ "${lines[1]}" =~ "Cannot create column c1, the tag 5678 was already used in table bbb" ]] || false + [[ "$output" =~ "Cannot create column pk, the tag 1234 was already used in table aaa" ]] || false + [[ "$output" =~ "Cannot create column c1, the tag 5678 was already used in table bbb" ]] || false run dolt sql -q "ALTER TABLE aaa ADD COLUMN c1 INT COMMENT 'tag:5678';" [ $status -ne 0 ] - [[ "${lines[0]}" =~ "Cannot create column c1, the tag 5678 was already used in table bbb" ]] || false + [[ "$output" =~ "Cannot create column c1, the tag 5678 was already used in table bbb" ]] || false } @test "Deterministic tag generation produces consistent results" { From 0467446dd8d359fdb922d7406e5f86668387da29 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Thu, 23 Apr 2020 09:39:51 -0700 Subject: [PATCH 15/16] Don't echo the failed query in the error message except during batch mode Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/cmd/dolt/commands/sql.go b/go/cmd/dolt/commands/sql.go index 26e4c9d103..0b3b41b4ba 100644 --- a/go/cmd/dolt/commands/sql.go +++ b/go/cmd/dolt/commands/sql.go @@ -369,7 +369,7 @@ func execQuery(sqlCtx *sql.Context, mrEnv env.MultiRepoEnv, roots map[string]*do sqlSch, rowIter, err := processQuery(sqlCtx, query, se) if err != nil { - verr := formatQueryError(query, err) + verr := formatQueryError("", err) return nil, verr } @@ -641,7 +641,7 @@ func runShell(ctx *sql.Context, se *sqlEngine, mrEnv env.MultiRepoEnv) error { } if sqlSch, rowIter, err := processQuery(ctx, query, se); err != nil { - verr := formatQueryError(query, err) + verr := formatQueryError("", err) shell.Println(verr.Verbose()) } else if rowIter != nil { defer rowIter.Close() From 50fc95838896bd9a4b760ce2ac93067b5438e64e Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Thu, 23 Apr 2020 09:58:51 -0700 Subject: [PATCH 16/16] copyright headers Signed-off-by: Zach Musgrave --- go/cmd/dolt/commands/sql_statement_scanner.go | 14 ++++++++++++++ go/cmd/dolt/commands/sql_statement_scanner_test.go | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index 067a4be53f..a5820b8fe1 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -1,3 +1,17 @@ +// Copyright 2020 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package commands import ( diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index f6db9e0371..a76d609b1a 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -1,3 +1,17 @@ +// Copyright 2020 Liquidata, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package commands import (