From ccb8c9ee97ab917550081001c3d64146699fd944 Mon Sep 17 00:00:00 2001 From: Martin Kleusberg Date: Mon, 15 Mar 2021 19:38:36 +0100 Subject: [PATCH] Speed up removing of comments from SQL queries This commit replaces the regular expressions for removing end of line and block comments from SQL queries provided by the user by a hand written state machine. This makes the code a lot faster, especially for longer SQL scripts with many statements and many comments in them. It is also not harder to read than the rather complex regular expressions from before - possibly even easier to read. See issue #2619. --- src/sqlitetablemodel.cpp | 78 ++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/src/sqlitetablemodel.cpp b/src/sqlitetablemodel.cpp index 1f4175c5..b3422328 100644 --- a/src/sqlitetablemodel.cpp +++ b/src/sqlitetablemodel.cpp @@ -719,58 +719,50 @@ void SqliteTableModel::updateAndRunQuery() void SqliteTableModel::removeCommentsFromQuery(QString& query) { + // Store the current size so we can easily check later if the string has been changed int oldSize = query.size(); - // first remove block comments + // This implements a simple state machine to strip the query from comments + QChar quote; + for(int i=0;i -1) + // We are currently not in quote state + + // So are we starting a quote? + if((query.at(i) == '\'' || query.at(i) == '\"' || query.at(i) == '[') && (i == 0 || query.at(i-1) != '\\')) { - result += rxSQL.cap(1) + " "; - query = rxSQL.cap(3); - } else { - result += query; - query.clear(); + // Quoted text is beginning. Switch to the quote state + + quote = query.at(i); + } else if(query.at(i) == '-' && i+1 < query.size() && query.at(i+1) == '-') { + // This is an end of line comment. Remove anything till the end of the line or the end of the string if this is the last line + + int pos_next_line_break = query.indexOf('\n', i); + if(pos_next_line_break == -1) + query = query.left(i); + else + query.remove(i, pos_next_line_break - i); // The \n is left in intentionally + } else if(query.at(i) == '/' && i+1 < query.size() && query.at(i+1) == '*') { + // This is a block comment. Remove anything till the end of the block or the end of the string if the block is not closed + int pos_end_comment = query.indexOf("*/", i); + if(pos_end_comment == -1) + query = query.left(i); + else + query.remove(i, pos_end_comment - i + 2); // Add 2 to include the */ } + } else { + // We are currently in quote state + + // If this is the closing quote character, switch back to normal state + if((query.at(i) == quote) && (i == 0 || query.at(i-1) != '\\')) + quote = 0; } - query = result; } - // deal with end-of-line comments - { - /* The regular expression for removing end of line comments works like this: - * ^((?:(?:[^'-]|-(?!-))*|(?:'[^']*'))*)(--.*)$ - * ^ $ # anchor beginning and end of string so we use it all - * ( )( ) # two separate capture groups for code and comment - * --.* # comment starts with -- and consumes everything afterwards - * (?: | )* # code is none or many strings alternating with non-strings - * (?:'[^']*') # a string is a quote, followed by none or more non-quotes, followed by a quote - * (?:[^'-]|-(?!-))* # non-string is a sequence of characters which aren't quotes or hyphens, - */ - - QRegExp rxSQL("^((?:(?:[^'-]|-(?!-))*|(?:'[^']*'))*)(--[^\\r\\n]*)([\\r\\n]*)(.*)$"); // set up regex to find end-of-line comment - QString result; - - while(query.size() != 0) - { - int pos = rxSQL.indexIn(query); - if(pos > -1) - { - result += rxSQL.cap(1) + rxSQL.cap(3); - query = rxSQL.cap(4); - } else { - result += query; - query.clear(); - } - } - - query = result.trimmed(); - } + query = query.trimmed(); if (oldSize != query.size()) { // Remove multiple line breaks that might have been created by deleting comments till the end of the line but not including the line break