From d8bc658df78beac0f95d9292d026c7f8b90adfd8 Mon Sep 17 00:00:00 2001 From: Moritz Hader Date: Tue, 8 Jul 2014 16:08:12 +0200 Subject: [PATCH] Fixed a problem with the import of .csv-files Up until now, the DBBrowserDB:decodeCSV used to load the csv-file character by character using the getChar() method of the QFile-class. Unfortunatelly, this approach caused multibyte-chars as used for UTF-8 encoding to be split and displayed incorrectly. The fix uses the QTextStream-class to load the file line by line. Every line is then again iterated character by character, before the old algorithm is applied. Using this approach, the characters are loaded and encoded properly. Splitting multibyte-chars is thus prevented. --- src/sqlitedb.cpp | 81 ++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/src/sqlitedb.cpp b/src/sqlitedb.cpp index 88bc385b..7acba6d9 100644 --- a/src/sqlitedb.cpp +++ b/src/sqlitedb.cpp @@ -839,16 +839,32 @@ QStringList DBBrowserDB::decodeCSV(const QString & csvfilename, char sep, char q QStringList result; QString current = ""; *numfields = 0; + int recs = 0; - if ( file.open( QIODevice::ReadWrite ) ) { - QProgressDialog progress(QObject::tr("Decoding CSV file..."), QObject::tr("Cancel"), 0, file.size()); - progress.setWindowModality(Qt::ApplicationModal); - char c=0; + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { + return result; + } + + //Other than QFile, the QTextStream-class properly detects 2-Byte QChars and converts them accordingly (UTF-8) + QTextStream inStream(&file); + + QProgressDialog progress(QObject::tr("Decoding CSV file..."), QObject::tr("Cancel"), 0, file.size()); + progress.setWindowModality(Qt::ApplicationModal); + + QString line = ""; + inStream >> line; + + while (!inStream.atEnd()) { + + //For every Line, we iterate over the single QChars + QString::ConstIterator i = line.begin(); bool inquotemode = false; bool inescapemode = false; - int recs = 0; - while(file.getChar(&c)) - { + + while (i != line.end()) { + + QChar c = *i; + if (c==quote){ if (inquotemode){ if (inescapemode){ @@ -857,14 +873,15 @@ QStringList DBBrowserDB::decodeCSV(const QString & csvfilename, char sep, char q current.append(c); } else { //are we escaping, or just finishing the quote? - char d; - file.getChar(&d); + i++; //Performing lookahead using the iterator + QChar d = *i; + if (d==quote) { inescapemode = true; } else { inquotemode = false; } - file.ungetChar(d); + i--; } } else { inquotemode = true; @@ -882,22 +899,7 @@ QStringList DBBrowserDB::decodeCSV(const QString & csvfilename, char sep, char q if (inquotemode){ //add the newline current.append(c); - } else { - //not quoting, start new record - result << current; - current = ""; - //for the first line, store the field count - if (*numfields == 0){ - *numfields = result.count(); - } - recs++; - progress.setValue(file.pos()); - qApp->processEvents(); - if (progress.wasCanceled()) break; - if ((recs>maxrecords)&&(maxrecords!=-1)) { - break; - } - } + } } else if (c==13) { if (inquotemode){ //add the carrier return if in quote mode only @@ -906,13 +908,32 @@ QStringList DBBrowserDB::decodeCSV(const QString & csvfilename, char sep, char q } else {//another character type current.append(c); } + + i++; } - file.close(); - //do we still have a last result, not appended? - //proper csv files should end with a linefeed , so this is not necessary - //if (current.length()>0) result << current; + + //Moved this block from (c==10), as line-separation is now handeled by the outer-loop + result << current; + current = ""; + + if (*numfields == 0){ + *numfields = result.count(); + } + recs++; + progress.setValue(file.pos()); + qApp->processEvents(); + if (progress.wasCanceled()) break; + if ((recs>maxrecords)&&(maxrecords!=-1)) { + break; + } + + inStream >> line; } + + file.close(); + return result; + } QString DBBrowserDB::getPragma(const QString& pragma)