diff --git a/src/ImportCsvDialog.cpp b/src/ImportCsvDialog.cpp index f2e37047..4c47545a 100644 --- a/src/ImportCsvDialog.cpp +++ b/src/ImportCsvDialog.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -85,12 +84,10 @@ namespace { void rollback( ImportCsvDialog* dialog, DBBrowserDB* pdb, - QProgressDialog& progress, const QString& savepointName, size_t nRecord, const QString& message) { - progress.hide(); QApplication::restoreOverrideCursor(); // restore original cursor if(!message.isEmpty()) { @@ -110,7 +107,7 @@ public: explicit CSVImportProgress(size_t filesize) { m_pProgressDlg = new QProgressDialog( - QObject::tr("Decoding CSV file..."), + QObject::tr("Importing CSV file..."), QObject::tr("Cancel"), 0, filesize); @@ -183,16 +180,10 @@ void ImportCsvDialog::updatePreview() ui->editCustomSeparator->setVisible(ui->comboSeparator->currentIndex() == ui->comboSeparator->count()-1); ui->editCustomEncoding->setVisible(ui->comboEncoding->currentIndex() == ui->comboEncoding->count()-1); - // Get preview data - QFile file(selectedFile); - file.open(QIODevice::ReadOnly); - - CSVParser csv(ui->checkBoxTrimFields->isChecked(), currentSeparatorChar(), currentQuoteChar()); - - QTextStream tstream(&file); - tstream.setCodec(currentEncoding().toUtf8()); - csv.parse(tstream, 20); - file.close(); + // Reset preview widget + ui->tablePreview->clear(); + ui->tablePreview->setColumnCount(0); + ui->tablePreview->setRowCount(0); // Analyse CSV file sqlb::FieldVector fieldList = generateFieldList(selectedFile); @@ -205,36 +196,40 @@ void ImportCsvDialog::updatePreview() if(fieldList.size() == 0) return; - // Use first row as header if necessary - CSVParser::TCSVResult::const_iterator itBegin = csv.csv().begin(); - if(ui->checkboxHeader->isChecked()) - { - ui->tablePreview->setHorizontalHeaderLabels(*itBegin); - ++itBegin; - } + // Set horizontal header data + QStringList horizontalHeader; + foreach(const sqlb::FieldPtr& field, fieldList) + horizontalHeader.push_back(field->name()); + ui->tablePreview->setHorizontalHeaderLabels(horizontalHeader); - // Fill data section - ui->tablePreview->setRowCount(std::distance(itBegin, csv.csv().end())); + // Parse file + parseCSV(selectedFile, [this](size_t rowNum, const QStringList& data) -> bool { + // Skip first row if it is to be used as header + if(rowNum == 0 && ui->checkboxHeader->isChecked()) + return true; - for(CSVParser::TCSVResult::const_iterator ct = itBegin; - ct != csv.csv().end(); - ++ct) - { - for(QStringList::const_iterator it = ct->begin(); it != ct->end(); ++it) + // Decrease the row number by one if the header checkbox is checked to take into account that the first row was used for the table header labels + // and therefore all data rows move one row up. + if(ui->checkboxHeader->isChecked()) + rowNum--; + + // Fill data section + ui->tablePreview->setRowCount(ui->tablePreview->rowCount() + 1); + for(QStringList::const_iterator it=data.begin();it!=data.end();++it) { - int rowNum = std::distance(itBegin, ct); - if(it == ct->begin()) - { - ui->tablePreview->setVerticalHeaderItem( - rowNum, - new QTableWidgetItem(QString::number(rowNum + 1))); - } + // Generate vertical header items + if(it == data.begin()) + ui->tablePreview->setVerticalHeaderItem(rowNum, new QTableWidgetItem(QString::number(rowNum + 1))); + + // Add table item ui->tablePreview->setItem( rowNum, - std::distance(ct->begin(), it), + std::distance(data.begin(), it), new QTableWidgetItem(*it)); } - } + + return true; + }, 20); } void ImportCsvDialog::checkInput() @@ -325,69 +320,61 @@ void ImportCsvDialog::matchSimilar() checkInput(); } -CSVParser ImportCsvDialog::parseCSV(const QString &fileName, qint64 count) +CSVParser::ParserResult ImportCsvDialog::parseCSV(const QString &fileName, std::function rowFunction, qint64 count) { // Parse all csv data QFile file(fileName); file.open(QIODevice::ReadOnly); CSVParser csv(ui->checkBoxTrimFields->isChecked(), currentSeparatorChar(), currentQuoteChar()); - // If count is one, we only want the header, no need to see progress - if (count != 1) csv.setCSVProgress(new CSVImportProgress(file.size())); + + // Only show progress dialog if we parse all rows. The assumption here is that if a row count limit has been set, it won't be a very high one. + if(count == -1) + csv.setCSVProgress(new CSVImportProgress(file.size())); QTextStream tstream(&file); tstream.setCodec(currentEncoding().toUtf8()); - csv.parse(tstream, count); - file.close(); - return csv; + return csv.parse(rowFunction, tstream, count); } sqlb::FieldVector ImportCsvDialog::generateFieldList(const QString& filename) { + sqlb::FieldVector fieldList; // List of fields in the file + // Parse the first couple of records of the CSV file and only analyse them - CSVParser parser = parseCSV(filename, 20); - - // If there is no data, we don't return any fields - if(parser.csv().size() == 0) - return sqlb::FieldVector(); - - // How many columns are there in the CSV file? - int columns = 0; - for(int i=0;i columns) - columns = parser.csv().at(i).size(); - } - - // Generate field names. These are either taken from the first CSV row or are generated in the format of "fieldXY" depending on the user input - sqlb::FieldVector fieldList; - for(int i=0;icheckboxHeader->isChecked() && i < parser.csv().at(0).size()) + parseCSV(filename, [this, &fieldList](size_t rowNum, const QStringList& data) -> bool { + // Has this row more columns than the previous one? Then add more fields to the field list as necessary. + for(int i=fieldList.size();icheckboxHeader->isChecked()) + { + // Take field name from CSV and remove invalid characters + fieldname = data.at(i); + fieldname.replace("`", ""); + fieldname.replace(" ", ""); + fieldname.replace('"', ""); + fieldname.replace("'",""); + fieldname.replace(",",""); + fieldname.replace(";",""); + } + + // If we don't have a field name by now, generate one + if(fieldname.isEmpty()) + fieldname = QString("field%1").arg(i+1); + + // Add field to the column list + fieldList.push_back(sqlb::FieldPtr(new sqlb::Field(fieldname, ""))); + } // TODO Here's also the place to do some sort of data type analysation of the CSV data - // Add field to the column list - fieldList.push_back(sqlb::FieldPtr(new sqlb::Field(fieldname, ""))); - } + // All good + return true; + }, 20); return fieldList; } @@ -396,6 +383,7 @@ void ImportCsvDialog::importCsv(const QString& fileName, const QString &name) { #ifdef CSV_BENCHMARK // If benchmark mode is enabled start measuring the performance now + qint64 timesRowFunction = 0; QElapsedTimer timer; timer.start(); #endif @@ -415,19 +403,8 @@ void ImportCsvDialog::importCsv(const QString& fileName, const QString &name) // Analyse CSV file sqlb::FieldVector fieldList = generateFieldList(fileName); - - // Parse entire file - CSVParser csv = parseCSV(fileName); - if (csv.csv().size() == 0) return; - -#ifdef CSV_BENCHMARK - qint64 timer_after_parsing = timer.elapsed(); -#endif - - // Show progress dialog - QProgressDialog progress(tr("Inserting data..."), tr("Cancel"), 0, csv.csv().size()); - progress.setWindowModality(Qt::ApplicationModal); - progress.show(); + if(fieldList.size() == 0) + return; // Are we importing into an existing table? bool importToExistingTable = false; @@ -452,22 +429,18 @@ void ImportCsvDialog::importCsv(const QString& fileName, const QString &name) } } -#ifdef CSV_BENCHMARK - qint64 timer_before_insert = timer.elapsed(); -#endif - // Create a savepoint, so we can rollback in case of any errors during importing // db needs to be saved or an error will occur QString restorepointName = pdb->generateSavepointName("csvimport"); if(!pdb->setSavepoint(restorepointName)) - return rollback(this, pdb, progress, restorepointName, 0, tr("Creating restore point failed: %1").arg(pdb->lastError())); + return rollback(this, pdb, restorepointName, 0, tr("Creating restore point failed: %1").arg(pdb->lastError())); // Create table QStringList nullValues; if(!importToExistingTable) { if(!pdb->createTable(sqlb::ObjectIdentifier("main", tableName), fieldList)) - return rollback(this, pdb, progress, restorepointName, 0, tr("Creating the table failed: %1").arg(pdb->lastError())); + return rollback(this, pdb, restorepointName, 0, tr("Creating the table failed: %1").arg(pdb->lastError())); } else { // Importing into an existing table. So find out something about it's structure. @@ -497,21 +470,29 @@ void ImportCsvDialog::importCsv(const QString& fileName, const QString &name) sqlite3_stmt* stmt; sqlite3_prepare_v2(pdb->_db, sQuery.toUtf8(), sQuery.toUtf8().length(), &stmt, nullptr); - // now lets import all data, one row at a time - CSVParser::TCSVResult::const_iterator itBegin = csv.csv().begin(); - if(ui->checkboxHeader->isChecked()) // If the first row contains the field names we should skip it here because this is the data import - ++itBegin; - for(CSVParser::TCSVResult::const_iterator it = itBegin; - it != csv.csv().end(); - ++it) - { + // Parse entire file + size_t lastRowNum = 0; + CSVParser::ParserResult result = parseCSV(fileName, [&](size_t rowNum, const QStringList& data) -> bool { + // Process the parser results row by row + +#ifdef CSV_BENCHMARK + qint64 timeAtStartOfRowFunction = timer.elapsed(); +#endif + + // Save row num for later use. This is used in the case of an error to tell the user in which row the error ocurred + lastRowNum = rowNum; + + // If this is the first row and we want to use the first row as table header, skip it now because this is the data import, not the header parsing + if(rowNum == 0 && ui->checkboxHeader->isChecked()) + return true; + // Bind all values unsigned int bound_fields = 0; - for(int i=0;isize();i++,bound_fields++) + for(int i=0;iat(i).isEmpty() && nullValues.size() > i) + if(importToExistingTable && data.at(i).isEmpty() && nullValues.size() > i) { // This is an empty value. We'll need to look up how to handle it depending on the field to be inserted into. QString val = nullValues.at(i); @@ -519,46 +500,48 @@ void ImportCsvDialog::importCsv(const QString& fileName, const QString &name) sqlite3_bind_text(stmt, i+1, val.toUtf8(), val.toUtf8().size(), SQLITE_TRANSIENT); } else { // This is a non-empty value. Just add it to the statement - sqlite3_bind_text(stmt, i+1, static_cast(it->at(i).toUtf8()), it->at(i).toUtf8().size(), SQLITE_TRANSIENT); + sqlite3_bind_text(stmt, i+1, static_cast(data.at(i).toUtf8()), data.at(i).toUtf8().size(), SQLITE_TRANSIENT); } } // Insert row if(sqlite3_step(stmt) != SQLITE_DONE) - { - sqlite3_finalize(stmt); - return rollback(this, pdb, progress, restorepointName, std::distance(itBegin, it) + 1, tr("Inserting row failed: %1").arg(pdb->lastError())); - } + return false; // Reset statement for next use. Also reset all bindings to NULL. This is important, so we don't need to bind missing columns or empty values in NULL // columns manually. sqlite3_reset(stmt); sqlite3_clear_bindings(stmt); - // Update progress bar and check if cancel button was clicked - unsigned int prog = std::distance(csv.csv().begin(), it); - if(prog % 100 == 0) - progress.setValue(prog); - if(progress.wasCanceled()) - { - sqlite3_finalize(stmt); - return rollback(this, pdb, progress, restorepointName, std::distance(itBegin, it) + 1, ""); - } +#ifdef CSV_BENCHMARK + timesRowFunction += timer.elapsed() - timeAtStartOfRowFunction; +#endif + + return true; + }); + + // Success? + if(result != CSVParser::ParserResult::ParserResultSuccess) + { + // Some error occurred or the user cancelled the action + + // Rollback the entire import. If the action was cancelled, don't show an error message. If it errored, show an error message. + sqlite3_finalize(stmt); + if(result == CSVParser::ParserResult::ParserResultCancelled) + return rollback(this, pdb, restorepointName, 0, QString()); + else + return rollback(this, pdb, restorepointName, lastRowNum, tr("Inserting row failed: %1").arg(pdb->lastError())); } // Clean up prepared statement sqlite3_finalize(stmt); #ifdef CSV_BENCHMARK - // If benchmark mode is enabled calculate the results now - qint64 timer_after_insert = timer.elapsed(); - QMessageBox::information(this, qApp->applicationName(), - tr("Importing the file '%1' took %2ms. The parser took %3ms and the insertion took %4ms.") + tr("Importing the file '%1' took %2ms. Of this %3ms were spent in the row function.") .arg(fileName) - .arg(timer_after_insert) - .arg(timer_after_parsing) - .arg(timer_after_insert-timer_before_insert)); + .arg(timer.elapsed()) + .arg(timesRowFunction)); #endif } diff --git a/src/ImportCsvDialog.h b/src/ImportCsvDialog.h index c6780753..3eec6cb2 100644 --- a/src/ImportCsvDialog.h +++ b/src/ImportCsvDialog.h @@ -1,12 +1,14 @@ #ifndef IMPORTCSVDIALOG_H #define IMPORTCSVDIALOG_H +#include "csvparser.h" #include "sqlitetypes.h" + #include +#include class DBBrowserDB; class QCompleter; -class CSVParser; class QListWidgetItem; namespace Ui { @@ -37,7 +39,7 @@ private: DBBrowserDB* pdb; QCompleter* encodingCompleter; - CSVParser parseCSV(const QString &f, qint64 count = -1); + CSVParser::ParserResult parseCSV(const QString& fileName, std::function rowFunction, qint64 count = -1); sqlb::FieldVector generateFieldList(const QString& filename); void importCsv(const QString& f, const QString &n = QString()); diff --git a/src/csvparser.cpp b/src/csvparser.cpp index 55e1825b..cdd6e0c1 100644 --- a/src/csvparser.cpp +++ b/src/csvparser.cpp @@ -28,9 +28,10 @@ inline void addColumn(QStringList& r, QString& field, bool trim) } } -bool CSVParser::parse(QTextStream& stream, qint64 nMaxRecords) +CSVParser::ParserResult CSVParser::parse(csvRowFunction insertFunction, QTextStream& stream, qint64 nMaxRecords) { - m_vCSVData.clear(); + m_iParsedRows = 0; + m_insertFunction = insertFunction; ParseStates state = StateNormal; QString fieldbuf; QStringList record; @@ -83,14 +84,16 @@ bool CSVParser::parse(QTextStream& stream, qint64 nMaxRecords) { addColumn(record, fieldbuf, m_bTrimFields); - addRow(record); + if(!addRow(record)) + return ParserResult::ParserResultError; } } else if(c == '\n') { addColumn(record, fieldbuf, m_bTrimFields); - addRow(record); + if(!addRow(record)) + return ParserResult::ParserResultError; } else { @@ -127,7 +130,8 @@ bool CSVParser::parse(QTextStream& stream, qint64 nMaxRecords) state = StateNormal; addColumn(record, fieldbuf, m_bTrimFields); - addRow(record); + if(!addRow(record)) + return ParserResult::ParserResultError; } else if(c == '\r') { @@ -147,7 +151,8 @@ bool CSVParser::parse(QTextStream& stream, qint64 nMaxRecords) { addColumn(record, fieldbuf, m_bTrimFields); - addRow(record); + if(!addRow(record)) + return ParserResult::ParserResultError; } } else @@ -159,14 +164,14 @@ bool CSVParser::parse(QTextStream& stream, qint64 nMaxRecords) break; } - if(nMaxRecords != -1 && m_vCSVData.size() >= nMaxRecords) - return true; + if(nMaxRecords != -1 && m_iParsedRows >= nMaxRecords) + return ParserResult::ParserResultSuccess; } - if(m_pCSVProgress && m_vCSVData.size() % 100 == 0) + if(m_pCSVProgress && m_iParsedRows % 100 == 0) { if(!m_pCSVProgress->update(stream.pos())) - return false; + return ParserResult::ParserResultCancelled; } } @@ -174,11 +179,12 @@ bool CSVParser::parse(QTextStream& stream, qint64 nMaxRecords) { addColumn(record, fieldbuf, m_bTrimFields); - addRow(record); + if(!addRow(record)) + return ParserResult::ParserResultError; } if(m_pCSVProgress) m_pCSVProgress->end(); - return state == StateNormal; + return (state == StateNormal) ? ParserResult::ParserResultSuccess : ParserResult::ParserResultError; } diff --git a/src/csvparser.h b/src/csvparser.h index 6a6c9ca0..4f8fad57 100644 --- a/src/csvparser.h +++ b/src/csvparser.h @@ -2,8 +2,8 @@ #define CSVPARSER_H #include -#include #include +#include class QTextStream; @@ -25,24 +25,27 @@ public: class CSVParser { public: - typedef QVector TCSVResult; + typedef std::function csvRowFunction; CSVParser(bool trimfields = true, const QChar& fieldseparator = ',', const QChar& quotechar = '"'); ~CSVParser(); - /*! - * \brief parse the given stream - * \param stream Stream with the CSV parser - * \param nMaxRecords Max records too read, -1 if unlimited - * \return True if parsing worked. - */ - bool parse(QTextStream& stream, qint64 nMaxRecords = -1); + enum ParserResult + { + ParserResultSuccess, + ParserResultCancelled, + ParserResultError + }; /*! - * \brief csv - * \return The parse result + * \brief parse the given stream + * @param insertFunction A function pointer that is called for each parsed row. It is passed two parameters, the row number and a list of all parsed columns + * in the row. The called function may return false if an error ocurred to stop the import process. Otherwise it should return true. + * \param stream Stream with the CSV parser + * \param nMaxRecords Max records too read, -1 if unlimited + * \return ParserResult value that indicated whether action finished normally, was cancelled or errored. */ - const TCSVResult& csv() const { return m_vCSVData; } + ParserResult parse(csvRowFunction insertFunction, QTextStream& stream, qint64 nMaxRecords = -1); void setCSVProgress(CSVProgress* csvp) { m_pCSVProgress = csvp; } @@ -54,10 +57,14 @@ private: StateEndQuote }; - inline void addRow(QStringList& r) + inline bool addRow(QStringList& r) { - m_vCSVData.append(r); + if(!m_insertFunction(m_iParsedRows, r)) + return false; + r.clear(); + m_iParsedRows++; + return true; } private: @@ -65,8 +72,9 @@ private: QChar m_cFieldSeparator; QChar m_cQuoteChar; CSVProgress* m_pCSVProgress; + csvRowFunction m_insertFunction; - TCSVResult m_vCSVData; + qint64 m_iParsedRows; // Number of rows parsed so far size_t m_nBufferSize; //! internal buffer read size }; diff --git a/src/tests/TestImport.cpp b/src/tests/TestImport.cpp index d7624a17..001af450 100644 --- a/src/tests/TestImport.cpp +++ b/src/tests/TestImport.cpp @@ -4,14 +4,13 @@ #include #include #include +#include #include "csvparser.h" #include "TestImport.h" QTEST_MAIN(TestImport) -Q_DECLARE_METATYPE(CSVParser::TCSVResult) - TestImport::TestImport() { } @@ -34,9 +33,9 @@ void TestImport::csvImport() QTemporaryFile file; QVERIFY(file.open()); { - QTextStream out(&file); - out.setCodec(encoding.toUtf8()); - out << csv; + QTextStream out(&file); + out.setCodec(encoding.toUtf8()); + out << csv; } file.flush(); @@ -44,10 +43,19 @@ void TestImport::csvImport() file.seek(0); QTextStream tstream(&file); tstream.setCodec(encoding.toUtf8()); - csvparser.parse(tstream); + + QVector parsedCsv; + int parsedCsvColumns = 0; + csvparser.parse([&parsedCsv, &parsedCsvColumns](size_t /*rowNum*/, const QStringList& data) -> bool { + parsedCsv.push_back(data); + if(data.size() > parsedCsvColumns) + parsedCsvColumns = data.size(); + return true; + }, tstream); // Check return values - QCOMPARE(csvparser.csv(), result); + QCOMPARE(parsedCsvColumns, numfields); + QCOMPARE(parsedCsv, result); } void TestImport::csvImport_data() @@ -57,9 +65,9 @@ void TestImport::csvImport_data() QTest::addColumn("quote"); QTest::addColumn("encoding"); QTest::addColumn("numfields"); - QTest::addColumn("result"); + QTest::addColumn>("result"); - CSVParser::TCSVResult result; + QVector result; result.append(QStringList() << "a" << "b" << "c"); result.append(QStringList() << "d" << "e" << "f"); result.append(QStringList() << "g" << "h" << "i");