#include "csvparser.h" #include CSVParser::CSVParser(bool trimfields, char fieldseparator, char quotechar) : m_bTrimFields(trimfields) , m_cFieldSeparator(fieldseparator) , m_cQuoteChar(quotechar) , m_pCSVProgress(nullptr) , m_nBufferSize(4096) { } CSVParser::~CSVParser() { delete m_pCSVProgress; } namespace { // This function adds a character to an existing field structure. If necessary, it extends the buffer size. inline void addChar(CSVField* field, char c) { // Increase buffer size if it is too small if(field->buffer_length >= field->buffer_max_length) { field->buffer_max_length += 64; field->buffer = static_cast(realloc(field->buffer, field->buffer_max_length)); } // Add char to the end of the buffer and increase length by one field->buffer[field->buffer_length++] = c; } // This function increases the size of the field list of an existing row. However, it only does so if the field list is currently full. inline void increaseRowSize(CSVRow& r) { // Check if field list is full if(r.num_fields >= r.max_num_fields) { // Increase field list size r.max_num_fields += 5; r.fields = static_cast(realloc(r.fields, r.max_num_fields*sizeof(CSVField))); // Initialise the newly created field structures for(size_t i=r.num_fields;idata = field->buffer; field->data_length = field->buffer_length; // If we have to trim the field, do this by manipulating the data start and data length variables if(trim) { // Check for trailing spaces and omit them while(field->data_length && isspace(*field->data)) { field->data++; field->data_length--; } // Check for pending spaces and omit them while(field->data_length && isspace(field->data[field->data_length-1])) field->data_length--; } // We assume here that the field object has been constructed in-place. So all we need to do for adding it to the row structure // is increasing the field count by one to make sure the newly constructed field object is used. r.num_fields++; // Clear field buffer for next use field->buffer_length = 0; // Increase field list size if it is too small increaseRowSize(r); // Return pointer to the next field return &r.fields[r.num_fields]; } // This function takes a parsed CSV row and hands it back to the caller of the CSV parser. It returns a null pointer if the parsing should be // aborted, otherwise it returns a pointer to a new field object that can be used for storing the contents of the first field of the next row. inline CSVField* addRow(CSVParser::csvRowFunction& f, CSVRow& r, size_t& rowCount) { // Call row function if(!f(rowCount, r)) return nullptr; // Reset the field list by setting the field count to 0. No need to deconstruct anything else. r.num_fields = 0; // Increase row count by one, as we're now starting to parse the next row rowCount++; // Return a pointer to the first field in the row object because we're starting with the first field of the next row now return r.fields; } } CSVParser::ParserResult CSVParser::parse(csvRowFunction insertFunction, QTextStream& stream, size_t nMaxRecords) { ParseStates state = StateNormal; // State of the parser QByteArray sBuffer; // Buffer for reading the file CSVRow record; // Buffer for parsing the current row size_t parsedRows = 0; // Number of rows parsed so far CSVField* field; // Buffer for parsing the current field if(m_pCSVProgress) m_pCSVProgress->start(); // Initialise row buffer and get pointer to the first field record = { nullptr, 0, 0 }; increaseRowSize(record); field = record.fields; // Make sure all buffers are freed when we're done here class FieldBufferDealloc { public: explicit FieldBufferDealloc(CSVRow& row) : m_row(row) {} ~FieldBufferDealloc() { for(size_t i=0;i 0 && parsedRows >= nMaxRecords) return ParserResult::ParserResultSuccess; } if(m_pCSVProgress && parsedRows % 100 == 0) { if(!m_pCSVProgress->update(bufferPos)) return ParserResult::ParserResultCancelled; } } if(record.num_fields) { field = addColumn(record, field, m_bTrimFields); if(!(field = addRow(insertFunction, record, parsedRows))) return ParserResult::ParserResultError; } if(m_pCSVProgress) m_pCSVProgress->end(); return (state == StateNormal) ? ParserResult::ParserResultSuccess : ParserResult::ParserResultError; }