diff --git a/src/Data.cpp b/src/Data.cpp index 117e02ce..33254219 100644 --- a/src/Data.cpp +++ b/src/Data.cpp @@ -4,6 +4,10 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest) { + // If the data starts with a Unicode BOM, we always assume it is text + if(startsWithBom(data)) + return true; + // Truncate to the first couple of bytes for quick testing if(quickTest) data = data.left(512); @@ -15,3 +19,35 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest) // Perform check return QString(data).toUtf8() == data; } + +bool startsWithBom(const QByteArray& data) +{ + // Note that these aren't all possible BOMs. But they are probably the most common ones. + + if(data.startsWith("\xEF\xBB\xBF") || + data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE") || + data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00")) + return true; + else + return false; +} + +QByteArray removeBom(QByteArray& data) +{ + if(data.startsWith("\xEF\xBB\xBF")) + { + QByteArray bom = data.left(3); + data.remove(0, 3); + return bom; + } else if(data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE")) { + QByteArray bom = data.left(2); + data.remove(0, 2); + return bom; + } else if(data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00")) { + QByteArray bom = data.left(4); + data.remove(0, 4); + return bom; + } else { + return QByteArray(); + } +} diff --git a/src/Data.h b/src/Data.h index e6de8d88..2cbf9ddc 100644 --- a/src/Data.h +++ b/src/Data.h @@ -9,4 +9,12 @@ // text but makes it less reliable bool isTextOnly(QByteArray data, const QString& encoding = QString(), bool quickTest = false); +// This function returns true if the data in the data parameter starts with a Unicode BOM. Otherwise it returns false. +bool startsWithBom(const QByteArray& data); + +// This function checks if the data in the data parameter starts with a Unicode BOM. If so, the BOM is removed from the +// byte array and passed back to the caller separately as the return value of the function. If the data does not start +// with a BOM an empty byte array is returned and the original data is not modified. +QByteArray removeBom(QByteArray& data); + #endif diff --git a/src/EditDialog.cpp b/src/EditDialog.cpp index 7328a06e..2a56b3ab 100644 --- a/src/EditDialog.cpp +++ b/src/EditDialog.cpp @@ -93,6 +93,9 @@ void EditDialog::loadData(const QByteArray& data) QImage img; QString textData; + // Clear previously removed BOM + removedBom.clear(); + // Determine the data type, saving that info in the class variable dataType = checkDataType(data); @@ -150,25 +153,28 @@ void EditDialog::loadData(const QByteArray& data) case Text: case JSON: - // Set enabled any of the text widgets ui->editorText->setEnabled(true); jsonEdit->setEnabled(true); switch (editMode) { case TextEditor: + { // The text widget buffer is now the main data source dataSource = TextBuffer; - // Load the text into the text editor - textData = QString::fromUtf8(data.constData(), data.size()); + // Load the text into the text editor, remove BOM first if there is one + QByteArray dataWithoutBom = data; + removedBom = removeBom(dataWithoutBom); + + textData = QString::fromUtf8(dataWithoutBom.constData(), dataWithoutBom.size()); ui->editorText->setPlainText(textData); // Select all of the text by default ui->editorText->selectAll(); break; - + } case JsonEditor: // The JSON widget buffer is now the main data source dataSource = JsonBuffer; @@ -373,6 +379,7 @@ void EditDialog::setNull() hexEdit->setData(QByteArray()); jsonEdit->clear(); dataType = Null; + removedBom.clear(); // Check if in text editor mode int editMode = ui->editorStack->currentIndex(); @@ -425,10 +432,10 @@ void EditDialog::accept() } else { // It's not NULL, so proceed with normal text string checking QString oldData = currentIndex.data(Qt::EditRole).toString(); - QString newData = ui->editorText->toPlainText(); + QString newData = removedBom + ui->editorText->toPlainText(); if (oldData != newData) // The data is different, so commit it back to the database - emit recordTextUpdated(currentIndex, newData.toUtf8(), false); + emit recordTextUpdated(currentIndex, removedBom + newData.toUtf8(), false); } break; case JsonBuffer: @@ -509,7 +516,7 @@ void EditDialog::editModeChanged(int newMode) case HexEditor: // Switching to the hex editor // Convert the text widget buffer for the hex widget - hexEdit->setData(ui->editorText->toPlainText().toUtf8()); + hexEdit->setData(removedBom + ui->editorText->toPlainText().toUtf8()); // The hex widget buffer is now the main data source dataSource = HexBuffer; diff --git a/src/EditDialog.h b/src/EditDialog.h index 56981abb..490f3eae 100644 --- a/src/EditDialog.h +++ b/src/EditDialog.h @@ -57,6 +57,7 @@ private: bool textNullSet; bool isReadOnly; bool mustIndentAndCompact; + QByteArray removedBom; enum DataSources { TextBuffer,