mirror of
https://github.com/sqlitebrowser/sqlitebrowser.git
synced 2026-01-20 11:00:44 -06:00
Improve handling of BOMs in table cells
Detect some Unicode BOMs and always treat data starting with a BOM as text. We might need to fine-tune this later but it should be an improvement already. In the Edit Dialog remove the BOM from the text editor but keep it in the hex editor. Also add it back to the text when saving changes in text mode. This way the BOM is out of the way for text edits but is not lost either when editing a cell.
This commit is contained in:
36
src/Data.cpp
36
src/Data.cpp
@@ -4,6 +4,10 @@
|
||||
|
||||
bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
|
||||
{
|
||||
// If the data starts with a Unicode BOM, we always assume it is text
|
||||
if(startsWithBom(data))
|
||||
return true;
|
||||
|
||||
// Truncate to the first couple of bytes for quick testing
|
||||
if(quickTest)
|
||||
data = data.left(512);
|
||||
@@ -15,3 +19,35 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
|
||||
// Perform check
|
||||
return QString(data).toUtf8() == data;
|
||||
}
|
||||
|
||||
bool startsWithBom(const QByteArray& data)
|
||||
{
|
||||
// Note that these aren't all possible BOMs. But they are probably the most common ones.
|
||||
|
||||
if(data.startsWith("\xEF\xBB\xBF") ||
|
||||
data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE") ||
|
||||
data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00"))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
QByteArray removeBom(QByteArray& data)
|
||||
{
|
||||
if(data.startsWith("\xEF\xBB\xBF"))
|
||||
{
|
||||
QByteArray bom = data.left(3);
|
||||
data.remove(0, 3);
|
||||
return bom;
|
||||
} else if(data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE")) {
|
||||
QByteArray bom = data.left(2);
|
||||
data.remove(0, 2);
|
||||
return bom;
|
||||
} else if(data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00")) {
|
||||
QByteArray bom = data.left(4);
|
||||
data.remove(0, 4);
|
||||
return bom;
|
||||
} else {
|
||||
return QByteArray();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,4 +9,12 @@
|
||||
// text but makes it less reliable
|
||||
bool isTextOnly(QByteArray data, const QString& encoding = QString(), bool quickTest = false);
|
||||
|
||||
// This function returns true if the data in the data parameter starts with a Unicode BOM. Otherwise it returns false.
|
||||
bool startsWithBom(const QByteArray& data);
|
||||
|
||||
// This function checks if the data in the data parameter starts with a Unicode BOM. If so, the BOM is removed from the
|
||||
// byte array and passed back to the caller separately as the return value of the function. If the data does not start
|
||||
// with a BOM an empty byte array is returned and the original data is not modified.
|
||||
QByteArray removeBom(QByteArray& data);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -93,6 +93,9 @@ void EditDialog::loadData(const QByteArray& data)
|
||||
QImage img;
|
||||
QString textData;
|
||||
|
||||
// Clear previously removed BOM
|
||||
removedBom.clear();
|
||||
|
||||
// Determine the data type, saving that info in the class variable
|
||||
dataType = checkDataType(data);
|
||||
|
||||
@@ -150,25 +153,28 @@ void EditDialog::loadData(const QByteArray& data)
|
||||
|
||||
case Text:
|
||||
case JSON:
|
||||
|
||||
// Set enabled any of the text widgets
|
||||
ui->editorText->setEnabled(true);
|
||||
jsonEdit->setEnabled(true);
|
||||
|
||||
switch (editMode) {
|
||||
case TextEditor:
|
||||
{
|
||||
// The text widget buffer is now the main data source
|
||||
dataSource = TextBuffer;
|
||||
|
||||
// Load the text into the text editor
|
||||
textData = QString::fromUtf8(data.constData(), data.size());
|
||||
// Load the text into the text editor, remove BOM first if there is one
|
||||
QByteArray dataWithoutBom = data;
|
||||
removedBom = removeBom(dataWithoutBom);
|
||||
|
||||
textData = QString::fromUtf8(dataWithoutBom.constData(), dataWithoutBom.size());
|
||||
ui->editorText->setPlainText(textData);
|
||||
|
||||
// Select all of the text by default
|
||||
ui->editorText->selectAll();
|
||||
|
||||
break;
|
||||
|
||||
}
|
||||
case JsonEditor:
|
||||
// The JSON widget buffer is now the main data source
|
||||
dataSource = JsonBuffer;
|
||||
@@ -373,6 +379,7 @@ void EditDialog::setNull()
|
||||
hexEdit->setData(QByteArray());
|
||||
jsonEdit->clear();
|
||||
dataType = Null;
|
||||
removedBom.clear();
|
||||
|
||||
// Check if in text editor mode
|
||||
int editMode = ui->editorStack->currentIndex();
|
||||
@@ -425,10 +432,10 @@ void EditDialog::accept()
|
||||
} else {
|
||||
// It's not NULL, so proceed with normal text string checking
|
||||
QString oldData = currentIndex.data(Qt::EditRole).toString();
|
||||
QString newData = ui->editorText->toPlainText();
|
||||
QString newData = removedBom + ui->editorText->toPlainText();
|
||||
if (oldData != newData)
|
||||
// The data is different, so commit it back to the database
|
||||
emit recordTextUpdated(currentIndex, newData.toUtf8(), false);
|
||||
emit recordTextUpdated(currentIndex, removedBom + newData.toUtf8(), false);
|
||||
}
|
||||
break;
|
||||
case JsonBuffer:
|
||||
@@ -509,7 +516,7 @@ void EditDialog::editModeChanged(int newMode)
|
||||
|
||||
case HexEditor: // Switching to the hex editor
|
||||
// Convert the text widget buffer for the hex widget
|
||||
hexEdit->setData(ui->editorText->toPlainText().toUtf8());
|
||||
hexEdit->setData(removedBom + ui->editorText->toPlainText().toUtf8());
|
||||
|
||||
// The hex widget buffer is now the main data source
|
||||
dataSource = HexBuffer;
|
||||
|
||||
@@ -57,6 +57,7 @@ private:
|
||||
bool textNullSet;
|
||||
bool isReadOnly;
|
||||
bool mustIndentAndCompact;
|
||||
QByteArray removedBom;
|
||||
|
||||
enum DataSources {
|
||||
TextBuffer,
|
||||
|
||||
Reference in New Issue
Block a user