mirror of
https://github.com/sqlitebrowser/sqlitebrowser.git
synced 2026-05-18 19:48:23 -05:00
Fix text detection check
Truncating the text in bytes boundaries for the quick test was breaking the text detection for Russian and probably any script encoded in more than one byte. The problem occurred probably when a multibyte character was truncated at the 512 boundary. This is a bit improbable in latin-based languages like German or Spanish, whose most characters are a byte, but very easy in other scripts, like Cyrillic, whose characters are encoded in more than one. The new approach is based in QTextCodec finding invalid characters using the current encoding, which seems immune to the truncation problem. According to callgrind, it has also better performance, probably because it does not involve memory comparison. See issue #1731
This commit is contained in:
+6
-8
@@ -1,6 +1,7 @@
|
||||
#include "Data.h"
|
||||
|
||||
#include <QTextCodec>
|
||||
#include <algorithm>
|
||||
|
||||
// Note that these aren't all possible BOMs. But they are probably the most common ones.
|
||||
// The size is needed at least for the ones with character zero in them.
|
||||
@@ -17,14 +18,11 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
|
||||
return true;
|
||||
|
||||
// Truncate to the first couple of bytes for quick testing
|
||||
if(quickTest)
|
||||
data = data.left(512);
|
||||
|
||||
// Convert to Unicode if necessary
|
||||
data = decodeString(data, encoding);
|
||||
|
||||
// Perform check
|
||||
return QString(data).toUtf8() == data;
|
||||
int testSize = quickTest? std::min(512, data.size()) : data.size();
|
||||
QTextCodec::ConverterState state;
|
||||
QTextCodec *codec = encoding.isEmpty()? QTextCodec::codecForName("UTF-8") : QTextCodec::codecForName(encoding.toUtf8());
|
||||
const QString text = codec->toUnicode(data.constData(), testSize, &state);
|
||||
return state.invalidChars <= 0;
|
||||
}
|
||||
|
||||
bool startsWithBom(const QByteArray& data)
|
||||
|
||||
Reference in New Issue
Block a user