-QString decodeString(const QByteArray &input, QTextCodec *codec) {
- // First, we check if it's utf8. It is very improbable to encounter a string that looks like
- // valid utf8, but in fact is not. This means that if the input string passes as valid utf8, it
- // is safe to assume that it is.
- // Q_ASSERT(sizeof(const char) == sizeof(quint8)); // In God we trust...
- bool isUtf8 = true;
- int cnt = 0;
- for(int i = 0; i < input.size(); i++) {
- if(cnt) {
- // We check a part of a multibyte char. These need to be of the form 10yyyyyy.
- if((input[i] & 0xc0) != 0x80) { isUtf8 = false; break; }
- cnt--;
- continue;
+
+QString decodeString(const QByteArray &input, QTextCodec *codec)
+{
+ if (codec && utf8DetectionBlacklist.contains(codec->mibEnum()))
+ return codec->toUnicode(input);
+
+ // First, we check if it's utf8. It is very improbable to encounter a string that looks like
+ // valid utf8, but in fact is not. This means that if the input string passes as valid utf8, it
+ // is safe to assume that it is.
+ // Q_ASSERT(sizeof(const char) == sizeof(quint8)); // In God we trust...
+ bool isUtf8 = true;
+ int cnt = 0;
+ for (int i = 0; i < input.size(); i++) {
+ if (cnt) {
+ // We check a part of a multibyte char. These need to be of the form 10yyyyyy.
+ if ((input[i] & 0xc0) != 0x80) { isUtf8 = false; break; }
+ cnt--;
+ continue;
+ }
+ if ((input[i] & 0x80) == 0x00) continue; // 7 bit is always ok
+ if ((input[i] & 0xf8) == 0xf0) { cnt = 3; continue; } // 4-byte char 11110xxx 10yyyyyy 10zzzzzz 10vvvvvv
+ if ((input[i] & 0xf0) == 0xe0) { cnt = 2; continue; } // 3-byte char 1110xxxx 10yyyyyy 10zzzzzz
+ if ((input[i] & 0xe0) == 0xc0) { cnt = 1; continue; } // 2-byte char 110xxxxx 10yyyyyy
+ isUtf8 = false; break; // 8 bit char, but not utf8!
+ }
+ if (isUtf8 && cnt == 0) {
+ QString s = QString::fromUtf8(input);
+ //qDebug() << "Detected utf8:" << s;
+ return s;