1
0
mirror of https://github.com/qTox/qTox.git synced 2024-03-22 14:00:36 +08:00

fix(smileys): fix flag emojis

Fix two small problems that break flag emojis

(1) Multi-char emojis are accidentally grouped together with emoticons
(such as : smile :). This is fixed by using a more robust isAscii(...) function
(2) Smiley regexp construction is undeterministic, causing multi-char to
sometimes be processed first, sometimes last. This is fixed by explicitly
inserting multi-char patterns first.

Issue: #5147
This commit is contained in:
Waris Boonyasiriwat 2021-02-14 17:38:59 -08:00
parent 177f37b0a9
commit 034d7671a5

View File

@ -105,6 +105,13 @@ QString getAsRichText(const QString& key)
return RICH_TEXT_PATTERN.arg(key); return RICH_TEXT_PATTERN.arg(key);
} }
bool isAscii(const QString& string)
{
constexpr auto asciiExtMask = 0x80;
return (string.toUtf8()[0] & asciiExtMask) == 0;
}
SmileyPack::SmileyPack() SmileyPack::SmileyPack()
: cleanupTimer{new QTimer(this)} : cleanupTimer{new QTimer(this)}
{ {
@ -257,19 +264,30 @@ bool SmileyPack::load(const QString& filename)
void SmileyPack::constructRegex() void SmileyPack::constructRegex()
{ {
QString allPattern = QStringLiteral("("); QString allPattern = QStringLiteral("(");
QString regularPatterns;
QString multiCharacterEmojiPatterns;
// construct one big regex that matches on every emoticon // construct one big regex that matches on every emoticon
for (const QString& emote : emoticonToPath.keys()) { for (const QString& emote : emoticonToPath.keys()) {
if (!isAscii(emote)) {
if (emote.toUcs4().length() == 1) { if (emote.toUcs4().length() == 1) {
// UTF-8 emoji regularPatterns.append(emote);
allPattern = allPattern % emote; regularPatterns.append(QStringLiteral("|"));
}
else {
multiCharacterEmojiPatterns.append(emote);
multiCharacterEmojiPatterns.append(QStringLiteral("|"));
}
} else { } else {
// patterns like ":)" or ":smile:", don't match inside a word or else will hit punctuation and html tags // patterns like ":)" or ":smile:", don't match inside a word or else will hit punctuation and html tags
allPattern = allPattern % QStringLiteral(R"((?<=^|\s))") % QRegularExpression::escape(emote) % QStringLiteral(R"((?=$|\s))"); regularPatterns.append(QStringLiteral(R"((?<=^|\s))") % QRegularExpression::escape(emote) % QStringLiteral(R"((?=$|\s))"));
regularPatterns.append(QStringLiteral("|"));
} }
allPattern = allPattern % QStringLiteral("|");
} }
// Regexps are evaluated from left to right, insert multichar emojis first so they are evaluated first
allPattern.append(multiCharacterEmojiPatterns);
allPattern.append(regularPatterns);
allPattern[allPattern.size() - 1] = QChar(')'); allPattern[allPattern.size() - 1] = QChar(')');
// compile and optimize regex // compile and optimize regex
@ -297,6 +315,7 @@ QString SmileyPack::smileyfied(const QString& msg)
result.replace(startPos + replaceDiff, keyLength, imgRichText); result.replace(startPos + replaceDiff, keyLength, imgRichText);
replaceDiff += imgRichText.length() - keyLength; replaceDiff += imgRichText.length() - keyLength;
} }
return result; return result;
} }