1 /***************************************************************************
2 * Copyright (C) 2005-2018 by the Quassel Project *
3 * devel@quassel-irc.org *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) version 3. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
21 #include "expressionmatch.h"
26 #include <QStringList>
28 #include "logmessage.h"
30 ExpressionMatch::ExpressionMatch(const QString &expression, MatchMode mode, bool caseSensitive)
32 // Store the original parameters for later reference
33 _sourceExpression = expression;
35 _sourceCaseSensitive = caseSensitive;
37 // Calculate the internal regex
39 // Do this now instead of on-demand to provide immediate feedback on errors when editing
40 // highlight and ignore rules.
45 bool ExpressionMatch::match(const QString &string, bool matchEmpty) const
47 // Handle empty expression strings
48 if (_sourceExpressionEmpty) {
49 // Match found if matching empty is allowed, otherwise no match found
54 // Can't match on an invalid rule
58 // We have "_matchRegEx", "_matchInvertRegEx", or both due to isValid() check above
60 // If specified, first check inverted rules
61 if (_matchInvertRegExActive && _matchInvertRegEx.isValid()) {
62 // Check inverted match rule
63 if (_matchInvertRegEx.match(string).hasMatch()) {
64 // Inverted rule matched, the rest of the rule cannot match
69 if (_matchRegExActive && _matchRegEx.isValid()) {
70 // Check regular match rule
71 return _matchRegEx.match(string).hasMatch();
74 // If no valid regular rules exist, due to the isValid() check there must be valid inverted
75 // rules that did not match. Count this as properly matching (implicit wildcard).
81 QString ExpressionMatch::trimMultiWildcardWhitespace(const QString &originalRule)
83 // This gets handled in two steps:
85 // 1. Break apart ";"-separated list into components
86 // 2. Combine whitespace-trimmed components into wildcard expression
88 // Let's start by making the list...
90 // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping
93 // Escaped list rules (where "[\n]" represents newline):
99 // \\; | Split (keep as "\\")
100 // \\\ | Keep as "\\" + "\", set consecutive slashes to 1
102 // \[\n] | Split (keep as "\")
103 // \\[\n] | Split (keep as "\\")
104 // ... | Keep as "..."
105 // \... | Keep as "\..."
106 // \\... | Keep as "\\..."
108 // Strings are forced to end with "\n", always applying "\..." and "\\..." rules
109 // "..." also includes another "\" character
111 // All whitespace is trimmed from each component
113 // "\\" and "\" are not downconverted to allow for other escape codes to be detected in
114 // ExpressionMatch::wildcardToRegex
119 // norm; norm-space ; newline-space [\n] ;escape \; sep ; slash-end-split\\; quad\\\\norm;
120 // newline-split-slash\\[\n] slash-at-end\\ [line does not continue]
127 // slash-end-split\\ [line does not continue]
129 // newline-split-slash\\ [line does not continue]
130 // slash-at-end\\ [line does not continue]
132 // > Trimmed wildcard rule
133 // norm; norm-space; newline-space[\n]escape \; sep; slash-end-split\\; quad\\\\norm;
134 // newline-split-slash\\[\n]slash-at-end\\ [line does not continue]
136 // (Newlines are encoded as "[\n]". Ignore linebreaks for the sake of comment wrapping.)
138 // Note: R"(\\)" results in the literal of "\\", two backslash characters. Anything inside the
139 // brackets is treated as a literal. Outside the brackets but inside the quotes is still
142 // See https://en.cppreference.com/w/cpp/language/string_literal
146 QString rule(originalRule);
148 // Force a termination at the end of the string to trigger a split
149 // Don't check for ";" splits as they may be escaped
150 if (!rule.endsWith("\n")) {
159 QString curString = {};
161 int sourceLength = rule.length();
162 // Consecutive "\" characters
163 int consecutiveSlashes = 0;
165 // We know it's going to be the same length or smaller, so reserve the same size as the string
166 result.reserve(sourceLength);
168 // For every character...
169 for (int i = 0; i < sourceLength; i++) {
171 curChar = rule.at(i);
172 // Check if it's on the list of special list characters, converting to Unicode for use
173 // in the switch statement
175 // See https://doc.qt.io/qt-5/qchar.html#unicode
176 switch (curChar.unicode()) {
179 switch (consecutiveSlashes) {
184 // "\\;" -> Split (keep as "\\")
185 // Not escaped separator, split into a new item
187 // Apply the additional "\\" if needed
188 if (consecutiveSlashes == 2) {
189 // "\\;" -> Split (keep as "\\")
190 curString.append(R"(\\)");
193 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
194 curString = curString.trimmed();
197 if (!curString.isEmpty()) {
198 // Add to list with the same separator used
199 result.append(curString + "; ");
201 // Reset the current list item
205 // "\;" -> Keep as "\;"
206 curString.append(R"(\;)");
209 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
210 qWarning() << Q_FUNC_INFO << "Wildcard rule"
211 << rule << "resulted in rule component"
212 << curString << "with unexpected count of consecutive '\\' ("
213 << consecutiveSlashes << "), ignoring" << curChar << "character!";
216 consecutiveSlashes = 0;
220 // Increase consecutive slash count
221 consecutiveSlashes++;
222 // Check if we've reached "\\\"...
223 if (consecutiveSlashes == 3) {
224 // "\\\" -> Keep as "\\" + "\"
225 curString.append(R"(\\)");
226 // Set consecutive slashes to 1, recognizing the trailing "\"
227 consecutiveSlashes = 1;
229 else if (consecutiveSlashes > 3) {
230 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
231 qWarning() << Q_FUNC_INFO << "Wildcard rule"
232 << rule << "resulted in rule component"
233 << curString << "with unexpected count of consecutive '\\' ("
234 << consecutiveSlashes << "), ignoring" << curChar << "character!";
240 // Preserve the characters as they are now
243 // "\[\n]" -> Split (keep as "\")
244 // "\\[\n]" -> Split (keep as "\\")
246 switch (consecutiveSlashes) {
252 // Apply the additional "\" or "\\"
253 curString.append(QString(R"(\)").repeated(consecutiveSlashes));
256 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
257 qWarning() << Q_FUNC_INFO << "Wildcard rule"
258 << rule << "resulted in rule component"
259 << curString << "with unexpected count of consecutive '\\' ("
260 << consecutiveSlashes << "), applying newline split anyways!";
264 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
265 curString = curString.trimmed();
268 if (!curString.isEmpty()) {
269 // Add to list with the same separator used
270 result.append(curString + "\n");
272 // Reset the current list item
274 consecutiveSlashes = 0;
277 // Preserve the characters as they are now
278 switch (consecutiveSlashes) {
280 // "..." -> Keep as "..."
281 curString.append(curChar);
285 // "\..." -> Keep as "\..."
286 // "\\..." -> Keep as "\\..."
287 curString.append(QString("\\").repeated(consecutiveSlashes) + curChar);
290 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
291 qWarning() << Q_FUNC_INFO << "Wildcard rule"
292 << rule << "resulted in rule component"
293 << curString << "with unexpected count of consecutive '\\' ("
294 << consecutiveSlashes << "), ignoring " << curChar << "char escape!";
297 consecutiveSlashes = 0;
302 // Remove any trailing separators
303 if (result.endsWith("; ")) {
307 // Remove any trailing whitespace
308 return result.trimmed();
312 void ExpressionMatch::cacheRegEx()
314 _matchRegExActive = false;
315 _matchInvertRegExActive = false;
317 _sourceExpressionEmpty = _sourceExpression.isEmpty();
318 if (_sourceExpressionEmpty) {
319 // No need to calculate anything for empty strings
323 // Convert the given expression to a regular expression based on the mode
324 switch (_sourceMode) {
325 case MatchMode::MatchPhrase:
326 // Match entire phrase, noninverted
327 // Don't trim whitespace for phrase matching as someone might want to match on " word ", a
328 // more-specific request than "word".
329 _matchRegEx = regExFactory("(?:^|\\W)" + regExEscape(_sourceExpression) + "(?:\\W|$)",
330 _sourceCaseSensitive);
331 _matchRegExActive = true;
333 case MatchMode::MatchMultiPhrase:
334 // Match multiple entire phrases, noninverted
335 // Convert from multiple-phrase rules
336 _matchRegEx = regExFactory(convertFromMultiPhrase(_sourceExpression), _sourceCaseSensitive);
337 _matchRegExActive = true;
339 case MatchMode::MatchWildcard:
340 // Match as wildcard expression
341 // Convert from wildcard rules for a single wildcard
342 if (_sourceExpression.startsWith("!")) {
343 // Inverted rule: take the remainder of the string
344 // "^" + invertComponents.at(0) + "$"
345 _matchInvertRegEx = regExFactory("^" + wildcardToRegEx(_sourceExpression.mid(1)) + "$",
346 _sourceCaseSensitive);
347 _matchInvertRegExActive = true;
350 // Normal rule: take the whole string
351 // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past
352 // escaped "\" (i.e. "\\!")
354 regExFactory("^" + wildcardToRegEx(_sourceExpression.startsWith("\\!")
355 ? _sourceExpression.mid(1)
356 : _sourceExpression) + "$",
357 _sourceCaseSensitive);
358 _matchRegExActive = true;
361 case MatchMode::MatchMultiWildcard:
362 // Match as multiple wildcard expressions
363 // Convert from wildcard rules for multiple wildcards
364 // (The generator function handles setting matchRegEx/matchInvertRegEx)
365 generateFromMultiWildcard(_sourceExpression, _sourceCaseSensitive);
367 case MatchMode::MatchRegEx:
368 // Match as regular expression
369 if (_sourceExpression.startsWith("!")) {
370 // Inverted rule: take the remainder of the string
371 _matchInvertRegEx = regExFactory(_sourceExpression.mid(1), _sourceCaseSensitive);
372 _matchInvertRegExActive = true;
375 // Normal rule: take the whole string
376 // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past
377 // escaped "\" (i.e. "\\!")
379 regExFactory(_sourceExpression.startsWith("\\!") ? _sourceExpression.mid(1)
381 _sourceCaseSensitive);
382 _matchRegExActive = true;
386 // This should never happen if you keep the above consistent
387 qWarning() << Q_FUNC_INFO << "Unknown MatchMode" << (int)_sourceMode << "!";
391 if (!_sourceExpressionEmpty && !isValid()) {
392 // This can happen with invalid regex, so make it a bit more user-friendly. Set it to Info
393 // level as ideally someone's not just going to leave a broken match rule around. For
394 // MatchRegEx, they probably need to fix their regex rule. For the other modes, there's
395 // probably a bug in the parsing routines (which should also be fixed).
396 quInfo() << "Could not parse expression match rule"
397 << _sourceExpression << "(match mode:" << (int)_sourceMode
398 << "), this rule will be ignored";
403 QRegularExpression ExpressionMatch::regExFactory(const QString ®ExString,
406 // Construct the regular expression object, setting case sensitivity as appropriate
407 QRegularExpression newRegEx = QRegularExpression(regExString, caseSensitive ?
408 QRegularExpression::PatternOption::NoPatternOption
409 : QRegularExpression::PatternOption::CaseInsensitiveOption);
411 // Check if rule is valid
412 if (!newRegEx.isValid()) {
413 // This can happen with invalid regex, so make it a bit more user-friendly. Keep this
414 // distinct from the main info-level message for easier debugging in case a regex component
415 // in Wildcard or Phrase mode breaks.
416 qDebug() << "Internal regular expression component" << regExString
417 << "is invalid and will be ignored";
419 // Qt offers explicit control over when QRegularExpression objects get optimized.
420 // By default, patterns are only optimized after some number of uses as defined
421 // within Qt internals.
423 // In the context of ExpressionMatch, some regular expressions might go unused, e.g. a highlight
424 // rule might never match a channel pattern, resulting in the contents pattern being untouched.
425 // It should be safe to let Qt handle optimization, taking a non-deterministic, one-off
426 // performance penalty on optimization for the sake of saving memory usage on patterns that
429 // If profiling shows expressions are generally used and/or the automatic optimization
430 // interferes incurs too high of a penalty (unlikely given we've created regular expression
431 // objects willy-nilly before now), this can be revisited to explicitly call...
434 // // Optimize regex now
435 // newRegEx.optimize();
438 // NOTE: This should only be called if the expression is valid! Apply within an "else" of the
439 // inverted isValid() check above.
441 // See https://doc.qt.io/qt-5/qregularexpression.html#optimize
447 QString ExpressionMatch::regExEscape(const QString &phrase)
449 // Escape the given phrase of any special regular expression characters
450 return QRegularExpression::escape(phrase);
454 QString ExpressionMatch::convertFromMultiPhrase(const QString &originalRule)
456 // Convert the multi-phrase rule into regular expression format
457 // Split apart the original rule into components
458 // Use QStringList instead of std::vector<QString> to make use of Qt's built-in .join() method
459 QStringList components = {};
461 for (auto &&component : originalRule.split("\n", QString::SkipEmptyParts)) {
462 // Don't trim whitespace to maintain consistency with single phrase matching
463 // As trimming is not performed, empty components will already be skipped. This means " "
464 // is considered a valid matching phrase.
466 // Take the whole string, escaping any regex
467 components.append(regExEscape(component));
470 // Create full regular expression by...
471 // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)"
472 // > Flattening normal and inverted rules using the regex OR character "...|..."
474 // Before: [foo, bar, baz]
475 // After: (?:^|\W)(?:foo|bar|baz)(?:\W|$)
477 if (components.count() == 1) {
478 // Single item, skip the noncapturing group
479 return "(?:^|\\W)" + components.at(0) + "(?:\\W|$)";
482 return "(?:^|\\W)(?:" + components.join("|") + ")(?:\\W|$)";
487 void ExpressionMatch::generateFromMultiWildcard(const QString &originalRule, bool caseSensitive)
489 // Convert the wildcard rule into regular expression format
490 // First, reset the existing match expressions
492 _matchInvertRegEx = {};
493 _matchRegExActive = false;
494 _matchInvertRegExActive = false;
496 // This gets handled in three steps:
498 // 1. Break apart ";"-separated list into components
499 // 2. Convert components from wildcard format into regular expression format
500 // 3. Combine normal/invert components into normal/invert regular expressions
502 // Let's start by making the list...
504 // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping
505 // escaped characters
507 // Escaped list rules (where "[\n]" represents newline):
512 // \; | Replace with ";"
513 // \\; | Split (keep as "\\")
514 // ! | At start: mark as inverted
515 // \! | At start: replace with "!"
516 // \\! | At start: keep as "\\!" (replaced with "\!" in wildcard conversion)
517 // ! | Elsewhere: keep as "!"
518 // \! | Elsewhere: keep as "\!"
519 // \\! | Elsewhere: keep as "\\!"
520 // \\\ | Keep as "\\" + "\", set consecutive slashes to 1
522 // \[\n] | Split (keep as "\")
523 // \\[\n] | Split (keep as "\\")
524 // ... | Keep as "..."
525 // \... | Keep as "\..."
526 // \\... | Keep as "\\..."
528 // Strings are forced to end with "\n", always applying "\..." and "\\..." rules
529 // "..." also includes another "\" character
531 // All whitespace is trimmed from each component
533 // "\\" and "\" are not downconverted to allow for other escape codes to be detected in
534 // ExpressionMatch::wildcardToRegex
540 // norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;\\!slash-invert;\\\\double;
541 // escape\;sep;slash-end-split\\;quad\\\\!noninvert;newline-split[\n]newline-split-slash\\[\n]
542 // slash-at-end\\ [line does not continue]
544 // (Newlines are encoded as "[\n]". Ignore linebreaks for the sake of comment wrapping.)
547 // > Normal components without wildcard conversion
554 // slash-end-split\\ [line does not continue]
555 // quad\\\\!noninvert
557 // newline-split-slash\\ [line does not continue]
558 // slash-at-end\\ [line does not continue]
560 // > Inverted components without wildcard conversion
565 // > Normal components with wildcard conversion
572 // slash\-end\-split\\ [line does not continue]
573 // quad\\\\\!noninvert
575 // newline\-split\-slash\\ [line does not continue]
576 // slash\-at\-end\\ [line does not continue]
578 // > Inverted components with wildcard conversion
583 // > Normal wildcard-converted regex
584 // ^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double|escape\;sep|
585 // slash\-end\-split\\|quad\\\\\!noninvert|newline\-split|newline\-split\-slash\\|
586 // slash\-at\-end\\)$
588 // > Inverted wildcard-converted regex
589 // ^(?:invert|invert\-space)$
591 // Note: R"(\\)" results in the literal of "\\", two backslash characters. Anything inside the
592 // brackets is treated as a literal. Outside the brackets but inside the quotes is still
595 // See https://en.cppreference.com/w/cpp/language/string_literal
599 QString rule(originalRule);
601 // Force a termination at the end of the string to trigger a split
602 // Don't check for ";" splits as they may be escaped
603 if (!rule.endsWith("\n")) {
607 // Result, sorted into normal and inverted rules
608 // Use QStringList instead of std::vector<QString> to make use of Qt's built-in .join() method
609 QStringList normalComponents = {}, invertComponents = {};
614 QString curString = {};
616 int sourceLength = rule.length();
617 // Consecutive "\" characters
618 int consecutiveSlashes = 0;
619 // Whether or not this marks an inverted rule
620 bool isInverted = false;
621 // Whether or not we're at the beginning of the rule (for detecting "!" and "\!")
622 bool isRuleStart = true;
624 // We know it's going to have ";"-count items or less, so reserve ";"-count items for both.
625 // Without parsing it's not easily possible to tell which are escaped or not, and among the
626 // non-escaped entries, which are inverted or not. These get destroyed once out of scope of
627 // this function, so balancing towards performance over memory usage should be okay, hopefully.
628 int separatorCount = rule.count(";");
629 normalComponents.reserve(separatorCount);
630 invertComponents.reserve(separatorCount);
632 // For every character...
633 for (int i = 0; i < sourceLength; i++) {
635 curChar = rule.at(i);
636 // Check if it's on the list of special list characters, converting to Unicode for use
637 // in the switch statement
639 // See https://doc.qt.io/qt-5/qchar.html#unicode
640 switch (curChar.unicode()) {
643 switch (consecutiveSlashes) {
648 // "\\;" -> Split (keep as "\\")
649 // Not escaped separator, split into a new item
651 // Apply the additional "\\" if needed
652 if (consecutiveSlashes == 2) {
653 // "\\;" -> Split (keep as "\\")
654 curString.append(R"(\\)");
657 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
658 curString = curString.trimmed();
661 if (!curString.isEmpty()) {
662 // Add to inverted/normal list
664 invertComponents.append(wildcardToRegEx(curString));
667 normalComponents.append(wildcardToRegEx(curString));
670 // Reset the current list item
676 // "\;" -> Replace with ";"
677 curString.append(";");
681 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
682 qWarning() << Q_FUNC_INFO << "Wildcard rule"
683 << rule << "resulted in rule component"
684 << curString << "with unexpected count of consecutive '\\' ("
685 << consecutiveSlashes << "), ignoring" << curChar << "character!";
689 consecutiveSlashes = 0;
692 // Rule inverter found
694 // Apply the inverting logic
695 switch (consecutiveSlashes) {
697 // "!" -> At start: mark as inverted
699 // Don't include the "!" character
702 // "\!" -> At start: replace with "!"
703 curString.append("!");
706 // "\\!" -> At start: keep as "\\!" (replaced with "\!" in wildcard conversion)
707 curString.append(R"(\\!)");
710 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
711 qWarning() << Q_FUNC_INFO << "Wildcard rule"
712 << rule << "resulted in rule component"
713 << curString << "with unexpected count of consecutive '\\' ("
714 << consecutiveSlashes << "), ignoring" << curChar << "character!";
719 // Preserve the characters as they are now
720 switch (consecutiveSlashes) {
722 // "!" -> Elsewhere: keep as "!"
723 curString.append("!");
727 // "\!" -> Elsewhere: keep as "\!"
728 // "\\!" -> Elsewhere: keep as "\\!"
729 curString.append(QString(R"(\)").repeated(consecutiveSlashes) + "!");
732 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
733 qWarning() << Q_FUNC_INFO << "Wildcard rule"
734 << rule << "resulted in rule component"
735 << curString << "with unexpected count of consecutive '\\' ("
736 << consecutiveSlashes << "), ignoring" << curChar << "character!";
741 consecutiveSlashes = 0;
745 // Increase consecutive slash count
746 consecutiveSlashes++;
747 // Check if we've reached "\\\"...
748 if (consecutiveSlashes == 3) {
749 // "\\\" -> Keep as "\\" + "\"
750 curString.append(R"(\\)");
751 // No longer at the rule start
753 // Set consecutive slashes to 1, recognizing the trailing "\"
754 consecutiveSlashes = 1;
756 else if (consecutiveSlashes > 3) {
757 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
758 qWarning() << Q_FUNC_INFO << "Wildcard rule"
759 << rule << "resulted in rule component"
760 << curString << "with unexpected count of consecutive '\\' ("
761 << consecutiveSlashes << "), ignoring" << curChar << "character!";
764 // Don't set "isRuleStart" here as "\" is used in escape sequences
768 // Preserve the characters as they are now
771 // "\[\n]" -> Split (keep as "\")
772 // "\\[\n]" -> Split (keep as "\\")
774 switch (consecutiveSlashes) {
780 // Apply the additional "\" or "\\"
781 curString.append(QString(R"(\)").repeated(consecutiveSlashes));
784 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
785 qWarning() << Q_FUNC_INFO << "Wildcard rule"
786 << rule << "resulted in rule component"
787 << curString << "with unexpected count of consecutive '\\' ("
788 << consecutiveSlashes << "), applying newline split anyways!";
792 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
793 curString = curString.trimmed();
796 if (!curString.isEmpty()) {
797 // Add to inverted/normal list
799 invertComponents.append(wildcardToRegEx(curString));
802 normalComponents.append(wildcardToRegEx(curString));
805 // Reset the current list item
809 consecutiveSlashes = 0;
812 // Preserve the characters as they are now
813 switch (consecutiveSlashes) {
815 // "..." -> Keep as "..."
816 curString.append(curChar);
820 // "\..." -> Keep as "\..."
821 // "\\..." -> Keep as "\\..."
822 curString.append(QString("\\").repeated(consecutiveSlashes) + curChar);
825 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
826 qWarning() << Q_FUNC_INFO << "Wildcard rule"
827 << rule << "resulted in rule component"
828 << curString << "with unexpected count of consecutive '\\' ("
829 << consecutiveSlashes << "), ignoring " << curChar << "char escape!";
832 // Don't mark as past rule start for whitespace (whitespace gets trimmed)
833 if (!curChar.isSpace()) {
836 consecutiveSlashes = 0;
841 // Clean up any duplicates
842 normalComponents.removeDuplicates();
843 invertComponents.removeDuplicates();
845 // Create full regular expressions by...
846 // > Anchoring to start and end of string to mimic QRegExp's .exactMatch() handling, "^...$"
847 // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)"
848 // > Flattening normal and inverted rules using the regex OR character "...|..."
850 // Before: [foo, bar, baz]
851 // After: ^(?:foo|bar|baz)$
853 // See https://doc.qt.io/qt-5/qregularexpression.html#porting-from-qregexp-exactmatch
854 // And https://regex101.com/
856 // Any empty/invalid regex are handled within ExpressionMatch::match()
857 if (!normalComponents.isEmpty()) {
858 // Create normal match regex
859 if (normalComponents.count() == 1) {
860 // Single item, skip the noncapturing group
861 _matchRegEx = regExFactory("^" + normalComponents.at(0) + "$", caseSensitive);
864 _matchRegEx = regExFactory("^(?:" + normalComponents.join("|") + ")$", caseSensitive);
866 _matchRegExActive = true;
868 if (!invertComponents.isEmpty()) {
869 // Create invert match regex
870 if (invertComponents.count() == 1) {
871 // Single item, skip the noncapturing group
872 _matchInvertRegEx = regExFactory("^" + invertComponents.at(0) + "$", caseSensitive);
876 regExFactory("^(?:" + invertComponents.join("|") + ")$", caseSensitive);
878 _matchInvertRegExActive = true;
883 QString ExpressionMatch::wildcardToRegEx(const QString &expression)
885 // Convert the wildcard expression into regular expression format
887 // We're taking a little bit different of a route...
889 // Original QRegExp::Wildcard rules:
890 // --------------------------
891 // Wildcard | Regex | Outcome
892 // ---------|-------|--------
893 // * | .* | zero or more of any character
894 // ? | . | any single character
896 // NOTE 1: This is QRegExp::Wildcard, not QRegExp::WildcardUnix
898 // NOTE 2: We are ignoring the "[...]" character-class matching functionality of
899 // QRegExp::Wildcard as that feature's a bit more complex and can be handled with full-featured
902 // See https://doc.qt.io/qt-5/qregexp.html#wildcard-matching
904 // Quassel originally did not use QRegExp::WildcardUnix, which prevented escaping "*" and "?" in
905 // messages. Unfortunately, spam messages might decide to use both, so offering a way to escape
908 // On the flip-side, that means to match "\" requires escaping as "\\", breaking backwards
911 // Quassel's Wildcard rules
912 // ------------------------------------------
913 // Wildcard | Regex escaped | Regex | Outcome
914 // ---------|---------------|-------|--------
915 // * | \* | .* | zero or more of any character
916 // ? | \? | . | any single character
917 // \* | \\\* | \* | literal "*"
918 // \? | \\\? | \? | literal "?"
919 // \[...] | \\[...] | [...] | invalid escape, ignore it
920 // \\ | \\\\ | \\ | literal "\"
922 // In essence, "*" and "?" need changed only if not escaped, "\\" collapses into "\", "\" gets
923 // ignored; other characters escape normally.
928 // never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval
930 // ("\\\\" represents "\\", "\\" represents "\", and "\\\" is valid+invalid, "\")
932 // > Regex escaped wildcard rule
933 // never\?gonna\*give\\\*you\\\?up\\\\test\|y\\yeah\\\\1\\\\\\\\2\\\\\\1inval
935 // > Expected correct regex
936 // never.gonna.*give\*you\?up\\test\|yyeah\\1\\\\2\\1inval
938 // > Undoing regex escaping of "\" as "\\" (i.e. simple replace, with special escapes intact)
939 // never.gonna.*give\*you\?up\test\|yyeah\1\\2\1inval
941 // Escape string according to regex
942 QString regExEscaped(regExEscape(expression));
946 // NOTE: In theory, regular expression lookbehind could solve this. Unfortunately, QRegExp does
947 // not support lookbehind, and it's theoretically inefficient, anyways. Just use an approach
948 // similar to that taken by QRegExp's official wildcard mode.
950 // Lookbehind example (that we can't use):
951 // (?<!abc)test Negative lookbehind - don't match if "test" is proceeded by "abc"
953 // See https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/tools/qregexp.cpp
955 // NOTE: We don't copy QRegExp's mode as QRegularExpression has more special characters. We
956 // can't use the same escaping code, hence calling the appropriate QReg[...]::escape() above.
965 int sourceLength = regExEscaped.length();
966 // Consecutive "\" characters
967 int consecutiveSlashes = 0;
969 // We know it's going to be the same length or smaller, so reserve the same size as the string
970 result.reserve(sourceLength);
972 // For every character...
973 for (int i = 0; i < sourceLength; i++) {
975 curChar = regExEscaped.at(i);
976 // Check if it's on the list of special wildcard characters, converting to Unicode for use
977 // in the switch statement
979 // See https://doc.qt.io/qt-5/qchar.html#unicode
980 switch (curChar.unicode()) {
983 switch (consecutiveSlashes) {
985 // "?" -> "\?" -> "."
986 // Convert from regex escaped "?" to regular expression
990 // "\?" -> "\\\?" -> "\?"
991 // Convert from regex escaped "\?" to literal string
992 result.append(R"(\?)");
995 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
996 qWarning() << Q_FUNC_INFO << "Wildcard rule"
997 << expression << "resulted in escaped regular expression string"
998 << regExEscaped << " with unexpected count of consecutive '\\' ("
999 << consecutiveSlashes << "), ignoring" << curChar << "character!";
1002 consecutiveSlashes = 0;
1006 switch (consecutiveSlashes) {
1008 // "*" -> "\*" -> ".*"
1009 // Convert from regex escaped "*" to regular expression
1010 result.append(".*");
1013 // "\*" -> "\\\*" -> "\*"
1014 // Convert from regex escaped "\*" to literal string
1015 result.append(R"(\*)");
1018 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
1019 qWarning() << Q_FUNC_INFO << "Wildcard rule"
1020 << expression << "resulted in escaped regular expression string"
1021 << regExEscaped << " with unexpected count of consecutive '\\' ("
1022 << consecutiveSlashes << "), ignoring" << curChar << "character!";
1025 consecutiveSlashes = 0;
1029 // Increase consecutive slash count
1030 consecutiveSlashes++;
1031 // Check if we've hit an escape sequence
1032 if (consecutiveSlashes == 4) {
1033 // "\\" -> "\\\\" -> "\\"
1034 // Convert from regex escaped "\\" to literal string
1035 result.append(R"(\\)");
1036 // Reset slash count
1037 consecutiveSlashes = 0;
1041 // Any other character
1042 switch (consecutiveSlashes) {
1045 // "[...]" -> "[...]" -> "[...]"
1047 // "\[...]" -> "\\[...]" -> "[...]"
1048 // Either just print the character itself, or convert from regex-escaped invalid
1049 // wildcard escape sequence to the character itself
1051 // Both mean doing nothing, the actual character [...] gets appended below
1054 // "[...]" -> "\[...]" -> "\"
1055 // Keep regex-escaped special character "[...]" as literal string
1056 // (Where "[...]" represents any non-wildcard regex special character)
1057 result.append(R"(\)");
1058 // The actual character [...] gets appended below
1061 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
1062 qWarning() << Q_FUNC_INFO << "Wildcard rule"
1063 << expression << "resulted in escaped regular expression string"
1064 << regExEscaped << " with unexpected count of consecutive '\\' ("
1065 << consecutiveSlashes << "), ignoring" << curChar << "char escape!";
1068 consecutiveSlashes = 0;
1069 // Add the character itself
1070 result.append(curChar);
1075 // Anchoring to simulate QRegExp::exactMatch() is handled in
1076 // ExpressionMatch::convertFromWildcard()