1 /***************************************************************************
2 * Copyright (C) 2005-2019 by the Quassel Project *
3 * devel@quassel-irc.org *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) version 3. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
21 #include "expressionmatch.h"
26 #include <QStringList>
28 ExpressionMatch::ExpressionMatch(const QString& expression, MatchMode mode, bool caseSensitive)
30 // Store the original parameters for later reference
31 _sourceExpression = expression;
33 _sourceCaseSensitive = caseSensitive;
35 // Calculate the internal regex
37 // Do this now instead of on-demand to provide immediate feedback on errors when editing
38 // highlight and ignore rules.
42 bool ExpressionMatch::match(const QString& string, bool matchEmpty) const
44 // Handle empty expression strings
45 if (_sourceExpressionEmpty) {
46 // Match found if matching empty is allowed, otherwise no match found
51 // Can't match on an invalid rule
55 // We have "_matchRegEx", "_matchInvertRegEx", or both due to isValid() check above
57 // If specified, first check inverted rules
58 if (_matchInvertRegExActive && _matchInvertRegEx.isValid()) {
59 // Check inverted match rule
60 if (_matchInvertRegEx.match(string).hasMatch()) {
61 // Inverted rule matched, the rest of the rule cannot match
66 if (_matchRegExActive && _matchRegEx.isValid()) {
67 // Check regular match rule
68 return _matchRegEx.match(string).hasMatch();
71 // If no valid regular rules exist, due to the isValid() check there must be valid inverted
72 // rules that did not match. Count this as properly matching (implicit wildcard).
77 QString ExpressionMatch::trimMultiWildcardWhitespace(const QString& originalRule)
79 // This gets handled in two steps:
81 // 1. Break apart ";"-separated list into components
82 // 2. Combine whitespace-trimmed components into wildcard expression
84 // Let's start by making the list...
86 // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping
89 // Escaped list rules (where "[\n]" represents newline):
95 // \\; | Split (keep as "\\")
96 // \\\ | Keep as "\\" + "\", set consecutive slashes to 1
98 // \[\n] | Split (keep as "\")
99 // \\[\n] | Split (keep as "\\")
100 // ... | Keep as "..."
101 // \... | Keep as "\..."
102 // \\... | Keep as "\\..."
104 // Strings are forced to end with "\n", always applying "\..." and "\\..." rules
105 // "..." also includes another "\" character
107 // All whitespace is trimmed from each component
109 // "\\" and "\" are not downconverted to allow for other escape codes to be detected in
110 // ExpressionMatch::wildcardToRegex
115 // norm; norm-space ; newline-space [\n] ;escape \; sep ; slash-end-split\\; quad\\\\norm;
116 // newline-split-slash\\[\n] slash-at-end\\ [line does not continue]
123 // slash-end-split\\ [line does not continue]
125 // newline-split-slash\\ [line does not continue]
126 // slash-at-end\\ [line does not continue]
128 // > Trimmed wildcard rule
129 // norm; norm-space; newline-space[\n]escape \; sep; slash-end-split\\; quad\\\\norm;
130 // newline-split-slash\\[\n]slash-at-end\\ [line does not continue]
132 // (Newlines are encoded as "[\n]". Ignore linebreaks for the sake of comment wrapping.)
134 // Note: R"(\\)" results in the literal of "\\", two backslash characters. Anything inside the
135 // brackets is treated as a literal. Outside the brackets but inside the quotes is still
138 // See https://en.cppreference.com/w/cpp/language/string_literal
142 QString rule(originalRule);
144 // Force a termination at the end of the string to trigger a split
145 // Don't check for ";" splits as they may be escaped
146 if (!rule.endsWith("\n")) {
155 QString curString = {};
157 int sourceLength = rule.length();
158 // Consecutive "\" characters
159 int consecutiveSlashes = 0;
161 // We know it's going to be the same length or smaller, so reserve the same size as the string
162 result.reserve(sourceLength);
164 // For every character...
165 for (int i = 0; i < sourceLength; i++) {
167 curChar = rule.at(i);
168 // Check if it's on the list of special list characters, converting to Unicode for use
169 // in the switch statement
171 // See https://doc.qt.io/qt-5/qchar.html#unicode
172 switch (curChar.unicode()) {
175 switch (consecutiveSlashes) {
180 // "\\;" -> Split (keep as "\\")
181 // Not escaped separator, split into a new item
183 // Apply the additional "\\" if needed
184 if (consecutiveSlashes == 2) {
185 // "\\;" -> Split (keep as "\\")
186 curString.append(R"(\\)");
189 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
190 curString = curString.trimmed();
193 if (!curString.isEmpty()) {
194 // Add to list with the same separator used
195 result.append(curString + "; ");
197 // Reset the current list item
201 // "\;" -> Keep as "\;"
202 curString.append(R"(\;)");
205 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
206 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
207 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring" << curChar
211 consecutiveSlashes = 0;
215 // Increase consecutive slash count
216 consecutiveSlashes++;
217 // Check if we've reached "\\\"...
218 if (consecutiveSlashes == 3) {
219 // "\\\" -> Keep as "\\" + "\"
220 curString.append(R"(\\)");
221 // Set consecutive slashes to 1, recognizing the trailing "\"
222 consecutiveSlashes = 1;
224 else if (consecutiveSlashes > 3) {
225 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
226 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
227 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring" << curChar
234 // Preserve the characters as they are now
237 // "\[\n]" -> Split (keep as "\")
238 // "\\[\n]" -> Split (keep as "\\")
240 switch (consecutiveSlashes) {
246 // Apply the additional "\" or "\\"
247 curString.append(QString(R"(\)").repeated(consecutiveSlashes));
250 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
251 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
252 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), applying newline split anyways!";
256 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
257 curString = curString.trimmed();
260 if (!curString.isEmpty()) {
261 // Add to list with the same separator used
262 result.append(curString + "\n");
264 // Reset the current list item
266 consecutiveSlashes = 0;
269 // Preserve the characters as they are now
270 switch (consecutiveSlashes) {
272 // "..." -> Keep as "..."
273 curString.append(curChar);
277 // "\..." -> Keep as "\..."
278 // "\\..." -> Keep as "\\..."
279 curString.append(QString("\\").repeated(consecutiveSlashes) + curChar);
282 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
283 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
284 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring " << curChar
288 consecutiveSlashes = 0;
293 // Remove any trailing separators
294 if (result.endsWith("; ")) {
298 // Remove any trailing whitespace
299 return result.trimmed();
302 void ExpressionMatch::cacheRegEx()
304 _matchRegExActive = false;
305 _matchInvertRegExActive = false;
307 _sourceExpressionEmpty = _sourceExpression.isEmpty();
308 if (_sourceExpressionEmpty) {
309 // No need to calculate anything for empty strings
313 // Convert the given expression to a regular expression based on the mode
314 switch (_sourceMode) {
315 case MatchMode::MatchPhrase:
316 // Match entire phrase, noninverted
317 // Don't trim whitespace for phrase matching as someone might want to match on " word ", a
318 // more-specific request than "word".
319 _matchRegEx = regExFactory("(?:^|\\W)" + regExEscape(_sourceExpression) + "(?:\\W|$)", _sourceCaseSensitive);
320 _matchRegExActive = true;
322 case MatchMode::MatchMultiPhrase:
323 // Match multiple entire phrases, noninverted
324 // Convert from multiple-phrase rules
325 _matchRegEx = regExFactory(convertFromMultiPhrase(_sourceExpression), _sourceCaseSensitive);
326 _matchRegExActive = true;
328 case MatchMode::MatchWildcard:
329 // Match as wildcard expression
330 // Convert from wildcard rules for a single wildcard
331 if (_sourceExpression.startsWith("!")) {
332 // Inverted rule: take the remainder of the string
333 // "^" + invertComponents.at(0) + "$"
334 _matchInvertRegEx = regExFactory("^" + wildcardToRegEx(_sourceExpression.mid(1)) + "$", _sourceCaseSensitive);
335 _matchInvertRegExActive = true;
338 // Normal rule: take the whole string
339 // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past
340 // escaped "\" (i.e. "\\!")
341 _matchRegEx = regExFactory("^" + wildcardToRegEx(_sourceExpression.startsWith("\\!") ? _sourceExpression.mid(1) : _sourceExpression)
343 _sourceCaseSensitive);
344 _matchRegExActive = true;
347 case MatchMode::MatchMultiWildcard:
348 // Match as multiple wildcard expressions
349 // Convert from wildcard rules for multiple wildcards
350 // (The generator function handles setting matchRegEx/matchInvertRegEx)
351 generateFromMultiWildcard(_sourceExpression, _sourceCaseSensitive);
353 case MatchMode::MatchRegEx:
354 // Match as regular expression
355 if (_sourceExpression.startsWith("!")) {
356 // Inverted rule: take the remainder of the string
357 _matchInvertRegEx = regExFactory(_sourceExpression.mid(1), _sourceCaseSensitive);
358 _matchInvertRegExActive = true;
361 // Normal rule: take the whole string
362 // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past
363 // escaped "\" (i.e. "\\!")
364 _matchRegEx = regExFactory(_sourceExpression.startsWith("\\!") ? _sourceExpression.mid(1) : _sourceExpression,
365 _sourceCaseSensitive);
366 _matchRegExActive = true;
370 // This should never happen if you keep the above consistent
371 qWarning() << Q_FUNC_INFO << "Unknown MatchMode" << (int)_sourceMode << "!";
375 if (!_sourceExpressionEmpty && !isValid()) {
376 // This can happen with invalid regex, so make it a bit more user-friendly. Set it to Info
377 // level as ideally someone's not just going to leave a broken match rule around. For
378 // MatchRegEx, they probably need to fix their regex rule. For the other modes, there's
379 // probably a bug in the parsing routines (which should also be fixed).
380 qInfo() << "Could not parse expression match rule" << _sourceExpression << "(match mode:" << (int)_sourceMode
381 << "), this rule will be ignored";
385 QRegularExpression ExpressionMatch::regExFactory(const QString& regExString, bool caseSensitive)
387 // Construct the regular expression object, setting case sensitivity as appropriate
388 QRegularExpression newRegEx = QRegularExpression(regExString,
389 caseSensitive ? QRegularExpression::PatternOption::NoPatternOption
390 : QRegularExpression::PatternOption::CaseInsensitiveOption);
392 // Check if rule is valid
393 if (!newRegEx.isValid()) {
394 // This can happen with invalid regex, so make it a bit more user-friendly. Keep this
395 // distinct from the main info-level message for easier debugging in case a regex component
396 // in Wildcard or Phrase mode breaks.
397 qDebug() << "Internal regular expression component" << regExString << "is invalid and will be ignored";
399 // Qt offers explicit control over when QRegularExpression objects get optimized.
400 // By default, patterns are only optimized after some number of uses as defined
401 // within Qt internals.
403 // In the context of ExpressionMatch, some regular expressions might go unused, e.g. a highlight
404 // rule might never match a channel pattern, resulting in the contents pattern being untouched.
405 // It should be safe to let Qt handle optimization, taking a non-deterministic, one-off
406 // performance penalty on optimization for the sake of saving memory usage on patterns that
409 // If profiling shows expressions are generally used and/or the automatic optimization
410 // interferes incurs too high of a penalty (unlikely given we've created regular expression
411 // objects willy-nilly before now), this can be revisited to explicitly call...
414 // // Optimize regex now
415 // newRegEx.optimize();
418 // NOTE: This should only be called if the expression is valid! Apply within an "else" of the
419 // inverted isValid() check above.
421 // See https://doc.qt.io/qt-5/qregularexpression.html#optimize
426 QString ExpressionMatch::regExEscape(const QString& phrase)
428 // Escape the given phrase of any special regular expression characters
429 return QRegularExpression::escape(phrase);
432 QString ExpressionMatch::convertFromMultiPhrase(const QString& originalRule)
434 // Convert the multi-phrase rule into regular expression format
435 // Split apart the original rule into components
436 // Use QStringList instead of std::vector<QString> to make use of Qt's built-in .join() method
437 QStringList components = {};
439 for (auto&& component : originalRule.split("\n", QString::SkipEmptyParts)) {
440 // Don't trim whitespace to maintain consistency with single phrase matching
441 // As trimming is not performed, empty components will already be skipped. This means " "
442 // is considered a valid matching phrase.
444 // Take the whole string, escaping any regex
445 components.append(regExEscape(component));
448 // Create full regular expression by...
449 // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)"
450 // > Flattening normal and inverted rules using the regex OR character "...|..."
452 // Before: [foo, bar, baz]
453 // After: (?:^|\W)(?:foo|bar|baz)(?:\W|$)
455 if (components.count() == 1) {
456 // Single item, skip the noncapturing group
457 return "(?:^|\\W)" + components.at(0) + "(?:\\W|$)";
460 return "(?:^|\\W)(?:" + components.join("|") + ")(?:\\W|$)";
464 void ExpressionMatch::generateFromMultiWildcard(const QString& originalRule, bool caseSensitive)
466 // Convert the wildcard rule into regular expression format
467 // First, reset the existing match expressions
469 _matchInvertRegEx = {};
470 _matchRegExActive = false;
471 _matchInvertRegExActive = false;
473 // This gets handled in three steps:
475 // 1. Break apart ";"-separated list into components
476 // 2. Convert components from wildcard format into regular expression format
477 // 3. Combine normal/invert components into normal/invert regular expressions
479 // Let's start by making the list...
481 // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping
482 // escaped characters
484 // Escaped list rules (where "[\n]" represents newline):
489 // \; | Replace with ";"
490 // \\; | Split (keep as "\\")
491 // ! | At start: mark as inverted
492 // \! | At start: replace with "!"
493 // \\! | At start: keep as "\\!" (replaced with "\!" in wildcard conversion)
494 // ! | Elsewhere: keep as "!"
495 // \! | Elsewhere: keep as "\!"
496 // \\! | Elsewhere: keep as "\\!"
497 // \\\ | Keep as "\\" + "\", set consecutive slashes to 1
499 // \[\n] | Split (keep as "\")
500 // \\[\n] | Split (keep as "\\")
501 // ... | Keep as "..."
502 // \... | Keep as "\..."
503 // \\... | Keep as "\\..."
505 // Strings are forced to end with "\n", always applying "\..." and "\\..." rules
506 // "..." also includes another "\" character
508 // All whitespace is trimmed from each component
510 // "\\" and "\" are not downconverted to allow for other escape codes to be detected in
511 // ExpressionMatch::wildcardToRegex
516 // norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;\\!slash-invert;\\\\double;
517 // escape\;sep;slash-end-split\\;quad\\\\!noninvert;newline-split[\n]newline-split-slash\\[\n]
518 // slash-at-end\\ [line does not continue]
520 // (Newlines are encoded as "[\n]". Ignore linebreaks for the sake of comment wrapping.)
523 // > Normal components without wildcard conversion
530 // slash-end-split\\ [line does not continue]
531 // quad\\\\!noninvert
533 // newline-split-slash\\ [line does not continue]
534 // slash-at-end\\ [line does not continue]
536 // > Inverted components without wildcard conversion
541 // > Normal components with wildcard conversion
548 // slash\-end\-split\\ [line does not continue]
549 // quad\\\\\!noninvert
551 // newline\-split\-slash\\ [line does not continue]
552 // slash\-at\-end\\ [line does not continue]
554 // > Inverted components with wildcard conversion
559 // > Normal wildcard-converted regex
560 // ^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double|escape\;sep|
561 // slash\-end\-split\\|quad\\\\\!noninvert|newline\-split|newline\-split\-slash\\|
562 // slash\-at\-end\\)$
564 // > Inverted wildcard-converted regex
565 // ^(?:invert|invert\-space)$
567 // Note: R"(\\)" results in the literal of "\\", two backslash characters. Anything inside the
568 // brackets is treated as a literal. Outside the brackets but inside the quotes is still
571 // See https://en.cppreference.com/w/cpp/language/string_literal
575 QString rule(originalRule);
577 // Force a termination at the end of the string to trigger a split
578 // Don't check for ";" splits as they may be escaped
579 if (!rule.endsWith("\n")) {
583 // Result, sorted into normal and inverted rules
584 // Use QStringList instead of std::vector<QString> to make use of Qt's built-in .join() method
585 QStringList normalComponents = {}, invertComponents = {};
590 QString curString = {};
592 int sourceLength = rule.length();
593 // Consecutive "\" characters
594 int consecutiveSlashes = 0;
595 // Whether or not this marks an inverted rule
596 bool isInverted = false;
597 // Whether or not we're at the beginning of the rule (for detecting "!" and "\!")
598 bool isRuleStart = true;
600 // We know it's going to have ";"-count items or less, so reserve ";"-count items for both.
601 // Without parsing it's not easily possible to tell which are escaped or not, and among the
602 // non-escaped entries, which are inverted or not. These get destroyed once out of scope of
603 // this function, so balancing towards performance over memory usage should be okay, hopefully.
604 int separatorCount = rule.count(";");
605 normalComponents.reserve(separatorCount);
606 invertComponents.reserve(separatorCount);
608 // For every character...
609 for (int i = 0; i < sourceLength; i++) {
611 curChar = rule.at(i);
612 // Check if it's on the list of special list characters, converting to Unicode for use
613 // in the switch statement
615 // See https://doc.qt.io/qt-5/qchar.html#unicode
616 switch (curChar.unicode()) {
619 switch (consecutiveSlashes) {
624 // "\\;" -> Split (keep as "\\")
625 // Not escaped separator, split into a new item
627 // Apply the additional "\\" if needed
628 if (consecutiveSlashes == 2) {
629 // "\\;" -> Split (keep as "\\")
630 curString.append(R"(\\)");
633 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
634 curString = curString.trimmed();
637 if (!curString.isEmpty()) {
638 // Add to inverted/normal list
640 invertComponents.append(wildcardToRegEx(curString));
643 normalComponents.append(wildcardToRegEx(curString));
646 // Reset the current list item
652 // "\;" -> Replace with ";"
653 curString.append(";");
657 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
658 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
659 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring" << curChar
664 consecutiveSlashes = 0;
667 // Rule inverter found
669 // Apply the inverting logic
670 switch (consecutiveSlashes) {
672 // "!" -> At start: mark as inverted
674 // Don't include the "!" character
677 // "\!" -> At start: replace with "!"
678 curString.append("!");
681 // "\\!" -> At start: keep as "\\!" (replaced with "\!" in wildcard conversion)
682 curString.append(R"(\\!)");
685 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
686 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
687 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring" << curChar
693 // Preserve the characters as they are now
694 switch (consecutiveSlashes) {
696 // "!" -> Elsewhere: keep as "!"
697 curString.append("!");
701 // "\!" -> Elsewhere: keep as "\!"
702 // "\\!" -> Elsewhere: keep as "\\!"
703 curString.append(QString(R"(\)").repeated(consecutiveSlashes) + "!");
706 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
707 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
708 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring" << curChar
714 consecutiveSlashes = 0;
718 // Increase consecutive slash count
719 consecutiveSlashes++;
720 // Check if we've reached "\\\"...
721 if (consecutiveSlashes == 3) {
722 // "\\\" -> Keep as "\\" + "\"
723 curString.append(R"(\\)");
724 // No longer at the rule start
726 // Set consecutive slashes to 1, recognizing the trailing "\"
727 consecutiveSlashes = 1;
729 else if (consecutiveSlashes > 3) {
730 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
731 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
732 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring" << curChar
736 // Don't set "isRuleStart" here as "\" is used in escape sequences
740 // Preserve the characters as they are now
743 // "\[\n]" -> Split (keep as "\")
744 // "\\[\n]" -> Split (keep as "\\")
746 switch (consecutiveSlashes) {
752 // Apply the additional "\" or "\\"
753 curString.append(QString(R"(\)").repeated(consecutiveSlashes));
756 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
757 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
758 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), applying newline split anyways!";
762 // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
763 curString = curString.trimmed();
766 if (!curString.isEmpty()) {
767 // Add to inverted/normal list
769 invertComponents.append(wildcardToRegEx(curString));
772 normalComponents.append(wildcardToRegEx(curString));
775 // Reset the current list item
779 consecutiveSlashes = 0;
782 // Preserve the characters as they are now
783 switch (consecutiveSlashes) {
785 // "..." -> Keep as "..."
786 curString.append(curChar);
790 // "\..." -> Keep as "\..."
791 // "\\..." -> Keep as "\\..."
792 curString.append(QString("\\").repeated(consecutiveSlashes) + curChar);
795 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
796 qWarning() << Q_FUNC_INFO << "Wildcard rule" << rule << "resulted in rule component" << curString
797 << "with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring " << curChar
801 // Don't mark as past rule start for whitespace (whitespace gets trimmed)
802 if (!curChar.isSpace()) {
805 consecutiveSlashes = 0;
810 // Clean up any duplicates
811 normalComponents.removeDuplicates();
812 invertComponents.removeDuplicates();
814 // Create full regular expressions by...
815 // > Anchoring to start and end of string to mimic QRegExp's .exactMatch() handling, "^...$"
816 // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)"
817 // > Flattening normal and inverted rules using the regex OR character "...|..."
819 // Before: [foo, bar, baz]
820 // After: ^(?:foo|bar|baz)$
822 // See https://doc.qt.io/qt-5/qregularexpression.html#porting-from-qregexp-exactmatch
823 // And https://regex101.com/
825 // Any empty/invalid regex are handled within ExpressionMatch::match()
826 if (!normalComponents.isEmpty()) {
827 // Create normal match regex
828 if (normalComponents.count() == 1) {
829 // Single item, skip the noncapturing group
830 _matchRegEx = regExFactory("^" + normalComponents.at(0) + "$", caseSensitive);
833 _matchRegEx = regExFactory("^(?:" + normalComponents.join("|") + ")$", caseSensitive);
835 _matchRegExActive = true;
837 if (!invertComponents.isEmpty()) {
838 // Create invert match regex
839 if (invertComponents.count() == 1) {
840 // Single item, skip the noncapturing group
841 _matchInvertRegEx = regExFactory("^" + invertComponents.at(0) + "$", caseSensitive);
844 _matchInvertRegEx = regExFactory("^(?:" + invertComponents.join("|") + ")$", caseSensitive);
846 _matchInvertRegExActive = true;
850 QString ExpressionMatch::wildcardToRegEx(const QString& expression)
852 // Convert the wildcard expression into regular expression format
854 // We're taking a little bit different of a route...
856 // Original QRegExp::Wildcard rules:
857 // --------------------------
858 // Wildcard | Regex | Outcome
859 // ---------|-------|--------
860 // * | .* | zero or more of any character
861 // ? | . | any single character
863 // NOTE 1: This is QRegExp::Wildcard, not QRegExp::WildcardUnix
865 // NOTE 2: We are ignoring the "[...]" character-class matching functionality of
866 // QRegExp::Wildcard as that feature's a bit more complex and can be handled with full-featured
869 // See https://doc.qt.io/qt-5/qregexp.html#wildcard-matching
871 // Quassel originally did not use QRegExp::WildcardUnix, which prevented escaping "*" and "?" in
872 // messages. Unfortunately, spam messages might decide to use both, so offering a way to escape
875 // On the flip-side, that means to match "\" requires escaping as "\\", breaking backwards
878 // Quassel's Wildcard rules
879 // ------------------------------------------
880 // Wildcard | Regex escaped | Regex | Outcome
881 // ---------|---------------|-------|--------
882 // * | \* | .* | zero or more of any character
883 // ? | \? | . | any single character
884 // \* | \\\* | \* | literal "*"
885 // \? | \\\? | \? | literal "?"
886 // \[...] | \\[...] | [...] | invalid escape, ignore it
887 // \\ | \\\\ | \\ | literal "\"
889 // In essence, "*" and "?" need changed only if not escaped, "\\" collapses into "\", "\" gets
890 // ignored; other characters escape normally.
895 // never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval
897 // ("\\\\" represents "\\", "\\" represents "\", and "\\\" is valid+invalid, "\")
899 // > Regex escaped wildcard rule
900 // never\?gonna\*give\\\*you\\\?up\\\\test\|y\\yeah\\\\1\\\\\\\\2\\\\\\1inval
902 // > Expected correct regex
903 // never.gonna.*give\*you\?up\\test\|yyeah\\1\\\\2\\1inval
905 // > Undoing regex escaping of "\" as "\\" (i.e. simple replace, with special escapes intact)
906 // never.gonna.*give\*you\?up\test\|yyeah\1\\2\1inval
908 // Escape string according to regex
909 QString regExEscaped(regExEscape(expression));
913 // NOTE: In theory, regular expression lookbehind could solve this. Unfortunately, QRegExp does
914 // not support lookbehind, and it's theoretically inefficient, anyways. Just use an approach
915 // similar to that taken by QRegExp's official wildcard mode.
917 // Lookbehind example (that we can't use):
918 // (?<!abc)test Negative lookbehind - don't match if "test" is proceeded by "abc"
920 // See https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/tools/qregexp.cpp
922 // NOTE: We don't copy QRegExp's mode as QRegularExpression has more special characters. We
923 // can't use the same escaping code, hence calling the appropriate QReg[...]::escape() above.
932 int sourceLength = regExEscaped.length();
933 // Consecutive "\" characters
934 int consecutiveSlashes = 0;
936 // We know it's going to be the same length or smaller, so reserve the same size as the string
937 result.reserve(sourceLength);
939 // For every character...
940 for (int i = 0; i < sourceLength; i++) {
942 curChar = regExEscaped.at(i);
943 // Check if it's on the list of special wildcard characters, converting to Unicode for use
944 // in the switch statement
946 // See https://doc.qt.io/qt-5/qchar.html#unicode
947 switch (curChar.unicode()) {
950 switch (consecutiveSlashes) {
952 // "?" -> "\?" -> "."
953 // Convert from regex escaped "?" to regular expression
957 // "\?" -> "\\\?" -> "\?"
958 // Convert from regex escaped "\?" to literal string
959 result.append(R"(\?)");
962 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
963 qWarning() << Q_FUNC_INFO << "Wildcard rule" << expression << "resulted in escaped regular expression string"
964 << regExEscaped << " with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring"
965 << curChar << "character!";
968 consecutiveSlashes = 0;
972 switch (consecutiveSlashes) {
974 // "*" -> "\*" -> ".*"
975 // Convert from regex escaped "*" to regular expression
979 // "\*" -> "\\\*" -> "\*"
980 // Convert from regex escaped "\*" to literal string
981 result.append(R"(\*)");
984 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
985 qWarning() << Q_FUNC_INFO << "Wildcard rule" << expression << "resulted in escaped regular expression string"
986 << regExEscaped << " with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring"
987 << curChar << "character!";
990 consecutiveSlashes = 0;
994 // Increase consecutive slash count
995 consecutiveSlashes++;
996 // Check if we've hit an escape sequence
997 if (consecutiveSlashes == 4) {
998 // "\\" -> "\\\\" -> "\\"
999 // Convert from regex escaped "\\" to literal string
1000 result.append(R"(\\)");
1001 // Reset slash count
1002 consecutiveSlashes = 0;
1006 // Any other character
1007 switch (consecutiveSlashes) {
1010 // "[...]" -> "[...]" -> "[...]"
1012 // "\[...]" -> "\\[...]" -> "[...]"
1013 // Either just print the character itself, or convert from regex-escaped invalid
1014 // wildcard escape sequence to the character itself
1016 // Both mean doing nothing, the actual character [...] gets appended below
1019 // "[...]" -> "\[...]" -> "\"
1020 // Keep regex-escaped special character "[...]" as literal string
1021 // (Where "[...]" represents any non-wildcard regex special character)
1022 result.append(R"(\)");
1023 // The actual character [...] gets appended below
1026 // This shouldn't ever happen (even with invalid wildcard rules), log a warning
1027 qWarning() << Q_FUNC_INFO << "Wildcard rule" << expression << "resulted in escaped regular expression string"
1028 << regExEscaped << " with unexpected count of consecutive '\\' (" << consecutiveSlashes << "), ignoring"
1029 << curChar << "char escape!";
1032 consecutiveSlashes = 0;
1033 // Add the character itself
1034 result.append(curChar);
1039 // Anchoring to simulate QRegExp::exactMatch() is handled in
1040 // ExpressionMatch::convertFromWildcard()