common: Add auto-caching ExpressionMatch class

author Shane Synan <digitalcircuit36939@gmail.com>

Sun, 2 Sep 2018 16:52:12 +0000 (11:52 -0500)

committer Manuel Nickschas <sputnick@quassel-irc.org>

Mon, 3 Sep 2018 20:12:02 +0000 (22:12 +0200)
author Shane Synan <digitalcircuit36939@gmail.com>
Sun, 2 Sep 2018 16:52:12 +0000 (11:52 -0500)
committer Manuel Nickschas <sputnick@quassel-irc.org>
Mon, 3 Sep 2018 20:12:02 +0000 (22:12 +0200)
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt

index caca158..7e078d9 100644 (file)
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SOURCES
      dccconfig.cpp
      event.cpp
      eventmanager.cpp
+    expressionmatch.cpp
      highlightrulemanager.cpp
      identity.cpp
      ignorelistmanager.cpp
diff --git a/src/common/expressionmatch.cpp b/src/common/expressionmatch.cpp

new file mode 100644 (file)

index 0000000..bbcd1da
--- /dev/null
+++ b/src/common/expressionmatch.cpp
@@ -0,0 +1,1118 @@
+/***************************************************************************
+ *   Copyright (C) 2005-2018 by the Quassel Project                        *
+ *   devel@quassel-irc.org                                                 *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) version 3.                                           *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.         *
+ ***************************************************************************/
+
+#include "expressionmatch.h"
+
+#include <QChar>
+#include <QDebug>
+#include <QString>
+#include <QStringList>
+
+#if QT_VERSION >= 0x050000
+#include <QRegularExpression>
+#else
+#include <QRegExp>
+#endif
+
+#include "logmessage.h"
+
+ExpressionMatch::ExpressionMatch(const QString &expression, MatchMode mode, bool caseSensitive)
+{
+    // Store the original parameters for later reference
+    _sourceExpression = expression;
+    _sourceMode = mode;
+    _sourceCaseSensitive = caseSensitive;
+
+    // Calculate the internal regex
+    //
+    // Do this now instead of on-demand to provide immediate feedback on errors when editing
+    // highlight and ignore rules.
+    cacheRegEx();
+}
+
+
+bool ExpressionMatch::match(const QString &string, bool matchEmpty) const
+{
+    // Handle empty expression strings
+    if (_sourceExpressionEmpty) {
+        // Match found if matching empty is allowed, otherwise no match found
+        return matchEmpty;
+    }
+
+    if (!isValid()) {
+        // Can't match on an invalid rule
+        return false;
+    }
+
+    // We have "_matchRegEx", "_matchInvertRegEx", or both due to isValid() check above
+
+    // If specified, first check inverted rules
+    if (_matchInvertRegExActive && _matchInvertRegEx.isValid()) {
+        // Check inverted match rule
+
+        // See _matchRegEx section below for explanations of QRegExp vs. QRegularExpression
+        if (
+#if QT_VERSION >= 0x050000
+                _matchInvertRegEx.match(string).hasMatch()
+#else
+                _matchInvertRegEx.indexIn(string) != -1
+#endif
+                ) {
+            // Inverted rule matched, the rest of the rule cannot match
+            return false;
+        }
+    }
+
+    if (_matchRegExActive && _matchRegEx.isValid()) {
+        // Check regular match rule
+    #if QT_VERSION >= 0x050000
+        // QRegularExpression does partial matching by default (exact matching requires anchoring
+        // expressions to be added)
+        // See https://doc.qt.io/qt-5/qregularexpression.html#porting-from-qregexp-exactmatch
+        return _matchRegEx.match(string).hasMatch();
+    #else
+        // QRegExp partial matching is done via indexIn
+        // See https://doc.qt.io/qt-5/qregexp.html#indexIn
+        return (_matchRegEx.indexIn(string) != -1);
+    #endif
+    } else {
+        // If no valid regular rules exist, due to the isValid() check there must be valid inverted
+        // rules that did not match.  Count this as properly matching (implicit wildcard).
+        return true;
+    }
+}
+
+
+QString ExpressionMatch::trimMultiWildcardWhitespace(const QString &originalRule)
+{
+    // This gets handled in two steps:
+    //
+    // 1.  Break apart ";"-separated list into components
+    // 2.  Combine whitespace-trimmed components into wildcard expression
+    //
+    // Let's start by making the list...
+
+    // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping
+    // escaped characters
+
+    // Escaped list rules (where "[\n]" represents newline):
+    // ---------------
+    // Token  | Outcome
+    // -------|--------
+    // ;      | Split
+    // \;     | Keep as "\;"
+    // \\;    | Split (keep as "\\")
+    // \\\    | Keep as "\\" + "\", set consecutive slashes to 1
+    // [\n]   | Split
+    // \[\n]  | Split (keep as "\")
+    // \\[\n] | Split (keep as "\\")
+    // ...    | Keep as "..."
+    // \...   | Keep as "\..."
+    // \\...  | Keep as "\\..."
+    //
+    // Strings are forced to end with "\n", always applying "\..." and "\\..." rules
+    // "..." also includes another "\" character
+    //
+    // All whitespace is trimmed from each component
+
+    // "\\" and "\" are not downconverted to allow for other escape codes to be detected in
+    // ExpressionMatch::wildcardToRegex
+
+    // Example:
+    //
+    // > Wildcard rule
+    // norm; norm-space ; newline-space [\n] ;escape \; sep ; slash-end-split\\; quad\\\\norm;
+    // newline-split-slash\\[\n] slash-at-end\\                       [line does not continue]
+    //
+    // > Components
+    //   norm
+    //   norm-space
+    //   newline-space
+    //   escape \; sep
+    //   slash-end-split\\          [line does not continue]
+    //   quad\\\\norm
+    //   newline-split-slash\\      [line does not continue]
+    //   slash-at-end\\             [line does not continue]
+    //
+    // > Trimmed wildcard rule
+    // norm; norm-space; newline-space[\n]escape \; sep; slash-end-split\\; quad\\\\norm;
+    // newline-split-slash\\[\n]slash-at-end\\                        [line does not continue]
+    //
+    // (Newlines are encoded as "[\n]".  Ignore linebreaks for the sake of comment wrapping.)
+
+    // Note: R"(\\)" results in the literal of "\\", two backslash characters.  Anything inside the
+    // brackets is treated as a literal.  Outside the brackets but inside the quotes is still
+    // escaped.
+    //
+    // See https://en.cppreference.com/w/cpp/language/string_literal
+
+    // Prepare to loop!
+
+    QString rule(originalRule);
+
+    // Force a termination at the end of the string to trigger a split
+    // Don't check for ";" splits as they may be escaped
+    if (!rule.endsWith("\n")) {
+        rule.append("\n");
+    }
+
+    // Result
+    QString result = {};
+    // Current character
+    QChar curChar = {};
+    // Current string
+    QString curString = {};
+    // Max length
+    int sourceLength = rule.length();
+    // Consecutive "\" characters
+    int consecutiveSlashes = 0;
+
+    // We know it's going to be the same length or smaller, so reserve the same size as the string
+    result.reserve(sourceLength);
+
+    // For every character...
+    for (int i = 0; i < sourceLength; i++) {
+        // Get the character
+        curChar = rule.at(i);
+        // Check if it's on the list of special list characters, converting to Unicode for use
+        // in the switch statement
+        //
+        // See https://doc.qt.io/qt-5/qchar.html#unicode
+        switch (curChar.unicode()) {
+        case ';':
+            // Separator found
+            switch (consecutiveSlashes) {
+            case 0:
+            case 2:
+                // ";"   -> Split
+                // ...or...
+                // "\\;" -> Split (keep as "\\")
+                // Not escaped separator, split into a new item
+
+                // Apply the additional "\\" if needed
+                if (consecutiveSlashes == 2) {
+                    // "\\;" -> Split (keep as "\\")
+                    curString.append(R"(\\)");
+                }
+
+                // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
+                curString = curString.trimmed();
+
+                // Skip empty items
+                if (!curString.isEmpty()) {
+                    // Add to list with the same separator used
+                    result.append(curString + "; ");
+                }
+                // Reset the current list item
+                curString.clear();
+                break;
+            case 1:
+                // "\;" -> Keep as "\;"
+                curString.append(R"(\;)");
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                break;
+            }
+            consecutiveSlashes = 0;
+            break;
+        case '\\':
+            // Split escape
+            // Increase consecutive slash count
+            consecutiveSlashes++;
+            // Check if we've reached "\\\"...
+            if (consecutiveSlashes == 3) {
+                // "\\\" -> Keep as "\\" + "\"
+                curString.append(R"(\\)");
+                // Set consecutive slashes to 1, recognizing the trailing "\"
+                consecutiveSlashes = 1;
+            }
+            else if (consecutiveSlashes > 3) {
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                break;
+            }
+            break;
+        case '\n':
+            // Newline found
+            // Preserve the characters as they are now
+
+            // "[\n]"   -> Split
+            // "\[\n]"  -> Split (keep as "\")
+            // "\\[\n]" -> Split (keep as "\\")
+
+            switch (consecutiveSlashes) {
+            case 0:
+                // Keep string as is
+                break;
+            case 1:
+            case 2:
+                // Apply the additional "\" or "\\"
+                curString.append(QString(R"(\)").repeated(consecutiveSlashes));
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), applying newline split anyways!";
+                break;
+            }
+
+            // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
+            curString = curString.trimmed();
+
+            // Skip empty items
+            if (!curString.isEmpty()) {
+                // Add to list with the same separator used
+                result.append(curString + "\n");
+            }
+            // Reset the current list item
+            curString.clear();
+            consecutiveSlashes = 0;
+            break;
+        default:
+            // Preserve the characters as they are now
+            switch (consecutiveSlashes) {
+            case 0:
+                // "..."   -> Keep as "..."
+                curString.append(curChar);
+                break;
+            case 1:
+            case 2:
+                // "\..."  -> Keep as "\..."
+                // "\\..." -> Keep as "\\..."
+                curString.append(QString("\\").repeated(consecutiveSlashes) + curChar);
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring " << curChar << "char escape!";
+                break;
+            }
+            consecutiveSlashes = 0;
+            break;
+        }
+    }
+
+    // Remove any trailing separators
+    if (result.endsWith("; ")) {
+        result.chop(2);
+    }
+
+    // Remove any trailing whitespace
+    return result.trimmed();
+}
+
+
+void ExpressionMatch::cacheRegEx()
+{
+    _matchRegExActive = false;
+    _matchInvertRegExActive = false;
+
+    _sourceExpressionEmpty = _sourceExpression.isEmpty();
+    if (_sourceExpressionEmpty) {
+        // No need to calculate anything for empty strings
+        return;
+    }
+
+    // Convert the given expression to a regular expression based on the mode
+    switch (_sourceMode) {
+    case MatchMode::MatchPhrase:
+        // Match entire phrase, noninverted
+        // Don't trim whitespace for phrase matching as someone might want to match on " word ", a
+        // more-specific request than "word".
+        _matchRegEx = regExFactory("(?:^|\\W)" + regExEscape(_sourceExpression) + "(?:\\W|$)",
+                                   _sourceCaseSensitive);
+        _matchRegExActive = true;
+        break;
+    case MatchMode::MatchMultiPhrase:
+        // Match multiple entire phrases, noninverted
+        // Convert from multiple-phrase rules
+        _matchRegEx = regExFactory(convertFromMultiPhrase(_sourceExpression), _sourceCaseSensitive);
+        _matchRegExActive = true;
+        break;
+    case MatchMode::MatchWildcard:
+        // Match as wildcard expression
+        // Convert from wildcard rules for a single wildcard
+        if (_sourceExpression.startsWith("!")) {
+            // Inverted rule: take the remainder of the string
+            // "^" + invertComponents.at(0) + "$"
+            _matchInvertRegEx = regExFactory("^" + wildcardToRegEx(_sourceExpression.mid(1)) + "$",
+                                             _sourceCaseSensitive);
+            _matchInvertRegExActive = true;
+        }
+        else {
+            // Normal rule: take the whole string
+            // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past
+            // escaped "\" (i.e. "\\!")
+            _matchRegEx =
+                    regExFactory("^" + wildcardToRegEx(_sourceExpression.startsWith("\\!")
+                                                       ? _sourceExpression.mid(1)
+                                                       : _sourceExpression) + "$",
+                                 _sourceCaseSensitive);
+            _matchRegExActive = true;
+        }
+        break;
+    case MatchMode::MatchMultiWildcard:
+        // Match as multiple wildcard expressions
+        // Convert from wildcard rules for multiple wildcards
+        // (The generator function handles setting matchRegEx/matchInvertRegEx)
+        generateFromMultiWildcard(_sourceExpression, _sourceCaseSensitive);
+        break;
+    case MatchMode::MatchRegEx:
+        // Match as regular expression
+        if (_sourceExpression.startsWith("!")) {
+            // Inverted rule: take the remainder of the string
+            _matchInvertRegEx = regExFactory(_sourceExpression.mid(1), _sourceCaseSensitive);
+            _matchInvertRegExActive = true;
+        }
+        else {
+            // Normal rule: take the whole string
+            // Account for any escaped "!" (i.e. "\!") by skipping past the "\", but don't skip past
+            // escaped "\" (i.e. "\\!")
+            _matchRegEx =
+                    regExFactory(_sourceExpression.startsWith("\\!") ? _sourceExpression.mid(1)
+                                                                     : _sourceExpression,
+                                 _sourceCaseSensitive);
+            _matchRegExActive = true;
+        }
+        break;
+    default:
+        // This should never happen if you keep the above consistent
+        qWarning() << Q_FUNC_INFO << "Unknown MatchMode" << (int)_sourceMode << "!";
+        break;
+    }
+
+    if (!_sourceExpressionEmpty && !isValid()) {
+        // This can happen with invalid regex, so make it a bit more user-friendly.  Set it to Info
+        // level as ideally someone's not just going to leave a broken match rule around.  For
+        // MatchRegEx, they probably need to fix their regex rule.  For the other modes, there's
+        // probably a bug in the parsing routines (which should also be fixed).
+        quInfo() << "Could not parse expression match rule"
+                 << _sourceExpression << "(match mode:" << (int)_sourceMode
+                 << "), this rule will be ignored";
+    }
+}
+
+
+#if QT_VERSION >= 0x050000
+QRegularExpression ExpressionMatch::regExFactory(const QString &regExString,
+                                                 bool caseSensitive)
+#else
+QRegExp ExpressionMatch::regExFactory(const QString &regExString, bool caseSensitive)
+#endif
+{
+    // Construct the regular expression object, setting case sensitivity as appropriate
+#if QT_VERSION >= 0x050000
+    QRegularExpression newRegEx =
+            QRegularExpression(regExString, caseSensitive ?
+                                   QRegularExpression::PatternOption::NoPatternOption
+                                 : QRegularExpression::PatternOption::CaseInsensitiveOption );
+#else
+    QRegExp newRegEx = QRegExp(regExString, caseSensitive ?
+                                   Qt::CaseSensitivity::CaseSensitive
+                                 : Qt::CaseSensitivity::CaseInsensitive);
+#endif
+
+    // Check if rule is valid
+    if (!newRegEx.isValid()) {
+        // This can happen with invalid regex, so make it a bit more user-friendly.  Keep this
+        // distinct from the main info-level message for easier debugging in case a regex component
+        // in Wildcard or Phrase mode breaks.
+        qDebug() << "Internal regular expression component" << regExString
+                 << "is invalid and will be ignored";
+    }
+    // Qt 5.4 (QT_VERSION >= 0x050400) offers explicit control over when QRegularExpression objects
+    // get optimized.  By default, patterns are only optimized after some number of uses as defined
+    // within Qt internals.
+    //
+    // In the context of ExpressionMatch, some regular expressions might go unused, e.g. a highlight
+    // rule might never match a channel pattern, resulting in the contents pattern being untouched.
+    // It should be safe to let Qt handle optimization, taking a non-deterministic, one-off
+    // performance penalty on optimization for the sake of saving memory usage on patterns that
+    // don't get used.
+    //
+    // If profiling shows expressions are generally used and/or the automatic optimization
+    // interferes incurs too high of a penalty (unlikely given we've created regular expression
+    // objects willy-nilly before now), this can be revisited to explicitly call...
+    //
+    // else {
+    //     // Optimize regex now
+    // #if QT_VERSION >= 0x050400
+    //     newRegEx.optimize();
+    // #endif
+    //
+    // }
+    //
+    // NOTE: This should only be called if the expression is valid!  Apply within an "else" of the
+    // inverted isValid() check above.
+    //
+    // See https://doc.qt.io/qt-5/qregularexpression.html#optimize
+
+    return newRegEx;
+}
+
+
+QString ExpressionMatch::regExEscape(const QString &phrase)
+{
+    // Escape the given phrase of any special regular expression characters
+#if QT_VERSION >= 0x050000
+    return QRegularExpression::escape(phrase);
+#else
+    return QRegExp::escape(phrase);
+#endif
+}
+
+
+QString ExpressionMatch::convertFromMultiPhrase(const QString &originalRule)
+{
+    // Convert the multi-phrase rule into regular expression format
+    // Split apart the original rule into components
+    // Use QStringList instead of std::vector<QString> to make use of Qt's built-in .join() method
+    QStringList components = {};
+    // Split on "\n"
+    for (auto &&component : originalRule.split("\n", QString::SkipEmptyParts)) {
+        // Don't trim whitespace to maintain consistency with single phrase matching
+        // As trimming is not performed, empty components will already be skipped.  This means " "
+        // is considered a valid matching phrase.
+
+        // Take the whole string, escaping any regex
+        components.append(regExEscape(component));
+    }
+
+    // Create full regular expression by...
+    // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)"
+    // > Flattening normal and inverted rules using the regex OR character "...|..."
+    //
+    // Before: [foo, bar, baz]
+    // After:  (?:^|\W)(?:foo|bar|baz)(?:\W|$)
+
+    if (components.count() == 1) {
+        // Single item, skip the noncapturing group
+        return "(?:^|\\W)" + components.at(0) + "(?:\\W|$)";
+    }
+    else {
+        return "(?:^|\\W)(?:" + components.join("|") + ")(?:\\W|$)";
+    }
+}
+
+
+void ExpressionMatch::generateFromMultiWildcard(const QString &originalRule, bool caseSensitive)
+{
+    // Convert the wildcard rule into regular expression format
+    // First, reset the existing match expressions
+    _matchRegEx = {};
+    _matchInvertRegEx = {};
+    _matchRegExActive = false;
+    _matchInvertRegExActive = false;
+
+    // This gets handled in three steps:
+    //
+    // 1.  Break apart ";"-separated list into components
+    // 2.  Convert components from wildcard format into regular expression format
+    // 3.  Combine normal/invert components into normal/invert regular expressions
+    //
+    // Let's start by making the list...
+
+    // Convert a ";"-separated list into an actual list, splitting on newlines and unescaping
+    // escaped characters
+
+    // Escaped list rules (where "[\n]" represents newline):
+    // ---------------
+    // Token  | Outcome
+    // -------|--------
+    // ;      | Split
+    // \;     | Replace with ";"
+    // \\;    | Split (keep as "\\")
+    // !      | At start: mark as inverted
+    // \!     | At start: replace with "!"
+    // \\!    | At start: keep as "\\!" (replaced with "\!" in wildcard conversion)
+    // !      | Elsewhere: keep as "!"
+    // \!     | Elsewhere: keep as "\!"
+    // \\!    | Elsewhere: keep as "\\!"
+    // \\\    | Keep as "\\" + "\", set consecutive slashes to 1
+    // [\n]   | Split
+    // \[\n]  | Split (keep as "\")
+    // \\[\n] | Split (keep as "\\")
+    // ...    | Keep as "..."
+    // \...   | Keep as "\..."
+    // \\...  | Keep as "\\..."
+    //
+    // Strings are forced to end with "\n", always applying "\..." and "\\..." rules
+    // "..." also includes another "\" character
+    //
+    // All whitespace is trimmed from each component
+
+    // "\\" and "\" are not downconverted to allow for other escape codes to be detected in
+    // ExpressionMatch::wildcardToRegex
+
+
+    // Example:
+    //
+    // > Wildcard rule
+    // norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;\\!slash-invert;\\\\double;
+    // escape\;sep;slash-end-split\\;quad\\\\!noninvert;newline-split[\n]newline-split-slash\\[\n]
+    // slash-at-end\\               [line does not continue]
+    //
+    // (Newlines are encoded as "[\n]".  Ignore linebreaks for the sake of comment wrapping.)
+    //
+    //
+    // > Normal components without wildcard conversion
+    //   norm
+    //   norm-space
+    //   !norm-escaped
+    //   \\!slash-invert
+    //   \\\\double
+    //   escape;sep
+    //   slash-end-split\\          [line does not continue]
+    //   quad\\\\!noninvert
+    //   newline-split
+    //   newline-split-slash\\      [line does not continue]
+    //   slash-at-end\\             [line does not continue]
+    //
+    // > Inverted components without wildcard conversion
+    //   invert
+    //   invert-space
+    //
+    //
+    // > Normal components with wildcard conversion
+    //   norm
+    //   norm\-space
+    //   \!norm\-escaped
+    //   \\\!slash\-invert
+    //   \\\\double
+    //   escape\;sep
+    //   slash\-end\-split\\        [line does not continue]
+    //   quad\\\\\!noninvert
+    //   newline\-split
+    //   newline\-split\-slash\\    [line does not continue]
+    //   slash\-at\-end\\           [line does not continue]
+    //
+    // > Inverted components with wildcard conversion
+    //   invert
+    //   invert\-space
+    //
+    //
+    // > Normal wildcard-converted regex
+    // ^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double|escape\;sep|
+    // slash\-end\-split\\|quad\\\\\!noninvert|newline\-split|newline\-split\-slash\\|
+    // slash\-at\-end\\)$
+    //
+    // > Inverted wildcard-converted regex
+    // ^(?:invert|invert\-space)$
+
+    // Note: R"(\\)" results in the literal of "\\", two backslash characters.  Anything inside the
+    // brackets is treated as a literal.  Outside the brackets but inside the quotes is still
+    // escaped.
+    //
+    // See https://en.cppreference.com/w/cpp/language/string_literal
+
+    // Prepare to loop!
+
+    QString rule(originalRule);
+
+    // Force a termination at the end of the string to trigger a split
+    // Don't check for ";" splits as they may be escaped
+    if (!rule.endsWith("\n")) {
+        rule.append("\n");
+    }
+
+    // Result, sorted into normal and inverted rules
+    // Use QStringList instead of std::vector<QString> to make use of Qt's built-in .join() method
+    QStringList normalComponents = {}, invertComponents = {};
+
+    // Current character
+    QChar curChar = {};
+    // Current string
+    QString curString = {};
+    // Max length
+    int sourceLength = rule.length();
+    // Consecutive "\" characters
+    int consecutiveSlashes = 0;
+    // Whether or not this marks an inverted rule
+    bool isInverted = false;
+    // Whether or not we're at the beginning of the rule (for detecting "!" and "\!")
+    bool isRuleStart = true;
+
+    // We know it's going to have ";"-count items or less, so reserve ";"-count items for both.
+    // Without parsing it's not easily possible to tell which are escaped or not, and among the
+    // non-escaped entries, which are inverted or not.  These get destroyed once out of scope of
+    // this function, so balancing towards performance over memory usage should be okay, hopefully.
+    int separatorCount = rule.count(";");
+    normalComponents.reserve(separatorCount);
+    invertComponents.reserve(separatorCount);
+
+    // For every character...
+    for (int i = 0; i < sourceLength; i++) {
+        // Get the character
+        curChar = rule.at(i);
+        // Check if it's on the list of special list characters, converting to Unicode for use
+        // in the switch statement
+        //
+        // See https://doc.qt.io/qt-5/qchar.html#unicode
+        switch (curChar.unicode()) {
+        case ';':
+            // Separator found
+            switch (consecutiveSlashes) {
+            case 0:
+            case 2:
+                // ";"   -> Split
+                // ...or...
+                // "\\;" -> Split (keep as "\\")
+                // Not escaped separator, split into a new item
+
+                // Apply the additional "\\" if needed
+                if (consecutiveSlashes == 2) {
+                    // "\\;" -> Split (keep as "\\")
+                    curString.append(R"(\\)");
+                }
+
+                // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
+                curString = curString.trimmed();
+
+                // Skip empty items
+                if (!curString.isEmpty()) {
+                    // Add to inverted/normal list
+                    if (isInverted) {
+                        invertComponents.append(wildcardToRegEx(curString));
+                    }
+                    else {
+                        normalComponents.append(wildcardToRegEx(curString));
+                    }
+                }
+                // Reset the current list item
+                curString.clear();
+                isInverted = false;
+                isRuleStart = true;
+                break;
+            case 1:
+                // "\;" -> Replace with ";"
+                curString.append(";");
+                isRuleStart = false;
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                isRuleStart = false;
+                break;
+            }
+            consecutiveSlashes = 0;
+            break;
+        case '!':
+            // Rule inverter found
+            if (isRuleStart) {
+                // Apply the inverting logic
+                switch (consecutiveSlashes) {
+                case 0:
+                    // "!"   -> At start: mark as inverted
+                    isInverted = true;
+                    // Don't include the "!" character
+                    break;
+                case 1:
+                    // "\!"  -> At start: replace with "!"
+                    curString.append("!");
+                    break;
+                case 2:
+                    // "\\!" -> At start: keep as "\\!" (replaced with "\!" in wildcard conversion)
+                    curString.append(R"(\\!)");
+                    break;
+                default:
+                    // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                    qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                             << rule << "resulted in rule component"
+                             << curString << "with unexpected count of consecutive '\\' ("
+                             << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                    break;
+                }
+            }
+            else {
+                // Preserve the characters as they are now
+                switch (consecutiveSlashes) {
+                case 0:
+                    // "!"    -> Elsewhere: keep as "!"
+                    curString.append("!");
+                    break;
+                case 1:
+                case 2:
+                     // "\!"  -> Elsewhere: keep as "\!"
+                     // "\\!" -> Elsewhere: keep as "\\!"
+                    curString.append(QString(R"(\)").repeated(consecutiveSlashes) + "!");
+                    break;
+                default:
+                    // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                    qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                               << rule << "resulted in rule component"
+                               << curString << "with unexpected count of consecutive '\\' ("
+                               << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                    break;
+                }
+            }
+            isRuleStart = false;
+            consecutiveSlashes = 0;
+            break;
+        case '\\':
+            // Split escape
+            // Increase consecutive slash count
+            consecutiveSlashes++;
+            // Check if we've reached "\\\"...
+            if (consecutiveSlashes == 3) {
+                // "\\\" -> Keep as "\\" + "\"
+                curString.append(R"(\\)");
+                // No longer at the rule start
+                isRuleStart = false;
+                // Set consecutive slashes to 1, recognizing the trailing "\"
+                consecutiveSlashes = 1;
+            }
+            else if (consecutiveSlashes > 3) {
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                break;
+            }
+            // Don't set "isRuleStart" here as "\" is used in escape sequences
+            break;
+        case '\n':
+            // Newline found
+            // Preserve the characters as they are now
+
+            // "[\n]"   -> Split
+            // "\[\n]"  -> Split (keep as "\")
+            // "\\[\n]" -> Split (keep as "\\")
+
+            switch (consecutiveSlashes) {
+            case 0:
+                // Keep string as is
+                break;
+            case 1:
+            case 2:
+                // Apply the additional "\" or "\\"
+                curString.append(QString(R"(\)").repeated(consecutiveSlashes));
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), applying newline split anyways!";
+                break;
+            }
+
+            // Remove any whitespace, e.g. "item1; item2" -> " item2" -> "item2"
+            curString = curString.trimmed();
+
+            // Skip empty items
+            if (!curString.isEmpty()) {
+                // Add to inverted/normal list
+                if (isInverted) {
+                    invertComponents.append(wildcardToRegEx(curString));
+                }
+                else {
+                    normalComponents.append(wildcardToRegEx(curString));
+                }
+            }
+            // Reset the current list item
+            curString.clear();
+            isInverted = false;
+            isRuleStart = true;
+            consecutiveSlashes = 0;
+            break;
+        default:
+            // Preserve the characters as they are now
+            switch (consecutiveSlashes) {
+            case 0:
+                // "..."   -> Keep as "..."
+                curString.append(curChar);
+                break;
+            case 1:
+            case 2:
+                // "\..."  -> Keep as "\..."
+                // "\\..." -> Keep as "\\..."
+                curString.append(QString("\\").repeated(consecutiveSlashes) + curChar);
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << rule << "resulted in rule component"
+                           << curString << "with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring " << curChar << "char escape!";
+                break;
+            }
+            // Don't mark as past rule start for whitespace (whitespace gets trimmed)
+            if (!curChar.isSpace()) {
+                isRuleStart = false;
+            }
+            consecutiveSlashes = 0;
+            break;
+        }
+    }
+
+    // Clean up any duplicates
+    normalComponents.removeDuplicates();
+    invertComponents.removeDuplicates();
+
+    // Create full regular expressions by...
+    // > Anchoring to start and end of string to mimic QRegExp's .exactMatch() handling, "^...$"
+    // > Enclosing within a non-capturing group to avoid overhead of text extraction, "(?:...)"
+    // > Flattening normal and inverted rules using the regex OR character "...|..."
+    //
+    // Before: [foo, bar, baz]
+    // After:  ^(?:foo|bar|baz)$
+    //
+    // See https://doc.qt.io/qt-5/qregularexpression.html#porting-from-qregexp-exactmatch
+    // And https://regex101.com/
+
+    // Any empty/invalid regex are handled within ExpressionMatch::match()
+    if (!normalComponents.isEmpty()) {
+        // Create normal match regex
+        if (normalComponents.count() == 1) {
+            // Single item, skip the noncapturing group
+            _matchRegEx = regExFactory("^" + normalComponents.at(0) + "$", caseSensitive);
+        }
+        else {
+            _matchRegEx = regExFactory("^(?:" + normalComponents.join("|") + ")$", caseSensitive);
+        }
+        _matchRegExActive = true;
+    }
+    if (!invertComponents.isEmpty()) {
+        // Create invert match regex
+        if (invertComponents.count() == 1) {
+            // Single item, skip the noncapturing group
+            _matchInvertRegEx = regExFactory("^" + invertComponents.at(0) + "$", caseSensitive);
+        }
+        else {
+            _matchInvertRegEx =
+                    regExFactory("^(?:" + invertComponents.join("|") + ")$", caseSensitive);
+        }
+        _matchInvertRegExActive = true;
+    }
+}
+
+
+QString ExpressionMatch::wildcardToRegEx(const QString &expression)
+{
+    // Convert the wildcard expression into regular expression format
+
+    // We're taking a little bit different of a route...
+    //
+    // Original QRegExp::Wildcard rules:
+    // --------------------------
+    // Wildcard | Regex | Outcome
+    // ---------|-------|--------
+    // *        | .*    | zero or more of any character
+    // ?        | .     | any single character
+    //
+    // NOTE 1: This is QRegExp::Wildcard, not QRegExp::WildcardUnix
+    //
+    // NOTE 2: We are ignoring the "[...]" character-class matching functionality of
+    // QRegExp::Wildcard as that feature's a bit more complex and can be handled with full-featured
+    // regexes.
+    //
+    // See https://doc.qt.io/qt-5/qregexp.html#wildcard-matching
+    //
+    // Quassel originally did not use QRegExp::WildcardUnix, which prevented escaping "*" and "?" in
+    // messages.  Unfortunately, spam messages might decide to use both, so offering a way to escape
+    // makes sense.
+    //
+    // On the flip-side, that means to match "\" requires escaping as "\\", breaking backwards
+    // compatibility.
+    //
+    // Quassel's Wildcard rules
+    // ------------------------------------------
+    // Wildcard | Regex escaped | Regex | Outcome
+    // ---------|---------------|-------|--------
+    // *        | \*            | .*    | zero or more of any character
+    // ?        | \?            | .     | any single character
+    // \*       | \\\*          | \*    | literal "*"
+    // \?       | \\\?          | \?    | literal "?"
+    // \[...]   | \\[...]       | [...] | invalid escape, ignore it
+    // \\       | \\\\          | \\    | literal "\"
+    //
+    // In essence, "*" and "?" need changed only if not escaped, "\\" collapses into "\", "\" gets
+    // ignored; other characters escape normally.
+    //
+    // Example:
+    //
+    // > Wildcard rule
+    // never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval
+    //
+    // ("\\\\" represents "\\", "\\" represents "\", and "\\\" is valid+invalid, "\")
+    //
+    // > Regex escaped wildcard rule
+    // never\?gonna\*give\\\*you\\\?up\\\\test\|y\\yeah\\\\1\\\\\\\\2\\\\\\1inval
+    //
+    // > Expected correct regex
+    // never.gonna.*give\*you\?up\\test\|yyeah\\1\\\\2\\1inval
+    //
+    // > Undoing regex escaping of "\" as "\\" (i.e. simple replace, with special escapes intact)
+    // never.gonna.*give\*you\?up\test\|yyeah\1\\2\1inval
+
+    // Escape string according to regex
+    QString regExEscaped(regExEscape(expression));
+
+    // Fix up the result
+    //
+    // NOTE: In theory, regular expression lookbehind could solve this.  Unfortunately, QRegExp does
+    // not support lookbehind, and it's theoretically inefficient, anyways.  Just use an approach
+    // similar to that taken by QRegExp's official wildcard mode.
+    //
+    // Lookbehind example (that we can't use):
+    // (?<!abc)test    Negative lookbehind - don't match if "test" is proceeded by "abc"
+    //
+    // See https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/tools/qregexp.cpp
+    //
+    // NOTE: We don't copy QRegExp's mode as QRegularExpression has more special characters.  We
+    // can't use the same escaping code, hence calling the appropriate QReg[...]::escape() above.
+
+    // Prepare to loop!
+
+    // Result
+    QString result = {};
+    // Current character
+    QChar curChar = {};
+    // Max length
+    int sourceLength = regExEscaped.length();
+    // Consecutive "\" characters
+    int consecutiveSlashes = 0;
+
+    // We know it's going to be the same length or smaller, so reserve the same size as the string
+    result.reserve(sourceLength);
+
+    // For every character...
+    for (int i = 0; i < sourceLength; i++) {
+        // Get the character
+        curChar = regExEscaped.at(i);
+        // Check if it's on the list of special wildcard characters, converting to Unicode for use
+        // in the switch statement
+        //
+        // See https://doc.qt.io/qt-5/qchar.html#unicode
+        switch (curChar.unicode()) {
+        case '?':
+            // Wildcard "?"
+            switch (consecutiveSlashes) {
+            case 1:
+                // "?" -> "\?" -> "."
+                // Convert from regex escaped "?" to regular expression
+                result.append(".");
+                break;
+            case 3:
+                // "\?" -> "\\\?" -> "\?"
+                // Convert from regex escaped "\?" to literal string
+                result.append(R"(\?)");
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << expression << "resulted in escaped regular expression string"
+                           << regExEscaped << " with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                break;
+            }
+            consecutiveSlashes = 0;
+            break;
+        case '*':
+            // Wildcard "*"
+            switch (consecutiveSlashes) {
+            case 1:
+                // "*" -> "\*" -> ".*"
+                // Convert from regex escaped "*" to regular expression
+                result.append(".*");
+                break;
+            case 3:
+                // "\*" -> "\\\*" -> "\*"
+                // Convert from regex escaped "\*" to literal string
+                result.append(R"(\*)");
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << expression << "resulted in escaped regular expression string"
+                           << regExEscaped << " with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "character!";
+                break;
+            }
+            consecutiveSlashes = 0;
+            break;
+        case '\\':
+            // Wildcard escape
+            // Increase consecutive slash count
+            consecutiveSlashes++;
+            // Check if we've hit an escape sequence
+            if (consecutiveSlashes == 4) {
+                // "\\" -> "\\\\" -> "\\"
+                // Convert from regex escaped "\\" to literal string
+                result.append(R"(\\)");
+                // Reset slash count
+                consecutiveSlashes = 0;
+            }
+            break;
+        default:
+            // Any other character
+            switch (consecutiveSlashes) {
+            case 0:
+            case 2:
+                // "[...]"  -> "[...]"   -> "[...]"
+                // ...or...
+                // "\[...]" -> "\\[...]" -> "[...]"
+                // Either just print the character itself, or convert from regex-escaped invalid
+                // wildcard escape sequence to the character itself
+                //
+                // Both mean doing nothing, the actual character [...] gets appended below
+                break;
+            case 1:
+                // "[...]" -> "\[...]" -> "\"
+                // Keep regex-escaped special character "[...]" as literal string
+                // (Where "[...]" represents any non-wildcard regex special character)
+                result.append(R"(\)");
+                // The actual character [...] gets appended below
+                break;
+            default:
+                // This shouldn't ever happen (even with invalid wildcard rules), log a warning
+                qWarning() << Q_FUNC_INFO << "Wildcard rule"
+                           << expression << "resulted in escaped regular expression string"
+                           << regExEscaped << " with unexpected count of consecutive '\\' ("
+                           << consecutiveSlashes << "), ignoring" << curChar << "char escape!";
+                break;
+            }
+            consecutiveSlashes = 0;
+            // Add the character itself
+            result.append(curChar);
+            break;
+        }
+    }
+
+    // Anchoring to simulate QRegExp::exactMatch() is handled in
+    // ExpressionMatch::convertFromWildcard()
+    return result;
+}
diff --git a/src/common/expressionmatch.h b/src/common/expressionmatch.h

new file mode 100644 (file)

index 0000000..0f5acfc
--- /dev/null
+++ b/src/common/expressionmatch.h
@@ -0,0 +1,261 @@
+/***************************************************************************
+ *   Copyright (C) 2005-2018 by the Quassel Project                        *
+ *   devel@quassel-irc.org                                                 *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) version 3.                                           *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.         *
+ ***************************************************************************/
+
+#pragma once
+
+#include <QString>
+#include <QStringList>
+
+#if QT_VERSION >= 0x050000
+#include <QRegularExpression>
+#else
+#include <QRegExp>
+#endif
+
+/**
+ * Expression matcher with multiple modes of operation and automatic caching for performance
+ */
+class ExpressionMatch
+{
+
+public:
+    /// Expression matching mode
+#if QT_VERSION >= 0x050000
+    enum class MatchMode {
+#else
+    enum MatchMode {
+#endif
+        MatchPhrase,        ///< Match phrase as specified, no special handling
+        MatchMultiPhrase,   ///< Match phrase as specified, split on \n only
+        MatchWildcard,      ///< Match wildcards, "!" at start inverts, "\" escapes
+        MatchMultiWildcard, ///< Match wildcards, split ; or \n, "!" at start inverts, "\" escapes
+        MatchRegEx          ///< Match as regular expression, "!..." invert regex, "\" escapes
+    };
+
+    /**
+     * Construct an empty ExpressionMatch
+     */
+    ExpressionMatch() {}
+
+    /**
+     * Construct an Expression match with the given parameters
+     *
+     * @param expression     A phrase, wildcard expression, or regular expression
+     * @param mode
+     * @parblock
+     * Expression matching mode
+     * @see ExpressionMatch::MatchMode
+     * @endparblock
+     * @param caseSensitive  If true, match case-sensitively, otherwise ignore case when matching
+     */
+    ExpressionMatch(const QString &expression, MatchMode mode, bool caseSensitive);
+
+    /**
+     * Check if the given string matches the stored expression
+     *
+     * @param string      String to check
+     * @param matchEmpty  If true, always match when the expression is empty, otherwise never match
+     * @return            True if match found, otherwise false
+     */
+    bool match(const QString &string, bool matchEmpty = false) const;
+
+    /**
+     * Gets if the source expression is empty
+     *
+     * @return True if source expression is empty, otherwise false
+     */
+    inline bool isEmpty() const { return (_sourceExpressionEmpty); }
+
+    /**
+     * Gets if the source expression and parameters resulted in a valid expression matcher
+     *
+     * @return True if given expression is valid, otherwise false
+     */
+    inline bool isValid() const {
+        // Either this must be empty, or normal or inverted rules must be valid and active
+        return (_sourceExpressionEmpty
+                || (_matchRegExActive && _matchRegEx.isValid())
+                || (_matchInvertRegExActive && _matchInvertRegEx.isValid()));
+    }
+
+    /**
+     * Gets the original expression match string
+     *
+     * @return QString of the source expression match string
+     */
+    inline QString sourceExpression() const { return _sourceExpression; }
+
+    /**
+     * Sets the expression match string
+     *
+     * @param expression A phrase, wildcard expression, or regular expression
+     */
+    void setSourceExpression(const QString &expression) {
+        if (_sourceExpression != expression) {
+            _sourceExpression = expression;
+            cacheRegEx();
+        }
+    }
+
+    /**
+     * Gets the original expression match mode
+     *
+     * @return MatchMode of the source expression
+     */
+    inline MatchMode sourceMode() const { return _sourceMode; }
+
+    /**
+     * Sets the expression match mode
+     *
+     * @param mode
+     * @parblock
+     * Expression matching mode
+     * @see ExpressionMatch::MatchMode
+     * @endparblock
+     */
+    void setSourceMode(MatchMode mode) {
+        if (_sourceMode != mode) {
+            _sourceMode = mode;
+            cacheRegEx();
+        }
+    }
+
+    /**
+     * Gets the original expression case-sensitivity
+     *
+     * @return True if case-sensitive, otherwise false
+     */
+    inline bool sourceCaseSensitive() const { return _sourceCaseSensitive; }
+
+    /**
+     * Sets the expression match as case sensitive or not
+     *
+     * @param caseSensitive If true, match case-sensitively, otherwise ignore case when matching
+     */
+    void setSourceCaseSensitive(bool caseSensitive) {
+        if (_sourceCaseSensitive != caseSensitive) {
+            _sourceCaseSensitive = caseSensitive;
+            cacheRegEx();
+        }
+    }
+
+    bool operator!=(const ExpressionMatch &other) const
+    {
+        return (_sourceExpression != other._sourceExpression ||
+                _sourceMode != other._sourceMode ||
+                _sourceCaseSensitive != other._sourceCaseSensitive);
+    }
+
+    /**
+     * Trim extraneous whitespace from individual rules within a given MultiWildcard expression
+     *
+     * This respects the ";" escaping rules with "\".  It is safe to call this multiple times; a
+     * trimmed string should remain unchanged.
+     *
+     * @see ExpressionMatch::MatchMode::MatchMultiWildcard
+     *
+     * @param originalRule MultiWildcard rule list, ";"-separated
+     * @return Trimmed MultiWildcard rule list
+     */
+    static QString trimMultiWildcardWhitespace(const QString &originalRule);
+
+private:
+    /**
+     * Calculates internal regular expressions
+     *
+     * Will always run when called, no cache validity checks performed.
+     */
+    void cacheRegEx();
+
+    /**
+     * Creates a regular expression object of appropriate type and case-sensitivity
+     *
+     * @param regExString    Regular expression string
+     * @param caseSensitive  If true, match case-sensitively, otherwise ignore case when matching
+     * @return Configured QRegExp class on Qt 4, QRegularExpression on Qt 5
+     */
+#if QT_VERSION >= 0x050000
+    static QRegularExpression regExFactory(const QString &regExString, bool caseSensitive);
+#else
+    static QRegExp regExFactory(const QString &regExString, bool caseSensitive);
+#endif
+
+    /**
+     * Escapes any regular expression characters in a string so they have no special meaning
+     *
+     * @param phrase String containing potential regular expression special characters
+     * @return QString with all regular expression characters escaped
+     */
+    static QString regExEscape(const QString &phrase);
+
+    /**
+     * Converts a multiple-phrase rule into a regular expression
+     *
+     * Unconditionally splits phrases on "\n", whitespace is preserved
+     *
+     * @param originalRule MultiPhrase rule list, "\n"-separated
+     * @return A regular expression matching the given phrases
+     */
+    static QString convertFromMultiPhrase(const QString &originalRule);
+
+    /**
+     * Internally converts a wildcard rule into regular expressions
+     *
+     * Splits wildcards on ";" and "\n", "!..." inverts section, "\" escapes
+     *
+     * @param originalRule   MultiWildcard rule list, ";"-separated
+     * @param caseSensitive  If true, match case-sensitively, otherwise ignore case when matching
+     */
+    void generateFromMultiWildcard(const QString &originalRule, bool caseSensitive);
+
+    /**
+     * Converts a wildcard expression into a regular expression
+     *
+     * NOTE:  Does not handle Quassel's extended scope matching and splitting.
+     *
+     * @see ExpressionMatch::convertFromWildcard()
+     * @return QString with all regular expression characters escaped
+     */
+    static QString wildcardToRegEx(const QString &expression);
+
+    // Original/source components
+    QString _sourceExpression = {};                  ///< Expression match string given on creation
+    MatchMode _sourceMode = MatchMode::MatchPhrase;  ///< Expression match mode given on creation
+    bool _sourceCaseSensitive = false;               ///< Expression case sensitive on creation
+
+    // Derived components
+    bool _sourceExpressionEmpty = false;             ///< Cached expression match string is empty
+
+    /// Underlying regular expression matching instance for normal (noninverted) rules
+#if QT_VERSION >= 0x050000
+    QRegularExpression _matchRegEx = {};
+#else
+    QRegExp _matchRegEx = {};
+#endif
+    bool _matchRegExActive = false;                  ///< If true, use normal expression in matching
+
+    /// Underlying regular expression matching instance for inverted rules
+#if QT_VERSION >= 0x050000
+    QRegularExpression _matchInvertRegEx = {};
+#else
+    QRegExp _matchInvertRegEx = {};
+#endif
+    bool _matchInvertRegExActive = false;            ///< If true, use invert expression in matching
+};
author	Shane Synan <digitalcircuit36939@gmail.com>
	Sun, 2 Sep 2018 16:52:12 +0000 (11:52 -0500)
committer	Manuel Nickschas <sputnick@quassel-irc.org>
	Mon, 3 Sep 2018 20:12:02 +0000 (22:12 +0200)
src/common/CMakeLists.txt		patch \| blob \| history
src/common/expressionmatch.cpp	[new file with mode: 0644]	patch \| blob
src/common/expressionmatch.h	[new file with mode: 0644]	patch \| blob