cmake: avoid de-duplication of user's CXXFLAGS
[quassel.git] / tests / common / expressionmatchtest.cpp
1 /***************************************************************************
2  *   Copyright (C) 2005-2022 by the Quassel Project                        *
3  *   devel@quassel-irc.org                                                 *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) version 3.                                           *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.         *
19  ***************************************************************************/
20
21 #include <vector>
22
23 #include <QString>
24 #include <QStringList>
25
26 #include "testglobal.h"
27 #include "expressionmatch.h"
28
29 TEST(ExpressionMatchTest, emptyPattern)
30 {
31     // Empty pattern
32     ExpressionMatch emptyMatch =
33             ExpressionMatch("", ExpressionMatch::MatchMode::MatchPhrase, false);
34
35     // Assert empty is valid
36     ASSERT_TRUE(emptyMatch.isValid());
37     // Assert empty
38     EXPECT_TRUE(emptyMatch.isEmpty());
39     // Assert default match fails (same as setting match empty to false)
40     EXPECT_FALSE(emptyMatch.match("something"));
41     // Assert match empty succeeds
42     EXPECT_TRUE(emptyMatch.match("something", true));
43 }
44
45 TEST(ExpressionMatchTest, matchPhrase)
46 {
47     // Simple phrase, case-insensitive
48     ExpressionMatch simpleMatch =
49             ExpressionMatch("test", ExpressionMatch::MatchMode::MatchPhrase, false);
50     // Simple phrase, case-sensitive
51     ExpressionMatch simpleMatchCS =
52             ExpressionMatch("test", ExpressionMatch::MatchMode::MatchPhrase, true);
53     // Phrase with space, case-insensitive
54     ExpressionMatch simpleMatchSpace =
55             ExpressionMatch(" space ", ExpressionMatch::MatchMode::MatchPhrase, true);
56     // Complex phrase
57     QString complexMatchFull(R"(^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double"
58                               "|escape\;sep|slash\-end\-split\\|quad\\\\\!noninvert|newline\-split"
59                               "|newline\-split\-slash\\|slash\-at\-end\\)$)");
60     ExpressionMatch complexMatch =
61             ExpressionMatch(complexMatchFull, ExpressionMatch::MatchMode::MatchPhrase, false);
62
63     // Assert valid and not empty
64     ASSERT_TRUE(simpleMatch.isValid());
65     EXPECT_FALSE(simpleMatch.isEmpty());
66     ASSERT_TRUE(simpleMatchCS.isValid());
67     EXPECT_FALSE(simpleMatchCS.isEmpty());
68     ASSERT_TRUE(simpleMatchSpace.isValid());
69     EXPECT_FALSE(simpleMatchSpace.isEmpty());
70     ASSERT_TRUE(complexMatch.isValid());
71     EXPECT_FALSE(complexMatch.isEmpty());
72
73     // Assert match succeeds
74     EXPECT_TRUE(simpleMatch.match("test"));
75     EXPECT_TRUE(simpleMatch.match("other test;"));
76     EXPECT_TRUE(simpleMatchSpace.match(" space "));
77     // Assert partial match fails
78     EXPECT_FALSE(simpleMatch.match("testing"));
79     EXPECT_FALSE(simpleMatchSpace.match("space"));
80     // Assert unrelated fails
81     EXPECT_FALSE(simpleMatch.match("not above"));
82
83     // Assert case sensitivity followed
84     EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
85     EXPECT_TRUE(simpleMatch.match("TeSt"));
86     EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
87     EXPECT_FALSE(simpleMatchCS.match("TeSt"));
88
89     // Assert complex phrases are escaped properly
90     EXPECT_TRUE(complexMatch.match(complexMatchFull));
91     EXPECT_FALSE(complexMatch.match("norm"));
92 }
93
94
95 TEST(ExpressionMatchTest, matchMultiPhrase)
96 {
97     // Simple phrases, case-insensitive
98     ExpressionMatch simpleMatch =
99             ExpressionMatch("test\nOther ", ExpressionMatch::MatchMode::MatchMultiPhrase, false);
100     // Simple phrases, case-sensitive
101     ExpressionMatch simpleMatchCS =
102             ExpressionMatch("test\nOther ", ExpressionMatch::MatchMode::MatchMultiPhrase, true);
103     // Complex phrases
104     QString complexMatchFullA(R"(^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double)"
105                               R"(|escape\;sep|slash\-end\-split\\|quad\\\\\!noninvert)"
106                               R"(|newline\-split|newline\-split\-slash\\|slash\-at\-end\\)$)");
107     QString complexMatchFullB(R"(^(?:invert|invert\-space)$)$)");
108     ExpressionMatch complexMatch =
109             ExpressionMatch(complexMatchFullA + "\n" + complexMatchFullB,
110                             ExpressionMatch::MatchMode::MatchMultiPhrase, false);
111
112     // Assert valid and not empty
113     ASSERT_TRUE(simpleMatch.isValid());
114     EXPECT_FALSE(simpleMatch.isEmpty());
115     ASSERT_TRUE(simpleMatchCS.isValid());
116     EXPECT_FALSE(simpleMatchCS.isEmpty());
117     ASSERT_TRUE(complexMatch.isValid());
118     EXPECT_FALSE(complexMatch.isEmpty());
119
120     // Assert match succeeds
121     EXPECT_TRUE(simpleMatch.match("test"));
122     EXPECT_TRUE(simpleMatch.match("test[suffix]"));
123     EXPECT_TRUE(simpleMatch.match("other test;"));
124     EXPECT_TRUE(simpleMatch.match("Other "));
125     EXPECT_TRUE(simpleMatch.match(".Other !"));
126     // Assert partial match fails
127     EXPECT_FALSE(simpleMatch.match("testing"));
128     EXPECT_FALSE(simpleMatch.match("Other!"));
129     // Assert unrelated fails
130     EXPECT_FALSE(simpleMatch.match("not above"));
131
132     // Assert case sensitivity followed
133     EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
134     EXPECT_TRUE(simpleMatch.match("TeSt"));
135     EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
136     EXPECT_FALSE(simpleMatchCS.match("TeSt"));
137
138     // Assert complex phrases are escaped properly
139     EXPECT_TRUE(complexMatch.match(complexMatchFullA));
140     EXPECT_TRUE(complexMatch.match(complexMatchFullB));
141     EXPECT_FALSE(complexMatch.match("norm"));
142     EXPECT_FALSE(complexMatch.match("invert"));
143 }
144
145
146 TEST(ExpressionMatchTest, matchWildcard)
147 {
148     // Simple wildcard, case-insensitive
149     ExpressionMatch simpleMatch =
150             ExpressionMatch("?test*", ExpressionMatch::MatchMode::MatchWildcard, false);
151     // Simple wildcard, case-sensitive
152     ExpressionMatch simpleMatchCS =
153             ExpressionMatch("?test*", ExpressionMatch::MatchMode::MatchWildcard, true);
154     // Escaped wildcard, case-insensitive
155     ExpressionMatch simpleMatchEscape =
156             ExpressionMatch(R"(\?test\*)", ExpressionMatch::MatchMode::MatchWildcard, false);
157     // Inverted wildcard, case-insensitive
158     ExpressionMatch simpleMatchInvert =
159             ExpressionMatch("!test*", ExpressionMatch::MatchMode::MatchWildcard, false);
160     // Not inverted wildcard, case-insensitive
161     ExpressionMatch simpleMatchNoInvert =
162             ExpressionMatch(R"(\!test*)", ExpressionMatch::MatchMode::MatchWildcard, false);
163     // Not inverted wildcard literal slash, case-insensitive
164     ExpressionMatch simpleMatchNoInvertSlash =
165             ExpressionMatch(R"(\\!test*)", ExpressionMatch::MatchMode::MatchWildcard, false);
166     // Complex wildcard
167     ExpressionMatch complexMatch =
168             ExpressionMatch(R"(never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval)",
169                             ExpressionMatch::MatchMode::MatchWildcard, false);
170
171     // Assert valid and not empty
172     ASSERT_TRUE(simpleMatch.isValid());
173     EXPECT_FALSE(simpleMatch.isEmpty());
174     ASSERT_TRUE(simpleMatchCS.isValid());
175     EXPECT_FALSE(simpleMatchCS.isEmpty());
176     ASSERT_TRUE(simpleMatchEscape.isValid());
177     EXPECT_FALSE(simpleMatchEscape.isEmpty());
178     ASSERT_TRUE(simpleMatchInvert.isValid());
179     EXPECT_FALSE(simpleMatchInvert.isEmpty());
180     ASSERT_TRUE(simpleMatchNoInvert.isValid());
181     EXPECT_FALSE(simpleMatchNoInvert.isEmpty());
182     ASSERT_TRUE(simpleMatchNoInvertSlash.isValid());
183     EXPECT_FALSE(simpleMatchNoInvertSlash.isEmpty());
184     ASSERT_TRUE(complexMatch.isValid());
185     EXPECT_FALSE(complexMatch.isEmpty());
186
187     // Assert match succeeds
188     EXPECT_TRUE(simpleMatch.match("@test"));
189     EXPECT_TRUE(simpleMatch.match("@testing"));
190     EXPECT_TRUE(simpleMatch.match("!test"));
191     EXPECT_TRUE(simpleMatchEscape.match("?test*"));
192     EXPECT_TRUE(simpleMatchInvert.match("atest"));
193     EXPECT_TRUE(simpleMatchNoInvert.match("!test"));
194     EXPECT_TRUE(simpleMatchNoInvertSlash.match(R"(\!test)"));
195     // Assert partial match fails
196     EXPECT_FALSE(simpleMatch.match("test"));
197     // Assert unrelated fails
198     EXPECT_FALSE(simpleMatch.match("not above"));
199     // Assert escaped wildcard fails
200     EXPECT_FALSE(simpleMatchEscape.match("@testing"));
201     EXPECT_FALSE(simpleMatchNoInvert.match("test"));
202     EXPECT_FALSE(simpleMatchNoInvert.match("anything"));
203     EXPECT_FALSE(simpleMatchNoInvertSlash.match("!test"));
204     EXPECT_FALSE(simpleMatchNoInvertSlash.match("test"));
205     EXPECT_FALSE(simpleMatchNoInvertSlash.match("anything"));
206     // Assert non-inverted fails
207     EXPECT_FALSE(simpleMatchInvert.match("testing"));
208
209     // Assert case sensitivity followed
210     EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
211     EXPECT_TRUE(simpleMatch.match("@TeSt"));
212     EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
213     EXPECT_FALSE(simpleMatchCS.match("@TeSt"));
214
215     // Assert complex match
216     EXPECT_TRUE(complexMatch.match(R"(neverAgonnaBBBgive*you?up\test|yyeah\1\\2\1inval)"));
217     // Assert complex not literal match
218     EXPECT_FALSE(complexMatch.match(R"(never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval)"));
219     // Assert complex unrelated not match
220     EXPECT_FALSE(complexMatch.match("other"));
221 }
222
223
224 TEST(ExpressionMatchTest, matchMultiWildcard)
225 {
226     // Simple wildcards, case-insensitive
227     ExpressionMatch simpleMatch =
228             ExpressionMatch("?test*;another?",
229                             ExpressionMatch::MatchMode::MatchMultiWildcard, false);
230     // Simple wildcards, case-sensitive
231     ExpressionMatch simpleMatchCS =
232             ExpressionMatch("?test*;another?",
233                             ExpressionMatch::MatchMode::MatchMultiWildcard, true);
234     // Escaped wildcards, case-insensitive
235     ExpressionMatch simpleMatchEscape =
236             ExpressionMatch(R"(\?test\*\;*thing\*)",
237                             ExpressionMatch::MatchMode::MatchMultiWildcard, false);
238     // Inverted wildcards, case-insensitive
239     ExpressionMatch simpleMatchInvert =
240             ExpressionMatch(R"(test*;!testing)",
241                             ExpressionMatch::MatchMode::MatchMultiWildcard, false);
242     // Implicit wildcards, case-insensitive
243     ExpressionMatch simpleMatchImplicit =
244             ExpressionMatch(R"(!testing*)",
245                             ExpressionMatch::MatchMode::MatchMultiWildcard, false);
246     // Complex wildcard
247     QString complexMatchFull(R"(norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;)"
248                              R"(\\!slash-invert;\\\\double; escape\;sep;slash-end-split\\;)"
249                              R"(quad\\\\!noninvert;newline-split)""\n"
250                              R"(newline-split-slash\\)""\n"
251                              R"(slash-at-end\\)");
252     // Match normal components
253     QStringList complexMatchNormal = {
254         R"(norm)",
255         R"(norm-space)",
256         R"(!norm-escaped)",
257         R"(\!slash-invert)",
258         R"(\\double)",
259         R"(escape;sep)",
260         R"(slash-end-split\)",
261         R"(quad\\!noninvert)",
262         R"(newline-split)",
263         R"(newline-split-slash\)",
264         R"(slash-at-end\)"
265     };
266     // Match negating components
267     QStringList complexMatchInvert = {
268         R"(invert)",
269         R"(invert-space)"
270     };
271     ExpressionMatch complexMatch =
272             ExpressionMatch(complexMatchFull, ExpressionMatch::MatchMode::MatchMultiWildcard,
273                             false);
274
275     // Assert valid and not empty
276     ASSERT_TRUE(simpleMatch.isValid());
277     EXPECT_FALSE(simpleMatch.isEmpty());
278     ASSERT_TRUE(simpleMatchCS.isValid());
279     EXPECT_FALSE(simpleMatchCS.isEmpty());
280     ASSERT_TRUE(simpleMatchEscape.isValid());
281     EXPECT_FALSE(simpleMatchEscape.isEmpty());
282     ASSERT_TRUE(simpleMatchInvert.isValid());
283     EXPECT_FALSE(simpleMatchInvert.isEmpty());
284     ASSERT_TRUE(simpleMatchImplicit.isValid());
285     EXPECT_FALSE(simpleMatchImplicit.isEmpty());
286     ASSERT_TRUE(complexMatch.isValid());
287     EXPECT_FALSE(complexMatch.isEmpty());
288
289     // Assert match succeeds
290     EXPECT_TRUE(simpleMatch.match("@test"));
291     EXPECT_TRUE(simpleMatch.match("@testing"));
292     EXPECT_TRUE(simpleMatch.match("!test"));
293     EXPECT_TRUE(simpleMatch.match("anotherA"));
294     EXPECT_TRUE(simpleMatchEscape.match("?test*;thing*"));
295     EXPECT_TRUE(simpleMatchEscape.match("?test*;AAAAAthing*"));
296     EXPECT_TRUE(simpleMatchInvert.match("test"));
297     EXPECT_TRUE(simpleMatchInvert.match("testing things"));
298     // Assert implicit wildcard succeeds
299     EXPECT_TRUE(simpleMatchImplicit.match("AAAAAA"));
300     // Assert partial match fails
301     EXPECT_FALSE(simpleMatch.match("test"));
302     EXPECT_FALSE(simpleMatch.match("another"));
303     EXPECT_FALSE(simpleMatch.match("anotherBB"));
304     // Assert unrelated fails
305     EXPECT_FALSE(simpleMatch.match("not above"));
306     // Assert escaped wildcard fails
307     EXPECT_FALSE(simpleMatchEscape.match("@testing"));
308     // Assert inverted match fails
309     EXPECT_FALSE(simpleMatchInvert.match("testing"));
310     EXPECT_FALSE(simpleMatchImplicit.match("testing"));
311
312     // Assert case sensitivity followed
313     EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
314     EXPECT_TRUE(simpleMatch.match("@TeSt"));
315     EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
316     EXPECT_FALSE(simpleMatchCS.match("@TeSt"));
317
318     // Assert complex match
319     for (auto&& normMatch : complexMatchNormal) {
320         // Each normal component should match
321         EXPECT_TRUE(complexMatch.match(normMatch));
322     }
323
324     for (auto&& invertMatch : complexMatchInvert) {
325         // Each invert component should not match
326         EXPECT_FALSE(complexMatch.match(invertMatch));
327     }
328
329     // Assert complex not literal match
330     EXPECT_FALSE(complexMatch.match(complexMatchFull));
331     // Assert complex unrelated not match
332     EXPECT_FALSE(complexMatch.match("other"));
333 }
334
335
336 TEST(ExpressionMatchTest, matchRegEx)
337 {
338     // Simple regex, case-insensitive
339     ExpressionMatch simpleMatch =
340             ExpressionMatch(R"(simple.\*escape-match.*)",
341                             ExpressionMatch::MatchMode::MatchRegEx, false);
342     // Simple regex, case-sensitive
343     ExpressionMatch simpleMatchCS =
344             ExpressionMatch(R"(simple.\*escape-match.*)",
345                             ExpressionMatch::MatchMode::MatchRegEx, true);
346     // Inverted regex, case-insensitive
347     ExpressionMatch simpleMatchInvert =
348             ExpressionMatch(R"(!invert.\*escape-match.*)",
349                             ExpressionMatch::MatchMode::MatchRegEx, false);
350     // Non-inverted regex, case-insensitive
351     ExpressionMatch simpleMatchNoInvert =
352             ExpressionMatch(R"(\!simple.\*escape-match.*)",
353                             ExpressionMatch::MatchMode::MatchRegEx, false);
354     // Non-inverted regex literal slash, case-insensitive
355     ExpressionMatch simpleMatchNoInvertSlash =
356             ExpressionMatch(R"(\\!simple.\*escape-match.*)",
357                             ExpressionMatch::MatchMode::MatchRegEx, false);
358
359     // Assert valid and not empty
360     ASSERT_TRUE(simpleMatch.isValid());
361     EXPECT_FALSE(simpleMatch.isEmpty());
362     ASSERT_TRUE(simpleMatchCS.isValid());
363     EXPECT_FALSE(simpleMatchCS.isEmpty());
364     ASSERT_TRUE(simpleMatchInvert.isValid());
365     EXPECT_FALSE(simpleMatchInvert.isEmpty());
366     ASSERT_TRUE(simpleMatchNoInvert.isValid());
367     EXPECT_FALSE(simpleMatchNoInvert.isEmpty());
368     ASSERT_TRUE(simpleMatchNoInvertSlash.isValid());
369     EXPECT_FALSE(simpleMatchNoInvertSlash.isEmpty());
370
371     // Assert match succeeds
372     EXPECT_TRUE(simpleMatch.match("simpleA*escape-match"));
373     EXPECT_TRUE(simpleMatch.match("simpleA*escape-matchBBBB"));
374     EXPECT_TRUE(simpleMatchInvert.match("not above"));
375     EXPECT_TRUE(simpleMatchNoInvert.match("!simpleA*escape-matchBBBB"));
376     EXPECT_TRUE(simpleMatchNoInvertSlash.match(R"(\!simpleA*escape-matchBBBB)"));
377     // Assert partial match fails
378     EXPECT_FALSE(simpleMatch.match("simpleA*escape-mat"));
379     EXPECT_FALSE(simpleMatch.match("simple*escape-match"));
380     // Assert unrelated fails
381     EXPECT_FALSE(simpleMatch.match("not above"));
382     // Assert escaped wildcard fails
383     EXPECT_FALSE(simpleMatch.match("simpleABBBBescape-matchBBBB"));
384     // Assert inverted fails
385     EXPECT_FALSE(simpleMatchInvert.match("invertA*escape-match"));
386     EXPECT_FALSE(simpleMatchInvert.match("invertA*escape-matchBBBB"));
387     EXPECT_FALSE(simpleMatchNoInvert.match("simpleA*escape-matchBBBB"));
388     EXPECT_FALSE(simpleMatchNoInvert.match("anything"));
389     EXPECT_FALSE(simpleMatchNoInvertSlash.match("!simpleA*escape-matchBBBB"));
390     EXPECT_FALSE(simpleMatchNoInvertSlash.match("anything"));
391
392     // Assert case sensitivity followed
393     EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
394     EXPECT_TRUE(simpleMatch.match("SiMpLEA*escape-MATCH"));
395     EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
396     EXPECT_FALSE(simpleMatchCS.match("SiMpLEA*escape-MATCH"));
397 }
398
399
400 TEST(ExpressionMatchTest, trimMultiWildcardWhitespace)
401 {
402     // Patterns
403     static constexpr uint PATTERN_SOURCE = 0;
404     static constexpr uint PATTERN_RESULT = 1;
405     std::vector<std::vector<QString>> patterns = {
406         // Literal
407         {"literal",
408          "literal"},
409         // Simple semicolon cleanup
410         {"simple1  ;simple2; simple3 ",
411          "simple1; simple2; simple3"},
412         // Simple newline cleanup
413         {"simple1  \nsimple2\n simple3 ",
414          "simple1\nsimple2\nsimple3"},
415         // Complex cleanup
416         {R"(norm; norm-space ; newline-space )""\n"
417          R"( ;escape \; sep ; slash-end-split\\; quad\\\\norm; newline-split-slash\\)""\n"
418          R"(slash-at-end\\)",
419          R"(norm; norm-space; newline-space)""\n"
420          R"(escape \; sep; slash-end-split\\; quad\\\\norm; newline-split-slash\\)""\n"
421          R"(slash-at-end\\)"}
422     };
423
424     // Check every source string...
425     QString result;
426     for (auto&& patternPair : patterns) {
427         // Make sure data is valid
428         ASSERT_TRUE(patternPair.size() == 2);
429         // Run transformation
430         result = ExpressionMatch::trimMultiWildcardWhitespace(patternPair[PATTERN_SOURCE]);
431         // Assert that source trims into expected pattern
432         EXPECT_EQ(patternPair[PATTERN_RESULT], result);
433         // Assert that re-trimming expected pattern gives the same result
434         EXPECT_EQ(ExpressionMatch::trimMultiWildcardWhitespace(result), result);
435     }
436 }
437
438
439 TEST(ExpressionMatchTest, testInvalidRegEx)
440 {
441     // Invalid regular expression pattern
442     ExpressionMatch invalidRegExMatch =
443             ExpressionMatch("*network", ExpressionMatch::MatchMode::MatchRegEx, false);
444
445     // Assert not valid
446     ASSERT_FALSE(invalidRegExMatch.isValid());
447     // Assert not empty
448     EXPECT_FALSE(invalidRegExMatch.isEmpty());
449     // Assert default match fails
450     EXPECT_FALSE(invalidRegExMatch.match(""));
451     // Assert wildcard match fails
452     EXPECT_FALSE(invalidRegExMatch.match("network"));
453     // Assert literal match fails
454     EXPECT_FALSE(invalidRegExMatch.match("*network"));
455 }
456
457
458 TEST(ExpressionMatchTest, matchPhraseUnicode)
459 {
460     // Escape Unicode color emoji as a workaround for bug with libXft, otherwise, color emoji may
461     // crash "git gui" and "gitk"
462     // This has no impact on Quassel or the tests themselves
463     //
464     // See https://unix.stackexchange.com/questions/629281/gitk-crashes-when-viewing-commit-containing-emoji-x-error-of-failed-request-ba
465     const QString UnicodeEmojiFire("\xf0\x9f\x94\xa5");
466     const QString UnicodeEmojiFox("\xf0\x9f\xa6\x8a");
467
468     // Simple phrase, case-insensitive, ASCII
469     ExpressionMatch simpleMatchASCII =
470             ExpressionMatch("V", ExpressionMatch::MatchMode::MatchPhrase, false);
471     // Simple phrase, case-insensitive, ASCII Unicode mix
472     ExpressionMatch simpleMatchUniASCII =
473             ExpressionMatch("räv", ExpressionMatch::MatchMode::MatchPhrase, false);
474     // Simple phrase, case-insensitive, full Unicode
475     ExpressionMatch simpleMatchUnicode =
476             ExpressionMatch("狐", ExpressionMatch::MatchMode::MatchPhrase, false);
477     // Simple phrase, case-insensitive, emoji
478     ExpressionMatch simpleMatchEmoji =
479             ExpressionMatch(UnicodeEmojiFox, ExpressionMatch::MatchMode::MatchPhrase, false);
480
481     // Assert valid and not empty
482     ASSERT_TRUE(simpleMatchASCII.isValid());
483     EXPECT_FALSE(simpleMatchASCII.isEmpty());
484     ASSERT_TRUE(simpleMatchUniASCII.isValid());
485     EXPECT_FALSE(simpleMatchUniASCII.isEmpty());
486     ASSERT_TRUE(simpleMatchUnicode.isValid());
487     EXPECT_FALSE(simpleMatchUnicode.isEmpty());
488     ASSERT_TRUE(simpleMatchEmoji.isValid());
489     EXPECT_FALSE(simpleMatchEmoji.isEmpty());
490
491     // Assert basic match succeeds
492     EXPECT_TRUE(simpleMatchASCII.match("V"));
493     EXPECT_TRUE(simpleMatchUniASCII.match("räv"));
494     EXPECT_TRUE(simpleMatchUnicode.match("狐"));
495     EXPECT_TRUE(simpleMatchEmoji.match(UnicodeEmojiFox));
496
497     // Assert classic word boundaries succeed
498     EXPECT_TRUE(simpleMatchASCII.match("V: hello"));
499     EXPECT_TRUE(simpleMatchUniASCII.match("\"räv\""));
500     EXPECT_TRUE(simpleMatchUnicode.match("狐."));
501     EXPECT_TRUE(simpleMatchEmoji.match("(" + UnicodeEmojiFox  + ")"));
502
503     // Assert non-word-boundary Unicode is NOT treated as a word boundary
504     // > ASCII nickname (most common case with spam)
505     EXPECT_FALSE(simpleMatchASCII.match("TÜV Västra"));
506     // > ASCII/Unicode mix
507     EXPECT_FALSE(simpleMatchUniASCII.match("rävīoli"));
508     // > Full unicode
509     EXPECT_FALSE(simpleMatchUnicode.match("九尾の狐"));
510
511     // Assert emoji are treated as word boundaries
512     EXPECT_TRUE(simpleMatchASCII.match(UnicodeEmojiFire + "V" + UnicodeEmojiFire));
513     EXPECT_TRUE(simpleMatchEmoji.match(UnicodeEmojiFire + UnicodeEmojiFox));
514
515     // Assert Unicode case folding does NOT happen (ä -> a)
516     EXPECT_FALSE(simpleMatchUniASCII.match("rav"));
517 }
518
519
520 TEST(ExpressionMatchTest, matchRegExUnicode)
521 {
522     // Word character (letter and digit) regex, case-insensitive
523     ExpressionMatch simpleMatchSixWordChar =
524             ExpressionMatch(R"(\w{6})",
525                             ExpressionMatch::MatchMode::MatchRegEx, false);
526     // Digit regex, case-insensitive
527     ExpressionMatch simpleMatchThreeDigit =
528             ExpressionMatch(R"(\d{3})",
529                             ExpressionMatch::MatchMode::MatchRegEx, false);
530     ExpressionMatch simpleMatchAnyDigit =
531             ExpressionMatch(R"(\d+)",
532                             ExpressionMatch::MatchMode::MatchRegEx, false);
533
534     // Assert valid and not empty
535     ASSERT_TRUE(simpleMatchSixWordChar.isValid());
536     EXPECT_FALSE(simpleMatchSixWordChar.isEmpty());
537     ASSERT_TRUE(simpleMatchThreeDigit.isValid());
538     EXPECT_FALSE(simpleMatchThreeDigit.isEmpty());
539     ASSERT_TRUE(simpleMatchAnyDigit.isValid());
540     EXPECT_FALSE(simpleMatchAnyDigit.isEmpty());
541
542     // Assert ASCII matches
543     EXPECT_TRUE(simpleMatchSixWordChar.match("abc123"));
544     EXPECT_TRUE(simpleMatchThreeDigit.match("123"));
545     EXPECT_TRUE(simpleMatchAnyDigit.match("1"));
546
547     // Assert Unicode matches
548     EXPECT_TRUE(simpleMatchSixWordChar.match("áwá๑2๓"));
549     //
550     // "Unicode Characters in the 'Number, Decimal Digit' Category"
551     // Thai digits
552     // See https://www.fileformat.info/info/unicode/category/Nd/list.htm
553     EXPECT_TRUE(simpleMatchThreeDigit.match("๑2๓"));
554     EXPECT_TRUE(simpleMatchAnyDigit.match("๑"));
555
556     // Assert wrong content doesn't match
557     EXPECT_FALSE(simpleMatchAnyDigit.match("áwá"));
558 }