1 /***************************************************************************
2 * Copyright (C) 2005-2022 by the Quassel Project *
3 * devel@quassel-irc.org *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) version 3. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
24 #include <QStringList>
26 #include "testglobal.h"
27 #include "expressionmatch.h"
29 TEST(ExpressionMatchTest, emptyPattern)
32 ExpressionMatch emptyMatch =
33 ExpressionMatch("", ExpressionMatch::MatchMode::MatchPhrase, false);
35 // Assert empty is valid
36 ASSERT_TRUE(emptyMatch.isValid());
38 EXPECT_TRUE(emptyMatch.isEmpty());
39 // Assert default match fails (same as setting match empty to false)
40 EXPECT_FALSE(emptyMatch.match("something"));
41 // Assert match empty succeeds
42 EXPECT_TRUE(emptyMatch.match("something", true));
45 TEST(ExpressionMatchTest, matchPhrase)
47 // Simple phrase, case-insensitive
48 ExpressionMatch simpleMatch =
49 ExpressionMatch("test", ExpressionMatch::MatchMode::MatchPhrase, false);
50 // Simple phrase, case-sensitive
51 ExpressionMatch simpleMatchCS =
52 ExpressionMatch("test", ExpressionMatch::MatchMode::MatchPhrase, true);
53 // Phrase with space, case-insensitive
54 ExpressionMatch simpleMatchSpace =
55 ExpressionMatch(" space ", ExpressionMatch::MatchMode::MatchPhrase, true);
57 QString complexMatchFull(R"(^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double"
58 "|escape\;sep|slash\-end\-split\\|quad\\\\\!noninvert|newline\-split"
59 "|newline\-split\-slash\\|slash\-at\-end\\)$)");
60 ExpressionMatch complexMatch =
61 ExpressionMatch(complexMatchFull, ExpressionMatch::MatchMode::MatchPhrase, false);
63 // Assert valid and not empty
64 ASSERT_TRUE(simpleMatch.isValid());
65 EXPECT_FALSE(simpleMatch.isEmpty());
66 ASSERT_TRUE(simpleMatchCS.isValid());
67 EXPECT_FALSE(simpleMatchCS.isEmpty());
68 ASSERT_TRUE(simpleMatchSpace.isValid());
69 EXPECT_FALSE(simpleMatchSpace.isEmpty());
70 ASSERT_TRUE(complexMatch.isValid());
71 EXPECT_FALSE(complexMatch.isEmpty());
73 // Assert match succeeds
74 EXPECT_TRUE(simpleMatch.match("test"));
75 EXPECT_TRUE(simpleMatch.match("other test;"));
76 EXPECT_TRUE(simpleMatchSpace.match(" space "));
77 // Assert partial match fails
78 EXPECT_FALSE(simpleMatch.match("testing"));
79 EXPECT_FALSE(simpleMatchSpace.match("space"));
80 // Assert unrelated fails
81 EXPECT_FALSE(simpleMatch.match("not above"));
83 // Assert case sensitivity followed
84 EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
85 EXPECT_TRUE(simpleMatch.match("TeSt"));
86 EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
87 EXPECT_FALSE(simpleMatchCS.match("TeSt"));
89 // Assert complex phrases are escaped properly
90 EXPECT_TRUE(complexMatch.match(complexMatchFull));
91 EXPECT_FALSE(complexMatch.match("norm"));
95 TEST(ExpressionMatchTest, matchMultiPhrase)
97 // Simple phrases, case-insensitive
98 ExpressionMatch simpleMatch =
99 ExpressionMatch("test\nOther ", ExpressionMatch::MatchMode::MatchMultiPhrase, false);
100 // Simple phrases, case-sensitive
101 ExpressionMatch simpleMatchCS =
102 ExpressionMatch("test\nOther ", ExpressionMatch::MatchMode::MatchMultiPhrase, true);
104 QString complexMatchFullA(R"(^(?:norm|norm\-space|\!norm\-escaped|\\\!slash\-invert|\\\\double)"
105 R"(|escape\;sep|slash\-end\-split\\|quad\\\\\!noninvert)"
106 R"(|newline\-split|newline\-split\-slash\\|slash\-at\-end\\)$)");
107 QString complexMatchFullB(R"(^(?:invert|invert\-space)$)$)");
108 ExpressionMatch complexMatch =
109 ExpressionMatch(complexMatchFullA + "\n" + complexMatchFullB,
110 ExpressionMatch::MatchMode::MatchMultiPhrase, false);
112 // Assert valid and not empty
113 ASSERT_TRUE(simpleMatch.isValid());
114 EXPECT_FALSE(simpleMatch.isEmpty());
115 ASSERT_TRUE(simpleMatchCS.isValid());
116 EXPECT_FALSE(simpleMatchCS.isEmpty());
117 ASSERT_TRUE(complexMatch.isValid());
118 EXPECT_FALSE(complexMatch.isEmpty());
120 // Assert match succeeds
121 EXPECT_TRUE(simpleMatch.match("test"));
122 EXPECT_TRUE(simpleMatch.match("test[suffix]"));
123 EXPECT_TRUE(simpleMatch.match("other test;"));
124 EXPECT_TRUE(simpleMatch.match("Other "));
125 EXPECT_TRUE(simpleMatch.match(".Other !"));
126 // Assert partial match fails
127 EXPECT_FALSE(simpleMatch.match("testing"));
128 EXPECT_FALSE(simpleMatch.match("Other!"));
129 // Assert unrelated fails
130 EXPECT_FALSE(simpleMatch.match("not above"));
132 // Assert case sensitivity followed
133 EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
134 EXPECT_TRUE(simpleMatch.match("TeSt"));
135 EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
136 EXPECT_FALSE(simpleMatchCS.match("TeSt"));
138 // Assert complex phrases are escaped properly
139 EXPECT_TRUE(complexMatch.match(complexMatchFullA));
140 EXPECT_TRUE(complexMatch.match(complexMatchFullB));
141 EXPECT_FALSE(complexMatch.match("norm"));
142 EXPECT_FALSE(complexMatch.match("invert"));
146 TEST(ExpressionMatchTest, matchWildcard)
148 // Simple wildcard, case-insensitive
149 ExpressionMatch simpleMatch =
150 ExpressionMatch("?test*", ExpressionMatch::MatchMode::MatchWildcard, false);
151 // Simple wildcard, case-sensitive
152 ExpressionMatch simpleMatchCS =
153 ExpressionMatch("?test*", ExpressionMatch::MatchMode::MatchWildcard, true);
154 // Escaped wildcard, case-insensitive
155 ExpressionMatch simpleMatchEscape =
156 ExpressionMatch(R"(\?test\*)", ExpressionMatch::MatchMode::MatchWildcard, false);
157 // Inverted wildcard, case-insensitive
158 ExpressionMatch simpleMatchInvert =
159 ExpressionMatch("!test*", ExpressionMatch::MatchMode::MatchWildcard, false);
160 // Not inverted wildcard, case-insensitive
161 ExpressionMatch simpleMatchNoInvert =
162 ExpressionMatch(R"(\!test*)", ExpressionMatch::MatchMode::MatchWildcard, false);
163 // Not inverted wildcard literal slash, case-insensitive
164 ExpressionMatch simpleMatchNoInvertSlash =
165 ExpressionMatch(R"(\\!test*)", ExpressionMatch::MatchMode::MatchWildcard, false);
167 ExpressionMatch complexMatch =
168 ExpressionMatch(R"(never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval)",
169 ExpressionMatch::MatchMode::MatchWildcard, false);
171 // Assert valid and not empty
172 ASSERT_TRUE(simpleMatch.isValid());
173 EXPECT_FALSE(simpleMatch.isEmpty());
174 ASSERT_TRUE(simpleMatchCS.isValid());
175 EXPECT_FALSE(simpleMatchCS.isEmpty());
176 ASSERT_TRUE(simpleMatchEscape.isValid());
177 EXPECT_FALSE(simpleMatchEscape.isEmpty());
178 ASSERT_TRUE(simpleMatchInvert.isValid());
179 EXPECT_FALSE(simpleMatchInvert.isEmpty());
180 ASSERT_TRUE(simpleMatchNoInvert.isValid());
181 EXPECT_FALSE(simpleMatchNoInvert.isEmpty());
182 ASSERT_TRUE(simpleMatchNoInvertSlash.isValid());
183 EXPECT_FALSE(simpleMatchNoInvertSlash.isEmpty());
184 ASSERT_TRUE(complexMatch.isValid());
185 EXPECT_FALSE(complexMatch.isEmpty());
187 // Assert match succeeds
188 EXPECT_TRUE(simpleMatch.match("@test"));
189 EXPECT_TRUE(simpleMatch.match("@testing"));
190 EXPECT_TRUE(simpleMatch.match("!test"));
191 EXPECT_TRUE(simpleMatchEscape.match("?test*"));
192 EXPECT_TRUE(simpleMatchInvert.match("atest"));
193 EXPECT_TRUE(simpleMatchNoInvert.match("!test"));
194 EXPECT_TRUE(simpleMatchNoInvertSlash.match(R"(\!test)"));
195 // Assert partial match fails
196 EXPECT_FALSE(simpleMatch.match("test"));
197 // Assert unrelated fails
198 EXPECT_FALSE(simpleMatch.match("not above"));
199 // Assert escaped wildcard fails
200 EXPECT_FALSE(simpleMatchEscape.match("@testing"));
201 EXPECT_FALSE(simpleMatchNoInvert.match("test"));
202 EXPECT_FALSE(simpleMatchNoInvert.match("anything"));
203 EXPECT_FALSE(simpleMatchNoInvertSlash.match("!test"));
204 EXPECT_FALSE(simpleMatchNoInvertSlash.match("test"));
205 EXPECT_FALSE(simpleMatchNoInvertSlash.match("anything"));
206 // Assert non-inverted fails
207 EXPECT_FALSE(simpleMatchInvert.match("testing"));
209 // Assert case sensitivity followed
210 EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
211 EXPECT_TRUE(simpleMatch.match("@TeSt"));
212 EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
213 EXPECT_FALSE(simpleMatchCS.match("@TeSt"));
215 // Assert complex match
216 EXPECT_TRUE(complexMatch.match(R"(neverAgonnaBBBgive*you?up\test|yyeah\1\\2\1inval)"));
217 // Assert complex not literal match
218 EXPECT_FALSE(complexMatch.match(R"(never?gonna*give\*you\?up\\test|y\yeah\\1\\\\2\\\1inval)"));
219 // Assert complex unrelated not match
220 EXPECT_FALSE(complexMatch.match("other"));
224 TEST(ExpressionMatchTest, matchMultiWildcard)
226 // Simple wildcards, case-insensitive
227 ExpressionMatch simpleMatch =
228 ExpressionMatch("?test*;another?",
229 ExpressionMatch::MatchMode::MatchMultiWildcard, false);
230 // Simple wildcards, case-sensitive
231 ExpressionMatch simpleMatchCS =
232 ExpressionMatch("?test*;another?",
233 ExpressionMatch::MatchMode::MatchMultiWildcard, true);
234 // Escaped wildcards, case-insensitive
235 ExpressionMatch simpleMatchEscape =
236 ExpressionMatch(R"(\?test\*\;*thing\*)",
237 ExpressionMatch::MatchMode::MatchMultiWildcard, false);
238 // Inverted wildcards, case-insensitive
239 ExpressionMatch simpleMatchInvert =
240 ExpressionMatch(R"(test*;!testing)",
241 ExpressionMatch::MatchMode::MatchMultiWildcard, false);
242 // Implicit wildcards, case-insensitive
243 ExpressionMatch simpleMatchImplicit =
244 ExpressionMatch(R"(!testing*)",
245 ExpressionMatch::MatchMode::MatchMultiWildcard, false);
247 QString complexMatchFull(R"(norm;!invert; norm-space ; !invert-space ;;!;\!norm-escaped;)"
248 R"(\\!slash-invert;\\\\double; escape\;sep;slash-end-split\\;)"
249 R"(quad\\\\!noninvert;newline-split)""\n"
250 R"(newline-split-slash\\)""\n"
251 R"(slash-at-end\\)");
252 // Match normal components
253 QStringList complexMatchNormal = {
260 R"(slash-end-split\)",
261 R"(quad\\!noninvert)",
263 R"(newline-split-slash\)",
266 // Match negating components
267 QStringList complexMatchInvert = {
271 ExpressionMatch complexMatch =
272 ExpressionMatch(complexMatchFull, ExpressionMatch::MatchMode::MatchMultiWildcard,
275 // Assert valid and not empty
276 ASSERT_TRUE(simpleMatch.isValid());
277 EXPECT_FALSE(simpleMatch.isEmpty());
278 ASSERT_TRUE(simpleMatchCS.isValid());
279 EXPECT_FALSE(simpleMatchCS.isEmpty());
280 ASSERT_TRUE(simpleMatchEscape.isValid());
281 EXPECT_FALSE(simpleMatchEscape.isEmpty());
282 ASSERT_TRUE(simpleMatchInvert.isValid());
283 EXPECT_FALSE(simpleMatchInvert.isEmpty());
284 ASSERT_TRUE(simpleMatchImplicit.isValid());
285 EXPECT_FALSE(simpleMatchImplicit.isEmpty());
286 ASSERT_TRUE(complexMatch.isValid());
287 EXPECT_FALSE(complexMatch.isEmpty());
289 // Assert match succeeds
290 EXPECT_TRUE(simpleMatch.match("@test"));
291 EXPECT_TRUE(simpleMatch.match("@testing"));
292 EXPECT_TRUE(simpleMatch.match("!test"));
293 EXPECT_TRUE(simpleMatch.match("anotherA"));
294 EXPECT_TRUE(simpleMatchEscape.match("?test*;thing*"));
295 EXPECT_TRUE(simpleMatchEscape.match("?test*;AAAAAthing*"));
296 EXPECT_TRUE(simpleMatchInvert.match("test"));
297 EXPECT_TRUE(simpleMatchInvert.match("testing things"));
298 // Assert implicit wildcard succeeds
299 EXPECT_TRUE(simpleMatchImplicit.match("AAAAAA"));
300 // Assert partial match fails
301 EXPECT_FALSE(simpleMatch.match("test"));
302 EXPECT_FALSE(simpleMatch.match("another"));
303 EXPECT_FALSE(simpleMatch.match("anotherBB"));
304 // Assert unrelated fails
305 EXPECT_FALSE(simpleMatch.match("not above"));
306 // Assert escaped wildcard fails
307 EXPECT_FALSE(simpleMatchEscape.match("@testing"));
308 // Assert inverted match fails
309 EXPECT_FALSE(simpleMatchInvert.match("testing"));
310 EXPECT_FALSE(simpleMatchImplicit.match("testing"));
312 // Assert case sensitivity followed
313 EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
314 EXPECT_TRUE(simpleMatch.match("@TeSt"));
315 EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
316 EXPECT_FALSE(simpleMatchCS.match("@TeSt"));
318 // Assert complex match
319 for (auto&& normMatch : complexMatchNormal) {
320 // Each normal component should match
321 EXPECT_TRUE(complexMatch.match(normMatch));
324 for (auto&& invertMatch : complexMatchInvert) {
325 // Each invert component should not match
326 EXPECT_FALSE(complexMatch.match(invertMatch));
329 // Assert complex not literal match
330 EXPECT_FALSE(complexMatch.match(complexMatchFull));
331 // Assert complex unrelated not match
332 EXPECT_FALSE(complexMatch.match("other"));
336 TEST(ExpressionMatchTest, matchRegEx)
338 // Simple regex, case-insensitive
339 ExpressionMatch simpleMatch =
340 ExpressionMatch(R"(simple.\*escape-match.*)",
341 ExpressionMatch::MatchMode::MatchRegEx, false);
342 // Simple regex, case-sensitive
343 ExpressionMatch simpleMatchCS =
344 ExpressionMatch(R"(simple.\*escape-match.*)",
345 ExpressionMatch::MatchMode::MatchRegEx, true);
346 // Inverted regex, case-insensitive
347 ExpressionMatch simpleMatchInvert =
348 ExpressionMatch(R"(!invert.\*escape-match.*)",
349 ExpressionMatch::MatchMode::MatchRegEx, false);
350 // Non-inverted regex, case-insensitive
351 ExpressionMatch simpleMatchNoInvert =
352 ExpressionMatch(R"(\!simple.\*escape-match.*)",
353 ExpressionMatch::MatchMode::MatchRegEx, false);
354 // Non-inverted regex literal slash, case-insensitive
355 ExpressionMatch simpleMatchNoInvertSlash =
356 ExpressionMatch(R"(\\!simple.\*escape-match.*)",
357 ExpressionMatch::MatchMode::MatchRegEx, false);
359 // Assert valid and not empty
360 ASSERT_TRUE(simpleMatch.isValid());
361 EXPECT_FALSE(simpleMatch.isEmpty());
362 ASSERT_TRUE(simpleMatchCS.isValid());
363 EXPECT_FALSE(simpleMatchCS.isEmpty());
364 ASSERT_TRUE(simpleMatchInvert.isValid());
365 EXPECT_FALSE(simpleMatchInvert.isEmpty());
366 ASSERT_TRUE(simpleMatchNoInvert.isValid());
367 EXPECT_FALSE(simpleMatchNoInvert.isEmpty());
368 ASSERT_TRUE(simpleMatchNoInvertSlash.isValid());
369 EXPECT_FALSE(simpleMatchNoInvertSlash.isEmpty());
371 // Assert match succeeds
372 EXPECT_TRUE(simpleMatch.match("simpleA*escape-match"));
373 EXPECT_TRUE(simpleMatch.match("simpleA*escape-matchBBBB"));
374 EXPECT_TRUE(simpleMatchInvert.match("not above"));
375 EXPECT_TRUE(simpleMatchNoInvert.match("!simpleA*escape-matchBBBB"));
376 EXPECT_TRUE(simpleMatchNoInvertSlash.match(R"(\!simpleA*escape-matchBBBB)"));
377 // Assert partial match fails
378 EXPECT_FALSE(simpleMatch.match("simpleA*escape-mat"));
379 EXPECT_FALSE(simpleMatch.match("simple*escape-match"));
380 // Assert unrelated fails
381 EXPECT_FALSE(simpleMatch.match("not above"));
382 // Assert escaped wildcard fails
383 EXPECT_FALSE(simpleMatch.match("simpleABBBBescape-matchBBBB"));
384 // Assert inverted fails
385 EXPECT_FALSE(simpleMatchInvert.match("invertA*escape-match"));
386 EXPECT_FALSE(simpleMatchInvert.match("invertA*escape-matchBBBB"));
387 EXPECT_FALSE(simpleMatchNoInvert.match("simpleA*escape-matchBBBB"));
388 EXPECT_FALSE(simpleMatchNoInvert.match("anything"));
389 EXPECT_FALSE(simpleMatchNoInvertSlash.match("!simpleA*escape-matchBBBB"));
390 EXPECT_FALSE(simpleMatchNoInvertSlash.match("anything"));
392 // Assert case sensitivity followed
393 EXPECT_FALSE(simpleMatch.sourceCaseSensitive());
394 EXPECT_TRUE(simpleMatch.match("SiMpLEA*escape-MATCH"));
395 EXPECT_TRUE(simpleMatchCS.sourceCaseSensitive());
396 EXPECT_FALSE(simpleMatchCS.match("SiMpLEA*escape-MATCH"));
400 TEST(ExpressionMatchTest, trimMultiWildcardWhitespace)
403 static constexpr uint PATTERN_SOURCE = 0;
404 static constexpr uint PATTERN_RESULT = 1;
405 std::vector<std::vector<QString>> patterns = {
409 // Simple semicolon cleanup
410 {"simple1 ;simple2; simple3 ",
411 "simple1; simple2; simple3"},
412 // Simple newline cleanup
413 {"simple1 \nsimple2\n simple3 ",
414 "simple1\nsimple2\nsimple3"},
416 {R"(norm; norm-space ; newline-space )""\n"
417 R"( ;escape \; sep ; slash-end-split\\; quad\\\\norm; newline-split-slash\\)""\n"
419 R"(norm; norm-space; newline-space)""\n"
420 R"(escape \; sep; slash-end-split\\; quad\\\\norm; newline-split-slash\\)""\n"
424 // Check every source string...
426 for (auto&& patternPair : patterns) {
427 // Make sure data is valid
428 ASSERT_TRUE(patternPair.size() == 2);
429 // Run transformation
430 result = ExpressionMatch::trimMultiWildcardWhitespace(patternPair[PATTERN_SOURCE]);
431 // Assert that source trims into expected pattern
432 EXPECT_EQ(patternPair[PATTERN_RESULT], result);
433 // Assert that re-trimming expected pattern gives the same result
434 EXPECT_EQ(ExpressionMatch::trimMultiWildcardWhitespace(result), result);
439 TEST(ExpressionMatchTest, testInvalidRegEx)
441 // Invalid regular expression pattern
442 ExpressionMatch invalidRegExMatch =
443 ExpressionMatch("*network", ExpressionMatch::MatchMode::MatchRegEx, false);
446 ASSERT_FALSE(invalidRegExMatch.isValid());
448 EXPECT_FALSE(invalidRegExMatch.isEmpty());
449 // Assert default match fails
450 EXPECT_FALSE(invalidRegExMatch.match(""));
451 // Assert wildcard match fails
452 EXPECT_FALSE(invalidRegExMatch.match("network"));
453 // Assert literal match fails
454 EXPECT_FALSE(invalidRegExMatch.match("*network"));
458 TEST(ExpressionMatchTest, matchPhraseUnicode)
460 // Escape Unicode color emoji as a workaround for bug with libXft, otherwise, color emoji may
461 // crash "git gui" and "gitk"
462 // This has no impact on Quassel or the tests themselves
464 // See https://unix.stackexchange.com/questions/629281/gitk-crashes-when-viewing-commit-containing-emoji-x-error-of-failed-request-ba
465 const QString UnicodeEmojiFire("\xf0\x9f\x94\xa5");
466 const QString UnicodeEmojiFox("\xf0\x9f\xa6\x8a");
468 // Simple phrase, case-insensitive, ASCII
469 ExpressionMatch simpleMatchASCII =
470 ExpressionMatch("V", ExpressionMatch::MatchMode::MatchPhrase, false);
471 // Simple phrase, case-insensitive, ASCII Unicode mix
472 ExpressionMatch simpleMatchUniASCII =
473 ExpressionMatch("räv", ExpressionMatch::MatchMode::MatchPhrase, false);
474 // Simple phrase, case-insensitive, full Unicode
475 ExpressionMatch simpleMatchUnicode =
476 ExpressionMatch("狐", ExpressionMatch::MatchMode::MatchPhrase, false);
477 // Simple phrase, case-insensitive, emoji
478 ExpressionMatch simpleMatchEmoji =
479 ExpressionMatch(UnicodeEmojiFox, ExpressionMatch::MatchMode::MatchPhrase, false);
481 // Assert valid and not empty
482 ASSERT_TRUE(simpleMatchASCII.isValid());
483 EXPECT_FALSE(simpleMatchASCII.isEmpty());
484 ASSERT_TRUE(simpleMatchUniASCII.isValid());
485 EXPECT_FALSE(simpleMatchUniASCII.isEmpty());
486 ASSERT_TRUE(simpleMatchUnicode.isValid());
487 EXPECT_FALSE(simpleMatchUnicode.isEmpty());
488 ASSERT_TRUE(simpleMatchEmoji.isValid());
489 EXPECT_FALSE(simpleMatchEmoji.isEmpty());
491 // Assert basic match succeeds
492 EXPECT_TRUE(simpleMatchASCII.match("V"));
493 EXPECT_TRUE(simpleMatchUniASCII.match("räv"));
494 EXPECT_TRUE(simpleMatchUnicode.match("狐"));
495 EXPECT_TRUE(simpleMatchEmoji.match(UnicodeEmojiFox));
497 // Assert classic word boundaries succeed
498 EXPECT_TRUE(simpleMatchASCII.match("V: hello"));
499 EXPECT_TRUE(simpleMatchUniASCII.match("\"räv\""));
500 EXPECT_TRUE(simpleMatchUnicode.match("狐."));
501 EXPECT_TRUE(simpleMatchEmoji.match("(" + UnicodeEmojiFox + ")"));
503 // Assert non-word-boundary Unicode is NOT treated as a word boundary
504 // > ASCII nickname (most common case with spam)
505 EXPECT_FALSE(simpleMatchASCII.match("TÜV Västra"));
506 // > ASCII/Unicode mix
507 EXPECT_FALSE(simpleMatchUniASCII.match("rävīoli"));
509 EXPECT_FALSE(simpleMatchUnicode.match("九尾の狐"));
511 // Assert emoji are treated as word boundaries
512 EXPECT_TRUE(simpleMatchASCII.match(UnicodeEmojiFire + "V" + UnicodeEmojiFire));
513 EXPECT_TRUE(simpleMatchEmoji.match(UnicodeEmojiFire + UnicodeEmojiFox));
515 // Assert Unicode case folding does NOT happen (ä -> a)
516 EXPECT_FALSE(simpleMatchUniASCII.match("rav"));
520 TEST(ExpressionMatchTest, matchRegExUnicode)
522 // Word character (letter and digit) regex, case-insensitive
523 ExpressionMatch simpleMatchSixWordChar =
524 ExpressionMatch(R"(\w{6})",
525 ExpressionMatch::MatchMode::MatchRegEx, false);
526 // Digit regex, case-insensitive
527 ExpressionMatch simpleMatchThreeDigit =
528 ExpressionMatch(R"(\d{3})",
529 ExpressionMatch::MatchMode::MatchRegEx, false);
530 ExpressionMatch simpleMatchAnyDigit =
531 ExpressionMatch(R"(\d+)",
532 ExpressionMatch::MatchMode::MatchRegEx, false);
534 // Assert valid and not empty
535 ASSERT_TRUE(simpleMatchSixWordChar.isValid());
536 EXPECT_FALSE(simpleMatchSixWordChar.isEmpty());
537 ASSERT_TRUE(simpleMatchThreeDigit.isValid());
538 EXPECT_FALSE(simpleMatchThreeDigit.isEmpty());
539 ASSERT_TRUE(simpleMatchAnyDigit.isValid());
540 EXPECT_FALSE(simpleMatchAnyDigit.isEmpty());
542 // Assert ASCII matches
543 EXPECT_TRUE(simpleMatchSixWordChar.match("abc123"));
544 EXPECT_TRUE(simpleMatchThreeDigit.match("123"));
545 EXPECT_TRUE(simpleMatchAnyDigit.match("1"));
547 // Assert Unicode matches
548 EXPECT_TRUE(simpleMatchSixWordChar.match("áwá๑2๓"));
550 // "Unicode Characters in the 'Number, Decimal Digit' Category"
552 // See https://www.fileformat.info/info/unicode/category/Nd/list.htm
553 EXPECT_TRUE(simpleMatchThreeDigit.match("๑2๓"));
554 EXPECT_TRUE(simpleMatchAnyDigit.match("๑"));
556 // Assert wrong content doesn't match
557 EXPECT_FALSE(simpleMatchAnyDigit.match("áwá"));