1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 #ifdef HAVE_CONFIG_H 42 #include "config.h" 43 #endif 44 45 #include <stdio.h> 46 #include <string.h> 47 48 #define PCRE2_CODE_UNIT_WIDTH 0 49 #include "pcre2.h" 50 51 /* 52 Letter characters: 53 \xe6\x92\xad = 0x64ad = 25773 (kanji) 54 Non-letter characters: 55 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark) 56 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888 57 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character) 58 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character) 59 Newlines: 60 \xc2\x85 = 0x85 = 133 (NExt Line = NEL) 61 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator) 62 Othercase pairs: 63 \xc3\xa9 = 0xe9 = 233 (e') 64 \xc3\x89 = 0xc9 = 201 (E') 65 \xc3\xa1 = 0xe1 = 225 (a') 66 \xc3\x81 = 0xc1 = 193 (A') 67 \x53 = 0x53 = S 68 \x73 = 0x73 = s 69 \xc5\xbf = 0x17f = 383 (long S) 70 \xc8\xba = 0x23a = 570 71 \xe2\xb1\xa5 = 0x2c65 = 11365 72 \xe1\xbd\xb8 = 0x1f78 = 8056 73 \xe1\xbf\xb8 = 0x1ff8 = 8184 74 \xf0\x90\x90\x80 = 0x10400 = 66560 75 \xf0\x90\x90\xa8 = 0x10428 = 66600 76 \xc7\x84 = 0x1c4 = 452 77 \xc7\x85 = 0x1c5 = 453 78 \xc7\x86 = 0x1c6 = 454 79 Caseless sets: 80 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586} 81 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1 82 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a} 83 84 Mark property: 85 \xcc\x8d = 0x30d = 781 86 Special: 87 \xc2\x80 = 0x80 = 128 (lowest 2 byte character) 88 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character) 89 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character) 90 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character) 91 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character) 92 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character) 93 */ 94 95 static int regression_tests(void); 96 97 int main(void) 98 { 99 int jit = 0; 100 #if defined SUPPORT_PCRE2_8 101 pcre2_config_8(PCRE2_CONFIG_JIT, &jit); 102 #elif defined SUPPORT_PCRE2_16 103 pcre2_config_16(PCRE2_CONFIG_JIT, &jit); 104 #elif defined SUPPORT_PCRE2_32 105 pcre2_config_32(PCRE2_CONFIG_JIT, &jit); 106 #endif 107 if (!jit) { 108 printf("JIT must be enabled to run pcre_jit_test\n"); 109 return 1; 110 } 111 return regression_tests(); 112 } 113 114 /* --------------------------------------------------------------------------------------- */ 115 116 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32) 117 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined 118 #endif 119 120 #define MU (PCRE2_MULTILINE | PCRE2_UTF) 121 #define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP) 122 #define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF) 123 #define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP) 124 #define M (PCRE2_MULTILINE) 125 #define MP (PCRE2_MULTILINE | PCRE2_UCP) 126 #define U (PCRE2_UTF) 127 #define CM (PCRE2_CASELESS | PCRE2_MULTILINE) 128 129 #define BSR(x) ((x) << 16) 130 #define A PCRE2_NEWLINE_ANYCRLF 131 132 #define GET_NEWLINE(x) ((x) & 0xffff) 133 #define GET_BSR(x) ((x) >> 16) 134 135 #define OFFSET_MASK 0x00ffff 136 #define F_NO8 0x010000 137 #define F_NO16 0x020000 138 #define F_NO32 0x020000 139 #define F_NOMATCH 0x040000 140 #define F_DIFF 0x080000 141 #define F_FORCECONV 0x100000 142 #define F_PROPERTY 0x200000 143 144 struct regression_test_case { 145 int compile_options; 146 int newline; 147 int match_options; 148 int start_offset; 149 const char *pattern; 150 const char *input; 151 }; 152 153 static struct regression_test_case regression_test_cases[] = { 154 /* Constant strings. */ 155 { MU, A, 0, 0, "AbC", "AbAbC" }, 156 { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" }, 157 { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" }, 158 { M, A, 0, 0, "[^a]", "aAbB" }, 159 { CM, A, 0, 0, "[^m]", "mMnN" }, 160 { M, A, 0, 0, "a[^b][^#]", "abacd" }, 161 { CM, A, 0, 0, "A[^B][^E]", "abacd" }, 162 { CMU, A, 0, 0, "[^x][^#]", "XxBll" }, 163 { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" }, 164 { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" }, 165 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" }, 166 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" }, 167 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" }, 168 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" }, 169 { MU, A, 0, 0, "[axd]", "sAXd" }, 170 { CMU, A, 0, 0, "[axd]", "sAXd" }, 171 { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" }, 172 { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" }, 173 { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" }, 174 { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, 175 { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, 176 { MU, A, 0, 0, "[^a]", "\xc2\x80[]" }, 177 { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" }, 178 { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" }, 179 { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" }, 180 { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" }, 181 { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" }, 182 { M, A, 0, 0, "\\Ca", "cda" }, 183 { CM, A, 0, 0, "\\Ca", "CDA" }, 184 { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" }, 185 { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" }, 186 { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 187 { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 188 { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 189 { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 190 { M, A, 0, 0, "[3-57-9]", "5" }, 191 192 /* Assertions. */ 193 { MU, A, 0, 0, "\\b[^A]", "A_B#" }, 194 { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" }, 195 { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" }, 196 { MP, A, 0, 0, "\\B", "_\xa1" }, 197 { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," }, 198 { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" }, 199 { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" }, 200 { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" }, 201 { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, 202 { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" }, 203 { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" }, 204 { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" }, 205 { M, A, 0, 1 | F_NOMATCH, "^", "\n" }, 206 { 0, 0, 0, 0, "^ab", "ab" }, 207 { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" }, 208 { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" }, 209 { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" }, 210 { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" }, 211 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" }, 212 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" }, 213 { 0, 0, 0, 0, "ab$", "ab" }, 214 { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" }, 215 { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" }, 216 { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" }, 217 { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" }, 218 { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" }, 219 { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" }, 220 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" }, 221 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" }, 222 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" }, 223 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" }, 224 { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" }, 225 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" }, 226 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" }, 227 { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" }, 228 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" }, 229 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" }, 230 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" }, 231 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" }, 232 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" }, 233 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" }, 234 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" }, 235 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" }, 236 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" }, 237 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" }, 238 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" }, 239 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" }, 240 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" }, 241 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" }, 242 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" }, 243 { M, A, 0, 0, "\\Aa", "aaa" }, 244 { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" }, 245 { M, A, 0, 1, "\\Ga", "aaa" }, 246 { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" }, 247 { M, A, 0, 0, "a\\z", "aaa" }, 248 { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" }, 249 250 /* Brackets and alternatives. */ 251 { MU, A, 0, 0, "(ab|bb|cd)", "bacde" }, 252 { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" }, 253 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" }, 254 { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" }, 255 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" }, 256 { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" }, 257 { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, 258 { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" }, 259 { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, 260 { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" }, 261 262 /* Greedy and non-greedy ? operators. */ 263 { MU, A, 0, 0, "(?:a)?a", "laab" }, 264 { CMU, A, 0, 0, "(A)?A", "llaab" }, 265 { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */ 266 { MU, A, 0, 0, "(a)?a", "manm" }, 267 { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" }, 268 { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" }, 269 { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" }, 270 271 /* Greedy and non-greedy + operators */ 272 { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" }, 273 { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" }, 274 { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" }, 275 { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" }, 276 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, 277 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, 278 { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" }, 279 280 /* Greedy and non-greedy * operators */ 281 { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" }, 282 { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" }, 283 { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" }, 284 { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" }, 285 { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" }, 286 { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" }, 287 { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" }, 288 { M, A, 0, 0, "((?:a|)*){0}a", "a" }, 289 290 /* Combining ? + * operators */ 291 { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" }, 292 { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" }, 293 { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" }, 294 { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" }, 295 { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" }, 296 297 /* Single character iterators. */ 298 { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" }, 299 { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" }, 300 { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" }, 301 { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" }, 302 { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" }, 303 { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" }, 304 { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" }, 305 { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" }, 306 { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" }, 307 { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" }, 308 { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" }, 309 { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" }, 310 { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" }, 311 { CMU, A, 0, 0, "[c-f]+k", "DemmFke" }, 312 { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" }, 313 { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" }, 314 { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" }, 315 { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" }, 316 { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" }, 317 { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" }, 318 { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" }, 319 { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" }, 320 { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" }, 321 { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" }, 322 { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" }, 323 { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, 324 { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" }, 325 { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" }, 326 { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" }, 327 { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, 328 { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, 329 { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" }, 330 { MU, A, 0, 0, "\\d+123", "987654321,01234" }, 331 { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" }, 332 { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" }, 333 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."}, 334 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."}, 335 { MU, A, 0, 0, ".[ab]*.", "xx" }, 336 { MU, A, 0, 0, ".[ab]*a", "xxa" }, 337 { MU, A, 0, 0, ".[ab]?.", "xx" }, 338 339 /* Bracket repeats with limit. */ 340 { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" }, 341 { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" }, 342 { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" }, 343 { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" }, 344 { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" }, 345 { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" }, 346 { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" }, 347 { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" }, 348 { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" }, 349 350 /* Basic character sets. */ 351 { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " }, 352 { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" }, 353 { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" }, 354 { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" }, 355 { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" }, 356 { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" }, 357 { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" }, 358 { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" }, 359 { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" }, 360 { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" }, 361 { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" }, 362 { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" }, 363 { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" }, 364 { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" }, 365 { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" }, 366 { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" }, 367 { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" }, 368 { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" }, 369 { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" }, 370 { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" }, 371 { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" }, 372 { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" }, 373 374 /* Unicode properties. */ 375 { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" }, 376 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" }, 377 { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" }, 378 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" }, 379 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" }, 380 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" }, 381 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" }, 382 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, 383 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, 384 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, 385 { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, 386 { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" }, 387 { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" }, 388 { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" }, 389 { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" }, 390 { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" }, 391 { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" }, 392 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" }, 393 { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, 394 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" }, 395 396 /* Possible empty brackets. */ 397 { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, 398 { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" }, 399 { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" }, 400 { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" }, 401 { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" }, 402 { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" }, 403 { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" }, 404 { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" }, 405 { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" }, 406 { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" }, 407 408 /* Start offset. */ 409 { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" }, 410 { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, 411 { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, 412 { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" }, 413 414 /* Newline. */ 415 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, 416 { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, 417 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." }, 418 { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" }, 419 { MU, A, 0, 1, "^", "\r\n" }, 420 { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" }, 421 { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" }, 422 423 /* Any character except newline or any newline. */ 424 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" }, 425 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" }, 426 { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, 427 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, 428 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" }, 429 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" }, 430 { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" }, 431 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" }, 432 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" }, 433 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" }, 434 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" }, 435 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" }, 436 { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" }, 437 { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" }, 438 { MU, A, 0, 0, "\\R+", "ab\r\n\r" }, 439 { MU, A, 0, 0, "\\R*", "ab\r\n\r" }, 440 { MU, A, 0, 0, "\\R*", "\r\n\r" }, 441 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" }, 442 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" }, 443 { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" }, 444 { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" }, 445 { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" }, 446 { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" }, 447 { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" }, 448 { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" }, 449 { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" }, 450 451 /* Atomic groups (no fallback from "next" direction). */ 452 { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" }, 453 { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" }, 454 { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op", 455 "bababcdedefgheijijklmlmnop" }, 456 { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" }, 457 { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" }, 458 { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" }, 459 { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" }, 460 { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" }, 461 { MU, A, 0, 0, "(?>x|)*$", "aaa" }, 462 { MU, A, 0, 0, "(?>(x)|)*$", "aaa" }, 463 { MU, A, 0, 0, "(?>x|())*$", "aaa" }, 464 { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" }, 465 { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" }, 466 { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" }, 467 { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" }, 468 { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" }, 469 { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" }, 470 { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" }, 471 { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" }, 472 { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" }, 473 { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" }, 474 { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" }, 475 { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" }, 476 { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" }, 477 { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" }, 478 { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" }, 479 { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, 480 { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, 481 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" }, 482 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" }, 483 { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" }, 484 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" }, 485 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" }, 486 { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" }, 487 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" }, 488 { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" }, 489 { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" }, 490 491 /* Possessive quantifiers. */ 492 { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" }, 493 { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" }, 494 { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" }, 495 { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" }, 496 { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" }, 497 { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" }, 498 { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" }, 499 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" }, 500 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" }, 501 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" }, 502 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" }, 503 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" }, 504 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" }, 505 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" }, 506 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" }, 507 { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" }, 508 { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" }, 509 { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" }, 510 { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" }, 511 { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" }, 512 { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" }, 513 { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" }, 514 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" }, 515 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" }, 516 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" }, 517 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" }, 518 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" }, 519 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" }, 520 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" }, 521 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" }, 522 { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" }, 523 { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" }, 524 { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" }, 525 { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, 526 { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, 527 528 /* Back references. */ 529 { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" }, 530 { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" }, 531 { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" }, 532 { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" }, 533 { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" }, 534 { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, 535 { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, 536 { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" }, 537 { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" }, 538 { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" }, 539 { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" }, 540 { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" }, 541 { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, 542 { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, 543 { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" }, 544 { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" }, 545 { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" }, 546 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, 547 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." }, 548 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" }, 549 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" }, 550 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, 551 { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" }, 552 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" }, 553 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" }, 554 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" }, 555 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" }, 556 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" }, 557 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" }, 558 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" }, 559 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" }, 560 { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" }, 561 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" }, 562 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" }, 563 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, 564 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" }, 565 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" }, 566 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, 567 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, 568 569 /* Assertions. */ 570 { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, 571 { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" }, 572 { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" }, 573 { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" }, 574 { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" }, 575 { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" }, 576 { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" }, 577 { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" }, 578 { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" }, 579 { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" }, 580 { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" }, 581 { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" }, 582 { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" }, 583 { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" }, 584 { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" }, 585 { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" }, 586 { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" }, 587 { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" }, 588 { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, 589 { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, 590 { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" }, 591 { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" }, 592 { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" }, 593 { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" }, 594 { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" }, 595 { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" }, 596 { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" }, 597 { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" }, 598 { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" }, 599 { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" }, 600 { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" }, 601 { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" }, 602 { MU, A, 0, 0, "a(?=)b", "ab" }, 603 { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" }, 604 605 /* Not empty, ACCEPT, FAIL */ 606 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" }, 607 { MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" }, 608 { MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" }, 609 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" }, 610 { MU, A, 0, 0, "a(*ACCEPT)b", "ab" }, 611 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" }, 612 { MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" }, 613 { MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" }, 614 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" }, 615 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" }, 616 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" }, 617 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" }, 618 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" }, 619 { MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" }, 620 { MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" }, 621 { MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" }, 622 { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" }, 623 { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" }, 624 { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" }, 625 626 /* Conditional blocks. */ 627 { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" }, 628 { MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" }, 629 { MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" }, 630 { MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" }, 631 { MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" }, 632 { MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" }, 633 { MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" }, 634 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" }, 635 { MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" }, 636 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" }, 637 { MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" }, 638 { MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" }, 639 { MU, A, 0, 0, "(?(?=a)ab)", "a" }, 640 { MU, A, 0, 0, "(?(?<!b)c)", "b" }, 641 { MU, A, 0, 0, "(?(DEFINE)a(b))", "a" }, 642 { MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" }, 643 { MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" }, 644 { MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" }, 645 { MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" }, 646 { MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" }, 647 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" }, 648 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" }, 649 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" }, 650 { MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" }, 651 { MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" }, 652 { MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" }, 653 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" }, 654 { MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" }, 655 { MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" }, 656 { MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" }, 657 { MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" }, 658 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" }, 659 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" }, 660 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" }, 661 { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" }, 662 { MU, A, 0, 0, "(?(?!)a|b)", "ab" }, 663 { MU, A, 0, 0, "(?(?!)a)", "ab" }, 664 { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" }, 665 666 /* Set start of match. */ 667 { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" }, 668 { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" }, 669 { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" }, 670 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" }, 671 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" }, 672 673 /* First line. */ 674 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" }, 675 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" }, 676 { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" }, 677 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" }, 678 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" }, 679 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" }, 680 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" }, 681 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" }, 682 { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" }, 683 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" }, 684 { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" }, 685 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" }, 686 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" }, 687 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" }, 688 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" }, 689 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" }, 690 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" }, 691 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" }, 692 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" }, 693 { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" }, 694 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" }, 695 { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" }, 696 697 /* Recurse. */ 698 { MU, A, 0, 0, "(a)(?1)", "aa" }, 699 { MU, A, 0, 0, "((a))(?1)", "aa" }, 700 { MU, A, 0, 0, "(b|a)(?1)", "aa" }, 701 { MU, A, 0, 0, "(b|(a))(?1)", "aa" }, 702 { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" }, 703 { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" }, 704 { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" }, 705 { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" }, 706 { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" }, 707 { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" }, 708 { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" }, 709 { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" }, 710 { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" }, 711 { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" }, 712 { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" }, 713 { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" }, 714 { MU, A, 0, 0, "b|<(?R)*>", "<<b>" }, 715 { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" }, 716 { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" }, 717 { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" }, 718 { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" }, 719 { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" }, 720 { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" }, 721 { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" }, 722 { MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" }, 723 { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" }, 724 { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" }, 725 { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" }, 726 { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" }, 727 728 /* 16 bit specific tests. */ 729 { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" }, 730 { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 731 { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" }, 732 { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 733 { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" }, 734 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" }, 735 { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" }, 736 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" }, 737 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" }, 738 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" }, 739 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" }, 740 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" }, 741 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" }, 742 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" }, 743 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" }, 744 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" }, 745 { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, 746 { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, 747 { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" }, 748 { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" }, 749 { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" }, 750 { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" }, 751 { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" }, 752 { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" }, 753 { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" }, 754 { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" }, 755 { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" }, 756 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" }, 757 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" }, 758 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" }, 759 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" }, 760 761 /* Partial matching. */ 762 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" }, 763 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" }, 764 { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" }, 765 { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" }, 766 { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" }, 767 { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" }, 768 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" }, 769 { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" }, 770 771 /* (*MARK) verb. */ 772 { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" }, 773 { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" }, 774 { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" }, 775 { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" }, 776 { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" }, 777 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" }, 778 { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" }, 779 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" }, 780 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, 781 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" }, 782 { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, 783 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, 784 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" }, 785 { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, 786 { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" }, 787 788 /* (*COMMIT) verb. */ 789 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" }, 790 { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" }, 791 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" }, 792 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" }, 793 { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" }, 794 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" }, 795 796 /* (*PRUNE) verb. */ 797 { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" }, 798 { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" }, 799 { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" }, 800 { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" }, 801 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" }, 802 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" }, 803 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" }, 804 { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" }, 805 { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 806 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 807 { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 808 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 809 { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" }, 810 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" }, 811 { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 812 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 813 { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 814 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 815 { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" }, 816 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" }, 817 { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" }, 818 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" }, 819 { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" }, 820 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" }, 821 { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" }, 822 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" }, 823 { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" }, 824 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" }, 825 { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" }, 826 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" }, 827 828 /* (*SKIP) verb. */ 829 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" }, 830 { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," }, 831 { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," }, 832 { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" }, 833 834 /* (*THEN) verb. */ 835 { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" }, 836 { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" }, 837 { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" }, 838 { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" }, 839 { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" }, 840 { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" }, 841 { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" }, 842 { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" }, 843 { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" }, 844 845 /* Deep recursion. */ 846 { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, 847 { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " }, 848 { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" }, 849 850 /* Deep recursion: Stack limit reached. */ 851 { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" }, 852 { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 853 { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 854 { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 855 { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 856 857 { 0, 0, 0, 0, NULL, NULL } 858 }; 859 860 #ifdef SUPPORT_PCRE2_8 861 static pcre2_jit_stack_8* callback8(void *arg) 862 { 863 return (pcre2_jit_stack_8 *)arg; 864 } 865 #endif 866 867 #ifdef SUPPORT_PCRE2_16 868 static pcre2_jit_stack_16* callback16(void *arg) 869 { 870 return (pcre2_jit_stack_16 *)arg; 871 } 872 #endif 873 874 #ifdef SUPPORT_PCRE2_32 875 static pcre2_jit_stack_32* callback32(void *arg) 876 { 877 return (pcre2_jit_stack_32 *)arg; 878 } 879 #endif 880 881 #ifdef SUPPORT_PCRE2_8 882 static pcre2_jit_stack_8 *stack8; 883 884 static pcre2_jit_stack_8 *getstack8(void) 885 { 886 if (!stack8) 887 stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL); 888 return stack8; 889 } 890 891 static void setstack8(pcre2_match_context_8 *mcontext) 892 { 893 if (!mcontext) { 894 if (stack8) 895 pcre2_jit_stack_free_8(stack8); 896 stack8 = NULL; 897 return; 898 } 899 900 pcre2_jit_stack_assign_8(mcontext, callback8, getstack8()); 901 } 902 #endif /* SUPPORT_PCRE2_8 */ 903 904 #ifdef SUPPORT_PCRE2_16 905 static pcre2_jit_stack_16 *stack16; 906 907 static pcre2_jit_stack_16 *getstack16(void) 908 { 909 if (!stack16) 910 stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL); 911 return stack16; 912 } 913 914 static void setstack16(pcre2_match_context_16 *mcontext) 915 { 916 if (!mcontext) { 917 if (stack16) 918 pcre2_jit_stack_free_16(stack16); 919 stack16 = NULL; 920 return; 921 } 922 923 pcre2_jit_stack_assign_16(mcontext, callback16, getstack16()); 924 } 925 #endif /* SUPPORT_PCRE2_16 */ 926 927 #ifdef SUPPORT_PCRE2_32 928 static pcre2_jit_stack_32 *stack32; 929 930 static pcre2_jit_stack_32 *getstack32(void) 931 { 932 if (!stack32) 933 stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL); 934 return stack32; 935 } 936 937 static void setstack32(pcre2_match_context_32 *mcontext) 938 { 939 if (!mcontext) { 940 if (stack32) 941 pcre2_jit_stack_free_32(stack32); 942 stack32 = NULL; 943 return; 944 } 945 946 pcre2_jit_stack_assign_32(mcontext, callback32, getstack32()); 947 } 948 #endif /* SUPPORT_PCRE2_32 */ 949 950 #ifdef SUPPORT_PCRE2_16 951 952 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length) 953 { 954 PCRE2_SPTR8 iptr = input; 955 PCRE2_UCHAR16 *optr = output; 956 unsigned int c; 957 958 if (max_length == 0) 959 return 0; 960 961 while (*iptr && max_length > 1) { 962 c = 0; 963 if (offsetmap) 964 *offsetmap++ = (int)(iptr - (unsigned char*)input); 965 966 if (*iptr < 0xc0) 967 c = *iptr++; 968 else if (!(*iptr & 0x20)) { 969 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); 970 iptr += 2; 971 } else if (!(*iptr & 0x10)) { 972 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); 973 iptr += 3; 974 } else if (!(*iptr & 0x08)) { 975 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); 976 iptr += 4; 977 } 978 979 if (c < 65536) { 980 *optr++ = c; 981 max_length--; 982 } else if (max_length <= 2) { 983 *optr = '\0'; 984 return (int)(optr - output); 985 } else { 986 c -= 0x10000; 987 *optr++ = 0xd800 | ((c >> 10) & 0x3ff); 988 *optr++ = 0xdc00 | (c & 0x3ff); 989 max_length -= 2; 990 if (offsetmap) 991 offsetmap++; 992 } 993 } 994 if (offsetmap) 995 *offsetmap = (int)(iptr - (unsigned char*)input); 996 *optr = '\0'; 997 return (int)(optr - output); 998 } 999 1000 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length) 1001 { 1002 PCRE2_SPTR8 iptr = input; 1003 PCRE2_UCHAR16 *optr = output; 1004 1005 if (max_length == 0) 1006 return 0; 1007 1008 while (*iptr && max_length > 1) { 1009 *optr++ = *iptr++; 1010 max_length--; 1011 } 1012 *optr = '\0'; 1013 return (int)(optr - output); 1014 } 1015 1016 #define REGTEST_MAX_LENGTH16 4096 1017 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16]; 1018 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16]; 1019 1020 #endif /* SUPPORT_PCRE2_16 */ 1021 1022 #ifdef SUPPORT_PCRE2_32 1023 1024 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length) 1025 { 1026 PCRE2_SPTR8 iptr = input; 1027 PCRE2_UCHAR32 *optr = output; 1028 unsigned int c; 1029 1030 if (max_length == 0) 1031 return 0; 1032 1033 while (*iptr && max_length > 1) { 1034 c = 0; 1035 if (offsetmap) 1036 *offsetmap++ = (int)(iptr - (unsigned char*)input); 1037 1038 if (*iptr < 0xc0) 1039 c = *iptr++; 1040 else if (!(*iptr & 0x20)) { 1041 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); 1042 iptr += 2; 1043 } else if (!(*iptr & 0x10)) { 1044 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); 1045 iptr += 3; 1046 } else if (!(*iptr & 0x08)) { 1047 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); 1048 iptr += 4; 1049 } 1050 1051 *optr++ = c; 1052 max_length--; 1053 } 1054 if (offsetmap) 1055 *offsetmap = (int)(iptr - (unsigned char*)input); 1056 *optr = 0; 1057 return (int)(optr - output); 1058 } 1059 1060 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length) 1061 { 1062 PCRE2_SPTR8 iptr = input; 1063 PCRE2_UCHAR32 *optr = output; 1064 1065 if (max_length == 0) 1066 return 0; 1067 1068 while (*iptr && max_length > 1) { 1069 *optr++ = *iptr++; 1070 max_length--; 1071 } 1072 *optr = '\0'; 1073 return (int)(optr - output); 1074 } 1075 1076 #define REGTEST_MAX_LENGTH32 4096 1077 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32]; 1078 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32]; 1079 1080 #endif /* SUPPORT_PCRE2_32 */ 1081 1082 static int check_ascii(const char *input) 1083 { 1084 const unsigned char *ptr = (unsigned char *)input; 1085 while (*ptr) { 1086 if (*ptr > 127) 1087 return 0; 1088 ptr++; 1089 } 1090 return 1; 1091 } 1092 1093 #define OVECTOR_SIZE 15 1094 1095 static int regression_tests(void) 1096 { 1097 struct regression_test_case *current = regression_test_cases; 1098 int error; 1099 PCRE2_SIZE err_offs; 1100 int is_successful; 1101 int is_ascii; 1102 int total = 0; 1103 int successful = 0; 1104 int successful_row = 0; 1105 int counter = 0; 1106 int jit_compile_mode; 1107 int utf = 0; 1108 int disabled_options = 0; 1109 int i; 1110 #ifdef SUPPORT_PCRE2_8 1111 pcre2_code_8 *re8; 1112 pcre2_compile_context_8 *ccontext8; 1113 pcre2_match_data_8 *mdata8_1; 1114 pcre2_match_data_8 *mdata8_2; 1115 pcre2_match_context_8 *mcontext8; 1116 PCRE2_SIZE *ovector8_1 = NULL; 1117 PCRE2_SIZE *ovector8_2 = NULL; 1118 int return_value8[2]; 1119 #endif 1120 #ifdef SUPPORT_PCRE2_16 1121 pcre2_code_16 *re16; 1122 pcre2_compile_context_16 *ccontext16; 1123 pcre2_match_data_16 *mdata16_1; 1124 pcre2_match_data_16 *mdata16_2; 1125 pcre2_match_context_16 *mcontext16; 1126 PCRE2_SIZE *ovector16_1 = NULL; 1127 PCRE2_SIZE *ovector16_2 = NULL; 1128 int return_value16[2]; 1129 int length16; 1130 #endif 1131 #ifdef SUPPORT_PCRE2_32 1132 pcre2_code_32 *re32; 1133 pcre2_compile_context_32 *ccontext32; 1134 pcre2_match_data_32 *mdata32_1; 1135 pcre2_match_data_32 *mdata32_2; 1136 pcre2_match_context_32 *mcontext32; 1137 PCRE2_SIZE *ovector32_1 = NULL; 1138 PCRE2_SIZE *ovector32_2 = NULL; 1139 int return_value32[2]; 1140 int length32; 1141 #endif 1142 1143 #if defined SUPPORT_PCRE2_8 1144 PCRE2_UCHAR8 cpu_info[128]; 1145 #elif defined SUPPORT_PCRE2_16 1146 PCRE2_UCHAR16 cpu_info[128]; 1147 #elif defined SUPPORT_PCRE2_32 1148 PCRE2_UCHAR32 cpu_info[128]; 1149 #endif 1150 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2) 1151 int return_value; 1152 #endif 1153 1154 /* This test compares the behaviour of interpreter and JIT. Although disabling 1155 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is 1156 still considered successful from pcre_jit_test point of view. */ 1157 1158 #if defined SUPPORT_PCRE2_8 1159 pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info); 1160 #elif defined SUPPORT_PCRE2_16 1161 pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info); 1162 #elif defined SUPPORT_PCRE2_32 1163 pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info); 1164 #endif 1165 1166 printf("Running JIT regression tests\n"); 1167 printf(" target CPU of SLJIT compiler: "); 1168 for (i = 0; cpu_info[i]; i++) 1169 printf("%c", (char)(cpu_info[i])); 1170 printf("\n"); 1171 1172 #if defined SUPPORT_PCRE2_8 1173 pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf); 1174 #elif defined SUPPORT_PCRE2_16 1175 pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf); 1176 #elif defined SUPPORT_PCRE2_32 1177 pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf); 1178 #endif 1179 1180 if (!utf) 1181 disabled_options |= PCRE2_UTF; 1182 #ifdef SUPPORT_PCRE2_8 1183 printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled"); 1184 #endif 1185 #ifdef SUPPORT_PCRE2_16 1186 printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled"); 1187 #endif 1188 #ifdef SUPPORT_PCRE2_32 1189 printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled"); 1190 #endif 1191 1192 while (current->pattern) { 1193 /* printf("\nPattern: %s :\n", current->pattern); */ 1194 total++; 1195 is_ascii = 0; 1196 if (!(current->start_offset & F_PROPERTY)) 1197 is_ascii = check_ascii(current->pattern) && check_ascii(current->input); 1198 1199 if (current->match_options & PCRE2_PARTIAL_SOFT) 1200 jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT; 1201 else if (current->match_options & PCRE2_PARTIAL_HARD) 1202 jit_compile_mode = PCRE2_JIT_PARTIAL_HARD; 1203 else 1204 jit_compile_mode = PCRE2_JIT_COMPLETE; 1205 error = 0; 1206 #ifdef SUPPORT_PCRE2_8 1207 re8 = NULL; 1208 ccontext8 = pcre2_compile_context_create_8(NULL); 1209 if (ccontext8) { 1210 if (GET_NEWLINE(current->newline)) 1211 pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline)); 1212 if (GET_BSR(current->newline)) 1213 pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline)); 1214 1215 if (!(current->start_offset & F_NO8)) { 1216 re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED, 1217 current->compile_options & ~disabled_options, 1218 &error, &err_offs, ccontext8); 1219 1220 if (!re8 && (utf || is_ascii)) 1221 printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); 1222 } 1223 pcre2_compile_context_free_8(ccontext8); 1224 } 1225 else 1226 printf("\n8 bit: Cannot allocate compile context\n"); 1227 #endif 1228 #ifdef SUPPORT_PCRE2_16 1229 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1230 convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16); 1231 else 1232 copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16); 1233 1234 re16 = NULL; 1235 ccontext16 = pcre2_compile_context_create_16(NULL); 1236 if (ccontext16) { 1237 if (GET_NEWLINE(current->newline)) 1238 pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline)); 1239 if (GET_BSR(current->newline)) 1240 pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline)); 1241 1242 if (!(current->start_offset & F_NO16)) { 1243 re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED, 1244 current->compile_options & ~disabled_options, 1245 &error, &err_offs, ccontext16); 1246 1247 if (!re16 && (utf || is_ascii)) 1248 printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); 1249 } 1250 pcre2_compile_context_free_16(ccontext16); 1251 } 1252 else 1253 printf("\n16 bit: Cannot allocate compile context\n"); 1254 #endif 1255 #ifdef SUPPORT_PCRE2_32 1256 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1257 convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32); 1258 else 1259 copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32); 1260 1261 re32 = NULL; 1262 ccontext32 = pcre2_compile_context_create_32(NULL); 1263 if (ccontext32) { 1264 if (GET_NEWLINE(current->newline)) 1265 pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline)); 1266 if (GET_BSR(current->newline)) 1267 pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline)); 1268 1269 if (!(current->start_offset & F_NO32)) { 1270 re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED, 1271 current->compile_options & ~disabled_options, 1272 &error, &err_offs, ccontext32); 1273 1274 if (!re32 && (utf || is_ascii)) 1275 printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); 1276 } 1277 pcre2_compile_context_free_32(ccontext32); 1278 } 1279 else 1280 printf("\n32 bit: Cannot allocate compile context\n"); 1281 #endif 1282 1283 counter++; 1284 if ((counter & 0x3) != 0) { 1285 #ifdef SUPPORT_PCRE2_8 1286 setstack8(NULL); 1287 #endif 1288 #ifdef SUPPORT_PCRE2_16 1289 setstack16(NULL); 1290 #endif 1291 #ifdef SUPPORT_PCRE2_32 1292 setstack32(NULL); 1293 #endif 1294 } 1295 1296 #ifdef SUPPORT_PCRE2_8 1297 return_value8[0] = -1000; 1298 return_value8[1] = -1000; 1299 mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL); 1300 mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL); 1301 mcontext8 = pcre2_match_context_create_8(NULL); 1302 if (!mdata8_1 || !mdata8_2 || !mcontext8) { 1303 printf("\n8 bit: Cannot allocate match data\n"); 1304 pcre2_match_data_free_8(mdata8_1); 1305 pcre2_match_data_free_8(mdata8_2); 1306 pcre2_match_context_free_8(mcontext8); 1307 pcre2_code_free_8(re8); 1308 re8 = NULL; 1309 } else { 1310 ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1); 1311 ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2); 1312 for (i = 0; i < OVECTOR_SIZE * 3; ++i) 1313 ovector8_1[i] = -2; 1314 for (i = 0; i < OVECTOR_SIZE * 3; ++i) 1315 ovector8_2[i] = -2; 1316 } 1317 if (re8) { 1318 return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), 1319 current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL); 1320 1321 if (pcre2_jit_compile_8(re8, jit_compile_mode)) { 1322 printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern); 1323 } else if ((counter & 0x1) != 0) { 1324 setstack8(mcontext8); 1325 return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), 1326 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8); 1327 } else { 1328 pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8()); 1329 return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), 1330 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8); 1331 } 1332 } 1333 #endif 1334 1335 #ifdef SUPPORT_PCRE2_16 1336 return_value16[0] = -1000; 1337 return_value16[1] = -1000; 1338 mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL); 1339 mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL); 1340 mcontext16 = pcre2_match_context_create_16(NULL); 1341 if (!mdata16_1 || !mdata16_2 || !mcontext16) { 1342 printf("\n16 bit: Cannot allocate match data\n"); 1343 pcre2_match_data_free_16(mdata16_1); 1344 pcre2_match_data_free_16(mdata16_2); 1345 pcre2_match_context_free_16(mcontext16); 1346 pcre2_code_free_16(re16); 1347 re16 = NULL; 1348 } else { 1349 ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1); 1350 ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2); 1351 for (i = 0; i < OVECTOR_SIZE * 3; ++i) 1352 ovector16_1[i] = -2; 1353 for (i = 0; i < OVECTOR_SIZE * 3; ++i) 1354 ovector16_2[i] = -2; 1355 } 1356 if (re16) { 1357 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1358 length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16); 1359 else 1360 length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16); 1361 1362 return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16, 1363 current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL); 1364 1365 if (pcre2_jit_compile_16(re16, jit_compile_mode)) { 1366 printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern); 1367 } else if ((counter & 0x1) != 0) { 1368 setstack16(mcontext16); 1369 return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16, 1370 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16); 1371 } else { 1372 pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16()); 1373 return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16, 1374 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16); 1375 } 1376 } 1377 #endif 1378 1379 #ifdef SUPPORT_PCRE2_32 1380 return_value32[0] = -1000; 1381 return_value32[1] = -1000; 1382 mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL); 1383 mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL); 1384 mcontext32 = pcre2_match_context_create_32(NULL); 1385 if (!mdata32_1 || !mdata32_2 || !mcontext32) { 1386 printf("\n32 bit: Cannot allocate match data\n"); 1387 pcre2_match_data_free_32(mdata32_1); 1388 pcre2_match_data_free_32(mdata32_2); 1389 pcre2_match_context_free_32(mcontext32); 1390 pcre2_code_free_32(re32); 1391 re32 = NULL; 1392 } else { 1393 ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1); 1394 ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2); 1395 for (i = 0; i < OVECTOR_SIZE * 3; ++i) 1396 ovector32_1[i] = -2; 1397 for (i = 0; i < OVECTOR_SIZE * 3; ++i) 1398 ovector32_2[i] = -2; 1399 } 1400 if (re32) { 1401 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1402 length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32); 1403 else 1404 length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32); 1405 1406 return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32, 1407 current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL); 1408 1409 if (pcre2_jit_compile_32(re32, jit_compile_mode)) { 1410 printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern); 1411 } else if ((counter & 0x1) != 0) { 1412 setstack32(mcontext32); 1413 return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32, 1414 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32); 1415 } else { 1416 pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32()); 1417 return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32, 1418 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32); 1419 } 1420 } 1421 #endif 1422 1423 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s", 1424 return_value8[0], return_value16[0], return_value32[0], 1425 (int)ovector8_1[0], (int)ovector8_1[1], 1426 (int)ovector16_1[0], (int)ovector16_1[1], 1427 (int)ovector32_1[0], (int)ovector32_1[1], 1428 (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */ 1429 1430 /* If F_DIFF is set, just run the test, but do not compare the results. 1431 Segfaults can still be captured. */ 1432 1433 is_successful = 1; 1434 if (!(current->start_offset & F_DIFF)) { 1435 #if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2) 1436 if (!(current->start_offset & F_FORCECONV)) { 1437 1438 /* All results must be the same. */ 1439 #ifdef SUPPORT_PCRE2_8 1440 if ((return_value = return_value8[0]) != return_value8[1]) { 1441 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n", 1442 return_value8[0], return_value8[1], total, current->pattern, current->input); 1443 is_successful = 0; 1444 } else 1445 #endif 1446 #ifdef SUPPORT_PCRE2_16 1447 if ((return_value = return_value16[0]) != return_value16[1]) { 1448 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n", 1449 return_value16[0], return_value16[1], total, current->pattern, current->input); 1450 is_successful = 0; 1451 } else 1452 #endif 1453 #ifdef SUPPORT_PCRE2_32 1454 if ((return_value = return_value32[0]) != return_value32[1]) { 1455 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n", 1456 return_value32[0], return_value32[1], total, current->pattern, current->input); 1457 is_successful = 0; 1458 } else 1459 #endif 1460 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16 1461 if (return_value8[0] != return_value16[0]) { 1462 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n", 1463 return_value8[0], return_value16[0], 1464 total, current->pattern, current->input); 1465 is_successful = 0; 1466 } else 1467 #endif 1468 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32 1469 if (return_value8[0] != return_value32[0]) { 1470 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n", 1471 return_value8[0], return_value32[0], 1472 total, current->pattern, current->input); 1473 is_successful = 0; 1474 } else 1475 #endif 1476 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 1477 if (return_value16[0] != return_value32[0]) { 1478 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n", 1479 return_value16[0], return_value32[0], 1480 total, current->pattern, current->input); 1481 is_successful = 0; 1482 } else 1483 #endif 1484 if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) { 1485 if (return_value == PCRE_ERROR_PARTIAL) { 1486 return_value = 2; 1487 } else { 1488 return_value *= 2; 1489 } 1490 #ifdef SUPPORT_PCRE2_8 1491 return_value8[0] = return_value; 1492 #endif 1493 #ifdef SUPPORT_PCRE2_16 1494 return_value16[0] = return_value; 1495 #endif 1496 #ifdef SUPPORT_PCRE2_32 1497 return_value32[0] = return_value; 1498 #endif 1499 /* Transform back the results. */ 1500 if (current->flags & PCRE_UTF8) { 1501 #ifdef SUPPORT_PCRE2_16 1502 for (i = 0; i < return_value; ++i) { 1503 if (ovector16_1[i] >= 0) 1504 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]]; 1505 if (ovector16_2[i] >= 0) 1506 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]]; 1507 } 1508 #endif 1509 #ifdef SUPPORT_PCRE2_32 1510 for (i = 0; i < return_value; ++i) { 1511 if (ovector32_1[i] >= 0) 1512 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]]; 1513 if (ovector32_2[i] >= 0) 1514 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]]; 1515 } 1516 #endif 1517 } 1518 1519 for (i = 0; i < return_value; ++i) { 1520 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16 1521 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { 1522 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n", 1523 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i], 1524 total, current->pattern, current->input); 1525 is_successful = 0; 1526 } 1527 #endif 1528 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32 1529 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) { 1530 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", 1531 i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i], 1532 total, current->pattern, current->input); 1533 is_successful = 0; 1534 } 1535 #endif 1536 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 1537 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) { 1538 printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", 1539 i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i], 1540 total, current->pattern, current->input); 1541 is_successful = 0; 1542 } 1543 #endif 1544 } 1545 } 1546 } else 1547 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */ 1548 { 1549 #ifdef SUPPORT_PCRE2_8 1550 if (return_value8[0] != return_value8[1]) { 1551 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1552 return_value8[0], return_value8[1], total, current->pattern, current->input); 1553 is_successful = 0; 1554 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) { 1555 if (return_value8[0] == PCRE2_ERROR_PARTIAL) 1556 return_value8[0] = 2; 1557 else 1558 return_value8[0] *= 2; 1559 1560 for (i = 0; i < return_value8[0]; ++i) 1561 if (ovector8_1[i] != ovector8_2[i]) { 1562 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1563 i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input); 1564 is_successful = 0; 1565 } 1566 } 1567 #endif 1568 1569 #ifdef SUPPORT_PCRE2_16 1570 if (return_value16[0] != return_value16[1]) { 1571 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1572 return_value16[0], return_value16[1], total, current->pattern, current->input); 1573 is_successful = 0; 1574 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) { 1575 if (return_value16[0] == PCRE2_ERROR_PARTIAL) 1576 return_value16[0] = 2; 1577 else 1578 return_value16[0] *= 2; 1579 1580 for (i = 0; i < return_value16[0]; ++i) 1581 if (ovector16_1[i] != ovector16_2[i]) { 1582 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1583 i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input); 1584 is_successful = 0; 1585 } 1586 } 1587 #endif 1588 1589 #ifdef SUPPORT_PCRE2_32 1590 if (return_value32[0] != return_value32[1]) { 1591 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1592 return_value32[0], return_value32[1], total, current->pattern, current->input); 1593 is_successful = 0; 1594 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) { 1595 if (return_value32[0] == PCRE2_ERROR_PARTIAL) 1596 return_value32[0] = 2; 1597 else 1598 return_value32[0] *= 2; 1599 1600 for (i = 0; i < return_value32[0]; ++i) 1601 if (ovector32_1[i] != ovector32_2[i]) { 1602 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1603 i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input); 1604 is_successful = 0; 1605 } 1606 } 1607 #endif 1608 } 1609 } 1610 1611 if (is_successful) { 1612 #ifdef SUPPORT_PCRE2_8 1613 if (!(current->start_offset & F_NO8) && (utf || is_ascii)) { 1614 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) { 1615 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", 1616 total, current->pattern, current->input); 1617 is_successful = 0; 1618 } 1619 1620 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) { 1621 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", 1622 total, current->pattern, current->input); 1623 is_successful = 0; 1624 } 1625 } 1626 #endif 1627 #ifdef SUPPORT_PCRE2_16 1628 if (!(current->start_offset & F_NO16) && (utf || is_ascii)) { 1629 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) { 1630 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", 1631 total, current->pattern, current->input); 1632 is_successful = 0; 1633 } 1634 1635 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) { 1636 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", 1637 total, current->pattern, current->input); 1638 is_successful = 0; 1639 } 1640 } 1641 #endif 1642 #ifdef SUPPORT_PCRE2_32 1643 if (!(current->start_offset & F_NO32) && (utf || is_ascii)) { 1644 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) { 1645 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n", 1646 total, current->pattern, current->input); 1647 is_successful = 0; 1648 } 1649 1650 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) { 1651 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n", 1652 total, current->pattern, current->input); 1653 is_successful = 0; 1654 } 1655 } 1656 #endif 1657 } 1658 1659 if (is_successful) { 1660 #ifdef SUPPORT_PCRE2_8 1661 if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) { 1662 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1663 total, current->pattern, current->input); 1664 is_successful = 0; 1665 } 1666 #endif 1667 #ifdef SUPPORT_PCRE2_16 1668 if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) { 1669 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1670 total, current->pattern, current->input); 1671 is_successful = 0; 1672 } 1673 #endif 1674 #ifdef SUPPORT_PCRE2_32 1675 if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) { 1676 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1677 total, current->pattern, current->input); 1678 is_successful = 0; 1679 } 1680 #endif 1681 } 1682 1683 #ifdef SUPPORT_PCRE2_8 1684 pcre2_code_free_8(re8); 1685 pcre2_match_data_free_8(mdata8_1); 1686 pcre2_match_data_free_8(mdata8_2); 1687 pcre2_match_context_free_8(mcontext8); 1688 #endif 1689 #ifdef SUPPORT_PCRE2_16 1690 pcre2_code_free_16(re16); 1691 pcre2_match_data_free_16(mdata16_1); 1692 pcre2_match_data_free_16(mdata16_2); 1693 pcre2_match_context_free_16(mcontext16); 1694 #endif 1695 #ifdef SUPPORT_PCRE2_32 1696 pcre2_code_free_32(re32); 1697 pcre2_match_data_free_32(mdata32_1); 1698 pcre2_match_data_free_32(mdata32_2); 1699 pcre2_match_context_free_32(mcontext32); 1700 #endif 1701 1702 if (is_successful) { 1703 successful++; 1704 successful_row++; 1705 printf("."); 1706 if (successful_row >= 60) { 1707 successful_row = 0; 1708 printf("\n"); 1709 } 1710 } else 1711 successful_row = 0; 1712 1713 fflush(stdout); 1714 current++; 1715 } 1716 #ifdef SUPPORT_PCRE2_8 1717 setstack8(NULL); 1718 #endif 1719 #ifdef SUPPORT_PCRE2_16 1720 setstack16(NULL); 1721 #endif 1722 #ifdef SUPPORT_PCRE2_32 1723 setstack32(NULL); 1724 #endif 1725 1726 if (total == successful) { 1727 printf("\nAll JIT regression tests are successfully passed.\n"); 1728 return 0; 1729 } else { 1730 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); 1731 return 1; 1732 } 1733 } 1734 1735 /* End of pcre2_jit_test.c */ 1736