1 ; 2 ; 3 ; this file contains a script of tests to run through regress.exe 4 ; 5 ; comments start with a semicolon and proceed to the end of the line 6 ; 7 ; changes to regular expression compile flags start with a "-" as the first 8 ; non-whitespace character and consist of a list of the printable names 9 ; of the flags, for example "match_default" 10 ; 11 ; Other lines contain a test to perform using the current flag status 12 ; the first token contains the expression to compile, the second the string 13 ; to match it against. If the second string is "!" then the expression should 14 ; not compile, that is the first string is an invalid regular expression. 15 ; This is then followed by a list of integers that specify what should match, 16 ; each pair represents the starting and ending positions of a subexpression 17 ; starting with the zeroth subexpression (the whole match). 18 ; A value of -1 indicates that the subexpression should not take part in the 19 ; match at all, if the first value is -1 then no part of the expression should 20 ; match the string. 21 ; 22 ; Tests taken from BOOST testsuite and adapted to glibc regex. 23 ; 24 ; Boost Software License - Version 1.0 - August 17th, 2003 25 ; 26 ; Permission is hereby granted, free of charge, to any person or organization 27 ; obtaining a copy of the software and accompanying documentation covered by 28 ; this license (the "Software") to use, reproduce, display, distribute, 29 ; execute, and transmit the Software, and to prepare derivative works of the 30 ; Software, and to permit third-parties to whom the Software is furnished to 31 ; do so, all subject to the following: 32 ; 33 ; The copyright notices in the Software and this entire statement, including 34 ; the above license grant, this restriction and the following disclaimer, 35 ; must be included in all copies of the Software, in whole or in part, and 36 ; all derivative works of the Software, unless such copies or derivative 37 ; works are solely in the form of machine-executable object code generated by 38 ; a source language processor. 39 ; 40 ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 41 ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 42 ; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 43 ; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 44 ; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 45 ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 46 ; DEALINGS IN THE SOFTWARE. 47 ; 48 49 - match_default normal REG_EXTENDED 50 51 ; 52 ; try some really simple literals: 53 a a 0 1 54 Z Z 0 1 55 Z aaa -1 -1 56 Z xxxxZZxxx 4 5 57 58 ; and some simple brackets: 59 (a) zzzaazz 3 4 3 4 60 () zzz 0 0 0 0 61 () "" 0 0 0 0 62 ( ! 63 ) ) 0 1 64 (aa ! 65 aa) baa)b 1 4 66 a b -1 -1 67 \(\) () 0 2 68 \(a\) (a) 0 3 69 \() () 0 2 70 (\) ! 71 p(a)rameter ABCparameterXYZ 3 12 4 5 72 [pq](a)rameter ABCparameterXYZ 3 12 4 5 73 74 ; now try escaped brackets: 75 - match_default bk_parens REG_BASIC 76 \(a\) zzzaazz 3 4 3 4 77 \(\) zzz 0 0 0 0 78 \(\) "" 0 0 0 0 79 \( ! 80 \) ! 81 \(aa ! 82 aa\) ! 83 () () 0 2 84 (a) (a) 0 3 85 (\) ! 86 \() ! 87 88 ; now move on to "." wildcards 89 - match_default normal REG_EXTENDED REG_STARTEND 90 . a 0 1 91 . \n 0 1 92 . \r 0 1 93 . \0 0 1 94 95 ; 96 ; now move on to the repetion ops, 97 ; starting with operator * 98 - match_default normal REG_EXTENDED 99 a* b 0 0 100 ab* a 0 1 101 ab* ab 0 2 102 ab* sssabbbbbbsss 3 10 103 ab*c* a 0 1 104 ab*c* abbb 0 4 105 ab*c* accc 0 4 106 ab*c* abbcc 0 5 107 *a ! 108 \<* ! 109 \>* ! 110 \n* \n\n 0 2 111 \** ** 0 2 112 \* * 0 1 113 114 ; now try operator + 115 ab+ a -1 -1 116 ab+ ab 0 2 117 ab+ sssabbbbbbsss 3 10 118 ab+c+ a -1 -1 119 ab+c+ abbb -1 -1 120 ab+c+ accc -1 -1 121 ab+c+ abbcc 0 5 122 +a ! 123 \<+ ! 124 \>+ ! 125 \n+ \n\n 0 2 126 \+ + 0 1 127 \+ ++ 0 1 128 \++ ++ 0 2 129 130 ; now try operator ? 131 - match_default normal REG_EXTENDED 132 a? b 0 0 133 ab? a 0 1 134 ab? ab 0 2 135 ab? sssabbbbbbsss 3 5 136 ab?c? a 0 1 137 ab?c? abbb 0 2 138 ab?c? accc 0 2 139 ab?c? abcc 0 3 140 ?a ! 141 \<? ! 142 \>? ! 143 \n? \n\n 0 1 144 \? ? 0 1 145 \? ?? 0 1 146 \?? ?? 0 1 147 148 ; now try operator {} 149 - match_default normal REG_EXTENDED 150 a{2} a -1 -1 151 a{2} aa 0 2 152 a{2} aaa 0 2 153 a{2,} a -1 -1 154 a{2,} aa 0 2 155 a{2,} aaaaa 0 5 156 a{2,4} a -1 -1 157 a{2,4} aa 0 2 158 a{2,4} aaa 0 3 159 a{2,4} aaaa 0 4 160 a{2,4} aaaaa 0 4 161 a{} ! 162 a{2 ! 163 a} a} 0 2 164 \{\} {} 0 2 165 166 - match_default normal REG_BASIC 167 a\{2\} a -1 -1 168 a\{2\} aa 0 2 169 a\{2\} aaa 0 2 170 a\{2,\} a -1 -1 171 a\{2,\} aa 0 2 172 a\{2,\} aaaaa 0 5 173 a\{2,4\} a -1 -1 174 a\{2,4\} aa 0 2 175 a\{2,4\} aaa 0 3 176 a\{2,4\} aaaa 0 4 177 a\{2,4\} aaaaa 0 4 178 {} {} 0 2 179 180 ; now test the alternation operator | 181 - match_default normal REG_EXTENDED 182 a|b a 0 1 183 a|b b 0 1 184 a(b|c) ab 0 2 1 2 185 a(b|c) ac 0 2 1 2 186 a(b|c) ad -1 -1 -1 -1 187 a\| a| 0 2 188 189 ; now test the set operator [] 190 - match_default normal REG_EXTENDED 191 ; try some literals first 192 [abc] a 0 1 193 [abc] b 0 1 194 [abc] c 0 1 195 [abc] d -1 -1 196 [^bcd] a 0 1 197 [^bcd] b -1 -1 198 [^bcd] d -1 -1 199 [^bcd] e 0 1 200 a[b]c abc 0 3 201 a[ab]c abc 0 3 202 a[^ab]c adc 0 3 203 a[]b]c a]c 0 3 204 a[[b]c a[c 0 3 205 a[-b]c a-c 0 3 206 a[^]b]c adc 0 3 207 a[^-b]c adc 0 3 208 a[b-]c a-c 0 3 209 a[b ! 210 a[] ! 211 212 ; then some ranges 213 [b-e] a -1 -1 214 [b-e] b 0 1 215 [b-e] e 0 1 216 [b-e] f -1 -1 217 [^b-e] a 0 1 218 [^b-e] b -1 -1 219 [^b-e] e -1 -1 220 [^b-e] f 0 1 221 a[1-3]c a2c 0 3 222 a[3-1]c ! 223 a[1-3-5]c ! 224 a[1- ! 225 226 ; and some classes 227 a[[:alpha:]]c abc 0 3 228 a[[:unknown:]]c ! 229 a[[: ! 230 a[[:alpha ! 231 a[[:alpha:] ! 232 a[[:alpha,:] ! 233 a[[:]:]]b ! 234 a[[:-:]]b ! 235 a[[:alph:]] ! 236 a[[:alphabet:]] ! 237 [[:alnum:]]+ -%@a0X_- 3 6 238 [[:alpha:]]+ -%@aX_0- 3 5 239 [[:blank:]]+ "a \tb" 1 4 240 [[:cntrl:]]+ a\n\tb 1 3 241 [[:digit:]]+ a019b 1 4 242 [[:graph:]]+ " a%b " 1 4 243 [[:lower:]]+ AabC 1 3 244 ; This test fails with STLPort, disable for now as this is a corner case anyway... 245 ;[[:print:]]+ "\na b\n" 1 4 246 [[:punct:]]+ " %-&\t" 1 4 247 [[:space:]]+ "a \n\t\rb" 1 5 248 [[:upper:]]+ aBCd 1 3 249 [[:xdigit:]]+ p0f3Cx 1 5 250 251 ; now test flag settings: 252 - escape_in_lists REG_NO_POSIX_TEST 253 [\n] \n 0 1 254 - REG_NO_POSIX_TEST 255 256 ; line anchors 257 - match_default normal REG_EXTENDED 258 ^ab ab 0 2 259 ^ab xxabxx -1 -1 260 ab$ ab 0 2 261 ab$ abxx -1 -1 262 - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL 263 ^ab ab -1 -1 264 ^ab xxabxx -1 -1 265 ab$ ab -1 -1 266 ab$ abxx -1 -1 267 268 ; back references 269 - match_default normal REG_PERL 270 a(b)\2c ! 271 a(b\1)c ! 272 a(b*)c\1d abbcbbd 0 7 1 3 273 a(b*)c\1d abbcbd -1 -1 274 a(b*)c\1d abbcbbbd -1 -1 275 ^(.)\1 abc -1 -1 276 a([bc])\1d abcdabbd 4 8 5 6 277 ; strictly speaking this is at best ambiguous, at worst wrong, this is what most 278 ; re implimentations will match though. 279 a(([bc])\2)*d abbccd 0 6 3 5 3 4 280 281 a(([bc])\2)*d abbcbd -1 -1 282 a((b)*\2)*d abbbd 0 5 1 4 2 3 283 ; perl only: 284 (ab*)[ab]*\1 ababaaa 0 7 0 1 285 (a)\1bcd aabcd 0 5 0 1 286 (a)\1bc*d aabcd 0 5 0 1 287 (a)\1bc*d aabd 0 4 0 1 288 (a)\1bc*d aabcccd 0 7 0 1 289 (a)\1bc*[ce]d aabcccd 0 7 0 1 290 ^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5 291 292 ; posix only: 293 - match_default extended REG_EXTENDED 294 (ab*)[ab]*\1 ababaaa 0 7 0 1 295 296 ; 297 ; word operators: 298 \w a 0 1 299 \w z 0 1 300 \w A 0 1 301 \w Z 0 1 302 \w _ 0 1 303 \w } -1 -1 304 \w ` -1 -1 305 \w [ -1 -1 306 \w @ -1 -1 307 ; non-word: 308 \W a -1 -1 309 \W z -1 -1 310 \W A -1 -1 311 \W Z -1 -1 312 \W _ -1 -1 313 \W } 0 1 314 \W ` 0 1 315 \W [ 0 1 316 \W @ 0 1 317 ; word start: 318 \<abcd " abcd" 2 6 319 \<ab cab -1 -1 320 \<ab "\nab" 1 3 321 \<tag ::tag 2 5 322 ;word end: 323 abc\> abc 0 3 324 abc\> abcd -1 -1 325 abc\> abc\n 0 3 326 abc\> abc:: 0 3 327 ; word boundary: 328 \babcd " abcd" 2 6 329 \bab cab -1 -1 330 \bab "\nab" 1 3 331 \btag ::tag 2 5 332 abc\b abc 0 3 333 abc\b abcd -1 -1 334 abc\b abc\n 0 3 335 abc\b abc:: 0 3 336 ; within word: 337 \B ab 1 1 338 a\Bb ab 0 2 339 a\B ab 0 1 340 a\B a -1 -1 341 a\B "a " -1 -1 342 343 ; 344 ; buffer operators: 345 \`abc abc 0 3 346 \`abc \nabc -1 -1 347 \`abc " abc" -1 -1 348 abc\' abc 0 3 349 abc\' abc\n -1 -1 350 abc\' "abc " -1 -1 351 352 ; 353 ; now follows various complex expressions designed to try and bust the matcher: 354 a(((b)))c abc 0 3 1 2 1 2 1 2 355 a(b|(c))d abd 0 3 1 2 -1 -1 356 a(b|(c))d acd 0 3 1 2 1 2 357 a(b*|c)d abbd 0 4 1 3 358 ; just gotta have one DFA-buster, of course 359 a[ab]{20} aaaaabaaaabaaaabaaaab 0 21 360 ; and an inline expansion in case somebody gets tricky 361 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21 362 ; and in case somebody just slips in an NFA... 363 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31 364 ; one really big one 365 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71 366 ; fish for problems as brackets go past 8 367 [ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8 368 [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9 369 [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10 370 [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10 371 ; and as parenthesis go past 9: 372 (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 373 (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 374 (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 375 (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 376 (a)d|(b)c abc 1 3 -1 -1 1 2 377 _+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19 378 379 ; subtleties of matching 380 ;a(b)?c\1d acd 0 3 -1 -1 381 ; POSIX is about the following test: 382 a(b)?c\1d acd -1 -1 -1 -1 383 a(b?c)+d accd 0 4 2 3 384 (wee|week)(knights|night) weeknights 0 10 0 3 3 10 385 .* abc 0 3 386 a(b|(c))d abd 0 3 1 2 -1 -1 387 a(b|(c))d acd 0 3 1 2 1 2 388 a(b*|c|e)d abbd 0 4 1 3 389 a(b*|c|e)d acd 0 3 1 2 390 a(b*|c|e)d ad 0 2 1 1 391 a(b?)c abc 0 3 1 2 392 a(b?)c ac 0 2 1 1 393 a(b+)c abc 0 3 1 2 394 a(b+)c abbbc 0 5 1 4 395 a(b*)c ac 0 2 1 1 396 (a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5 397 a([bc]?)c abc 0 3 1 2 398 a([bc]?)c ac 0 2 1 1 399 a([bc]+)c abc 0 3 1 2 400 a([bc]+)c abcc 0 4 1 3 401 a([bc]+)bc abcbc 0 5 1 3 402 a(bb+|b)b abb 0 3 1 2 403 a(bbb+|bb+|b)b abb 0 3 1 2 404 a(bbb+|bb+|b)b abbb 0 4 1 3 405 a(bbb+|bb+|b)bb abbb 0 4 1 2 406 (.*).* abcdef 0 6 0 6 407 (a*)* bc 0 0 0 0 408 xyx*xz xyxxxxyxxxz 5 11 409 410 ; do we get the right subexpression when it is used more than once? 411 a(b|c)*d ad 0 2 -1 -1 412 a(b|c)*d abcd 0 4 2 3 413 a(b|c)+d abd 0 3 1 2 414 a(b|c)+d abcd 0 4 2 3 415 a(b|c?)+d ad 0 2 1 1 416 a(b|c){0,0}d ad 0 2 -1 -1 417 a(b|c){0,1}d ad 0 2 -1 -1 418 a(b|c){0,1}d abd 0 3 1 2 419 a(b|c){0,2}d ad 0 2 -1 -1 420 a(b|c){0,2}d abcd 0 4 2 3 421 a(b|c){0,}d ad 0 2 -1 -1 422 a(b|c){0,}d abcd 0 4 2 3 423 a(b|c){1,1}d abd 0 3 1 2 424 a(b|c){1,2}d abd 0 3 1 2 425 a(b|c){1,2}d abcd 0 4 2 3 426 a(b|c){1,}d abd 0 3 1 2 427 a(b|c){1,}d abcd 0 4 2 3 428 a(b|c){2,2}d acbd 0 4 2 3 429 a(b|c){2,2}d abcd 0 4 2 3 430 a(b|c){2,4}d abcd 0 4 2 3 431 a(b|c){2,4}d abcbd 0 5 3 4 432 a(b|c){2,4}d abcbcd 0 6 4 5 433 a(b|c){2,}d abcd 0 4 2 3 434 a(b|c){2,}d abcbd 0 5 3 4 435 ; perl only: these conflict with the POSIX test below 436 ;a(b|c?)+d abcd 0 4 3 3 437 ;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1 438 ;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3 439 440 ; posix only: 441 - match_default extended REG_EXTENDED REG_STARTEND 442 443 a(b|c?)+d abcd 0 4 2 3 444 a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3 445 a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1 446 a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3 447 a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1 448 a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3 449 a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1 450 a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3 451 a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1 452 453 - match_default normal REG_PERL 454 ; try to match C++ syntax elements: 455 ; line comment: 456 //[^\n]* "++i //here is a line comment\n" 4 28 457 ; block comment: 458 /\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27 459 /\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1 460 /\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1 461 /\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1 462 /\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1 463 /\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1 464 ; preprossor directives: 465 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1 466 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1 467 ; perl only: 468 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42 469 ; literals: 470 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 471 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1 472 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 473 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1 474 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24 475 ; strings: 476 '([^\\']|\\.)*' '\\x3A' 0 6 4 5 477 '([^\\']|\\.)*' '\\'' 0 4 1 3 478 '([^\\']|\\.)*' '\\n' 0 4 1 3 479 480 ; finally try some case insensitive matches: 481 - match_default normal REG_EXTENDED REG_ICASE 482 ; upper and lower have no meaning here so they fail, however these 483 ; may compile with other libraries... 484 ;[[:lower:]] ! 485 ;[[:upper:]] ! 486 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72 487 488 ; known and suspected bugs: 489 - match_default normal REG_EXTENDED 490 \( ( 0 1 491 \) ) 0 1 492 \$ $ 0 1 493 \^ ^ 0 1 494 \. . 0 1 495 \* * 0 1 496 \+ + 0 1 497 \? ? 0 1 498 \[ [ 0 1 499 \] ] 0 1 500 \| | 0 1 501 \\ \\ 0 1 502 # # 0 1 503 \# # 0 1 504 a- a- 0 2 505 \- - 0 1 506 \{ { 0 1 507 \} } 0 1 508 0 0 0 1 509 1 1 0 1 510 9 9 0 1 511 b b 0 1 512 B B 0 1 513 < < 0 1 514 > > 0 1 515 w w 0 1 516 W W 0 1 517 ` ` 0 1 518 ' ' 0 1 519 \n \n 0 1 520 , , 0 1 521 a a 0 1 522 f f 0 1 523 n n 0 1 524 r r 0 1 525 t t 0 1 526 v v 0 1 527 c c 0 1 528 x x 0 1 529 : : 0 1 530 (\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5 531 532 - match_default normal REG_EXTENDED REG_ICASE 533 a A 0 1 534 A a 0 1 535 [abc]+ abcABC 0 6 536 [ABC]+ abcABC 0 6 537 [a-z]+ abcABC 0 6 538 [A-Z]+ abzANZ 0 6 539 [a-Z]+ abzABZ 0 6 540 [A-z]+ abzABZ 0 6 541 [[:lower:]]+ abyzABYZ 0 8 542 [[:upper:]]+ abzABZ 0 6 543 [[:alpha:]]+ abyzABYZ 0 8 544 [[:alnum:]]+ 09abyzABYZ 0 10 545 546 ; word start: 547 \<abcd " abcd" 2 6 548 \<ab cab -1 -1 549 \<ab "\nab" 1 3 550 \<tag ::tag 2 5 551 ;word end: 552 abc\> abc 0 3 553 abc\> abcd -1 -1 554 abc\> abc\n 0 3 555 abc\> abc:: 0 3 556 557 ; collating elements and rewritten set code: 558 - match_default normal REG_EXTENDED REG_STARTEND 559 ;[[.zero.]] 0 0 1 560 ;[[.one.]] 1 0 1 561 ;[[.two.]] 2 0 1 562 ;[[.three.]] 3 0 1 563 [[.a.]] baa 1 2 564 ;[[.right-curly-bracket.]] } 0 1 565 ;[[.NUL.]] \0 0 1 566 [[:<:]z] ! 567 [a[:>:]] ! 568 [[=a=]] a 0 1 569 ;[[=right-curly-bracket=]] } 0 1 570 - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE 571 [[.A.]] A 0 1 572 [[.A.]] a 0 1 573 [[.A.]-b]+ AaBb 0 4 574 [A-[.b.]]+ AaBb 0 4 575 [[.a.]-B]+ AaBb 0 4 576 [a-[.B.]]+ AaBb 0 4 577 - match_default normal REG_EXTENDED REG_STARTEND 578 [[.a.]-c]+ abcd 0 3 579 [a-[.c.]]+ abcd 0 3 580 [[:alpha:]-a] ! 581 [a-[:alpha:]] ! 582 583 ; try mutli-character ligatures: 584 ;[[.ae.]] ae 0 2 585 ;[[.ae.]] aE -1 -1 586 ;[[.AE.]] AE 0 2 587 ;[[.Ae.]] Ae 0 2 588 ;[[.ae.]-b] a -1 -1 589 ;[[.ae.]-b] b 0 1 590 ;[[.ae.]-b] ae 0 2 591 ;[a-[.ae.]] a 0 1 592 ;[a-[.ae.]] b -1 -1 593 ;[a-[.ae.]] ae 0 2 594 - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE 595 ;[[.ae.]] AE 0 2 596 ;[[.ae.]] Ae 0 2 597 ;[[.AE.]] Ae 0 2 598 ;[[.Ae.]] aE 0 2 599 ;[[.AE.]-B] a -1 -1 600 ;[[.Ae.]-b] b 0 1 601 ;[[.Ae.]-b] B 0 1 602 ;[[.ae.]-b] AE 0 2 603 604 - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST 605 \s+ "ab ab" 2 5 606 \S+ " abc " 2 5 607 608 - match_default normal REG_EXTENDED REG_STARTEND 609 \`abc abc 0 3 610 \`abc aabc -1 -1 611 abc\' abc 0 3 612 abc\' abcd -1 -1 613 abc\' abc\n\n -1 -1 614 abc\' abc 0 3 615 616 ; extended repeat checking to exercise new algorithms: 617 ab.*xy abxy_ 0 4 618 ab.*xy ab_xy_ 0 5 619 ab.*xy abxy 0 4 620 ab.*xy ab_xy 0 5 621 ab.* ab 0 2 622 ab.* ab__ 0 4 623 624 ab.{2,5}xy ab__xy_ 0 6 625 ab.{2,5}xy ab____xy_ 0 8 626 ab.{2,5}xy ab_____xy_ 0 9 627 ab.{2,5}xy ab__xy 0 6 628 ab.{2,5}xy ab_____xy 0 9 629 ab.{2,5} ab__ 0 4 630 ab.{2,5} ab_______ 0 7 631 ab.{2,5}xy ab______xy -1 -1 632 ab.{2,5}xy ab_xy -1 -1 633 634 ab.*?xy abxy_ 0 4 635 ab.*?xy ab_xy_ 0 5 636 ab.*?xy abxy 0 4 637 ab.*?xy ab_xy 0 5 638 ab.*? ab 0 2 639 ab.*? ab__ 0 4 640 641 ab.{2,5}?xy ab__xy_ 0 6 642 ab.{2,5}?xy ab____xy_ 0 8 643 ab.{2,5}?xy ab_____xy_ 0 9 644 ab.{2,5}?xy ab__xy 0 6 645 ab.{2,5}?xy ab_____xy 0 9 646 ab.{2,5}? ab__ 0 4 647 ab.{2,5}? ab_______ 0 7 648 ab.{2,5}?xy ab______xy -1 -1 649 ab.{2,5}xy ab_xy -1 -1 650 651 ; again but with slower algorithm variant: 652 - match_default REG_EXTENDED 653 ; now again for single character repeats: 654 655 ab_*xy abxy_ 0 4 656 ab_*xy ab_xy_ 0 5 657 ab_*xy abxy 0 4 658 ab_*xy ab_xy 0 5 659 ab_* ab 0 2 660 ab_* ab__ 0 4 661 662 ab_{2,5}xy ab__xy_ 0 6 663 ab_{2,5}xy ab____xy_ 0 8 664 ab_{2,5}xy ab_____xy_ 0 9 665 ab_{2,5}xy ab__xy 0 6 666 ab_{2,5}xy ab_____xy 0 9 667 ab_{2,5} ab__ 0 4 668 ab_{2,5} ab_______ 0 7 669 ab_{2,5}xy ab______xy -1 -1 670 ab_{2,5}xy ab_xy -1 -1 671 672 ab_*?xy abxy_ 0 4 673 ab_*?xy ab_xy_ 0 5 674 ab_*?xy abxy 0 4 675 ab_*?xy ab_xy 0 5 676 ab_*? ab 0 2 677 ab_*? ab__ 0 4 678 679 ab_{2,5}?xy ab__xy_ 0 6 680 ab_{2,5}?xy ab____xy_ 0 8 681 ab_{2,5}?xy ab_____xy_ 0 9 682 ab_{2,5}?xy ab__xy 0 6 683 ab_{2,5}?xy ab_____xy 0 9 684 ab_{2,5}? ab__ 0 4 685 ab_{2,5}? ab_______ 0 7 686 ab_{2,5}?xy ab______xy -1 -1 687 ab_{2,5}xy ab_xy -1 -1 688 689 ; and again for sets: 690 ab[_,;]*xy abxy_ 0 4 691 ab[_,;]*xy ab_xy_ 0 5 692 ab[_,;]*xy abxy 0 4 693 ab[_,;]*xy ab_xy 0 5 694 ab[_,;]* ab 0 2 695 ab[_,;]* ab__ 0 4 696 697 ab[_,;]{2,5}xy ab__xy_ 0 6 698 ab[_,;]{2,5}xy ab____xy_ 0 8 699 ab[_,;]{2,5}xy ab_____xy_ 0 9 700 ab[_,;]{2,5}xy ab__xy 0 6 701 ab[_,;]{2,5}xy ab_____xy 0 9 702 ab[_,;]{2,5} ab__ 0 4 703 ab[_,;]{2,5} ab_______ 0 7 704 ab[_,;]{2,5}xy ab______xy -1 -1 705 ab[_,;]{2,5}xy ab_xy -1 -1 706 707 ab[_,;]*?xy abxy_ 0 4 708 ab[_,;]*?xy ab_xy_ 0 5 709 ab[_,;]*?xy abxy 0 4 710 ab[_,;]*?xy ab_xy 0 5 711 ab[_,;]*? ab 0 2 712 ab[_,;]*? ab__ 0 4 713 714 ab[_,;]{2,5}?xy ab__xy_ 0 6 715 ab[_,;]{2,5}?xy ab____xy_ 0 8 716 ab[_,;]{2,5}?xy ab_____xy_ 0 9 717 ab[_,;]{2,5}?xy ab__xy 0 6 718 ab[_,;]{2,5}?xy ab_____xy 0 9 719 ab[_,;]{2,5}? ab__ 0 4 720 ab[_,;]{2,5}? ab_______ 0 7 721 ab[_,;]{2,5}?xy ab______xy -1 -1 722 ab[_,;]{2,5}xy ab_xy -1 -1 723 724 ; and again for tricky sets with digraphs: 725 ;ab[_[.ae.]]*xy abxy_ 0 4 726 ;ab[_[.ae.]]*xy ab_xy_ 0 5 727 ;ab[_[.ae.]]*xy abxy 0 4 728 ;ab[_[.ae.]]*xy ab_xy 0 5 729 ;ab[_[.ae.]]* ab 0 2 730 ;ab[_[.ae.]]* ab__ 0 4 731 732 ;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6 733 ;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8 734 ;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9 735 ;ab[_[.ae.]]{2,5}xy ab__xy 0 6 736 ;ab[_[.ae.]]{2,5}xy ab_____xy 0 9 737 ;ab[_[.ae.]]{2,5} ab__ 0 4 738 ;ab[_[.ae.]]{2,5} ab_______ 0 7 739 ;ab[_[.ae.]]{2,5}xy ab______xy -1 -1 740 ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 741 742 ;ab[_[.ae.]]*?xy abxy_ 0 4 743 ;ab[_[.ae.]]*?xy ab_xy_ 0 5 744 ;ab[_[.ae.]]*?xy abxy 0 4 745 ;ab[_[.ae.]]*?xy ab_xy 0 5 746 ;ab[_[.ae.]]*? ab 0 2 747 ;ab[_[.ae.]]*? ab__ 0 2 748 749 ;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6 750 ;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8 751 ;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9 752 ;ab[_[.ae.]]{2,5}?xy ab__xy 0 6 753 ;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9 754 ;ab[_[.ae.]]{2,5}? ab__ 0 4 755 ;ab[_[.ae.]]{2,5}? ab_______ 0 4 756 ;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1 757 ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 758 759 ; new bugs detected in spring 2003: 760 - normal match_continuous REG_NO_POSIX_TEST 761 b abc 1 2 762 763 () abc 0 0 0 0 764 ^() abc 0 0 0 0 765 ^()+ abc 0 0 0 0 766 ^(){1} abc 0 0 0 0 767 ^(){2} abc 0 0 0 0 768 ^((){2}) abc 0 0 0 0 0 0 769 () "" 0 0 0 0 770 ()\1 "" 0 0 0 0 771 ()\1 a 0 0 0 0 772 a()\1b ab 0 2 1 1 773 a()b\1 ab 0 2 1 1 774 775 ; subtleties of matching with no sub-expressions marked 776 - normal match_nosubs REG_NO_POSIX_TEST 777 a(b?c)+d accd 0 4 778 (wee|week)(knights|night) weeknights 0 10 779 .* abc 0 3 780 a(b|(c))d abd 0 3 781 a(b|(c))d acd 0 3 782 a(b*|c|e)d abbd 0 4 783 a(b*|c|e)d acd 0 3 784 a(b*|c|e)d ad 0 2 785 a(b?)c abc 0 3 786 a(b?)c ac 0 2 787 a(b+)c abc 0 3 788 a(b+)c abbbc 0 5 789 a(b*)c ac 0 2 790 (a|ab)(bc([de]+)f|cde) abcdef 0 6 791 a([bc]?)c abc 0 3 792 a([bc]?)c ac 0 2 793 a([bc]+)c abc 0 3 794 a([bc]+)c abcc 0 4 795 a([bc]+)bc abcbc 0 5 796 a(bb+|b)b abb 0 3 797 a(bbb+|bb+|b)b abb 0 3 798 a(bbb+|bb+|b)b abbb 0 4 799 a(bbb+|bb+|b)bb abbb 0 4 800 (.*).* abcdef 0 6 801 (a*)* bc 0 0 802 803 - normal nosubs REG_NO_POSIX_TEST 804 a(b?c)+d accd 0 4 805 (wee|week)(knights|night) weeknights 0 10 806 .* abc 0 3 807 a(b|(c))d abd 0 3 808 a(b|(c))d acd 0 3 809 a(b*|c|e)d abbd 0 4 810 a(b*|c|e)d acd 0 3 811 a(b*|c|e)d ad 0 2 812 a(b?)c abc 0 3 813 a(b?)c ac 0 2 814 a(b+)c abc 0 3 815 a(b+)c abbbc 0 5 816 a(b*)c ac 0 2 817 (a|ab)(bc([de]+)f|cde) abcdef 0 6 818 a([bc]?)c abc 0 3 819 a([bc]?)c ac 0 2 820 a([bc]+)c abc 0 3 821 a([bc]+)c abcc 0 4 822 a([bc]+)bc abcbc 0 5 823 a(bb+|b)b abb 0 3 824 a(bbb+|bb+|b)b abb 0 3 825 a(bbb+|bb+|b)b abbb 0 4 826 a(bbb+|bb+|b)bb abbb 0 4 827 (.*).* abcdef 0 6 828 (a*)* bc 0 0 829 830