1 # This set of tests checks the API, internals, and non-Perl stuff for UTF 2 # support, including Unicode properties. However, tests that give different 3 # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and 4 # 12). 5 6 # PCRE2 and Perl disagree about the characteristics of certain Unicode 7 # characters. For example, 061C is considered by Perl to be Arabic, though 8 # is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are 9 # graphic and printable according to Perl, though they are actually "isolate" 10 # control characters. That is why the following tests are here rather than in 11 # test 4. 12 13 /^[\p{Arabic}]/utf 14 \= Expect no match 15 \x{061c} 16 17 /^[[:graph:]]+$/utf,ucp 18 \= Expect no match 19 \x{61c} 20 \x{2066} 21 \x{2067} 22 \x{2068} 23 \x{2069} 24 25 /^[[:print:]]+$/utf,ucp 26 \= Expect no match 27 \x{61c} 28 \x{2066} 29 \x{2067} 30 \x{2068} 31 \x{2069} 32 33 /^[[:^graph:]]+$/utf,ucp 34 \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} 35 \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} 36 37 /^[[:^print:]]+$/utf,ucp 38 \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} 39 \x{2068}\x{2069} 40 41 # Perl does not consider U+180e to be a space character. It is true that it 42 # does not appear in the Unicode PropList.txt file as such, but in many other 43 # sources it is listed as a space, and has been treated as such in PCRE for 44 # a long time. 45 46 /^>[[:blank:]]*/utf,ucp 47 >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 48 49 /^A\s+Z/utf,ucp 50 A\x{85}\x{180e}\x{2005}Z 51 52 /^A[\s]+Z/utf,ucp 53 A\x{2005}Z 54 A\x{85}\x{2005}Z 55 56 /^[[:graph:]]+$/utf,ucp 57 \= Expect no match 58 \x{180e} 59 60 /^[[:print:]]+$/utf,ucp 61 \x{180e} 62 63 /^[[:^graph:]]+$/utf,ucp 64 \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} 65 66 /^[[:^print:]]+$/utf,ucp 67 \= Expect no match 68 \x{180e} 69 70 # End of U+180E tests. 71 72 # --------------------------------------------------------------------- 73 74 /\x{110000}/IB,utf 75 76 /\o{4200000}/IB,utf 77 78 /\x{ffffffff}/utf 79 80 /\o{37777777777}/utf 81 82 /\x{100000000}/utf 83 84 /\o{77777777777}/utf 85 86 /\x{d800}/utf 87 88 /\o{154000}/utf 89 90 /\x{dfff}/utf 91 92 /\o{157777}/utf 93 94 /\x{d7ff}/utf 95 96 /\o{153777}/utf 97 98 /\x{e000}/utf 99 100 /\o{170000}/utf 101 102 /^\x{100}a\x{1234}/utf 103 \x{100}a\x{1234}bcd 104 105 /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf 106 \x{0041}\x{2262}\x{0391}\x{002e} 107 108 /.{3,5}X/IB,utf 109 \x{212ab}\x{212ab}\x{212ab}\x{861}X 110 111 /.{3,5}?/IB,utf 112 \x{212ab}\x{212ab}\x{212ab}\x{861} 113 114 /^[ab]/IB,utf 115 bar 116 \= Expect no match 117 c 118 \x{ff} 119 \x{100} 120 121 /^[^ab]/IB,utf 122 c 123 \x{ff} 124 \x{100} 125 \= Expect no match 126 aaa 127 128 /\x{100}*(\d+|"(?1)")/utf 129 1234 130 "1234" 131 \x{100}1234 132 "\x{100}1234" 133 \x{100}\x{100}12ab 134 \x{100}\x{100}"12" 135 \= Expect no match 136 \x{100}\x{100}abcd 137 138 /\x{100}*/IB,utf 139 140 /a\x{100}*/IB,utf 141 142 /ab\x{100}*/IB,utf 143 144 /[\x{200}-\x{100}]/utf 145 146 /[-]/utf 147 \x{100} 148 \x{104} 149 \= Expect no match 150 \x{105} 151 \x{ff} 152 153 /[\xFF]/IB 154 >\xff< 155 156 /[^\xFF]/IB 157 158 /[-]/utf 159 # Matches without Study 160 \x{d6} 161 162 /[-]/utf 163 <-- Same with Study 164 \x{d6} 165 166 /[\x{c4}-\x{dc}]/utf 167 # Matches without Study 168 \x{d6} 169 170 /[\x{c4}-\x{dc}]/utf 171 <-- Same with Study 172 \x{d6} 173 174 /[^\x{100}]abc(xyz(?1))/IB,utf 175 176 /(\x{100}(b(?2)c))?/IB,utf 177 178 /(\x{100}(b(?2)c)){0,2}/IB,utf 179 180 /(\x{100}(b(?1)c))?/IB,utf 181 182 /(\x{100}(b(?1)c)){0,2}/IB,utf 183 184 /\W/utf 185 A.B 186 A\x{100}B 187 188 /\w/utf 189 \x{100}X 190 191 /^\/IB,utf 192 193 /()()()()()()()()()() 194 ()()()()()()()()()() 195 ()()()()()()()()()() 196 ()()()()()()()()()() 197 A (x) (?41) B/x,utf 198 AxxB 199 200 /^[\x{100}\E-\Q\E\x{150}]/B,utf 201 202 /^[\Q\E-\Q\E]/B,utf 203 204 /^abc./gmx,newline=any,utf 205 abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK 206 207 /abc.$/gmx,newline=any,utf 208 abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 209 210 /^a\Rb/bsr=unicode,utf 211 a\nb 212 a\rb 213 a\r\nb 214 a\x0bb 215 a\x0cb 216 a\x{85}b 217 a\x{2028}b 218 a\x{2029}b 219 \= Expect no match 220 a\n\rb 221 222 /^a\R*b/bsr=unicode,utf 223 ab 224 a\nb 225 a\rb 226 a\r\nb 227 a\x0bb 228 a\x0c\x{2028}\x{2029}b 229 a\x{85}b 230 a\n\rb 231 a\n\r\x{85}\x0cb 232 233 /^a\R+b/bsr=unicode,utf 234 a\nb 235 a\rb 236 a\r\nb 237 a\x0bb 238 a\x0c\x{2028}\x{2029}b 239 a\x{85}b 240 a\n\rb 241 a\n\r\x{85}\x0cb 242 \= Expect no match 243 ab 244 245 /^a\R{1,3}b/bsr=unicode,utf 246 a\nb 247 a\n\rb 248 a\n\r\x{85}b 249 a\r\n\r\nb 250 a\r\n\r\n\r\nb 251 a\n\r\n\rb 252 a\n\n\r\nb 253 \= Expect no match 254 a\n\n\n\rb 255 a\r 256 257 /\H\h\V\v/utf 258 X X\x0a 259 X\x09X\x0b 260 \= Expect no match 261 \x{a0} X\x0a 262 263 /\H*\h+\V?\v{3,4}/utf 264 \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a 265 \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a 266 \x09\x20\x{a0}\x0a\x0b\x0c 267 \= Expect no match 268 \x09\x20\x{a0}\x0a\x0b 269 270 /\H\h\V\v/utf 271 \x{3001}\x{3000}\x{2030}\x{2028} 272 X\x{180e}X\x{85} 273 \= Expect no match 274 \x{2009} X\x0a 275 276 /\H*\h+\V?\v{3,4}/utf 277 \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a 278 \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a 279 \x09\x20\x{202f}\x0a\x0b\x0c 280 \= Expect no match 281 \x09\x{200a}\x{a0}\x{2028}\x0b 282 283 /[\h]/B,utf 284 >\x{1680} 285 286 /[\h]{3,}/B,utf 287 >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< 288 289 /[\v]/B,utf 290 291 /[\H]/B,utf 292 293 /[\V]/B,utf 294 295 /.*$/newline=any,utf 296 \x{1ec5} 297 298 /a\Rb/I,bsr=anycrlf,utf 299 a\rb 300 a\nb 301 a\r\nb 302 \= Expect no match 303 a\x{85}b 304 a\x0bb 305 306 /a\Rb/I,bsr=unicode,utf 307 a\rb 308 a\nb 309 a\r\nb 310 a\x{85}b 311 a\x0bb 312 313 /a\R?b/I,bsr=anycrlf,utf 314 a\rb 315 a\nb 316 a\r\nb 317 \= Expect no match 318 a\x{85}b 319 a\x0bb 320 321 /a\R?b/I,bsr=unicode,utf 322 a\rb 323 a\nb 324 a\r\nb 325 a\x{85}b 326 a\x0bb 327 328 /.*a.*=.b.*/utf,newline=any 329 QQQ\x{2029}ABCaXYZ=!bPQR 330 \= Expect no match 331 a\x{2029}b 332 \x61\xe2\x80\xa9\x62 333 334 /[[:a\x{100}b:]]/utf 335 336 /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref 337 a\x{1234}b 338 a\nb 339 \= Expect no match 340 ab 341 342 /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref 343 aXb 344 a\nX\nX\x{1234}b 345 \= Expect no match 346 ab 347 348 /(\x{de})\1/ 349 \x{de}\x{de} 350 351 /X/newline=any,utf,firstline 352 A\x{1ec5}ABCXYZ 353 354 /Xa{2,4}b/utf 355 X\=ps 356 Xa\=ps 357 Xaa\=ps 358 Xaaa\=ps 359 Xaaaa\=ps 360 361 /Xa{2,4}?b/utf 362 X\=ps 363 Xa\=ps 364 Xaa\=ps 365 Xaaa\=ps 366 Xaaaa\=ps 367 368 /Xa{2,4}+b/utf 369 X\=ps 370 Xa\=ps 371 Xaa\=ps 372 Xaaa\=ps 373 Xaaaa\=ps 374 375 /X\x{123}{2,4}b/utf 376 X\=ps 377 X\x{123}\=ps 378 X\x{123}\x{123}\=ps 379 X\x{123}\x{123}\x{123}\=ps 380 X\x{123}\x{123}\x{123}\x{123}\=ps 381 382 /X\x{123}{2,4}?b/utf 383 X\=ps 384 X\x{123}\=ps 385 X\x{123}\x{123}\=ps 386 X\x{123}\x{123}\x{123}\=ps 387 X\x{123}\x{123}\x{123}\x{123}\=ps 388 389 /X\x{123}{2,4}+b/utf 390 X\=ps 391 X\x{123}\=ps 392 X\x{123}\x{123}\=ps 393 X\x{123}\x{123}\x{123}\=ps 394 X\x{123}\x{123}\x{123}\x{123}\=ps 395 396 /X\x{123}{2,4}b/utf 397 \= Expect no match 398 Xx\=ps 399 X\x{123}x\=ps 400 X\x{123}\x{123}x\=ps 401 X\x{123}\x{123}\x{123}x\=ps 402 X\x{123}\x{123}\x{123}\x{123}x\=ps 403 404 /X\x{123}{2,4}?b/utf 405 \= Expect no match 406 Xx\=ps 407 X\x{123}x\=ps 408 X\x{123}\x{123}x\=ps 409 X\x{123}\x{123}\x{123}x\=ps 410 X\x{123}\x{123}\x{123}\x{123}x\=ps 411 412 /X\x{123}{2,4}+b/utf 413 \= Expect no match 414 Xx\=ps 415 X\x{123}x\=ps 416 X\x{123}\x{123}x\=ps 417 X\x{123}\x{123}\x{123}x\=ps 418 X\x{123}\x{123}\x{123}\x{123}x\=ps 419 420 /X\d{2,4}b/utf 421 X\=ps 422 X3\=ps 423 X33\=ps 424 X333\=ps 425 X3333\=ps 426 427 /X\d{2,4}?b/utf 428 X\=ps 429 X3\=ps 430 X33\=ps 431 X333\=ps 432 X3333\=ps 433 434 /X\d{2,4}+b/utf 435 X\=ps 436 X3\=ps 437 X33\=ps 438 X333\=ps 439 X3333\=ps 440 441 /X\D{2,4}b/utf 442 X\=ps 443 Xa\=ps 444 Xaa\=ps 445 Xaaa\=ps 446 Xaaaa\=ps 447 448 /X\D{2,4}?b/utf 449 X\=ps 450 Xa\=ps 451 Xaa\=ps 452 Xaaa\=ps 453 Xaaaa\=ps 454 455 /X\D{2,4}+b/utf 456 X\=ps 457 Xa\=ps 458 Xaa\=ps 459 Xaaa\=ps 460 Xaaaa\=ps 461 462 /X\D{2,4}b/utf 463 X\=ps 464 X\x{123}\=ps 465 X\x{123}\x{123}\=ps 466 X\x{123}\x{123}\x{123}\=ps 467 X\x{123}\x{123}\x{123}\x{123}\=ps 468 469 /X\D{2,4}?b/utf 470 X\=ps 471 X\x{123}\=ps 472 X\x{123}\x{123}\=ps 473 X\x{123}\x{123}\x{123}\=ps 474 X\x{123}\x{123}\x{123}\x{123}\=ps 475 476 /X\D{2,4}+b/utf 477 X\=ps 478 X\x{123}\=ps 479 X\x{123}\x{123}\=ps 480 X\x{123}\x{123}\x{123}\=ps 481 X\x{123}\x{123}\x{123}\x{123}\=ps 482 483 /X[abc]{2,4}b/utf 484 X\=ps 485 Xa\=ps 486 Xaa\=ps 487 Xaaa\=ps 488 Xaaaa\=ps 489 490 /X[abc]{2,4}?b/utf 491 X\=ps 492 Xa\=ps 493 Xaa\=ps 494 Xaaa\=ps 495 Xaaaa\=ps 496 497 /X[abc]{2,4}+b/utf 498 X\=ps 499 Xa\=ps 500 Xaa\=ps 501 Xaaa\=ps 502 Xaaaa\=ps 503 504 /X[abc\x{123}]{2,4}b/utf 505 X\=ps 506 X\x{123}\=ps 507 X\x{123}\x{123}\=ps 508 X\x{123}\x{123}\x{123}\=ps 509 X\x{123}\x{123}\x{123}\x{123}\=ps 510 511 /X[abc\x{123}]{2,4}?b/utf 512 X\=ps 513 X\x{123}\=ps 514 X\x{123}\x{123}\=ps 515 X\x{123}\x{123}\x{123}\=ps 516 X\x{123}\x{123}\x{123}\x{123}\=ps 517 518 /X[abc\x{123}]{2,4}+b/utf 519 X\=ps 520 X\x{123}\=ps 521 X\x{123}\x{123}\=ps 522 X\x{123}\x{123}\x{123}\=ps 523 X\x{123}\x{123}\x{123}\x{123}\=ps 524 525 /X[^a]{2,4}b/utf 526 X\=ps 527 Xz\=ps 528 Xzz\=ps 529 Xzzz\=ps 530 Xzzzz\=ps 531 532 /X[^a]{2,4}?b/utf 533 X\=ps 534 Xz\=ps 535 Xzz\=ps 536 Xzzz\=ps 537 Xzzzz\=ps 538 539 /X[^a]{2,4}+b/utf 540 X\=ps 541 Xz\=ps 542 Xzz\=ps 543 Xzzz\=ps 544 Xzzzz\=ps 545 546 /X[^a]{2,4}b/utf 547 X\=ps 548 X\x{123}\=ps 549 X\x{123}\x{123}\=ps 550 X\x{123}\x{123}\x{123}\=ps 551 X\x{123}\x{123}\x{123}\x{123}\=ps 552 553 /X[^a]{2,4}?b/utf 554 X\=ps 555 X\x{123}\=ps 556 X\x{123}\x{123}\=ps 557 X\x{123}\x{123}\x{123}\=ps 558 X\x{123}\x{123}\x{123}\x{123}\=ps 559 560 /X[^a]{2,4}+b/utf 561 X\=ps 562 X\x{123}\=ps 563 X\x{123}\x{123}\=ps 564 X\x{123}\x{123}\x{123}\=ps 565 X\x{123}\x{123}\x{123}\x{123}\=ps 566 567 /(Y)X\1{2,4}b/utf 568 YX\=ps 569 YXY\=ps 570 YXYY\=ps 571 YXYYY\=ps 572 YXYYYY\=ps 573 574 /(Y)X\1{2,4}?b/utf 575 YX\=ps 576 YXY\=ps 577 YXYY\=ps 578 YXYYY\=ps 579 YXYYYY\=ps 580 581 /(Y)X\1{2,4}+b/utf 582 YX\=ps 583 YXY\=ps 584 YXYY\=ps 585 YXYYY\=ps 586 YXYYYY\=ps 587 588 /(\x{123})X\1{2,4}b/utf 589 \x{123}X\=ps 590 \x{123}X\x{123}\=ps 591 \x{123}X\x{123}\x{123}\=ps 592 \x{123}X\x{123}\x{123}\x{123}\=ps 593 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps 594 595 /(\x{123})X\1{2,4}?b/utf 596 \x{123}X\=ps 597 \x{123}X\x{123}\=ps 598 \x{123}X\x{123}\x{123}\=ps 599 \x{123}X\x{123}\x{123}\x{123}\=ps 600 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps 601 602 /(\x{123})X\1{2,4}+b/utf 603 \x{123}X\=ps 604 \x{123}X\x{123}\=ps 605 \x{123}X\x{123}\x{123}\=ps 606 \x{123}X\x{123}\x{123}\x{123}\=ps 607 \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps 608 609 /\bthe cat\b/utf 610 the cat\=ps 611 the cat\=ph 612 613 /abcd*/utf 614 xxxxabcd\=ps 615 xxxxabcd\=ph 616 617 /abcd*/i,utf 618 xxxxabcd\=ps 619 xxxxabcd\=ph 620 XXXXABCD\=ps 621 XXXXABCD\=ph 622 623 /abc\d*/utf 624 xxxxabc1\=ps 625 xxxxabc1\=ph 626 627 /(a)bc\1*/utf 628 xxxxabca\=ps 629 xxxxabca\=ph 630 631 /abc[de]*/utf 632 xxxxabcde\=ps 633 xxxxabcde\=ph 634 635 /X\W{3}X/utf 636 X\=ps 637 638 /\sxxx\s/utf,tables=2 639 AB\x{85}xxx\x{a0}XYZ 640 AB\x{a0}xxx\x{85}XYZ 641 642 /\S \S/utf,tables=2 643 \x{a2} \x{84} 644 645 'A#'Bx,newline=any,utf 646 647 'A# 648 PQ'Bx,newline=any,utf 649 650 /a+#aa 651 z#XX?/Bx,newline=any,utf 652 653 /a+#aa 654 z#?/Bx,newline=any,utf 655 656 /\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf 658 659 /\g{A}xxx#b(?'A'123) (?'A'456)/Bx,newline=any,utf 661 662 /^\c/utf 663 664 /(\R*)(.)/s,utf 665 \r\n 666 \r\r\n\n\r 667 \r\r\n\n\r\n 668 669 /(\R)*(.)/s,utf 670 \r\n 671 \r\r\n\n\r 672 \r\r\n\n\r\n 673 674 /[^\x{1234}]+/Ii,utf 675 676 /[^\x{1234}]+?/Ii,utf 677 678 /[^\x{1234}]++/Ii,utf 679 680 /[^\x{1234}]{2}/Ii,utf 681 682 /f.*/ 683 for\=ph 684 685 /f.*/s 686 for\=ph 687 688 /f.*/utf 689 for\=ph 690 691 /f.*/s,utf 692 for\=ph 693 694 /\x{d7ff}\x{e000}/utf 695 696 /\x{d800}/utf 697 698 /\x{dfff}/utf 699 700 /\h+/utf 701 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 702 \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 703 704 /[\h\x{e000}]+/B,utf 705 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 706 \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 707 708 /\H+/utf 709 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 710 \x{2000}\x{200a}\x{1fff}\x{200b} 711 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 712 \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 713 714 /[\H\x{d7ff}]+/B,utf 715 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 716 \x{2000}\x{200a}\x{1fff}\x{200b} 717 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 718 \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 719 720 /\v+/utf 721 \x{2027}\x{2030}\x{2028}\x{2029} 722 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 723 724 /[\v\x{e000}]+/B,utf 725 \x{2027}\x{2030}\x{2028}\x{2029} 726 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 727 728 /\V+/utf 729 \x{2028}\x{2029}\x{2027}\x{2030} 730 \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 731 732 /[\V\x{d7ff}]+/B,utf 733 \x{2028}\x{2029}\x{2027}\x{2030} 734 \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 735 736 /\R+/bsr=unicode,utf 737 \x{2027}\x{2030}\x{2028}\x{2029} 738 \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 739 740 /(..)\1/utf 741 ab\=ps 742 aba\=ps 743 abab\=ps 744 745 /(..)\1/i,utf 746 ab\=ps 747 abA\=ps 748 aBAb\=ps 749 750 /(..)\1{2,}/utf 751 ab\=ps 752 aba\=ps 753 abab\=ps 754 ababa\=ps 755 ababab\=ps 756 ababab\=ph 757 abababa\=ps 758 abababa\=ph 759 760 /(..)\1{2,}/i,utf 761 ab\=ps 762 aBa\=ps 763 aBAb\=ps 764 AbaBA\=ps 765 abABAb\=ps 766 aBAbaB\=ph 767 abABabA\=ps 768 abaBABa\=ph 769 770 /(..)\1{2,}?x/i,utf 771 ab\=ps 772 abA\=ps 773 aBAb\=ps 774 abaBA\=ps 775 abAbaB\=ps 776 abaBabA\=ps 777 abAbABaBx\=ps 778 779 /./utf,newline=crlf 780 \r\=ps 781 \r\=ph 782 783 /.{2,3}/utf,newline=crlf 784 \r\=ps 785 \r\=ph 786 \r\r\=ps 787 \r\r\=ph 788 \r\r\r\=ps 789 \r\r\r\=ph 790 791 /.{2,3}?/utf,newline=crlf 792 \r\=ps 793 \r\=ph 794 \r\r\=ps 795 \r\r\=ph 796 \r\r\r\=ps 797 \r\r\r\=ph 798 799 /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf 800 801 /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf 802 803 /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf 804 805 /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf 806 807 /(?<=\x{1234}\x{1234})\bxy/I,utf 808 809 /(?<!^)ETA/utf 810 \= Expect no match 811 ETA 812 813 /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref 814 815 /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref 816 817 /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref 818 819 /^a+[a\x{200}]/B,utf 820 aa 821 822 /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf 823 824 /[\p{L}]/IB 825 826 /[\p{^L}]/IB 827 828 /[\P{L}]/IB 829 830 /[\P{^L}]/IB 831 832 /[abc\p{L}\x{0660}]/IB,utf 833 834 /[\p{Nd}]/IB,utf 835 1234 836 837 /[\p{Nd}+-]+/IB,utf 838 1234 839 12-34 840 12+\x{661}-34 841 \= Expect no match 842 abcd 843 844 /(?:[\PPa*]*){8,}/ 845 846 /[\P{Any}]/B 847 848 /[\P{Any}\E]/B 849 850 /(\P{Yi}+\277)/ 851 852 /(\P{Yi}+\277)?/ 853 854 /(?<=\P{Yi}{3}A)X/ 855 856 /\p{Yi}+(\P{Yi}+)(?1)/ 857 858 /(\P{Yi}{2}\277)?/ 859 860 /[\P{Yi}A]/ 861 862 /[\P{Yi}\P{Yi}\P{Yi}A]/ 863 864 /[^\P{Yi}A]/ 865 866 /[^\P{Yi}\P{Yi}\P{Yi}A]/ 867 868 /(\P{Yi}*\277)*/ 869 870 /(\P{Yi}*?\277)*/ 871 872 /(\p{Yi}*+\277)*/ 873 874 /(\P{Yi}?\277)*/ 875 876 /(\P{Yi}??\277)*/ 877 878 /(\p{Yi}?+\277)*/ 879 880 /(\P{Yi}{0,3}\277)*/ 881 882 /(\P{Yi}{0,3}?\277)*/ 883 884 /(\p{Yi}{0,3}+\277)*/ 885 886 /\p{Zl}{2,3}+/B,utf 887 890 \x{2028}\x{2028}\x{2028} 891 892 /\p{Zl}/B,utf 893 894 /\p{Lu}{3}+/B,utf 895 896 /\pL{2}+/B,utf 897 898 /\p{Cc}{2}+/B,utf 899 900 /^\p{Cf}/utf 901 \x{180e} 902 \x{061c} 903 \x{2066} 904 \x{2067} 905 \x{2068} 906 \x{2069} 907 908 /^\p{Cs}/utf 909 \x{dfff}\=no_utf_check 910 \= Expect no match 911 \x{09f} 912 913 /^\p{Mn}/utf 914 \x{1a1b} 915 916 /^\p{Pe}/utf 917 \x{2309} 918 \x{230b} 919 920 /^\p{Ps}/utf 921 \x{2308} 922 \x{230a} 923 924 /^\p{Sc}+/utf 925 $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} 926 \x{9f2} 927 \= Expect no match 928 X 929 \x{2c2} 930 931 /^\p{Zs}/utf 932 \ \ 933 \x{a0} 934 \x{1680} 935 \x{2000} 936 \x{2001} 937 \= Expect no match 938 \x{2028} 939 \x{200d} 940 941 # These are here because Perl has problems with the negative versions of the 942 # properties and has changed how it behaves for caseless matching. 943 944 /\p{^Lu}/i,utf 945 1234 946 \= Expect no match 947 ABC 948 949 /\P{Lu}/i,utf 950 1234 951 \= Expect no match 952 ABC 953 954 /\p{Ll}/i,utf 955 a 956 Az 957 \= Expect no match 958 ABC 959 960 /\p{Lu}/i,utf 961 A 962 a\x{10a0}B 963 \= Expect no match 964 a 965 \x{1d00} 966 967 /\p{Lu}/i,utf 968 A 969 aZ 970 \= Expect no match 971 abc 972 973 /[\x{c0}\x{391}]/i,utf 974 \x{c0} 975 \x{e0} 976 977 # The next two are special cases where the lengths of the different cases of 978 # the same character differ. The first went wrong with heap frame storage; the 979 # second was broken in all cases. 980 981 /^\x{023a}+?(\x{0130}+)/i,utf 982 \x{023a}\x{2c65}\x{0130} 983 984 /^\x{023a}+([^X])/i,utf 985 \x{023a}\x{2c65}X 986 987 /\x{c0}+\x{116}+/i,utf 988 \x{c0}\x{e0}\x{116}\x{117} 989 990 /[\x{c0}\x{116}]+/i,utf 991 \x{c0}\x{e0}\x{116}\x{117} 992 993 /(\x{de})\1/i,utf 994 \x{de}\x{de} 995 \x{de}\x{fe} 996 \x{fe}\x{fe} 997 \x{fe}\x{de} 998 999 /^\x{c0}$/i,utf 1000 \x{c0} 1001 \x{e0} 1002 1003 /^\x{e0}$/i,utf 1004 \x{c0} 1005 \x{e0} 1006 1007 # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE 1008 # will match it only with UCP support, because without that it has no notion 1009 # of case for anything other than the ASCII letters. 1010 1011 /((?i)[\x{c0}])/utf 1012 \x{c0} 1013 \x{e0} 1014 1015 /(?i:[\x{c0}])/utf 1016 \x{c0} 1017 \x{e0} 1018 1019 # These are PCRE's extra properties to help with Unicodizing \d etc. 1020 1021 /^\p{Xan}/utf 1022 ABCD 1023 1234 1024 \x{6ca} 1025 \x{a6c} 1026 \x{10a7} 1027 \= Expect no match 1028 _ABC 1029 1030 /^\p{Xan}+/utf 1031 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1032 \= Expect no match 1033 _ABC 1034 1035 /^\p{Xan}+?/utf 1036 \x{6ca}\x{a6c}\x{10a7}_ 1037 1038 /^\p{Xan}*/utf 1039 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1040 1041 /^\p{Xan}{2,9}/utf 1042 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1043 1044 /^\p{Xan}{2,9}?/utf 1045 \x{6ca}\x{a6c}\x{10a7}_ 1046 1047 /^[\p{Xan}]/utf 1048 ABCD1234_ 1049 1234abcd_ 1050 \x{6ca} 1051 \x{a6c} 1052 \x{10a7} 1053 \= Expect no match 1054 _ABC 1055 1056 /^[\p{Xan}]+/utf 1057 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1058 \= Expect no match 1059 _ABC 1060 1061 /^>\p{Xsp}/utf 1062 >\x{1680}\x{2028}\x{0b} 1063 >\x{a0} 1064 \= Expect no match 1065 \x{0b} 1066 1067 /^>\p{Xsp}+/utf 1068 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1069 1070 /^>\p{Xsp}+?/utf 1071 >\x{1680}\x{2028}\x{0b} 1072 1073 /^>\p{Xsp}*/utf 1074 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1075 1076 /^>\p{Xsp}{2,9}/utf 1077 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1078 1079 /^>\p{Xsp}{2,9}?/utf 1080 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1081 1082 /^>[\p{Xsp}]/utf 1083 >\x{2028}\x{0b} 1084 1085 /^>[\p{Xsp}]+/utf 1086 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1087 1088 /^>\p{Xps}/utf 1089 >\x{1680}\x{2028}\x{0b} 1090 >\x{a0} 1091 \= Expect no match 1092 \x{0b} 1093 1094 /^>\p{Xps}+/utf 1095 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1096 1097 /^>\p{Xps}+?/utf 1098 >\x{1680}\x{2028}\x{0b} 1099 1100 /^>\p{Xps}*/utf 1101 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1102 1103 /^>\p{Xps}{2,9}/utf 1104 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1105 1106 /^>\p{Xps}{2,9}?/utf 1107 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1108 1109 /^>[\p{Xps}]/utf 1110 >\x{2028}\x{0b} 1111 1112 /^>[\p{Xps}]+/utf 1113 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 1114 1115 /^\p{Xwd}/utf 1116 ABCD 1117 1234 1118 \x{6ca} 1119 \x{a6c} 1120 \x{10a7} 1121 _ABC 1122 \= Expect no match 1123 [] 1124 1125 /^\p{Xwd}+/utf 1126 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1127 1128 /^\p{Xwd}+?/utf 1129 \x{6ca}\x{a6c}\x{10a7}_ 1130 1131 /^\p{Xwd}*/utf 1132 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1133 1134 /^\p{Xwd}{2,9}/utf 1135 A_B12\x{6ca}\x{a6c}\x{10a7} 1136 1137 /^\p{Xwd}{2,9}?/utf 1138 \x{6ca}\x{a6c}\x{10a7}_ 1139 1140 /^[\p{Xwd}]/utf 1141 ABCD1234_ 1142 1234abcd_ 1143 \x{6ca} 1144 \x{a6c} 1145 \x{10a7} 1146 _ABC 1147 \= Expect no match 1148 [] 1149 1150 /^[\p{Xwd}]+/utf 1151 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 1152 1153 # A check not in UTF-8 mode 1154 1155 /^[\p{Xwd}]+/ 1156 ABCD1234_ 1157 1158 # Some negative checks 1159 1160 /^[\P{Xwd}]+/utf 1161 !.+\x{019}\x{35a}AB 1162 1163 /^[\p{^Xwd}]+/utf 1164 !.+\x{019}\x{35a}AB 1165 1166 /[\D]/B,utf,ucp 1167 1\x{3c8}2 1168 1169 /[\d]/B,utf,ucp 1170 >\x{6f4}< 1171 1172 /[\S]/B,utf,ucp 1173 \x{1680}\x{6f4}\x{1680} 1174 1175 /[\s]/B,utf,ucp 1176 >\x{1680}< 1177 1178 /[\W]/B,utf,ucp 1179 A\x{1712}B 1180 1181 /[\w]/B,utf,ucp 1182 >\x{1723}< 1183 1184 /\D/B,utf,ucp 1185 1\x{3c8}2 1186 1187 /\d/B,utf,ucp 1188 >\x{6f4}< 1189 1190 /\S/B,utf,ucp 1191 \x{1680}\x{6f4}\x{1680} 1192 1193 /\s/B,utf,ucp 1194 >\x{1680}> 1195 1196 /\W/B,utf,ucp 1197 A\x{1712}B 1198 1199 /\w/B,utf,ucp 1200 >\x{1723}< 1201 1202 /[[:alpha:]]/B,ucp 1203 1204 /[[:lower:]]/B,ucp 1205 1206 /[[:upper:]]/B,ucp 1207 1208 /[[:alnum:]]/B,ucp 1209 1210 /[[:ascii:]]/B,ucp 1211 1212 /[[:cntrl:]]/B,ucp 1213 1214 /[[:digit:]]/B,ucp 1215 1216 /[[:graph:]]/B,ucp 1217 1218 /[[:print:]]/B,ucp 1219 1220 /[[:punct:]]/B,ucp 1221 1222 /[[:space:]]/B,ucp 1223 1224 /[[:word:]]/B,ucp 1225 1226 /[[:xdigit:]]/B,ucp 1227 1228 # Unicode properties for \b abd \B 1229 1230 /\b...\B/utf,ucp 1231 abc_ 1232 \x{37e}abc\x{376} 1233 \x{37e}\x{376}\x{371}\x{393}\x{394} 1234 !\x{c0}++\x{c1}\x{c2} 1235 !\x{c0}+++++ 1236 1237 # Without PCRE_UCP, non-ASCII always fail, even if < 256 1238 1239 /\b...\B/utf 1240 abc_ 1241 \= Expect no match 1242 \x{37e}abc\x{376} 1243 \x{37e}\x{376}\x{371}\x{393}\x{394} 1244 !\x{c0}++\x{c1}\x{c2} 1245 !\x{c0}+++++ 1246 1247 # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties 1248 1249 /\b...\B/ucp 1250 abc_ 1251 !\x{c0}++\x{c1}\x{c2} 1252 !\x{c0}+++++ 1253 1254 # Some of these are silly, but they check various combinations 1255 1256 /[[:^alpha:][:^cntrl:]]+/B,utf,ucp 1257 123 1258 abc 1259 1260 /[[:^cntrl:][:^alpha:]]+/B,utf,ucp 1261 123 1262 abc 1263 1264 /[[:alpha:]]+/B,utf,ucp 1265 abc 1266 1267 /[[:^alpha:]\S]+/B,utf,ucp 1268 123 1269 abc 1270 1271 /[^\d]+/B,utf,ucp 1272 abc123 1273 abc\x{123} 1274 \x{660}abc 1275 1276 /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B 1277 1278 /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B 1279 1280 /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B 1281 1282 /\p{Han}+X\p{Greek}+\x{370}/B,utf 1283 1284 /\p{Xan}+!\p{Xan}+A/B 1285 1286 /\p{Xsp}+!\p{Xsp}\t/B 1287 1288 /\p{Xps}+!\p{Xps}\t/B 1289 1290 /\p{Xwd}+!\p{Xwd}_/B 1291 1292 /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp 1293 1294 # These behaved oddly in Perl, so they are kept in this test 1295 1296 /(\x{23a}\x{23a}\x{23a})?\1/i,utf 1297 \= Expect no match 1298 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} 1299 1300 /()?\1/i,utf 1301 \= Expect no match 1302 1303 1304 /(\x{23a}\x{23a}\x{23a})?\1/i,utf 1305 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1306 1307 /()?\1/i,utf 1308 1309 1310 /(\x{23a}\x{23a}\x{23a})\1/i,utf 1311 \= Expect no match 1312 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} 1313 1314 /()\1/i,utf 1315 \= Expect no match 1316 1317 1318 /(\x{23a}\x{23a}\x{23a})\1/i,utf 1319 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1320 1321 /()\1/i,utf 1322 1323 1324 /(\x{2c65}\x{2c65})\1/i,utf 1325 \x{2c65}\x{2c65}\x{23a}\x{23a} 1326 1327 /()\1/i,utf 1328 1329 1330 /(\x{23a}\x{23a}\x{23a})\1Y/i,utf 1331 X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ 1332 1333 /(\x{2c65}\x{2c65})\1Y/i,utf 1334 X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ 1335 1336 # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE 1337 1338 /^[\p{Batak}]/utf 1339 \x{1bc0} 1340 \x{1bff} 1341 \= Expect no match 1342 \x{1bf4} 1343 1344 /^[\p{Brahmi}]/utf 1345 \x{11000} 1346 \x{1106f} 1347 \= Expect no match 1348 \x{1104e} 1349 1350 /^[\p{Mandaic}]/utf 1351 \x{840} 1352 \x{85e} 1353 \= Expect no match 1354 \x{85c} 1355 \x{85d} 1356 1357 /(\X*)(.)/s,utf 1358 A\x{300} 1359 1360 /^S(\X*)e(\X*)$/utf 1361 Stereo 1362 1363 /^\X/utf 1364 reo 1365 1366 /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames 1367 aX41z 1368 \= Expect no match 1369 aAz 1370 1371 /\X/ 1372 a\=ps 1373 a\=ph 1374 1375 /\Xa/ 1376 aa\=ps 1377 aa\=ph 1378 1379 /\X{2}/ 1380 aa\=ps 1381 aa\=ph 1382 1383 /\X+a/ 1384 a\=ps 1385 aa\=ps 1386 aa\=ph 1387 1388 /\X+?a/ 1389 a\=ps 1390 ab\=ps 1391 aa\=ps 1392 aa\=ph 1393 aba\=ps 1394 1395 # These Unicode 6.1.0 scripts are not known to Perl. 1396 1397 /\p{Chakma}\d/utf,ucp 1398 \x{11100}\x{1113c} 1399 1400 /\p{Takri}\d/utf,ucp 1401 \x{11680}\x{116c0} 1402 1403 /^\X/utf 1404 A\=ps 1405 A\=ph 1406 A\x{300}\x{301}\=ps 1407 A\x{300}\x{301}\=ph 1408 A\x{301}\=ps 1409 A\x{301}\=ph 1410 1411 /^\X{2,3}/utf 1412 A\=ps 1413 A\=ph 1414 AA\=ps 1415 AA\=ph 1416 A\x{300}\x{301}\=ps 1417 A\x{300}\x{301}\=ph 1418 A\x{300}\x{301}A\x{300}\x{301}\=ps 1419 A\x{300}\x{301}A\x{300}\x{301}\=ph 1420 1421 /^\X{2}/utf 1422 AA\=ps 1423 AA\=ph 1424 A\x{300}\x{301}A\x{300}\x{301}\=ps 1425 A\x{300}\x{301}A\x{300}\x{301}\=ph 1426 1427 /^\X+/utf 1428 AA\=ps 1429 AA\=ph 1430 1431 /^\X+?Z/utf 1432 AA\=ps 1433 AA\=ph 1434 1435 /A\x{3a3}B/IBi,utf 1436 1437 /[\x{3a3}]/Bi,utf 1438 1439 /[^\x{3a3}]/Bi,utf 1440 1441 /[\x{3a3}]+/Bi,utf 1442 1443 /[^\x{3a3}]+/Bi,utf 1444 1445 /a*\x{3a3}/Bi,utf 1446 1447 /\x{3a3}+a/Bi,utf 1448 1449 /\x{3a3}*\x{3c2}/Bi,utf 1450 1451 /\x{3a3}{3}/i,utf,aftertext 1452 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1453 1454 /\x{3a3}{2,4}/i,utf,aftertext 1455 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1456 1457 /\x{3a3}{2,4}?/i,utf,aftertext 1458 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1459 1460 /\x{3a3}+./i,utf,aftertext 1461 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1462 1463 /\x{3a3}++./i,utf,aftertext 1464 \= Expect no match 1465 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 1466 1467 /\x{3a3}*\x{3c2}/Bi,utf 1468 1469 /[^\x{3a3}]*\x{3c2}/Bi,utf 1470 1471 /[^a]*\x{3c2}/Bi,utf 1472 1473 /ist/Bi,utf 1474 \= Expect no match 1475 ikt 1476 1477 /is+t/i,utf 1478 iSs\x{17f}t 1479 \= Expect no match 1480 ikt 1481 1482 /is+?t/i,utf 1483 \= Expect no match 1484 ikt 1485 1486 /is?t/i,utf 1487 \= Expect no match 1488 ikt 1489 1490 /is{2}t/i,utf 1491 \= Expect no match 1492 iskt 1493 1494 # This property is a PCRE special 1495 1496 /^\p{Xuc}/utf 1497 $abc 1498 @abc 1499 `abc 1500 \x{1234}abc 1501 \= Expect no match 1502 abc 1503 1504 /^\p{Xuc}+/utf 1505 $@`\x{a0}\x{1234}\x{e000}** 1506 \= Expect no match 1507 \x{9f} 1508 1509 /^\p{Xuc}+?/utf 1510 $@`\x{a0}\x{1234}\x{e000}** 1511 \= Expect no match 1512 \x{9f} 1513 1514 /^\p{Xuc}+?\*/utf 1515 $@`\x{a0}\x{1234}\x{e000}** 1516 \= Expect no match 1517 \x{9f} 1518 1519 /^\p{Xuc}++/utf 1520 $@`\x{a0}\x{1234}\x{e000}** 1521 \= Expect no match 1522 \x{9f} 1523 1524 /^\p{Xuc}{3,5}/utf 1525 $@`\x{a0}\x{1234}\x{e000}** 1526 \= Expect no match 1527 \x{9f} 1528 1529 /^\p{Xuc}{3,5}?/utf 1530 $@`\x{a0}\x{1234}\x{e000}** 1531 \= Expect no match 1532 \x{9f} 1533 1534 /^[\p{Xuc}]/utf 1535 $@`\x{a0}\x{1234}\x{e000}** 1536 \= Expect no match 1537 \x{9f} 1538 1539 /^[\p{Xuc}]+/utf 1540 $@`\x{a0}\x{1234}\x{e000}** 1541 \= Expect no match 1542 \x{9f} 1543 1544 /^\P{Xuc}/utf 1545 abc 1546 \= Expect no match 1547 $abc 1548 @abc 1549 `abc 1550 \x{1234}abc 1551 1552 /^[\P{Xuc}]/utf 1553 abc 1554 \= Expect no match 1555 $abc 1556 @abc 1557 `abc 1558 \x{1234}abc 1559 1560 # Some auto-possessification tests 1561 1562 /\pN+\z/B 1563 1564 /\PN+\z/B 1565 1566 /\pN+/B 1567 1568 /\PN+/B 1569 1570 /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp 1571 1572 /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp 1573 1574 /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp 1575 1576 /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp 1577 1578 /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp 1579 1580 /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp 1581 1582 /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp 1583 1584 /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp 1585 1586 /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp 1587 1588 /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp 1589 1590 /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp 1591 1592 /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp 1593 1594 /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp 1595 1596 /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp 1597 1598 /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp 1599 1600 /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp 1601 1602 /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp 1603 1604 /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp 1605 1606 /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp 1607 1608 /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp 1609 1610 # End auto-possessification tests 1611 1612 /\w+/B,utf,ucp,auto_callout 1613 abcd 1614 1615 /[\p{N}]?+/B,no_auto_possess 1616 1617 /[\p{L}ab]{2,3}+/B,no_auto_possess 1618 1619 /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx 1620 1621 /.+\X/Bsx 1622 1623 /\X+$/Bmx 1624 1625 /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx 1626 1627 /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp 1628 1629 /[RST]+/Bi,utf,ucp 1630 1631 /[R-T]+/Bi,utf,ucp 1632 1633 /[Q-U]+/Bi,utf,ucp 1634 1635 /^s?c/Iim,utf 1636 scat 1637 1638 /\X?abc/utf,no_start_optimize 1639 \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 1640 1641 /\x{100}\x{200}\K\x{300}/utf,startchar 1642 \x{100}\x{200}\x{300} 1643 1644 # Test UTF characters in a substitution 1645 1646 /bc/utf,replace=XZ 1647 123bc123 1648 1649 /(?<=abc)(|def)/g,utf,replace=<$0> 1650 123abcyzabcdef789abcqr 1651 1652 /[A-`]/iB,utf 1653 abcdefghijklmno 1654 1655 /(?<=\K\x{17f})/g,utf,aftertext 1656 \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 1657 1658 /(?<=\K\x{17f})/altglobal,utf,aftertext 1659 \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 1660 1661 "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" 1662 1663 /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ 1664 1665 "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" 1666 1667 /[\pS#moq]/ 1668 = 1669 1670 /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark 1671 cxxxz 1672 1673 /abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended 1674 abcd 1675 1676 /a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended 1677 a\x{e0}\x{101}\x{c0}\x{102} 1678 1679 /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> 1680 ab12cde 1681 1682 /[\W\p{Any}]/B 1683 abc 1684 123 1685 1686 /[\W\pL]/B 1687 abc 1688 \= Expect no match 1689 123 1690 1691 /(*UCP)(*UTF)[[:>:]]X/B 1692 1693 /abc/utf,replace=xyz 1694 abc\=zero_terminate 1695 1696 /a[[:punct:]b]/ucp,bincode 1697 1698 /a[[:punct:]b]/utf,ucp,bincode 1699 1700 /a[b[:punct:]]/utf,ucp,bincode 1701 1702 /[[:^ascii:]]/utf,ucp,bincode 1703 1704 /[[:^ascii:]\w]/utf,ucp,bincode 1705 1706 /[\w[:^ascii:]]/utf,ucp,bincode 1707 1708 /[^[:ascii:]\W]/utf,ucp,bincode 1709 \x{de} 1710 \x{200} 1711 \= Expect no match 1712 \x{300} 1713 \x{37e} 1714 1715 /[[:^ascii:]a]/utf,ucp,bincode 1716 1717 /L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout 1718 1719 /L(?#(|++<!(2)?/B,utf,ucp,auto_callout 1720 1721 /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ 1722 1723 # End of testinput5 1724