1 /-- This set of tests is for UTF-8 support but not Unicode property support, 2 and is relevant only to the 8-bit library. --/ 3 4 < forbid W 5 6 /X(\C{3})/8 7 X\x{1234} 8 0: X\x{1234} 9 1: \x{1234} 10 11 /X(\C{4})/8 12 X\x{1234}YZ 13 0: X\x{1234}Y 14 1: \x{1234}Y 15 16 /X\C*/8 17 XYZabcdce 18 0: XYZabcdce 19 20 /X\C*?/8 21 XYZabcde 22 0: X 23 24 /X\C{3,5}/8 25 Xabcdefg 26 0: Xabcde 27 X\x{1234} 28 0: X\x{1234} 29 X\x{1234}YZ 30 0: X\x{1234}YZ 31 X\x{1234}\x{512} 32 0: X\x{1234}\x{512} 33 X\x{1234}\x{512}YZ 34 0: X\x{1234}\x{512} 35 36 /X\C{3,5}?/8 37 Xabcdefg 38 0: Xabc 39 X\x{1234} 40 0: X\x{1234} 41 X\x{1234}YZ 42 0: X\x{1234} 43 X\x{1234}\x{512} 44 0: X\x{1234} 45 46 /a\Cb/8 47 aXb 48 0: aXb 49 a\nb 50 0: a\x{0a}b 51 52 /a\C\Cb/8 53 a\x{100}b 54 0: a\x{100}b 55 56 /ab\Cde/8 57 abXde 58 0: abXde 59 60 /a\C\Cb/8 61 a\x{100}b 62 0: a\x{100}b 63 ** Failers 64 No match 65 a\x{12257}b 66 No match 67 68 /[]/8 69 Failed: invalid UTF-8 string at offset 1 70 71 //8 72 Failed: invalid UTF-8 string at offset 0 73 74 /xxx/8 75 Failed: invalid UTF-8 string at offset 0 76 77 /xxx/8?DZSSO 78 ------------------------------------------------------------------ 79 Bra 80 \X{c0}\X{c0}\X{c0}xxx 81 Ket 82 End 83 ------------------------------------------------------------------ 84 Capturing subpattern count = 0 85 Options: no_auto_possessify utf no_utf_check 86 First char = \x{c3} 87 Need char = 'x' 88 89 /badutf/8 90 \xdf 91 Error -10 (bad UTF-8 string) offset=0 reason=1 92 \xef 93 Error -10 (bad UTF-8 string) offset=0 reason=2 94 \xef\x80 95 Error -10 (bad UTF-8 string) offset=0 reason=1 96 \xf7 97 Error -10 (bad UTF-8 string) offset=0 reason=3 98 \xf7\x80 99 Error -10 (bad UTF-8 string) offset=0 reason=2 100 \xf7\x80\x80 101 Error -10 (bad UTF-8 string) offset=0 reason=1 102 \xfb 103 Error -10 (bad UTF-8 string) offset=0 reason=4 104 \xfb\x80 105 Error -10 (bad UTF-8 string) offset=0 reason=3 106 \xfb\x80\x80 107 Error -10 (bad UTF-8 string) offset=0 reason=2 108 \xfb\x80\x80\x80 109 Error -10 (bad UTF-8 string) offset=0 reason=1 110 \xfd 111 Error -10 (bad UTF-8 string) offset=0 reason=5 112 \xfd\x80 113 Error -10 (bad UTF-8 string) offset=0 reason=4 114 \xfd\x80\x80 115 Error -10 (bad UTF-8 string) offset=0 reason=3 116 \xfd\x80\x80\x80 117 Error -10 (bad UTF-8 string) offset=0 reason=2 118 \xfd\x80\x80\x80\x80 119 Error -10 (bad UTF-8 string) offset=0 reason=1 120 \xdf\x7f 121 Error -10 (bad UTF-8 string) offset=0 reason=6 122 \xef\x7f\x80 123 Error -10 (bad UTF-8 string) offset=0 reason=6 124 \xef\x80\x7f 125 Error -10 (bad UTF-8 string) offset=0 reason=7 126 \xf7\x7f\x80\x80 127 Error -10 (bad UTF-8 string) offset=0 reason=6 128 \xf7\x80\x7f\x80 129 Error -10 (bad UTF-8 string) offset=0 reason=7 130 \xf7\x80\x80\x7f 131 Error -10 (bad UTF-8 string) offset=0 reason=8 132 \xfb\x7f\x80\x80\x80 133 Error -10 (bad UTF-8 string) offset=0 reason=6 134 \xfb\x80\x7f\x80\x80 135 Error -10 (bad UTF-8 string) offset=0 reason=7 136 \xfb\x80\x80\x7f\x80 137 Error -10 (bad UTF-8 string) offset=0 reason=8 138 \xfb\x80\x80\x80\x7f 139 Error -10 (bad UTF-8 string) offset=0 reason=9 140 \xfd\x7f\x80\x80\x80\x80 141 Error -10 (bad UTF-8 string) offset=0 reason=6 142 \xfd\x80\x7f\x80\x80\x80 143 Error -10 (bad UTF-8 string) offset=0 reason=7 144 \xfd\x80\x80\x7f\x80\x80 145 Error -10 (bad UTF-8 string) offset=0 reason=8 146 \xfd\x80\x80\x80\x7f\x80 147 Error -10 (bad UTF-8 string) offset=0 reason=9 148 \xfd\x80\x80\x80\x80\x7f 149 Error -10 (bad UTF-8 string) offset=0 reason=10 150 \xed\xa0\x80 151 Error -10 (bad UTF-8 string) offset=0 reason=14 152 \xc0\x8f 153 Error -10 (bad UTF-8 string) offset=0 reason=15 154 \xe0\x80\x8f 155 Error -10 (bad UTF-8 string) offset=0 reason=16 156 \xf0\x80\x80\x8f 157 Error -10 (bad UTF-8 string) offset=0 reason=17 158 \xf8\x80\x80\x80\x8f 159 Error -10 (bad UTF-8 string) offset=0 reason=18 160 \xfc\x80\x80\x80\x80\x8f 161 Error -10 (bad UTF-8 string) offset=0 reason=19 162 \x80 163 Error -10 (bad UTF-8 string) offset=0 reason=20 164 \xfe 165 Error -10 (bad UTF-8 string) offset=0 reason=21 166 \xff 167 Error -10 (bad UTF-8 string) offset=0 reason=21 168 169 /badutf/8 170 \xfb\x80\x80\x80\x80 171 Error -10 (bad UTF-8 string) offset=0 reason=11 172 \xfd\x80\x80\x80\x80\x80 173 Error -10 (bad UTF-8 string) offset=0 reason=12 174 \xf7\xbf\xbf\xbf 175 Error -10 (bad UTF-8 string) offset=0 reason=13 176 177 /shortutf/8 178 \P\P\xdf 179 Error -25 (short UTF-8 string) offset=0 reason=1 180 \P\P\xef 181 Error -25 (short UTF-8 string) offset=0 reason=2 182 \P\P\xef\x80 183 Error -25 (short UTF-8 string) offset=0 reason=1 184 \P\P\xf7 185 Error -25 (short UTF-8 string) offset=0 reason=3 186 \P\P\xf7\x80 187 Error -25 (short UTF-8 string) offset=0 reason=2 188 \P\P\xf7\x80\x80 189 Error -25 (short UTF-8 string) offset=0 reason=1 190 \P\P\xfb 191 Error -25 (short UTF-8 string) offset=0 reason=4 192 \P\P\xfb\x80 193 Error -25 (short UTF-8 string) offset=0 reason=3 194 \P\P\xfb\x80\x80 195 Error -25 (short UTF-8 string) offset=0 reason=2 196 \P\P\xfb\x80\x80\x80 197 Error -25 (short UTF-8 string) offset=0 reason=1 198 \P\P\xfd 199 Error -25 (short UTF-8 string) offset=0 reason=5 200 \P\P\xfd\x80 201 Error -25 (short UTF-8 string) offset=0 reason=4 202 \P\P\xfd\x80\x80 203 Error -25 (short UTF-8 string) offset=0 reason=3 204 \P\P\xfd\x80\x80\x80 205 Error -25 (short UTF-8 string) offset=0 reason=2 206 \P\P\xfd\x80\x80\x80\x80 207 Error -25 (short UTF-8 string) offset=0 reason=1 208 209 /anything/8 210 \xc0\x80 211 Error -10 (bad UTF-8 string) offset=0 reason=15 212 \xc1\x8f 213 Error -10 (bad UTF-8 string) offset=0 reason=15 214 \xe0\x9f\x80 215 Error -10 (bad UTF-8 string) offset=0 reason=16 216 \xf0\x8f\x80\x80 217 Error -10 (bad UTF-8 string) offset=0 reason=17 218 \xf8\x87\x80\x80\x80 219 Error -10 (bad UTF-8 string) offset=0 reason=18 220 \xfc\x83\x80\x80\x80\x80 221 Error -10 (bad UTF-8 string) offset=0 reason=19 222 \xfe\x80\x80\x80\x80\x80 223 Error -10 (bad UTF-8 string) offset=0 reason=21 224 \xff\x80\x80\x80\x80\x80 225 Error -10 (bad UTF-8 string) offset=0 reason=21 226 \xc3\x8f 227 No match 228 \xe0\xaf\x80 229 No match 230 \xe1\x80\x80 231 No match 232 \xf0\x9f\x80\x80 233 No match 234 \xf1\x8f\x80\x80 235 No match 236 \xf8\x88\x80\x80\x80 237 Error -10 (bad UTF-8 string) offset=0 reason=11 238 \xf9\x87\x80\x80\x80 239 Error -10 (bad UTF-8 string) offset=0 reason=11 240 \xfc\x84\x80\x80\x80\x80 241 Error -10 (bad UTF-8 string) offset=0 reason=12 242 \xfd\x83\x80\x80\x80\x80 243 Error -10 (bad UTF-8 string) offset=0 reason=12 244 \?\xf8\x88\x80\x80\x80 245 No match 246 \?\xf9\x87\x80\x80\x80 247 No match 248 \?\xfc\x84\x80\x80\x80\x80 249 No match 250 \?\xfd\x83\x80\x80\x80\x80 251 No match 252 253 /\x{100}/8DZ 254 ------------------------------------------------------------------ 255 Bra 256 \x{100} 257 Ket 258 End 259 ------------------------------------------------------------------ 260 Capturing subpattern count = 0 261 Options: utf 262 First char = \x{c4} 263 Need char = \x{80} 264 265 /\x{1000}/8DZ 266 ------------------------------------------------------------------ 267 Bra 268 \x{1000} 269 Ket 270 End 271 ------------------------------------------------------------------ 272 Capturing subpattern count = 0 273 Options: utf 274 First char = \x{e1} 275 Need char = \x{80} 276 277 /\x{10000}/8DZ 278 ------------------------------------------------------------------ 279 Bra 280 \x{10000} 281 Ket 282 End 283 ------------------------------------------------------------------ 284 Capturing subpattern count = 0 285 Options: utf 286 First char = \x{f0} 287 Need char = \x{80} 288 289 /\x{100000}/8DZ 290 ------------------------------------------------------------------ 291 Bra 292 \x{100000} 293 Ket 294 End 295 ------------------------------------------------------------------ 296 Capturing subpattern count = 0 297 Options: utf 298 First char = \x{f4} 299 Need char = \x{80} 300 301 /\x{10ffff}/8DZ 302 ------------------------------------------------------------------ 303 Bra 304 \x{10ffff} 305 Ket 306 End 307 ------------------------------------------------------------------ 308 Capturing subpattern count = 0 309 Options: utf 310 First char = \x{f4} 311 Need char = \x{bf} 312 313 /[\x{ff}]/8DZ 314 ------------------------------------------------------------------ 315 Bra 316 \x{ff} 317 Ket 318 End 319 ------------------------------------------------------------------ 320 Capturing subpattern count = 0 321 Options: utf 322 First char = \x{c3} 323 Need char = \x{bf} 324 325 /[\x{100}]/8DZ 326 ------------------------------------------------------------------ 327 Bra 328 \x{100} 329 Ket 330 End 331 ------------------------------------------------------------------ 332 Capturing subpattern count = 0 333 Options: utf 334 First char = \x{c4} 335 Need char = \x{80} 336 337 /\x80/8DZ 338 ------------------------------------------------------------------ 339 Bra 340 \x{80} 341 Ket 342 End 343 ------------------------------------------------------------------ 344 Capturing subpattern count = 0 345 Options: utf 346 First char = \x{c2} 347 Need char = \x{80} 348 349 /\xff/8DZ 350 ------------------------------------------------------------------ 351 Bra 352 \x{ff} 353 Ket 354 End 355 ------------------------------------------------------------------ 356 Capturing subpattern count = 0 357 Options: utf 358 First char = \x{c3} 359 Need char = \x{bf} 360 361 /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 362 ------------------------------------------------------------------ 363 Bra 364 \x{d55c}\x{ad6d}\x{c5b4} 365 Ket 366 End 367 ------------------------------------------------------------------ 368 Capturing subpattern count = 0 369 Options: utf 370 First char = \x{ed} 371 Need char = \x{b4} 372 \x{D55c}\x{ad6d}\x{C5B4} 373 0: \x{d55c}\x{ad6d}\x{c5b4} 374 375 /\x{65e5}\x{672c}\x{8a9e}/DZ8 376 ------------------------------------------------------------------ 377 Bra 378 \x{65e5}\x{672c}\x{8a9e} 379 Ket 380 End 381 ------------------------------------------------------------------ 382 Capturing subpattern count = 0 383 Options: utf 384 First char = \x{e6} 385 Need char = \x{9e} 386 \x{65e5}\x{672c}\x{8a9e} 387 0: \x{65e5}\x{672c}\x{8a9e} 388 389 /\x{80}/DZ8 390 ------------------------------------------------------------------ 391 Bra 392 \x{80} 393 Ket 394 End 395 ------------------------------------------------------------------ 396 Capturing subpattern count = 0 397 Options: utf 398 First char = \x{c2} 399 Need char = \x{80} 400 401 /\x{084}/DZ8 402 ------------------------------------------------------------------ 403 Bra 404 \x{84} 405 Ket 406 End 407 ------------------------------------------------------------------ 408 Capturing subpattern count = 0 409 Options: utf 410 First char = \x{c2} 411 Need char = \x{84} 412 413 /\x{104}/DZ8 414 ------------------------------------------------------------------ 415 Bra 416 \x{104} 417 Ket 418 End 419 ------------------------------------------------------------------ 420 Capturing subpattern count = 0 421 Options: utf 422 First char = \x{c4} 423 Need char = \x{84} 424 425 /\x{861}/DZ8 426 ------------------------------------------------------------------ 427 Bra 428 \x{861} 429 Ket 430 End 431 ------------------------------------------------------------------ 432 Capturing subpattern count = 0 433 Options: utf 434 First char = \x{e0} 435 Need char = \x{a1} 436 437 /\x{212ab}/DZ8 438 ------------------------------------------------------------------ 439 Bra 440 \x{212ab} 441 Ket 442 End 443 ------------------------------------------------------------------ 444 Capturing subpattern count = 0 445 Options: utf 446 First char = \x{f0} 447 Need char = \x{ab} 448 449 /-- This one is here not because it's different to Perl, but because the way 450 the captured single-byte is displayed. (In Perl it becomes a character, and you 451 can't tell the difference.) --/ 452 453 /X(\C)(.*)/8 454 X\x{1234} 455 0: X\x{1234} 456 1: \x{e1} 457 2: \x{88}\x{b4} 458 X\nabc 459 0: X\x{0a}abc 460 1: \x{0a} 461 2: abc 462 463 /-- This one is here because Perl gives out a grumbly error message (quite 464 correctly, but that messes up comparisons). --/ 465 466 /a\Cb/8 467 *** Failers 468 No match 469 a\x{100}b 470 No match 471 472 /[^ab\xC0-\xF0]/8SDZ 473 ------------------------------------------------------------------ 474 Bra 475 [\x00-`c-\xbf\xf1-\xff] (neg) 476 Ket 477 End 478 ------------------------------------------------------------------ 479 Capturing subpattern count = 0 480 Options: utf 481 No first char 482 No need char 483 Subject length lower bound = 1 484 Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 485 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 486 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 487 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 488 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 489 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 490 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 491 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 492 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 493 \xfe \xff 494 \x{f1} 495 0: \x{f1} 496 \x{bf} 497 0: \x{bf} 498 \x{100} 499 0: \x{100} 500 \x{1000} 501 0: \x{1000} 502 *** Failers 503 0: * 504 \x{c0} 505 No match 506 \x{f0} 507 No match 508 509 /{3,4}/8SDZ 510 ------------------------------------------------------------------ 511 Bra 512 \x{100}{3} 513 \x{100}?+ 514 Ket 515 End 516 ------------------------------------------------------------------ 517 Capturing subpattern count = 0 518 Options: utf 519 First char = \x{c4} 520 Need char = \x{80} 521 Subject length lower bound = 3 522 No starting char list 523 \x{100}\x{100}\x{100}\x{100\x{100} 524 0: \x{100}\x{100}\x{100} 525 526 /(\x{100}+|x)/8SDZ 527 ------------------------------------------------------------------ 528 Bra 529 CBra 1 530 \x{100}++ 531 Alt 532 x 533 Ket 534 Ket 535 End 536 ------------------------------------------------------------------ 537 Capturing subpattern count = 1 538 Options: utf 539 No first char 540 No need char 541 Subject length lower bound = 1 542 Starting chars: x \xc4 543 544 /(\x{100}*a|x)/8SDZ 545 ------------------------------------------------------------------ 546 Bra 547 CBra 1 548 \x{100}*+ 549 a 550 Alt 551 x 552 Ket 553 Ket 554 End 555 ------------------------------------------------------------------ 556 Capturing subpattern count = 1 557 Options: utf 558 No first char 559 No need char 560 Subject length lower bound = 1 561 Starting chars: a x \xc4 562 563 /(\x{100}{0,2}a|x)/8SDZ 564 ------------------------------------------------------------------ 565 Bra 566 CBra 1 567 \x{100}{0,2}+ 568 a 569 Alt 570 x 571 Ket 572 Ket 573 End 574 ------------------------------------------------------------------ 575 Capturing subpattern count = 1 576 Options: utf 577 No first char 578 No need char 579 Subject length lower bound = 1 580 Starting chars: a x \xc4 581 582 /(\x{100}{1,2}a|x)/8SDZ 583 ------------------------------------------------------------------ 584 Bra 585 CBra 1 586 \x{100} 587 \x{100}{0,1}+ 588 a 589 Alt 590 x 591 Ket 592 Ket 593 End 594 ------------------------------------------------------------------ 595 Capturing subpattern count = 1 596 Options: utf 597 No first char 598 No need char 599 Subject length lower bound = 1 600 Starting chars: x \xc4 601 602 /\x{100}/8DZ 603 ------------------------------------------------------------------ 604 Bra 605 \x{100} 606 Ket 607 End 608 ------------------------------------------------------------------ 609 Capturing subpattern count = 0 610 Options: utf 611 First char = \x{c4} 612 Need char = \x{80} 613 614 /a\x{100}\x{101}*/8DZ 615 ------------------------------------------------------------------ 616 Bra 617 a\x{100} 618 \x{101}*+ 619 Ket 620 End 621 ------------------------------------------------------------------ 622 Capturing subpattern count = 0 623 Options: utf 624 First char = 'a' 625 Need char = \x{80} 626 627 /a\x{100}\x{101}+/8DZ 628 ------------------------------------------------------------------ 629 Bra 630 a\x{100} 631 \x{101}++ 632 Ket 633 End 634 ------------------------------------------------------------------ 635 Capturing subpattern count = 0 636 Options: utf 637 First char = 'a' 638 Need char = \x{81} 639 640 /[^\x{c4}]/DZ 641 ------------------------------------------------------------------ 642 Bra 643 [^\x{c4}] 644 Ket 645 End 646 ------------------------------------------------------------------ 647 Capturing subpattern count = 0 648 No options 649 No first char 650 No need char 651 652 /[\x{100}]/8DZ 653 ------------------------------------------------------------------ 654 Bra 655 \x{100} 656 Ket 657 End 658 ------------------------------------------------------------------ 659 Capturing subpattern count = 0 660 Options: utf 661 First char = \x{c4} 662 Need char = \x{80} 663 \x{100} 664 0: \x{100} 665 Z\x{100} 666 0: \x{100} 667 \x{100}Z 668 0: \x{100} 669 *** Failers 670 No match 671 672 /[\xff]/DZ8 673 ------------------------------------------------------------------ 674 Bra 675 \x{ff} 676 Ket 677 End 678 ------------------------------------------------------------------ 679 Capturing subpattern count = 0 680 Options: utf 681 First char = \x{c3} 682 Need char = \x{bf} 683 >\x{ff}< 684 0: \x{ff} 685 686 /[^\xff]/8DZ 687 ------------------------------------------------------------------ 688 Bra 689 [^\x{ff}] 690 Ket 691 End 692 ------------------------------------------------------------------ 693 Capturing subpattern count = 0 694 Options: utf 695 No first char 696 No need char 697 698 /\x{100}abc(xyz(?1))/8DZ 699 ------------------------------------------------------------------ 700 Bra 701 \x{100}abc 702 CBra 1 703 xyz 704 Recurse 705 Ket 706 Ket 707 End 708 ------------------------------------------------------------------ 709 Capturing subpattern count = 1 710 Options: utf 711 First char = \x{c4} 712 Need char = 'z' 713 714 /a\x{1234}b/P8 715 a\x{1234}b 716 0: a\x{1234}b 717 718 /\777/8I 719 Capturing subpattern count = 0 720 Options: utf 721 First char = \x{c7} 722 Need char = \x{bf} 723 \x{1ff} 724 0: \x{1ff} 725 \777 726 0: \x{1ff} 727 728 /\x{100}+\x{200}/8DZ 729 ------------------------------------------------------------------ 730 Bra 731 \x{100}++ 732 \x{200} 733 Ket 734 End 735 ------------------------------------------------------------------ 736 Capturing subpattern count = 0 737 Options: utf 738 First char = \x{c4} 739 Need char = \x{80} 740 741 /\x{100}+X/8DZ 742 ------------------------------------------------------------------ 743 Bra 744 \x{100}++ 745 X 746 Ket 747 End 748 ------------------------------------------------------------------ 749 Capturing subpattern count = 0 750 Options: utf 751 First char = \x{c4} 752 Need char = 'X' 753 754 /^[\Q\E-\Q\E/BZ8 755 Failed: missing terminating ] for character class at offset 15 756 757 /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 758 759 /X/8 760 \x{d800} 761 Error -10 (bad UTF-8 string) offset=0 reason=14 762 \x{d800}\? 763 No match 764 \x{da00} 765 Error -10 (bad UTF-8 string) offset=0 reason=14 766 \x{da00}\? 767 No match 768 \x{dfff} 769 Error -10 (bad UTF-8 string) offset=0 reason=14 770 \x{dfff}\? 771 No match 772 \x{110000} 773 Error -10 (bad UTF-8 string) offset=0 reason=13 774 \x{110000}\? 775 No match 776 \x{2000000} 777 Error -10 (bad UTF-8 string) offset=0 reason=11 778 \x{2000000}\? 779 No match 780 \x{7fffffff} 781 Error -10 (bad UTF-8 string) offset=0 reason=12 782 \x{7fffffff}\? 783 No match 784 785 /(*UTF8)\x{1234}/ 786 abcd\x{1234}pqr 787 0: \x{1234} 788 789 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I 790 Capturing subpattern count = 0 791 Options: bsr_unicode utf 792 Forced newline sequence: CRLF 793 First char = 'a' 794 Need char = 'b' 795 796 /\h/SI8 797 Capturing subpattern count = 0 798 Options: utf 799 No first char 800 No need char 801 Subject length lower bound = 1 802 Starting chars: \x09 \x20 \xc2 \xe1 \xe2 \xe3 803 ABC\x{09} 804 0: \x{09} 805 ABC\x{20} 806 0: 807 ABC\x{a0} 808 0: \x{a0} 809 ABC\x{1680} 810 0: \x{1680} 811 ABC\x{180e} 812 0: \x{180e} 813 ABC\x{2000} 814 0: \x{2000} 815 ABC\x{202f} 816 0: \x{202f} 817 ABC\x{205f} 818 0: \x{205f} 819 ABC\x{3000} 820 0: \x{3000} 821 822 /\v/SI8 823 Capturing subpattern count = 0 824 Options: utf 825 No first char 826 No need char 827 Subject length lower bound = 1 828 Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 829 ABC\x{0a} 830 0: \x{0a} 831 ABC\x{0b} 832 0: \x{0b} 833 ABC\x{0c} 834 0: \x{0c} 835 ABC\x{0d} 836 0: \x{0d} 837 ABC\x{85} 838 0: \x{85} 839 ABC\x{2028} 840 0: \x{2028} 841 842 /\h*A/SI8 843 Capturing subpattern count = 0 844 Options: utf 845 No first char 846 Need char = 'A' 847 Subject length lower bound = 1 848 Starting chars: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 849 CDBABC 850 0: A 851 852 /\v+A/SI8 853 Capturing subpattern count = 0 854 Options: utf 855 No first char 856 Need char = 'A' 857 Subject length lower bound = 2 858 Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 859 860 /\s?xxx\s/8SI 861 Capturing subpattern count = 0 862 Options: utf 863 No first char 864 Need char = 'x' 865 Subject length lower bound = 4 866 Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 x 867 868 /\sxxx\s/I8ST1 869 Capturing subpattern count = 0 870 Options: utf 871 No first char 872 Need char = 'x' 873 Subject length lower bound = 5 874 Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 875 AB\x{85}xxx\x{a0}XYZ 876 0: \x{85}xxx\x{a0} 877 AB\x{a0}xxx\x{85}XYZ 878 0: \x{a0}xxx\x{85} 879 880 /\S \S/I8ST1 881 Capturing subpattern count = 0 882 Options: utf 883 No first char 884 Need char = ' ' 885 Subject length lower bound = 3 886 Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 887 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 888 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 889 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 890 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 891 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 892 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 893 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 894 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 895 \x{a2} \x{84} 896 0: \x{a2} \x{84} 897 A Z 898 0: A Z 899 900 /a+/8 901 a\x{123}aa\>1 902 0: aa 903 a\x{123}aa\>2 904 Error -11 (bad UTF-8 offset) 905 a\x{123}aa\>3 906 0: aa 907 a\x{123}aa\>4 908 0: a 909 a\x{123}aa\>5 910 No match 911 a\x{123}aa\>6 912 Error -24 (bad offset value) 913 914 /\x{1234}+/iS8I 915 Capturing subpattern count = 0 916 Options: caseless utf 917 No first char 918 No need char 919 Subject length lower bound = 1 920 Starting chars: \xe1 921 922 /\x{1234}+?/iS8I 923 Capturing subpattern count = 0 924 Options: caseless utf 925 No first char 926 No need char 927 Subject length lower bound = 1 928 Starting chars: \xe1 929 930 /\x{1234}++/iS8I 931 Capturing subpattern count = 0 932 Options: caseless utf 933 No first char 934 No need char 935 Subject length lower bound = 1 936 Starting chars: \xe1 937 938 /\x{1234}{2}/iS8I 939 Capturing subpattern count = 0 940 Options: caseless utf 941 No first char 942 No need char 943 Subject length lower bound = 2 944 Starting chars: \xe1 945 946 /[^\x{c4}]/8DZ 947 ------------------------------------------------------------------ 948 Bra 949 [^\x{c4}] 950 Ket 951 End 952 ------------------------------------------------------------------ 953 Capturing subpattern count = 0 954 Options: utf 955 No first char 956 No need char 957 958 /X+\x{200}/8DZ 959 ------------------------------------------------------------------ 960 Bra 961 X++ 962 \x{200} 963 Ket 964 End 965 ------------------------------------------------------------------ 966 Capturing subpattern count = 0 967 Options: utf 968 First char = 'X' 969 Need char = \x{80} 970 971 /\R/SI8 972 Capturing subpattern count = 0 973 Options: utf 974 No first char 975 No need char 976 Subject length lower bound = 1 977 Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 978 979 /\777/8DZ 980 ------------------------------------------------------------------ 981 Bra 982 \x{1ff} 983 Ket 984 End 985 ------------------------------------------------------------------ 986 Capturing subpattern count = 0 987 Options: utf 988 First char = \x{c7} 989 Need char = \x{bf} 990 991 /\w+\x{C4}/8BZ 992 ------------------------------------------------------------------ 993 Bra 994 \w++ 995 \x{c4} 996 Ket 997 End 998 ------------------------------------------------------------------ 999 a\x{C4}\x{C4} 1000 0: a\x{c4} 1001 1002 /\w+\x{C4}/8BZT1 1003 ------------------------------------------------------------------ 1004 Bra 1005 \w+ 1006 \x{c4} 1007 Ket 1008 End 1009 ------------------------------------------------------------------ 1010 a\x{C4}\x{C4} 1011 0: a\x{c4}\x{c4} 1012 1013 /\W+\x{C4}/8BZ 1014 ------------------------------------------------------------------ 1015 Bra 1016 \W+ 1017 \x{c4} 1018 Ket 1019 End 1020 ------------------------------------------------------------------ 1021 !\x{C4} 1022 0: !\x{c4} 1023 1024 /\W+\x{C4}/8BZT1 1025 ------------------------------------------------------------------ 1026 Bra 1027 \W++ 1028 \x{c4} 1029 Ket 1030 End 1031 ------------------------------------------------------------------ 1032 !\x{C4} 1033 0: !\x{c4} 1034 1035 /\W+\x{A1}/8BZ 1036 ------------------------------------------------------------------ 1037 Bra 1038 \W+ 1039 \x{a1} 1040 Ket 1041 End 1042 ------------------------------------------------------------------ 1043 !\x{A1} 1044 0: !\x{a1} 1045 1046 /\W+\x{A1}/8BZT1 1047 ------------------------------------------------------------------ 1048 Bra 1049 \W+ 1050 \x{a1} 1051 Ket 1052 End 1053 ------------------------------------------------------------------ 1054 !\x{A1} 1055 0: !\x{a1} 1056 1057 /X\s+\x{A0}/8BZ 1058 ------------------------------------------------------------------ 1059 Bra 1060 X 1061 \s++ 1062 \x{a0} 1063 Ket 1064 End 1065 ------------------------------------------------------------------ 1066 X\x20\x{A0}\x{A0} 1067 0: X \x{a0} 1068 1069 /X\s+\x{A0}/8BZT1 1070 ------------------------------------------------------------------ 1071 Bra 1072 X 1073 \s+ 1074 \x{a0} 1075 Ket 1076 End 1077 ------------------------------------------------------------------ 1078 X\x20\x{A0}\x{A0} 1079 0: X \x{a0}\x{a0} 1080 1081 /\S+\x{A0}/8BZ 1082 ------------------------------------------------------------------ 1083 Bra 1084 \S+ 1085 \x{a0} 1086 Ket 1087 End 1088 ------------------------------------------------------------------ 1089 X\x{A0}\x{A0} 1090 0: X\x{a0}\x{a0} 1091 1092 /\S+\x{A0}/8BZT1 1093 ------------------------------------------------------------------ 1094 Bra 1095 \S++ 1096 \x{a0} 1097 Ket 1098 End 1099 ------------------------------------------------------------------ 1100 X\x{A0}\x{A0} 1101 0: X\x{a0} 1102 1103 /\x{a0}+\s!/8BZ 1104 ------------------------------------------------------------------ 1105 Bra 1106 \x{a0}++ 1107 \s 1108 ! 1109 Ket 1110 End 1111 ------------------------------------------------------------------ 1112 \x{a0}\x20! 1113 0: \x{a0} ! 1114 1115 /\x{a0}+\s!/8BZT1 1116 ------------------------------------------------------------------ 1117 Bra 1118 \x{a0}+ 1119 \s 1120 ! 1121 Ket 1122 End 1123 ------------------------------------------------------------------ 1124 \x{a0}\x20! 1125 0: \x{a0} ! 1126 1127 /A/8 1128 \x{ff000041} 1129 ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 1130 \x{7f000041} 1131 Error -10 (bad UTF-8 string) offset=0 reason=12 1132 1133 /(*UTF8)abc/9 1134 Failed: setting UTF is disabled by the application at offset 0 1135 1136 /abc/89 1137 Failed: setting UTF is disabled by the application at offset 0 1138 1139 /-- End of testinput15 --/ 1140