Home | History | Annotate | Download | only in testdata
      1 /-- This set of tests is for UTF-8 support but not Unicode property support,
      2     and is relevant only to the 8-bit library. --/
      3 
      4 < forbid W
      5 
      6 /X(\C{3})/8
      7     X\x{1234}
      8  0: X\x{1234}
      9  1: \x{1234}
     10 
     11 /X(\C{4})/8
     12     X\x{1234}YZ
     13  0: X\x{1234}Y
     14  1: \x{1234}Y
     15     
     16 /X\C*/8
     17     XYZabcdce
     18  0: XYZabcdce
     19     
     20 /X\C*?/8
     21     XYZabcde
     22  0: X
     23     
     24 /X\C{3,5}/8
     25     Xabcdefg   
     26  0: Xabcde
     27     X\x{1234} 
     28  0: X\x{1234}
     29     X\x{1234}YZ
     30  0: X\x{1234}YZ
     31     X\x{1234}\x{512}  
     32  0: X\x{1234}\x{512}
     33     X\x{1234}\x{512}YZ
     34  0: X\x{1234}\x{512}
     35 
     36 /X\C{3,5}?/8
     37     Xabcdefg   
     38  0: Xabc
     39     X\x{1234} 
     40  0: X\x{1234}
     41     X\x{1234}YZ
     42  0: X\x{1234}
     43     X\x{1234}\x{512}  
     44  0: X\x{1234}
     45 
     46 /a\Cb/8
     47     aXb
     48  0: aXb
     49     a\nb
     50  0: a\x{0a}b
     51     
     52 /a\C\Cb/8 
     53     a\x{100}b 
     54  0: a\x{100}b
     55 
     56 /ab\Cde/8
     57     abXde
     58  0: abXde
     59 
     60 /a\C\Cb/8 
     61     a\x{100}b
     62  0: a\x{100}b
     63     ** Failers 
     64 No match
     65     a\x{12257}b
     66 No match
     67 
     68 /[]/8
     69 Failed: invalid UTF-8 string at offset 1
     70 
     71 //8
     72 Failed: invalid UTF-8 string at offset 0
     73 
     74 /xxx/8
     75 Failed: invalid UTF-8 string at offset 0
     76 
     77 /xxx/8?DZSSO
     78 ------------------------------------------------------------------
     79         Bra
     80         \X{c0}\X{c0}\X{c0}xxx
     81         Ket
     82         End
     83 ------------------------------------------------------------------
     84 Capturing subpattern count = 0
     85 Options: no_auto_possessify utf no_utf_check
     86 First char = \x{c3}
     87 Need char = 'x'
     88 
     89 /badutf/8
     90     \xdf
     91 Error -10 (bad UTF-8 string) offset=0 reason=1
     92     \xef
     93 Error -10 (bad UTF-8 string) offset=0 reason=2
     94     \xef\x80
     95 Error -10 (bad UTF-8 string) offset=0 reason=1
     96     \xf7
     97 Error -10 (bad UTF-8 string) offset=0 reason=3
     98     \xf7\x80
     99 Error -10 (bad UTF-8 string) offset=0 reason=2
    100     \xf7\x80\x80
    101 Error -10 (bad UTF-8 string) offset=0 reason=1
    102     \xfb
    103 Error -10 (bad UTF-8 string) offset=0 reason=4
    104     \xfb\x80
    105 Error -10 (bad UTF-8 string) offset=0 reason=3
    106     \xfb\x80\x80
    107 Error -10 (bad UTF-8 string) offset=0 reason=2
    108     \xfb\x80\x80\x80
    109 Error -10 (bad UTF-8 string) offset=0 reason=1
    110     \xfd
    111 Error -10 (bad UTF-8 string) offset=0 reason=5
    112     \xfd\x80
    113 Error -10 (bad UTF-8 string) offset=0 reason=4
    114     \xfd\x80\x80
    115 Error -10 (bad UTF-8 string) offset=0 reason=3
    116     \xfd\x80\x80\x80
    117 Error -10 (bad UTF-8 string) offset=0 reason=2
    118     \xfd\x80\x80\x80\x80
    119 Error -10 (bad UTF-8 string) offset=0 reason=1
    120     \xdf\x7f
    121 Error -10 (bad UTF-8 string) offset=0 reason=6
    122     \xef\x7f\x80
    123 Error -10 (bad UTF-8 string) offset=0 reason=6
    124     \xef\x80\x7f
    125 Error -10 (bad UTF-8 string) offset=0 reason=7
    126     \xf7\x7f\x80\x80
    127 Error -10 (bad UTF-8 string) offset=0 reason=6
    128     \xf7\x80\x7f\x80
    129 Error -10 (bad UTF-8 string) offset=0 reason=7
    130     \xf7\x80\x80\x7f
    131 Error -10 (bad UTF-8 string) offset=0 reason=8
    132     \xfb\x7f\x80\x80\x80
    133 Error -10 (bad UTF-8 string) offset=0 reason=6
    134     \xfb\x80\x7f\x80\x80
    135 Error -10 (bad UTF-8 string) offset=0 reason=7
    136     \xfb\x80\x80\x7f\x80
    137 Error -10 (bad UTF-8 string) offset=0 reason=8
    138     \xfb\x80\x80\x80\x7f
    139 Error -10 (bad UTF-8 string) offset=0 reason=9
    140     \xfd\x7f\x80\x80\x80\x80
    141 Error -10 (bad UTF-8 string) offset=0 reason=6
    142     \xfd\x80\x7f\x80\x80\x80
    143 Error -10 (bad UTF-8 string) offset=0 reason=7
    144     \xfd\x80\x80\x7f\x80\x80
    145 Error -10 (bad UTF-8 string) offset=0 reason=8
    146     \xfd\x80\x80\x80\x7f\x80
    147 Error -10 (bad UTF-8 string) offset=0 reason=9
    148     \xfd\x80\x80\x80\x80\x7f
    149 Error -10 (bad UTF-8 string) offset=0 reason=10
    150     \xed\xa0\x80
    151 Error -10 (bad UTF-8 string) offset=0 reason=14
    152     \xc0\x8f
    153 Error -10 (bad UTF-8 string) offset=0 reason=15
    154     \xe0\x80\x8f
    155 Error -10 (bad UTF-8 string) offset=0 reason=16
    156     \xf0\x80\x80\x8f
    157 Error -10 (bad UTF-8 string) offset=0 reason=17
    158     \xf8\x80\x80\x80\x8f
    159 Error -10 (bad UTF-8 string) offset=0 reason=18
    160     \xfc\x80\x80\x80\x80\x8f
    161 Error -10 (bad UTF-8 string) offset=0 reason=19
    162     \x80
    163 Error -10 (bad UTF-8 string) offset=0 reason=20
    164     \xfe
    165 Error -10 (bad UTF-8 string) offset=0 reason=21
    166     \xff
    167 Error -10 (bad UTF-8 string) offset=0 reason=21
    168 
    169 /badutf/8
    170     \xfb\x80\x80\x80\x80
    171 Error -10 (bad UTF-8 string) offset=0 reason=11
    172     \xfd\x80\x80\x80\x80\x80
    173 Error -10 (bad UTF-8 string) offset=0 reason=12
    174     \xf7\xbf\xbf\xbf
    175 Error -10 (bad UTF-8 string) offset=0 reason=13
    176 
    177 /shortutf/8
    178     \P\P\xdf
    179 Error -25 (short UTF-8 string) offset=0 reason=1
    180     \P\P\xef
    181 Error -25 (short UTF-8 string) offset=0 reason=2
    182     \P\P\xef\x80
    183 Error -25 (short UTF-8 string) offset=0 reason=1
    184     \P\P\xf7
    185 Error -25 (short UTF-8 string) offset=0 reason=3
    186     \P\P\xf7\x80
    187 Error -25 (short UTF-8 string) offset=0 reason=2
    188     \P\P\xf7\x80\x80
    189 Error -25 (short UTF-8 string) offset=0 reason=1
    190     \P\P\xfb
    191 Error -25 (short UTF-8 string) offset=0 reason=4
    192     \P\P\xfb\x80
    193 Error -25 (short UTF-8 string) offset=0 reason=3
    194     \P\P\xfb\x80\x80
    195 Error -25 (short UTF-8 string) offset=0 reason=2
    196     \P\P\xfb\x80\x80\x80
    197 Error -25 (short UTF-8 string) offset=0 reason=1
    198     \P\P\xfd
    199 Error -25 (short UTF-8 string) offset=0 reason=5
    200     \P\P\xfd\x80
    201 Error -25 (short UTF-8 string) offset=0 reason=4
    202     \P\P\xfd\x80\x80
    203 Error -25 (short UTF-8 string) offset=0 reason=3
    204     \P\P\xfd\x80\x80\x80
    205 Error -25 (short UTF-8 string) offset=0 reason=2
    206     \P\P\xfd\x80\x80\x80\x80
    207 Error -25 (short UTF-8 string) offset=0 reason=1
    208 
    209 /anything/8
    210     \xc0\x80
    211 Error -10 (bad UTF-8 string) offset=0 reason=15
    212     \xc1\x8f 
    213 Error -10 (bad UTF-8 string) offset=0 reason=15
    214     \xe0\x9f\x80
    215 Error -10 (bad UTF-8 string) offset=0 reason=16
    216     \xf0\x8f\x80\x80 
    217 Error -10 (bad UTF-8 string) offset=0 reason=17
    218     \xf8\x87\x80\x80\x80  
    219 Error -10 (bad UTF-8 string) offset=0 reason=18
    220     \xfc\x83\x80\x80\x80\x80
    221 Error -10 (bad UTF-8 string) offset=0 reason=19
    222     \xfe\x80\x80\x80\x80\x80  
    223 Error -10 (bad UTF-8 string) offset=0 reason=21
    224     \xff\x80\x80\x80\x80\x80  
    225 Error -10 (bad UTF-8 string) offset=0 reason=21
    226     \xc3\x8f
    227 No match
    228     \xe0\xaf\x80
    229 No match
    230     \xe1\x80\x80
    231 No match
    232     \xf0\x9f\x80\x80 
    233 No match
    234     \xf1\x8f\x80\x80 
    235 No match
    236     \xf8\x88\x80\x80\x80  
    237 Error -10 (bad UTF-8 string) offset=0 reason=11
    238     \xf9\x87\x80\x80\x80  
    239 Error -10 (bad UTF-8 string) offset=0 reason=11
    240     \xfc\x84\x80\x80\x80\x80
    241 Error -10 (bad UTF-8 string) offset=0 reason=12
    242     \xfd\x83\x80\x80\x80\x80
    243 Error -10 (bad UTF-8 string) offset=0 reason=12
    244     \?\xf8\x88\x80\x80\x80  
    245 No match
    246     \?\xf9\x87\x80\x80\x80  
    247 No match
    248     \?\xfc\x84\x80\x80\x80\x80
    249 No match
    250     \?\xfd\x83\x80\x80\x80\x80
    251 No match
    252 
    253 /\x{100}/8DZ
    254 ------------------------------------------------------------------
    255         Bra
    256         \x{100}
    257         Ket
    258         End
    259 ------------------------------------------------------------------
    260 Capturing subpattern count = 0
    261 Options: utf
    262 First char = \x{c4}
    263 Need char = \x{80}
    264 
    265 /\x{1000}/8DZ
    266 ------------------------------------------------------------------
    267         Bra
    268         \x{1000}
    269         Ket
    270         End
    271 ------------------------------------------------------------------
    272 Capturing subpattern count = 0
    273 Options: utf
    274 First char = \x{e1}
    275 Need char = \x{80}
    276 
    277 /\x{10000}/8DZ
    278 ------------------------------------------------------------------
    279         Bra
    280         \x{10000}
    281         Ket
    282         End
    283 ------------------------------------------------------------------
    284 Capturing subpattern count = 0
    285 Options: utf
    286 First char = \x{f0}
    287 Need char = \x{80}
    288 
    289 /\x{100000}/8DZ
    290 ------------------------------------------------------------------
    291         Bra
    292         \x{100000}
    293         Ket
    294         End
    295 ------------------------------------------------------------------
    296 Capturing subpattern count = 0
    297 Options: utf
    298 First char = \x{f4}
    299 Need char = \x{80}
    300 
    301 /\x{10ffff}/8DZ
    302 ------------------------------------------------------------------
    303         Bra
    304         \x{10ffff}
    305         Ket
    306         End
    307 ------------------------------------------------------------------
    308 Capturing subpattern count = 0
    309 Options: utf
    310 First char = \x{f4}
    311 Need char = \x{bf}
    312 
    313 /[\x{ff}]/8DZ
    314 ------------------------------------------------------------------
    315         Bra
    316         \x{ff}
    317         Ket
    318         End
    319 ------------------------------------------------------------------
    320 Capturing subpattern count = 0
    321 Options: utf
    322 First char = \x{c3}
    323 Need char = \x{bf}
    324 
    325 /[\x{100}]/8DZ
    326 ------------------------------------------------------------------
    327         Bra
    328         \x{100}
    329         Ket
    330         End
    331 ------------------------------------------------------------------
    332 Capturing subpattern count = 0
    333 Options: utf
    334 First char = \x{c4}
    335 Need char = \x{80}
    336 
    337 /\x80/8DZ
    338 ------------------------------------------------------------------
    339         Bra
    340         \x{80}
    341         Ket
    342         End
    343 ------------------------------------------------------------------
    344 Capturing subpattern count = 0
    345 Options: utf
    346 First char = \x{c2}
    347 Need char = \x{80}
    348 
    349 /\xff/8DZ
    350 ------------------------------------------------------------------
    351         Bra
    352         \x{ff}
    353         Ket
    354         End
    355 ------------------------------------------------------------------
    356 Capturing subpattern count = 0
    357 Options: utf
    358 First char = \x{c3}
    359 Need char = \x{bf}
    360 
    361 /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
    362 ------------------------------------------------------------------
    363         Bra
    364         \x{d55c}\x{ad6d}\x{c5b4}
    365         Ket
    366         End
    367 ------------------------------------------------------------------
    368 Capturing subpattern count = 0
    369 Options: utf
    370 First char = \x{ed}
    371 Need char = \x{b4}
    372     \x{D55c}\x{ad6d}\x{C5B4} 
    373  0: \x{d55c}\x{ad6d}\x{c5b4}
    374 
    375 /\x{65e5}\x{672c}\x{8a9e}/DZ8
    376 ------------------------------------------------------------------
    377         Bra
    378         \x{65e5}\x{672c}\x{8a9e}
    379         Ket
    380         End
    381 ------------------------------------------------------------------
    382 Capturing subpattern count = 0
    383 Options: utf
    384 First char = \x{e6}
    385 Need char = \x{9e}
    386     \x{65e5}\x{672c}\x{8a9e}
    387  0: \x{65e5}\x{672c}\x{8a9e}
    388 
    389 /\x{80}/DZ8
    390 ------------------------------------------------------------------
    391         Bra
    392         \x{80}
    393         Ket
    394         End
    395 ------------------------------------------------------------------
    396 Capturing subpattern count = 0
    397 Options: utf
    398 First char = \x{c2}
    399 Need char = \x{80}
    400 
    401 /\x{084}/DZ8
    402 ------------------------------------------------------------------
    403         Bra
    404         \x{84}
    405         Ket
    406         End
    407 ------------------------------------------------------------------
    408 Capturing subpattern count = 0
    409 Options: utf
    410 First char = \x{c2}
    411 Need char = \x{84}
    412 
    413 /\x{104}/DZ8
    414 ------------------------------------------------------------------
    415         Bra
    416         \x{104}
    417         Ket
    418         End
    419 ------------------------------------------------------------------
    420 Capturing subpattern count = 0
    421 Options: utf
    422 First char = \x{c4}
    423 Need char = \x{84}
    424 
    425 /\x{861}/DZ8
    426 ------------------------------------------------------------------
    427         Bra
    428         \x{861}
    429         Ket
    430         End
    431 ------------------------------------------------------------------
    432 Capturing subpattern count = 0
    433 Options: utf
    434 First char = \x{e0}
    435 Need char = \x{a1}
    436 
    437 /\x{212ab}/DZ8
    438 ------------------------------------------------------------------
    439         Bra
    440         \x{212ab}
    441         Ket
    442         End
    443 ------------------------------------------------------------------
    444 Capturing subpattern count = 0
    445 Options: utf
    446 First char = \x{f0}
    447 Need char = \x{ab}
    448 
    449 /-- This one is here not because it's different to Perl, but because the way
    450 the captured single-byte is displayed. (In Perl it becomes a character, and you
    451 can't tell the difference.) --/
    452     
    453 /X(\C)(.*)/8
    454     X\x{1234}
    455  0: X\x{1234}
    456  1: \x{e1}
    457  2: \x{88}\x{b4}
    458     X\nabc 
    459  0: X\x{0a}abc
    460  1: \x{0a}
    461  2: abc
    462 
    463 /-- This one is here because Perl gives out a grumbly error message (quite 
    464 correctly, but that messes up comparisons). --/
    465     
    466 /a\Cb/8
    467     *** Failers 
    468 No match
    469     a\x{100}b 
    470 No match
    471     
    472 /[^ab\xC0-\xF0]/8SDZ
    473 ------------------------------------------------------------------
    474         Bra
    475         [\x00-`c-\xbf\xf1-\xff] (neg)
    476         Ket
    477         End
    478 ------------------------------------------------------------------
    479 Capturing subpattern count = 0
    480 Options: utf
    481 No first char
    482 No need char
    483 Subject length lower bound = 1
    484 Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
    485   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
    486   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
    487   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
    488   Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
    489   \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 
    490   \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 
    491   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
    492   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
    493   \xfe \xff 
    494     \x{f1}
    495  0: \x{f1}
    496     \x{bf}
    497  0: \x{bf}
    498     \x{100}
    499  0: \x{100}
    500     \x{1000}   
    501  0: \x{1000}
    502     *** Failers
    503  0: *
    504     \x{c0} 
    505 No match
    506     \x{f0} 
    507 No match
    508 
    509 /{3,4}/8SDZ
    510 ------------------------------------------------------------------
    511         Bra
    512         \x{100}{3}
    513         \x{100}?+
    514         Ket
    515         End
    516 ------------------------------------------------------------------
    517 Capturing subpattern count = 0
    518 Options: utf
    519 First char = \x{c4}
    520 Need char = \x{80}
    521 Subject length lower bound = 3
    522 No starting char list
    523   \x{100}\x{100}\x{100}\x{100\x{100}
    524  0: \x{100}\x{100}\x{100}
    525 
    526 /(\x{100}+|x)/8SDZ
    527 ------------------------------------------------------------------
    528         Bra
    529         CBra 1
    530         \x{100}++
    531         Alt
    532         x
    533         Ket
    534         Ket
    535         End
    536 ------------------------------------------------------------------
    537 Capturing subpattern count = 1
    538 Options: utf
    539 No first char
    540 No need char
    541 Subject length lower bound = 1
    542 Starting chars: x \xc4 
    543 
    544 /(\x{100}*a|x)/8SDZ
    545 ------------------------------------------------------------------
    546         Bra
    547         CBra 1
    548         \x{100}*+
    549         a
    550         Alt
    551         x
    552         Ket
    553         Ket
    554         End
    555 ------------------------------------------------------------------
    556 Capturing subpattern count = 1
    557 Options: utf
    558 No first char
    559 No need char
    560 Subject length lower bound = 1
    561 Starting chars: a x \xc4 
    562 
    563 /(\x{100}{0,2}a|x)/8SDZ
    564 ------------------------------------------------------------------
    565         Bra
    566         CBra 1
    567         \x{100}{0,2}+
    568         a
    569         Alt
    570         x
    571         Ket
    572         Ket
    573         End
    574 ------------------------------------------------------------------
    575 Capturing subpattern count = 1
    576 Options: utf
    577 No first char
    578 No need char
    579 Subject length lower bound = 1
    580 Starting chars: a x \xc4 
    581 
    582 /(\x{100}{1,2}a|x)/8SDZ
    583 ------------------------------------------------------------------
    584         Bra
    585         CBra 1
    586         \x{100}
    587         \x{100}{0,1}+
    588         a
    589         Alt
    590         x
    591         Ket
    592         Ket
    593         End
    594 ------------------------------------------------------------------
    595 Capturing subpattern count = 1
    596 Options: utf
    597 No first char
    598 No need char
    599 Subject length lower bound = 1
    600 Starting chars: x \xc4 
    601 
    602 /\x{100}/8DZ
    603 ------------------------------------------------------------------
    604         Bra
    605         \x{100}
    606         Ket
    607         End
    608 ------------------------------------------------------------------
    609 Capturing subpattern count = 0
    610 Options: utf
    611 First char = \x{c4}
    612 Need char = \x{80}
    613 
    614 /a\x{100}\x{101}*/8DZ
    615 ------------------------------------------------------------------
    616         Bra
    617         a\x{100}
    618         \x{101}*+
    619         Ket
    620         End
    621 ------------------------------------------------------------------
    622 Capturing subpattern count = 0
    623 Options: utf
    624 First char = 'a'
    625 Need char = \x{80}
    626 
    627 /a\x{100}\x{101}+/8DZ
    628 ------------------------------------------------------------------
    629         Bra
    630         a\x{100}
    631         \x{101}++
    632         Ket
    633         End
    634 ------------------------------------------------------------------
    635 Capturing subpattern count = 0
    636 Options: utf
    637 First char = 'a'
    638 Need char = \x{81}
    639 
    640 /[^\x{c4}]/DZ
    641 ------------------------------------------------------------------
    642         Bra
    643         [^\x{c4}]
    644         Ket
    645         End
    646 ------------------------------------------------------------------
    647 Capturing subpattern count = 0
    648 No options
    649 No first char
    650 No need char
    651 
    652 /[\x{100}]/8DZ
    653 ------------------------------------------------------------------
    654         Bra
    655         \x{100}
    656         Ket
    657         End
    658 ------------------------------------------------------------------
    659 Capturing subpattern count = 0
    660 Options: utf
    661 First char = \x{c4}
    662 Need char = \x{80}
    663     \x{100}
    664  0: \x{100}
    665     Z\x{100}
    666  0: \x{100}
    667     \x{100}Z
    668  0: \x{100}
    669     *** Failers 
    670 No match
    671 
    672 /[\xff]/DZ8
    673 ------------------------------------------------------------------
    674         Bra
    675         \x{ff}
    676         Ket
    677         End
    678 ------------------------------------------------------------------
    679 Capturing subpattern count = 0
    680 Options: utf
    681 First char = \x{c3}
    682 Need char = \x{bf}
    683     >\x{ff}<
    684  0: \x{ff}
    685 
    686 /[^\xff]/8DZ
    687 ------------------------------------------------------------------
    688         Bra
    689         [^\x{ff}]
    690         Ket
    691         End
    692 ------------------------------------------------------------------
    693 Capturing subpattern count = 0
    694 Options: utf
    695 No first char
    696 No need char
    697 
    698 /\x{100}abc(xyz(?1))/8DZ
    699 ------------------------------------------------------------------
    700         Bra
    701         \x{100}abc
    702         CBra 1
    703         xyz
    704         Recurse
    705         Ket
    706         Ket
    707         End
    708 ------------------------------------------------------------------
    709 Capturing subpattern count = 1
    710 Options: utf
    711 First char = \x{c4}
    712 Need char = 'z'
    713 
    714 /a\x{1234}b/P8
    715     a\x{1234}b
    716  0: a\x{1234}b
    717 
    718 /\777/8I
    719 Capturing subpattern count = 0
    720 Options: utf
    721 First char = \x{c7}
    722 Need char = \x{bf}
    723   \x{1ff}
    724  0: \x{1ff}
    725   \777 
    726  0: \x{1ff}
    727   
    728 /\x{100}+\x{200}/8DZ
    729 ------------------------------------------------------------------
    730         Bra
    731         \x{100}++
    732         \x{200}
    733         Ket
    734         End
    735 ------------------------------------------------------------------
    736 Capturing subpattern count = 0
    737 Options: utf
    738 First char = \x{c4}
    739 Need char = \x{80}
    740 
    741 /\x{100}+X/8DZ
    742 ------------------------------------------------------------------
    743         Bra
    744         \x{100}++
    745         X
    746         Ket
    747         End
    748 ------------------------------------------------------------------
    749 Capturing subpattern count = 0
    750 Options: utf
    751 First char = \x{c4}
    752 Need char = 'X'
    753 
    754 /^[\Q\E-\Q\E/BZ8
    755 Failed: missing terminating ] for character class at offset 15
    756 
    757 /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
    758     
    759 /X/8
    760     \x{d800}
    761 Error -10 (bad UTF-8 string) offset=0 reason=14
    762     \x{d800}\?
    763 No match
    764     \x{da00}
    765 Error -10 (bad UTF-8 string) offset=0 reason=14
    766     \x{da00}\?
    767 No match
    768     \x{dfff}
    769 Error -10 (bad UTF-8 string) offset=0 reason=14
    770     \x{dfff}\?
    771 No match
    772     \x{110000}    
    773 Error -10 (bad UTF-8 string) offset=0 reason=13
    774     \x{110000}\?    
    775 No match
    776     \x{2000000} 
    777 Error -10 (bad UTF-8 string) offset=0 reason=11
    778     \x{2000000}\? 
    779 No match
    780     \x{7fffffff} 
    781 Error -10 (bad UTF-8 string) offset=0 reason=12
    782     \x{7fffffff}\? 
    783 No match
    784 
    785 /(*UTF8)\x{1234}/
    786   abcd\x{1234}pqr
    787  0: \x{1234}
    788 
    789 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
    790 Capturing subpattern count = 0
    791 Options: bsr_unicode utf
    792 Forced newline sequence: CRLF
    793 First char = 'a'
    794 Need char = 'b'
    795 
    796 /\h/SI8
    797 Capturing subpattern count = 0
    798 Options: utf
    799 No first char
    800 No need char
    801 Subject length lower bound = 1
    802 Starting chars: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
    803     ABC\x{09}
    804  0: \x{09}
    805     ABC\x{20}
    806  0:  
    807     ABC\x{a0}
    808  0: \x{a0}
    809     ABC\x{1680}
    810  0: \x{1680}
    811     ABC\x{180e}
    812  0: \x{180e}
    813     ABC\x{2000}
    814  0: \x{2000}
    815     ABC\x{202f} 
    816  0: \x{202f}
    817     ABC\x{205f} 
    818  0: \x{205f}
    819     ABC\x{3000} 
    820  0: \x{3000}
    821 
    822 /\v/SI8
    823 Capturing subpattern count = 0
    824 Options: utf
    825 No first char
    826 No need char
    827 Subject length lower bound = 1
    828 Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 
    829     ABC\x{0a}
    830  0: \x{0a}
    831     ABC\x{0b}
    832  0: \x{0b}
    833     ABC\x{0c}
    834  0: \x{0c}
    835     ABC\x{0d}
    836  0: \x{0d}
    837     ABC\x{85}
    838  0: \x{85}
    839     ABC\x{2028}
    840  0: \x{2028}
    841 
    842 /\h*A/SI8
    843 Capturing subpattern count = 0
    844 Options: utf
    845 No first char
    846 Need char = 'A'
    847 Subject length lower bound = 1
    848 Starting chars: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
    849     CDBABC
    850  0: A
    851     
    852 /\v+A/SI8
    853 Capturing subpattern count = 0
    854 Options: utf
    855 No first char
    856 Need char = 'A'
    857 Subject length lower bound = 2
    858 Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 
    859 
    860 /\s?xxx\s/8SI
    861 Capturing subpattern count = 0
    862 Options: utf
    863 No first char
    864 Need char = 'x'
    865 Subject length lower bound = 4
    866 Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 x 
    867 
    868 /\sxxx\s/I8ST1
    869 Capturing subpattern count = 0
    870 Options: utf
    871 No first char
    872 Need char = 'x'
    873 Subject length lower bound = 5
    874 Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 
    875     AB\x{85}xxx\x{a0}XYZ
    876  0: \x{85}xxx\x{a0}
    877     AB\x{a0}xxx\x{85}XYZ
    878  0: \x{a0}xxx\x{85}
    879 
    880 /\S \S/I8ST1
    881 Capturing subpattern count = 0
    882 Options: utf
    883 No first char
    884 Need char = ' '
    885 Subject length lower bound = 3
    886 Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 
    887   \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 
    888   \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 
    889   D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 
    890   i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 
    891   \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 
    892   \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 
    893   \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 
    894   \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
    895     \x{a2} \x{84} 
    896  0: \x{a2} \x{84}
    897     A Z 
    898  0: A Z
    899 
    900 /a+/8
    901     a\x{123}aa\>1
    902  0: aa
    903     a\x{123}aa\>2
    904 Error -11 (bad UTF-8 offset)
    905     a\x{123}aa\>3
    906  0: aa
    907     a\x{123}aa\>4
    908  0: a
    909     a\x{123}aa\>5
    910 No match
    911     a\x{123}aa\>6
    912 Error -24 (bad offset value)
    913 
    914 /\x{1234}+/iS8I
    915 Capturing subpattern count = 0
    916 Options: caseless utf
    917 No first char
    918 No need char
    919 Subject length lower bound = 1
    920 Starting chars: \xe1 
    921 
    922 /\x{1234}+?/iS8I
    923 Capturing subpattern count = 0
    924 Options: caseless utf
    925 No first char
    926 No need char
    927 Subject length lower bound = 1
    928 Starting chars: \xe1 
    929 
    930 /\x{1234}++/iS8I
    931 Capturing subpattern count = 0
    932 Options: caseless utf
    933 No first char
    934 No need char
    935 Subject length lower bound = 1
    936 Starting chars: \xe1 
    937 
    938 /\x{1234}{2}/iS8I
    939 Capturing subpattern count = 0
    940 Options: caseless utf
    941 No first char
    942 No need char
    943 Subject length lower bound = 2
    944 Starting chars: \xe1 
    945 
    946 /[^\x{c4}]/8DZ
    947 ------------------------------------------------------------------
    948         Bra
    949         [^\x{c4}]
    950         Ket
    951         End
    952 ------------------------------------------------------------------
    953 Capturing subpattern count = 0
    954 Options: utf
    955 No first char
    956 No need char
    957 
    958 /X+\x{200}/8DZ
    959 ------------------------------------------------------------------
    960         Bra
    961         X++
    962         \x{200}
    963         Ket
    964         End
    965 ------------------------------------------------------------------
    966 Capturing subpattern count = 0
    967 Options: utf
    968 First char = 'X'
    969 Need char = \x{80}
    970 
    971 /\R/SI8
    972 Capturing subpattern count = 0
    973 Options: utf
    974 No first char
    975 No need char
    976 Subject length lower bound = 1
    977 Starting chars: \x0a \x0b \x0c \x0d \xc2 \xe2 
    978 
    979 /\777/8DZ
    980 ------------------------------------------------------------------
    981         Bra
    982         \x{1ff}
    983         Ket
    984         End
    985 ------------------------------------------------------------------
    986 Capturing subpattern count = 0
    987 Options: utf
    988 First char = \x{c7}
    989 Need char = \x{bf}
    990 
    991 /\w+\x{C4}/8BZ
    992 ------------------------------------------------------------------
    993         Bra
    994         \w++
    995         \x{c4}
    996         Ket
    997         End
    998 ------------------------------------------------------------------
    999     a\x{C4}\x{C4}
   1000  0: a\x{c4}
   1001 
   1002 /\w+\x{C4}/8BZT1
   1003 ------------------------------------------------------------------
   1004         Bra
   1005         \w+
   1006         \x{c4}
   1007         Ket
   1008         End
   1009 ------------------------------------------------------------------
   1010     a\x{C4}\x{C4}
   1011  0: a\x{c4}\x{c4}
   1012     
   1013 /\W+\x{C4}/8BZ
   1014 ------------------------------------------------------------------
   1015         Bra
   1016         \W+
   1017         \x{c4}
   1018         Ket
   1019         End
   1020 ------------------------------------------------------------------
   1021     !\x{C4}
   1022  0: !\x{c4}
   1023  
   1024 /\W+\x{C4}/8BZT1
   1025 ------------------------------------------------------------------
   1026         Bra
   1027         \W++
   1028         \x{c4}
   1029         Ket
   1030         End
   1031 ------------------------------------------------------------------
   1032     !\x{C4}
   1033  0: !\x{c4}
   1034 
   1035 /\W+\x{A1}/8BZ
   1036 ------------------------------------------------------------------
   1037         Bra
   1038         \W+
   1039         \x{a1}
   1040         Ket
   1041         End
   1042 ------------------------------------------------------------------
   1043     !\x{A1}
   1044  0: !\x{a1}
   1045  
   1046 /\W+\x{A1}/8BZT1
   1047 ------------------------------------------------------------------
   1048         Bra
   1049         \W+
   1050         \x{a1}
   1051         Ket
   1052         End
   1053 ------------------------------------------------------------------
   1054     !\x{A1}
   1055  0: !\x{a1}
   1056 
   1057 /X\s+\x{A0}/8BZ
   1058 ------------------------------------------------------------------
   1059         Bra
   1060         X
   1061         \s++
   1062         \x{a0}
   1063         Ket
   1064         End
   1065 ------------------------------------------------------------------
   1066     X\x20\x{A0}\x{A0}
   1067  0: X \x{a0}
   1068 
   1069 /X\s+\x{A0}/8BZT1
   1070 ------------------------------------------------------------------
   1071         Bra
   1072         X
   1073         \s+
   1074         \x{a0}
   1075         Ket
   1076         End
   1077 ------------------------------------------------------------------
   1078     X\x20\x{A0}\x{A0}
   1079  0: X \x{a0}\x{a0}
   1080 
   1081 /\S+\x{A0}/8BZ
   1082 ------------------------------------------------------------------
   1083         Bra
   1084         \S+
   1085         \x{a0}
   1086         Ket
   1087         End
   1088 ------------------------------------------------------------------
   1089     X\x{A0}\x{A0}
   1090  0: X\x{a0}\x{a0}
   1091 
   1092 /\S+\x{A0}/8BZT1
   1093 ------------------------------------------------------------------
   1094         Bra
   1095         \S++
   1096         \x{a0}
   1097         Ket
   1098         End
   1099 ------------------------------------------------------------------
   1100     X\x{A0}\x{A0}
   1101  0: X\x{a0}
   1102 
   1103 /\x{a0}+\s!/8BZ
   1104 ------------------------------------------------------------------
   1105         Bra
   1106         \x{a0}++
   1107         \s
   1108         !
   1109         Ket
   1110         End
   1111 ------------------------------------------------------------------
   1112     \x{a0}\x20!
   1113  0: \x{a0} !
   1114 
   1115 /\x{a0}+\s!/8BZT1
   1116 ------------------------------------------------------------------
   1117         Bra
   1118         \x{a0}+
   1119         \s
   1120         !
   1121         Ket
   1122         End
   1123 ------------------------------------------------------------------
   1124     \x{a0}\x20!
   1125  0: \x{a0} !
   1126 
   1127 /A/8
   1128   \x{ff000041}
   1129 ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
   1130   \x{7f000041} 
   1131 Error -10 (bad UTF-8 string) offset=0 reason=12
   1132 
   1133 /(*UTF8)abc/9
   1134 Failed: setting UTF is disabled by the application at offset 0
   1135 
   1136 /abc/89
   1137 Failed: setting UTF is disabled by the application at offset 0
   1138 
   1139 /-- End of testinput15 --/
   1140