Home | History | Annotate | Download | only in testdata
      1 # This set of tests is for UTF-8 support and Unicode property support, with
      2 # relevance only for the 8-bit library.
      3 
      4 # The next 4 patterns have UTF-8 errors
      5 
      6 /[]/utf
      7 Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
      8 
      9 //utf
     10 Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
     11 
     12 /xxx/utf
     13 Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
     14 
     15 //utf
     16 Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
     17 
     18 # Now test subjects
     19 
     20 /badutf/utf
     21 \= Expect UTF-8 errors
     22     X\xdf
     23 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
     24     XX\xef
     25 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
     26     XXX\xef\x80
     27 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
     28     X\xf7
     29 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
     30     XX\xf7\x80
     31 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
     32     XXX\xf7\x80\x80
     33 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
     34     \xfb
     35 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
     36     \xfb\x80
     37 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
     38     \xfb\x80\x80
     39 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
     40     \xfb\x80\x80\x80
     41 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
     42     \xfd
     43 Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
     44     \xfd\x80
     45 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
     46     \xfd\x80\x80
     47 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
     48     \xfd\x80\x80\x80
     49 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
     50     \xfd\x80\x80\x80\x80
     51 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
     52     \xdf\x7f
     53 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
     54     \xef\x7f\x80
     55 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
     56     \xef\x80\x7f
     57 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
     58     \xf7\x7f\x80\x80
     59 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
     60     \xf7\x80\x7f\x80
     61 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
     62     \xf7\x80\x80\x7f
     63 Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
     64     \xfb\x7f\x80\x80\x80
     65 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
     66     \xfb\x80\x7f\x80\x80
     67 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
     68     \xfb\x80\x80\x7f\x80
     69 Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
     70     \xfb\x80\x80\x80\x7f
     71 Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
     72     \xfd\x7f\x80\x80\x80\x80
     73 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
     74     \xfd\x80\x7f\x80\x80\x80
     75 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
     76     \xfd\x80\x80\x7f\x80\x80
     77 Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
     78     \xfd\x80\x80\x80\x7f\x80
     79 Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
     80     \xfd\x80\x80\x80\x80\x7f
     81 Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0
     82     \xed\xa0\x80
     83 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
     84     \xc0\x8f
     85 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0
     86     \xe0\x80\x8f
     87 Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0
     88     \xf0\x80\x80\x8f
     89 Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
     90     \xf8\x80\x80\x80\x8f
     91 Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
     92     \xfc\x80\x80\x80\x80\x8f
     93 Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
     94     \x80
     95 Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
     96     \xfe
     97 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
     98     \xff
     99 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
    100 
    101 /badutf/utf
    102 \= Expect UTF-8 errors
    103     XX\xfb\x80\x80\x80\x80
    104 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
    105     XX\xfd\x80\x80\x80\x80\x80
    106 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2
    107     XX\xf7\xbf\xbf\xbf
    108 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
    109 
    110 /shortutf/utf
    111 \= Expect UTF-8 errors
    112     XX\xdf\=ph
    113 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
    114     XX\xef\=ph
    115 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
    116     XX\xef\x80\=ph
    117 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
    118     \xf7\=ph
    119 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
    120     \xf7\x80\=ph
    121 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
    122     \xf7\x80\x80\=ph
    123 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
    124     \xfb\=ph
    125 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
    126     \xfb\x80\=ph
    127 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
    128     \xfb\x80\x80\=ph
    129 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
    130     \xfb\x80\x80\x80\=ph
    131 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
    132     \xfd\=ph
    133 Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
    134     \xfd\x80\=ph
    135 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
    136     \xfd\x80\x80\=ph
    137 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
    138     \xfd\x80\x80\x80\=ph
    139 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
    140     \xfd\x80\x80\x80\x80\=ph
    141 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
    142 
    143 /anything/utf
    144 \= Expect UTF-8 errors
    145     X\xc0\x80
    146 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
    147     XX\xc1\x8f
    148 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2
    149     XXX\xe0\x9f\x80
    150 Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3
    151     \xf0\x8f\x80\x80
    152 Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
    153     \xf8\x87\x80\x80\x80
    154 Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
    155     \xfc\x83\x80\x80\x80\x80
    156 Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
    157     \xfe\x80\x80\x80\x80\x80
    158 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
    159     \xff\x80\x80\x80\x80\x80
    160 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
    161     \xf8\x88\x80\x80\x80
    162 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
    163     \xf9\x87\x80\x80\x80
    164 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
    165     \xfc\x84\x80\x80\x80\x80
    166 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
    167     \xfd\x83\x80\x80\x80\x80
    168 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
    169 \= Expect no match
    170     \xc3\x8f
    171 No match
    172     \xe0\xaf\x80
    173 No match
    174     \xe1\x80\x80
    175 No match
    176     \xf0\x9f\x80\x80
    177 No match
    178     \xf1\x8f\x80\x80
    179 No match
    180     \xf8\x88\x80\x80\x80\=no_utf_check
    181 No match
    182     \xf9\x87\x80\x80\x80\=no_utf_check
    183 No match
    184     \xfc\x84\x80\x80\x80\x80\=no_utf_check
    185 No match
    186     \xfd\x83\x80\x80\x80\x80\=no_utf_check
    187 No match
    188     
    189 # Similar tests with offsets
    190 
    191 /badutf/utf
    192 \= Expect UTF-8 errors
    193     X\xdfabcd
    194 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    195     X\xdfabcd\=offset=1
    196 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    197 \= Expect no match
    198     X\xdfabcd\=offset=2
    199 No match
    200 
    201 /(?<=x)badutf/utf
    202 \= Expect UTF-8 errors
    203     X\xdfabcd
    204 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    205     X\xdfabcd\=offset=1
    206 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    207     X\xdfabcd\=offset=2
    208 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    209     X\xdfabcd\xdf\=offset=3
    210 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
    211 \= Expect no match
    212     X\xdfabcd\=offset=3
    213 No match
    214 
    215 /(?<=xx)badutf/utf
    216 \= Expect UTF-8 errors
    217     X\xdfabcd
    218 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    219     X\xdfabcd\=offset=1
    220 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    221     X\xdfabcd\=offset=2
    222 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    223     X\xdfabcd\=offset=3
    224 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    225 
    226 /(?<=xxxx)badutf/utf
    227 \= Expect UTF-8 errors
    228     X\xdfabcd
    229 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    230     X\xdfabcd\=offset=1
    231 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    232     X\xdfabcd\=offset=2
    233 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    234     X\xdfabcd\=offset=3
    235 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
    236     X\xdfabc\xdf\=offset=6
    237 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
    238     X\xdfabc\xdf\=offset=7
    239 Failed: error -33: bad offset value
    240 \= Expect no match
    241     X\xdfabcd\=offset=6
    242 No match
    243  
    244 /\x{100}/IB,utf
    245 ------------------------------------------------------------------
    246         Bra
    247         \x{100}
    248         Ket
    249         End
    250 ------------------------------------------------------------------
    251 Capturing subpattern count = 0
    252 Options: utf
    253 First code unit = \xc4
    254 Last code unit = \x80
    255 Subject length lower bound = 1
    256 
    257 /\x{1000}/IB,utf
    258 ------------------------------------------------------------------
    259         Bra
    260         \x{1000}
    261         Ket
    262         End
    263 ------------------------------------------------------------------
    264 Capturing subpattern count = 0
    265 Options: utf
    266 First code unit = \xe1
    267 Last code unit = \x80
    268 Subject length lower bound = 1
    269 
    270 /\x{10000}/IB,utf
    271 ------------------------------------------------------------------
    272         Bra
    273         \x{10000}
    274         Ket
    275         End
    276 ------------------------------------------------------------------
    277 Capturing subpattern count = 0
    278 Options: utf
    279 First code unit = \xf0
    280 Last code unit = \x80
    281 Subject length lower bound = 1
    282 
    283 /\x{100000}/IB,utf
    284 ------------------------------------------------------------------
    285         Bra
    286         \x{100000}
    287         Ket
    288         End
    289 ------------------------------------------------------------------
    290 Capturing subpattern count = 0
    291 Options: utf
    292 First code unit = \xf4
    293 Last code unit = \x80
    294 Subject length lower bound = 1
    295 
    296 /\x{10ffff}/IB,utf
    297 ------------------------------------------------------------------
    298         Bra
    299         \x{10ffff}
    300         Ket
    301         End
    302 ------------------------------------------------------------------
    303 Capturing subpattern count = 0
    304 Options: utf
    305 First code unit = \xf4
    306 Last code unit = \xbf
    307 Subject length lower bound = 1
    308 
    309 /[\x{ff}]/IB,utf
    310 ------------------------------------------------------------------
    311         Bra
    312         \x{ff}
    313         Ket
    314         End
    315 ------------------------------------------------------------------
    316 Capturing subpattern count = 0
    317 Options: utf
    318 First code unit = \xc3
    319 Last code unit = \xbf
    320 Subject length lower bound = 1
    321 
    322 /[\x{100}]/IB,utf
    323 ------------------------------------------------------------------
    324         Bra
    325         \x{100}
    326         Ket
    327         End
    328 ------------------------------------------------------------------
    329 Capturing subpattern count = 0
    330 Options: utf
    331 First code unit = \xc4
    332 Last code unit = \x80
    333 Subject length lower bound = 1
    334 
    335 /\x80/IB,utf
    336 ------------------------------------------------------------------
    337         Bra
    338         \x{80}
    339         Ket
    340         End
    341 ------------------------------------------------------------------
    342 Capturing subpattern count = 0
    343 Options: utf
    344 First code unit = \xc2
    345 Last code unit = \x80
    346 Subject length lower bound = 1
    347 
    348 /\xff/IB,utf
    349 ------------------------------------------------------------------
    350         Bra
    351         \x{ff}
    352         Ket
    353         End
    354 ------------------------------------------------------------------
    355 Capturing subpattern count = 0
    356 Options: utf
    357 First code unit = \xc3
    358 Last code unit = \xbf
    359 Subject length lower bound = 1
    360 
    361 /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
    362 ------------------------------------------------------------------
    363         Bra
    364         \x{d55c}\x{ad6d}\x{c5b4}
    365         Ket
    366         End
    367 ------------------------------------------------------------------
    368 Capturing subpattern count = 0
    369 Options: utf
    370 First code unit = \xed
    371 Last code unit = \xb4
    372 Subject length lower bound = 3
    373     \x{D55c}\x{ad6d}\x{C5B4}
    374  0: \x{d55c}\x{ad6d}\x{c5b4}
    375 
    376 /\x{65e5}\x{672c}\x{8a9e}/IB,utf
    377 ------------------------------------------------------------------
    378         Bra
    379         \x{65e5}\x{672c}\x{8a9e}
    380         Ket
    381         End
    382 ------------------------------------------------------------------
    383 Capturing subpattern count = 0
    384 Options: utf
    385 First code unit = \xe6
    386 Last code unit = \x9e
    387 Subject length lower bound = 3
    388     \x{65e5}\x{672c}\x{8a9e}
    389  0: \x{65e5}\x{672c}\x{8a9e}
    390 
    391 /\x{80}/IB,utf
    392 ------------------------------------------------------------------
    393         Bra
    394         \x{80}
    395         Ket
    396         End
    397 ------------------------------------------------------------------
    398 Capturing subpattern count = 0
    399 Options: utf
    400 First code unit = \xc2
    401 Last code unit = \x80
    402 Subject length lower bound = 1
    403 
    404 /\x{084}/IB,utf
    405 ------------------------------------------------------------------
    406         Bra
    407         \x{84}
    408         Ket
    409         End
    410 ------------------------------------------------------------------
    411 Capturing subpattern count = 0
    412 Options: utf
    413 First code unit = \xc2
    414 Last code unit = \x84
    415 Subject length lower bound = 1
    416 
    417 /\x{104}/IB,utf
    418 ------------------------------------------------------------------
    419         Bra
    420         \x{104}
    421         Ket
    422         End
    423 ------------------------------------------------------------------
    424 Capturing subpattern count = 0
    425 Options: utf
    426 First code unit = \xc4
    427 Last code unit = \x84
    428 Subject length lower bound = 1
    429 
    430 /\x{861}/IB,utf
    431 ------------------------------------------------------------------
    432         Bra
    433         \x{861}
    434         Ket
    435         End
    436 ------------------------------------------------------------------
    437 Capturing subpattern count = 0
    438 Options: utf
    439 First code unit = \xe0
    440 Last code unit = \xa1
    441 Subject length lower bound = 1
    442 
    443 /\x{212ab}/IB,utf
    444 ------------------------------------------------------------------
    445         Bra
    446         \x{212ab}
    447         Ket
    448         End
    449 ------------------------------------------------------------------
    450 Capturing subpattern count = 0
    451 Options: utf
    452 First code unit = \xf0
    453 Last code unit = \xab
    454 Subject length lower bound = 1
    455 
    456 /[^ab\xC0-\xF0]/IB,utf
    457 ------------------------------------------------------------------
    458         Bra
    459         [\x00-`c-\xbf\xf1-\xff] (neg)
    460         Ket
    461         End
    462 ------------------------------------------------------------------
    463 Capturing subpattern count = 0
    464 Options: utf
    465 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
    466   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
    467   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
    468   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
    469   Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
    470   \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 
    471   \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 
    472   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
    473   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
    474   \xfe \xff 
    475 Subject length lower bound = 1
    476     \x{f1}
    477  0: \x{f1}
    478     \x{bf}
    479  0: \x{bf}
    480     \x{100}
    481  0: \x{100}
    482     \x{1000}
    483  0: \x{1000}
    484 \= Expect no match
    485     \x{c0}
    486 No match
    487     \x{f0}
    488 No match
    489 
    490 /{3,4}/IB,utf
    491 ------------------------------------------------------------------
    492         Bra
    493         \x{100}{3}
    494         \x{100}?+
    495         Ket
    496         End
    497 ------------------------------------------------------------------
    498 Capturing subpattern count = 0
    499 Options: utf
    500 First code unit = \xc4
    501 Last code unit = \x80
    502 Subject length lower bound = 3
    503   \x{100}\x{100}\x{100}\x{100\x{100}
    504  0: \x{100}\x{100}\x{100}
    505 
    506 /(\x{100}+|x)/IB,utf
    507 ------------------------------------------------------------------
    508         Bra
    509         CBra 1
    510         \x{100}++
    511         Alt
    512         x
    513         Ket
    514         Ket
    515         End
    516 ------------------------------------------------------------------
    517 Capturing subpattern count = 1
    518 Options: utf
    519 Starting code units: x \xc4 
    520 Subject length lower bound = 1
    521 
    522 /(\x{100}*a|x)/IB,utf
    523 ------------------------------------------------------------------
    524         Bra
    525         CBra 1
    526         \x{100}*+
    527         a
    528         Alt
    529         x
    530         Ket
    531         Ket
    532         End
    533 ------------------------------------------------------------------
    534 Capturing subpattern count = 1
    535 Options: utf
    536 Starting code units: a x \xc4 
    537 Subject length lower bound = 1
    538 
    539 /(\x{100}{0,2}a|x)/IB,utf
    540 ------------------------------------------------------------------
    541         Bra
    542         CBra 1
    543         \x{100}{0,2}+
    544         a
    545         Alt
    546         x
    547         Ket
    548         Ket
    549         End
    550 ------------------------------------------------------------------
    551 Capturing subpattern count = 1
    552 Options: utf
    553 Starting code units: a x \xc4 
    554 Subject length lower bound = 1
    555 
    556 /(\x{100}{1,2}a|x)/IB,utf
    557 ------------------------------------------------------------------
    558         Bra
    559         CBra 1
    560         \x{100}
    561         \x{100}{0,1}+
    562         a
    563         Alt
    564         x
    565         Ket
    566         Ket
    567         End
    568 ------------------------------------------------------------------
    569 Capturing subpattern count = 1
    570 Options: utf
    571 Starting code units: x \xc4 
    572 Subject length lower bound = 1
    573 
    574 /\x{100}/IB,utf
    575 ------------------------------------------------------------------
    576         Bra
    577         \x{100}
    578         Ket
    579         End
    580 ------------------------------------------------------------------
    581 Capturing subpattern count = 0
    582 Options: utf
    583 First code unit = \xc4
    584 Last code unit = \x80
    585 Subject length lower bound = 1
    586 
    587 /a\x{100}\x{101}*/IB,utf
    588 ------------------------------------------------------------------
    589         Bra
    590         a\x{100}
    591         \x{101}*+
    592         Ket
    593         End
    594 ------------------------------------------------------------------
    595 Capturing subpattern count = 0
    596 Options: utf
    597 First code unit = 'a'
    598 Last code unit = \x80
    599 Subject length lower bound = 2
    600 
    601 /a\x{100}\x{101}+/IB,utf
    602 ------------------------------------------------------------------
    603         Bra
    604         a\x{100}
    605         \x{101}++
    606         Ket
    607         End
    608 ------------------------------------------------------------------
    609 Capturing subpattern count = 0
    610 Options: utf
    611 First code unit = 'a'
    612 Last code unit = \x81
    613 Subject length lower bound = 3
    614 
    615 /[^\x{c4}]/IB
    616 ------------------------------------------------------------------
    617         Bra
    618         [^\x{c4}]
    619         Ket
    620         End
    621 ------------------------------------------------------------------
    622 Capturing subpattern count = 0
    623 Subject length lower bound = 1
    624 
    625 /[\x{100}]/IB,utf
    626 ------------------------------------------------------------------
    627         Bra
    628         \x{100}
    629         Ket
    630         End
    631 ------------------------------------------------------------------
    632 Capturing subpattern count = 0
    633 Options: utf
    634 First code unit = \xc4
    635 Last code unit = \x80
    636 Subject length lower bound = 1
    637     \x{100}
    638  0: \x{100}
    639     Z\x{100}
    640  0: \x{100}
    641     \x{100}Z
    642  0: \x{100}
    643 
    644 /[\xff]/IB,utf
    645 ------------------------------------------------------------------
    646         Bra
    647         \x{ff}
    648         Ket
    649         End
    650 ------------------------------------------------------------------
    651 Capturing subpattern count = 0
    652 Options: utf
    653 First code unit = \xc3
    654 Last code unit = \xbf
    655 Subject length lower bound = 1
    656     >\x{ff}<
    657  0: \x{ff}
    658 
    659 /[^\xff]/IB,utf
    660 ------------------------------------------------------------------
    661         Bra
    662         [^\x{ff}]
    663         Ket
    664         End
    665 ------------------------------------------------------------------
    666 Capturing subpattern count = 0
    667 Options: utf
    668 Subject length lower bound = 1
    669 
    670 /\x{100}abc(xyz(?1))/IB,utf
    671 ------------------------------------------------------------------
    672         Bra
    673         \x{100}abc
    674         CBra 1
    675         xyz
    676         Recurse
    677         Ket
    678         Ket
    679         End
    680 ------------------------------------------------------------------
    681 Capturing subpattern count = 1
    682 Options: utf
    683 First code unit = \xc4
    684 Last code unit = 'z'
    685 Subject length lower bound = 7
    686 
    687 /\777/I,utf
    688 Capturing subpattern count = 0
    689 Options: utf
    690 First code unit = \xc7
    691 Last code unit = \xbf
    692 Subject length lower bound = 1
    693   \x{1ff}
    694  0: \x{1ff}
    695   \777
    696  0: \x{1ff}
    697 
    698 /\x{100}+\x{200}/IB,utf
    699 ------------------------------------------------------------------
    700         Bra
    701         \x{100}++
    702         \x{200}
    703         Ket
    704         End
    705 ------------------------------------------------------------------
    706 Capturing subpattern count = 0
    707 Options: utf
    708 First code unit = \xc4
    709 Last code unit = \x80
    710 Subject length lower bound = 2
    711 
    712 /\x{100}+X/IB,utf
    713 ------------------------------------------------------------------
    714         Bra
    715         \x{100}++
    716         X
    717         Ket
    718         End
    719 ------------------------------------------------------------------
    720 Capturing subpattern count = 0
    721 Options: utf
    722 First code unit = \xc4
    723 Last code unit = 'X'
    724 Subject length lower bound = 2
    725 
    726 /^[\Q\E-\Q\E/B,utf
    727 Failed: error 106 at offset 15: missing terminating ] for character class
    728 
    729 # This tests the stricter UTF-8 check according to RFC 3629.
    730 
    731 /X/utf
    732 \= Expect UTF-8 errors
    733     \x{d800}
    734 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
    735     \x{da00}
    736 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
    737     \x{dfff}
    738 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
    739     \x{110000}
    740 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
    741     \x{2000000}
    742 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
    743     \x{7fffffff}
    744 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
    745 \= Expect no match
    746     \x{d800}\=no_utf_check
    747 No match
    748     \x{da00}\=no_utf_check
    749 No match
    750     \x{dfff}\=no_utf_check
    751 No match
    752     \x{110000}\=no_utf_check
    753 No match
    754     \x{2000000}\=no_utf_check
    755 No match
    756     \x{7fffffff}\=no_utf_check
    757 No match
    758 
    759 /(*UTF8)\x{1234}/
    760     abcd\x{1234}pqr
    761  0: \x{1234}
    762 
    763 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
    764 Capturing subpattern count = 0
    765 Compile options: <none>
    766 Overall options: utf
    767 \R matches any Unicode newline
    768 Forced newline is CRLF
    769 First code unit = 'a'
    770 Last code unit = 'b'
    771 Subject length lower bound = 3
    772 
    773 /\h/I,utf
    774 Capturing subpattern count = 0
    775 Options: utf
    776 Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3 
    777 Subject length lower bound = 1
    778     ABC\x{09}
    779  0: \x{09}
    780     ABC\x{20}
    781  0:  
    782     ABC\x{a0}
    783  0: \x{a0}
    784     ABC\x{1680}
    785  0: \x{1680}
    786     ABC\x{180e}
    787  0: \x{180e}
    788     ABC\x{2000}
    789  0: \x{2000}
    790     ABC\x{202f}
    791  0: \x{202f}
    792     ABC\x{205f}
    793  0: \x{205f}
    794     ABC\x{3000}
    795  0: \x{3000}
    796 
    797 /\v/I,utf
    798 Capturing subpattern count = 0
    799 Options: utf
    800 Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 
    801 Subject length lower bound = 1
    802     ABC\x{0a}
    803  0: \x{0a}
    804     ABC\x{0b}
    805  0: \x{0b}
    806     ABC\x{0c}
    807  0: \x{0c}
    808     ABC\x{0d}
    809  0: \x{0d}
    810     ABC\x{85}
    811  0: \x{85}
    812     ABC\x{2028}
    813  0: \x{2028}
    814 
    815 /\h*A/I,utf
    816 Capturing subpattern count = 0
    817 Options: utf
    818 Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 
    819 Last code unit = 'A'
    820 Subject length lower bound = 1
    821     CDBABC
    822  0: A
    823 
    824 /\v+A/I,utf
    825 Capturing subpattern count = 0
    826 Options: utf
    827 Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 
    828 Last code unit = 'A'
    829 Subject length lower bound = 2
    830 
    831 /\s?xxx\s/I,utf
    832 Capturing subpattern count = 0
    833 Options: utf
    834 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x 
    835 Last code unit = 'x'
    836 Subject length lower bound = 4
    837 
    838 /\sxxx\s/I,utf,tables=2
    839 Capturing subpattern count = 0
    840 Options: utf
    841 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 
    842 Last code unit = 'x'
    843 Subject length lower bound = 5
    844     AB\x{85}xxx\x{a0}XYZ
    845  0: \x{85}xxx\x{a0}
    846     AB\x{a0}xxx\x{85}XYZ
    847  0: \x{a0}xxx\x{85}
    848 
    849 /\S \S/I,utf,tables=2
    850 Capturing subpattern count = 0
    851 Options: utf
    852 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 
    853   \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 
    854   \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 
    855   D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 
    856   i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 
    857   \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 
    858   \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 
    859   \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 
    860   \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
    861 Last code unit = ' '
    862 Subject length lower bound = 3
    863     \x{a2} \x{84}
    864  0: \x{a2} \x{84}
    865     A Z
    866  0: A Z
    867 
    868 /a+/utf
    869     a\x{123}aa\=offset=1
    870  0: aa
    871     a\x{123}aa\=offset=3
    872  0: aa
    873     a\x{123}aa\=offset=4
    874  0: a
    875 \= Expect bad offset value
    876     a\x{123}aa\=offset=6
    877 Failed: error -33: bad offset value
    878 \= Expect bad UTF-8 offset     
    879     a\x{123}aa\=offset=2
    880 Error -36 (bad UTF-8 offset)
    881 \= Expect no match
    882     a\x{123}aa\=offset=5
    883 No match
    884 
    885 /\x{1234}+/Ii,utf
    886 Capturing subpattern count = 0
    887 Options: caseless utf
    888 Starting code units: \xe1 
    889 Subject length lower bound = 1
    890 
    891 /\x{1234}+?/Ii,utf
    892 Capturing subpattern count = 0
    893 Options: caseless utf
    894 Starting code units: \xe1 
    895 Subject length lower bound = 1
    896 
    897 /\x{1234}++/Ii,utf
    898 Capturing subpattern count = 0
    899 Options: caseless utf
    900 Starting code units: \xe1 
    901 Subject length lower bound = 1
    902 
    903 /\x{1234}{2}/Ii,utf
    904 Capturing subpattern count = 0
    905 Options: caseless utf
    906 Starting code units: \xe1 
    907 Subject length lower bound = 2
    908 
    909 /[^\x{c4}]/IB,utf
    910 ------------------------------------------------------------------
    911         Bra
    912         [^\x{c4}]
    913         Ket
    914         End
    915 ------------------------------------------------------------------
    916 Capturing subpattern count = 0
    917 Options: utf
    918 Subject length lower bound = 1
    919 
    920 /X+\x{200}/IB,utf
    921 ------------------------------------------------------------------
    922         Bra
    923         X++
    924         \x{200}
    925         Ket
    926         End
    927 ------------------------------------------------------------------
    928 Capturing subpattern count = 0
    929 Options: utf
    930 First code unit = 'X'
    931 Last code unit = \x80
    932 Subject length lower bound = 2
    933 
    934 /\R/I,utf
    935 Capturing subpattern count = 0
    936 Options: utf
    937 Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2 
    938 Subject length lower bound = 1
    939 
    940 /\777/IB,utf
    941 ------------------------------------------------------------------
    942         Bra
    943         \x{1ff}
    944         Ket
    945         End
    946 ------------------------------------------------------------------
    947 Capturing subpattern count = 0
    948 Options: utf
    949 First code unit = \xc7
    950 Last code unit = \xbf
    951 Subject length lower bound = 1
    952 
    953 /\w+\x{C4}/B,utf
    954 ------------------------------------------------------------------
    955         Bra
    956         \w++
    957         \x{c4}
    958         Ket
    959         End
    960 ------------------------------------------------------------------
    961     a\x{C4}\x{C4}
    962  0: a\x{c4}
    963 
    964 /\w+\x{C4}/B,utf,tables=2
    965 ------------------------------------------------------------------
    966         Bra
    967         \w+
    968         \x{c4}
    969         Ket
    970         End
    971 ------------------------------------------------------------------
    972     a\x{C4}\x{C4}
    973  0: a\x{c4}\x{c4}
    974 
    975 /\W+\x{C4}/B,utf
    976 ------------------------------------------------------------------
    977         Bra
    978         \W+
    979         \x{c4}
    980         Ket
    981         End
    982 ------------------------------------------------------------------
    983     !\x{C4}
    984  0: !\x{c4}
    985 
    986 /\W+\x{C4}/B,utf,tables=2
    987 ------------------------------------------------------------------
    988         Bra
    989         \W++
    990         \x{c4}
    991         Ket
    992         End
    993 ------------------------------------------------------------------
    994     !\x{C4}
    995  0: !\x{c4}
    996 
    997 /\W+\x{A1}/B,utf
    998 ------------------------------------------------------------------
    999         Bra
   1000         \W+
   1001         \x{a1}
   1002         Ket
   1003         End
   1004 ------------------------------------------------------------------
   1005     !\x{A1}
   1006  0: !\x{a1}
   1007 
   1008 /\W+\x{A1}/B,utf,tables=2
   1009 ------------------------------------------------------------------
   1010         Bra
   1011         \W+
   1012         \x{a1}
   1013         Ket
   1014         End
   1015 ------------------------------------------------------------------
   1016     !\x{A1}
   1017  0: !\x{a1}
   1018 
   1019 /X\s+\x{A0}/B,utf
   1020 ------------------------------------------------------------------
   1021         Bra
   1022         X
   1023         \s++
   1024         \x{a0}
   1025         Ket
   1026         End
   1027 ------------------------------------------------------------------
   1028     X\x20\x{A0}\x{A0}
   1029  0: X \x{a0}
   1030 
   1031 /X\s+\x{A0}/B,utf,tables=2
   1032 ------------------------------------------------------------------
   1033         Bra
   1034         X
   1035         \s+
   1036         \x{a0}
   1037         Ket
   1038         End
   1039 ------------------------------------------------------------------
   1040     X\x20\x{A0}\x{A0}
   1041  0: X \x{a0}\x{a0}
   1042 
   1043 /\S+\x{A0}/B,utf
   1044 ------------------------------------------------------------------
   1045         Bra
   1046         \S+
   1047         \x{a0}
   1048         Ket
   1049         End
   1050 ------------------------------------------------------------------
   1051     X\x{A0}\x{A0}
   1052  0: X\x{a0}\x{a0}
   1053 
   1054 /\S+\x{A0}/B,utf,tables=2
   1055 ------------------------------------------------------------------
   1056         Bra
   1057         \S++
   1058         \x{a0}
   1059         Ket
   1060         End
   1061 ------------------------------------------------------------------
   1062     X\x{A0}\x{A0}
   1063  0: X\x{a0}
   1064 
   1065 /\x{a0}+\s!/B,utf
   1066 ------------------------------------------------------------------
   1067         Bra
   1068         \x{a0}++
   1069         \s
   1070         !
   1071         Ket
   1072         End
   1073 ------------------------------------------------------------------
   1074     \x{a0}\x20!
   1075  0: \x{a0} !
   1076 
   1077 /\x{a0}+\s!/B,utf,tables=2
   1078 ------------------------------------------------------------------
   1079         Bra
   1080         \x{a0}+
   1081         \s
   1082         !
   1083         Ket
   1084         End
   1085 ------------------------------------------------------------------
   1086     \x{a0}\x20!
   1087  0: \x{a0} !
   1088 
   1089 /A/utf
   1090   \x{ff000041}
   1091 ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
   1092   \x{7f000041}
   1093 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
   1094 
   1095 /(*UTF8)abc/never_utf
   1096 Failed: error 174 at offset 7: using UTF is disabled by the application
   1097 
   1098 /abc/utf,never_utf
   1099 Failed: error 174 at offset 0: using UTF is disabled by the application
   1100 
   1101 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
   1102 ------------------------------------------------------------------
   1103         Bra
   1104      /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
   1105         Ket
   1106         End
   1107 ------------------------------------------------------------------
   1108 Capturing subpattern count = 0
   1109 Options: caseless utf
   1110 First code unit = 'A' (caseless)
   1111 Subject length lower bound = 5
   1112 
   1113 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
   1114 ------------------------------------------------------------------
   1115         Bra
   1116         A\x{391}\x{10427}\x{ff3a}\x{1fb0}
   1117         Ket
   1118         End
   1119 ------------------------------------------------------------------
   1120 Capturing subpattern count = 0
   1121 Options: utf
   1122 First code unit = 'A'
   1123 Last code unit = \xb0
   1124 Subject length lower bound = 5
   1125 
   1126 /AB\x{1fb0}/IB,utf
   1127 ------------------------------------------------------------------
   1128         Bra
   1129         AB\x{1fb0}
   1130         Ket
   1131         End
   1132 ------------------------------------------------------------------
   1133 Capturing subpattern count = 0
   1134 Options: utf
   1135 First code unit = 'A'
   1136 Last code unit = \xb0
   1137 Subject length lower bound = 3
   1138 
   1139 /AB\x{1fb0}/IBi,utf
   1140 ------------------------------------------------------------------
   1141         Bra
   1142      /i AB\x{1fb0}
   1143         Ket
   1144         End
   1145 ------------------------------------------------------------------
   1146 Capturing subpattern count = 0
   1147 Options: caseless utf
   1148 First code unit = 'A' (caseless)
   1149 Last code unit = 'B' (caseless)
   1150 Subject length lower bound = 3
   1151 
   1152 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
   1153 Capturing subpattern count = 0
   1154 Options: caseless utf
   1155 Starting code units: \xd0 \xd1 
   1156 Subject length lower bound = 17
   1157     \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
   1158  0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
   1159     \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
   1160  0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
   1161 
   1162 /[]/Bi,utf
   1163 ------------------------------------------------------------------
   1164         Bra
   1165      /i \x{2c65}
   1166         Ket
   1167         End
   1168 ------------------------------------------------------------------
   1169 
   1170 /[^]/Bi,utf
   1171 ------------------------------------------------------------------
   1172         Bra
   1173      /i [^\x{2c65}]
   1174         Ket
   1175         End
   1176 ------------------------------------------------------------------
   1177 
   1178 /\h/I
   1179 Capturing subpattern count = 0
   1180 Starting code units: \x09 \x20 \xa0 
   1181 Subject length lower bound = 1
   1182 
   1183 /\v/I
   1184 Capturing subpattern count = 0
   1185 Starting code units: \x0a \x0b \x0c \x0d \x85 
   1186 Subject length lower bound = 1
   1187 
   1188 /\R/I
   1189 Capturing subpattern count = 0
   1190 Starting code units: \x0a \x0b \x0c \x0d \x85 
   1191 Subject length lower bound = 1
   1192 
   1193 /[[:blank:]]/B,ucp
   1194 ------------------------------------------------------------------
   1195         Bra
   1196         [\x09 \xa0]
   1197         Ket
   1198         End
   1199 ------------------------------------------------------------------
   1200 
   1201 /\x{212a}+/Ii,utf
   1202 Capturing subpattern count = 0
   1203 Options: caseless utf
   1204 Starting code units: K k \xe2 
   1205 Subject length lower bound = 1
   1206     KKkk\x{212a}
   1207  0: KKkk\x{212a}
   1208 
   1209 /s+/Ii,utf
   1210 Capturing subpattern count = 0
   1211 Options: caseless utf
   1212 Starting code units: S s \xc5 
   1213 Subject length lower bound = 1
   1214     SSss\x{17f}
   1215  0: SSss\x{17f}
   1216 
   1217 /\x{100}*A/IB,utf
   1218 ------------------------------------------------------------------
   1219         Bra
   1220         \x{100}*+
   1221         A
   1222         Ket
   1223         End
   1224 ------------------------------------------------------------------
   1225 Capturing subpattern count = 0
   1226 Options: utf
   1227 Starting code units: A \xc4 
   1228 Last code unit = 'A'
   1229 Subject length lower bound = 1
   1230     A
   1231  0: A
   1232 
   1233 /\x{100}*\d(?R)/IB,utf
   1234 ------------------------------------------------------------------
   1235         Bra
   1236         \x{100}*+
   1237         \d
   1238         Recurse
   1239         Ket
   1240         End
   1241 ------------------------------------------------------------------
   1242 Capturing subpattern count = 0
   1243 Options: utf
   1244 Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 
   1245 Subject length lower bound = 1
   1246 
   1247 /[Z\x{100}]/IB,utf
   1248 ------------------------------------------------------------------
   1249         Bra
   1250         [Z\x{100}]
   1251         Ket
   1252         End
   1253 ------------------------------------------------------------------
   1254 Capturing subpattern count = 0
   1255 Options: utf
   1256 Starting code units: Z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd 
   1257   \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc 
   1258   \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb 
   1259   \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa 
   1260   \xfb \xfc \xfd \xfe \xff 
   1261 Subject length lower bound = 1
   1262     Z\x{100}
   1263  0: Z
   1264     \x{100}
   1265  0: \x{100}
   1266     \x{100}Z
   1267  0: \x{100}
   1268 
   1269 /[z-\x{100}]/IB,utf
   1270 ------------------------------------------------------------------
   1271         Bra
   1272         [z-\xff\x{100}]
   1273         Ket
   1274         End
   1275 ------------------------------------------------------------------
   1276 Capturing subpattern count = 0
   1277 Options: utf
   1278 Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 
   1279   \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 
   1280   \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 
   1281   \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 
   1282   \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
   1283 Subject length lower bound = 1
   1284 
   1285 /[z\Qa-d]\E]/IB,utf
   1286 ------------------------------------------------------------------
   1287         Bra
   1288         [\-\]adz\x{100}]
   1289         Ket
   1290         End
   1291 ------------------------------------------------------------------
   1292 Capturing subpattern count = 0
   1293 Options: utf
   1294 Starting code units: - ] a d z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc 
   1295   \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb 
   1296   \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea 
   1297   \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 
   1298   \xfa \xfb \xfc \xfd \xfe \xff 
   1299 Subject length lower bound = 1
   1300     \x{100}
   1301  0: \x{100}
   1302      
   1303  0: \x{100}
   1304 
   1305 /[ab\x{100}]abc(xyz(?1))/IB,utf
   1306 ------------------------------------------------------------------
   1307         Bra
   1308         [ab\x{100}]
   1309         abc
   1310         CBra 1
   1311         xyz
   1312         Recurse
   1313         Ket
   1314         Ket
   1315         End
   1316 ------------------------------------------------------------------
   1317 Capturing subpattern count = 1
   1318 Options: utf
   1319 Starting code units: a b \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd 
   1320   \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc 
   1321   \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb 
   1322   \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa 
   1323   \xfb \xfc \xfd \xfe \xff 
   1324 Last code unit = 'z'
   1325 Subject length lower bound = 7
   1326 
   1327 /\x{100}*\s/IB,utf
   1328 ------------------------------------------------------------------
   1329         Bra
   1330         \x{100}*+
   1331         \s
   1332         Ket
   1333         End
   1334 ------------------------------------------------------------------
   1335 Capturing subpattern count = 0
   1336 Options: utf
   1337 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 
   1338 Subject length lower bound = 1
   1339 
   1340 /\x{100}*\d/IB,utf
   1341 ------------------------------------------------------------------
   1342         Bra
   1343         \x{100}*+
   1344         \d
   1345         Ket
   1346         End
   1347 ------------------------------------------------------------------
   1348 Capturing subpattern count = 0
   1349 Options: utf
   1350 Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 
   1351 Subject length lower bound = 1
   1352 
   1353 /\x{100}*\w/IB,utf
   1354 ------------------------------------------------------------------
   1355         Bra
   1356         \x{100}*+
   1357         \w
   1358         Ket
   1359         End
   1360 ------------------------------------------------------------------
   1361 Capturing subpattern count = 0
   1362 Options: utf
   1363 Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P 
   1364   Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z 
   1365   \xc4 
   1366 Subject length lower bound = 1
   1367 
   1368 /\x{100}*\D/IB,utf
   1369 ------------------------------------------------------------------
   1370         Bra
   1371         \x{100}*
   1372         \D
   1373         Ket
   1374         End
   1375 ------------------------------------------------------------------
   1376 Capturing subpattern count = 0
   1377 Options: utf
   1378 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   1379   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   1380   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 
   1381   ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c 
   1382   d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 
   1383   \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 
   1384   \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 
   1385   \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef 
   1386   \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe 
   1387   \xff 
   1388 Subject length lower bound = 1
   1389 
   1390 /\x{100}*\S/IB,utf
   1391 ------------------------------------------------------------------
   1392         Bra
   1393         \x{100}*
   1394         \S
   1395         Ket
   1396         End
   1397 ------------------------------------------------------------------
   1398 Capturing subpattern count = 0
   1399 Options: utf
   1400 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f 
   1401   \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e 
   1402   \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C 
   1403   D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h 
   1404   i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 
   1405   \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 
   1406   \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 
   1407   \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 
   1408   \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
   1409 Subject length lower bound = 1
   1410 
   1411 /\x{100}*\W/IB,utf
   1412 ------------------------------------------------------------------
   1413         Bra
   1414         \x{100}*
   1415         \W
   1416         Ket
   1417         End
   1418 ------------------------------------------------------------------
   1419 Capturing subpattern count = 0
   1420 Options: utf
   1421 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   1422   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   1423   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > 
   1424   ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 
   1425   \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 
   1426   \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 
   1427   \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 
   1428   \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
   1429 Subject length lower bound = 1
   1430 
   1431 /[\x{105}-\x{109}]/IBi,utf
   1432 ------------------------------------------------------------------
   1433         Bra
   1434         [\x{104}-\x{109}]
   1435         Ket
   1436         End
   1437 ------------------------------------------------------------------
   1438 Capturing subpattern count = 0
   1439 Options: caseless utf
   1440 Starting code units: \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce 
   1441   \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd 
   1442   \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec 
   1443   \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb 
   1444   \xfc \xfd \xfe \xff 
   1445 Subject length lower bound = 1
   1446     \x{104}
   1447  0: \x{104}
   1448     \x{105}
   1449  0: \x{105}
   1450     \x{109}  
   1451  0: \x{109}
   1452 \= Expect no match
   1453     \x{100}
   1454 No match
   1455     \x{10a} 
   1456 No match
   1457     
   1458 /[z-\x{100}]/IBi,utf
   1459 ------------------------------------------------------------------
   1460         Bra
   1461         [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
   1462         Ket
   1463         End
   1464 ------------------------------------------------------------------
   1465 Capturing subpattern count = 0
   1466 Options: caseless utf
   1467 Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 
   1468   \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 
   1469   \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 
   1470   \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 
   1471   \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
   1472 Subject length lower bound = 1
   1473     Z
   1474  0: Z
   1475     z
   1476  0: z
   1477     \x{39c}
   1478  0: \x{39c}
   1479     \x{178}
   1480  0: \x{178}
   1481     |
   1482  0: |
   1483     \x{80}
   1484  0: \x{80}
   1485     \x{ff}
   1486  0: \x{ff}
   1487     \x{100}
   1488  0: \x{100}
   1489     \x{101} 
   1490  0: \x{101}
   1491 \= Expect no match
   1492     \x{102}
   1493 No match
   1494     Y
   1495 No match
   1496     y           
   1497 No match
   1498 
   1499 /[z-\x{100}]/IBi,utf
   1500 ------------------------------------------------------------------
   1501         Bra
   1502         [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
   1503         Ket
   1504         End
   1505 ------------------------------------------------------------------
   1506 Capturing subpattern count = 0
   1507 Options: caseless utf
   1508 Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 
   1509   \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 
   1510   \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 
   1511   \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 
   1512   \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff 
   1513 Subject length lower bound = 1
   1514 
   1515 /\x{3a3}B/IBi,utf
   1516 ------------------------------------------------------------------
   1517         Bra
   1518         clist 03a3 03c2 03c3
   1519      /i B
   1520         Ket
   1521         End
   1522 ------------------------------------------------------------------
   1523 Capturing subpattern count = 0
   1524 Options: caseless utf
   1525 Starting code units: \xce \xcf 
   1526 Last code unit = 'B' (caseless)
   1527 Subject length lower bound = 2
   1528 
   1529 /abc/utf,replace=
   1530     abc
   1531 Failed: error -3: UTF-8 error: 1 byte missing at end
   1532 
   1533 /(?<=(a)(?-1))x/I,utf
   1534 Capturing subpattern count = 1
   1535 Max lookbehind = 2
   1536 Options: utf
   1537 First code unit = 'x'
   1538 Subject length lower bound = 1
   1539     a\x80zx\=offset=3
   1540 Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
   1541 
   1542 /[\W\p{Any}]/B
   1543 ------------------------------------------------------------------
   1544         Bra
   1545         [\x00-/:-@[-^`{-\xff\p{Any}]
   1546         Ket
   1547         End
   1548 ------------------------------------------------------------------
   1549     abc
   1550  0: a
   1551     123 
   1552  0: 1
   1553 
   1554 /[\W\pL]/B
   1555 ------------------------------------------------------------------
   1556         Bra
   1557         [\x00-/:-@[-^`{-\xff\p{L}]
   1558         Ket
   1559         End
   1560 ------------------------------------------------------------------
   1561     abc
   1562  0: a
   1563 \= Expect no match
   1564     123     
   1565 No match
   1566 
   1567 /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':)/utf
   1568 Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
   1569 
   1570 /[\s[:^ascii:]]/B,ucp
   1571 ------------------------------------------------------------------
   1572         Bra
   1573         [\x80-\xff\p{Xsp}]
   1574         Ket
   1575         End
   1576 ------------------------------------------------------------------
   1577 
   1578 # A special extra option allows excaped surrogate code points in 8-bit mode,
   1579 # but subjects containing them must not be UTF-checked.
   1580 
   1581 /\x{d800}/I,utf,allow_surrogate_escapes
   1582 Capturing subpattern count = 0
   1583 Options: utf
   1584 Extra options: allow_surrogate_escapes
   1585 First code unit = \xed
   1586 Last code unit = \x80
   1587 Subject length lower bound = 1
   1588     \x{d800}\=no_utf_check
   1589  0: \x{d800}
   1590 
   1591 /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
   1592     \x{dfff}\x{df01}\=no_utf_check
   1593  0: \x{dfff}\x{df01}
   1594     
   1595 # This has different starting code units in 8-bit mode. 
   1596 
   1597 /^[^ab]/IB,utf
   1598 ------------------------------------------------------------------
   1599         Bra
   1600         ^
   1601         [\x00-`c-\xff] (neg)
   1602         Ket
   1603         End
   1604 ------------------------------------------------------------------
   1605 Capturing subpattern count = 0
   1606 Compile options: utf
   1607 Overall options: anchored utf
   1608 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a 
   1609   \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 
   1610   \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 
   1611   5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y 
   1612   Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 
   1613   \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 
   1614   \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf 
   1615   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
   1616   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
   1617   \xfe \xff 
   1618 Subject length lower bound = 1
   1619     c
   1620  0: c
   1621     \x{ff}
   1622  0: \x{ff}
   1623     \x{100}
   1624  0: \x{100}
   1625 \= Expect no match
   1626     aaa
   1627 No match
   1628 
   1629 # End of testinput10
   1630