Home | History | Annotate | Download | only in testdata
      1 /-- This set of tests is for UTF-8 support but not Unicode property support,
      2     and is relevant only to the 8-bit library. --/
      3 
      4 < forbid W
      5 
      6 /X(\C{3})/8
      7     X\x{1234}
      8 
      9 /X(\C{4})/8
     10     X\x{1234}YZ
     11     
     12 /X\C*/8
     13     XYZabcdce
     14     
     15 /X\C*?/8
     16     XYZabcde
     17     
     18 /X\C{3,5}/8
     19     Xabcdefg   
     20     X\x{1234} 
     21     X\x{1234}YZ
     22     X\x{1234}\x{512}  
     23     X\x{1234}\x{512}YZ
     24 
     25 /X\C{3,5}?/8
     26     Xabcdefg   
     27     X\x{1234} 
     28     X\x{1234}YZ
     29     X\x{1234}\x{512}  
     30 
     31 /a\Cb/8
     32     aXb
     33     a\nb
     34     
     35 /a\C\Cb/8 
     36     a\x{100}b 
     37 
     38 /ab\Cde/8
     39     abXde
     40 
     41 /a\C\Cb/8 
     42     a\x{100}b
     43     ** Failers 
     44     a\x{12257}b
     45 
     46 /[]/8
     47 
     48 //8
     49 
     50 /xxx/8
     51 
     52 /xxx/8?DZSSO
     53 
     54 /badutf/8
     55     \xdf
     56     \xef
     57     \xef\x80
     58     \xf7
     59     \xf7\x80
     60     \xf7\x80\x80
     61     \xfb
     62     \xfb\x80
     63     \xfb\x80\x80
     64     \xfb\x80\x80\x80
     65     \xfd
     66     \xfd\x80
     67     \xfd\x80\x80
     68     \xfd\x80\x80\x80
     69     \xfd\x80\x80\x80\x80
     70     \xdf\x7f
     71     \xef\x7f\x80
     72     \xef\x80\x7f
     73     \xf7\x7f\x80\x80
     74     \xf7\x80\x7f\x80
     75     \xf7\x80\x80\x7f
     76     \xfb\x7f\x80\x80\x80
     77     \xfb\x80\x7f\x80\x80
     78     \xfb\x80\x80\x7f\x80
     79     \xfb\x80\x80\x80\x7f
     80     \xfd\x7f\x80\x80\x80\x80
     81     \xfd\x80\x7f\x80\x80\x80
     82     \xfd\x80\x80\x7f\x80\x80
     83     \xfd\x80\x80\x80\x7f\x80
     84     \xfd\x80\x80\x80\x80\x7f
     85     \xed\xa0\x80
     86     \xc0\x8f
     87     \xe0\x80\x8f
     88     \xf0\x80\x80\x8f
     89     \xf8\x80\x80\x80\x8f
     90     \xfc\x80\x80\x80\x80\x8f
     91     \x80
     92     \xfe
     93     \xff
     94 
     95 /badutf/8
     96     \xfb\x80\x80\x80\x80
     97     \xfd\x80\x80\x80\x80\x80
     98     \xf7\xbf\xbf\xbf
     99 
    100 /shortutf/8
    101     \P\P\xdf
    102     \P\P\xef
    103     \P\P\xef\x80
    104     \P\P\xf7
    105     \P\P\xf7\x80
    106     \P\P\xf7\x80\x80
    107     \P\P\xfb
    108     \P\P\xfb\x80
    109     \P\P\xfb\x80\x80
    110     \P\P\xfb\x80\x80\x80
    111     \P\P\xfd
    112     \P\P\xfd\x80
    113     \P\P\xfd\x80\x80
    114     \P\P\xfd\x80\x80\x80
    115     \P\P\xfd\x80\x80\x80\x80
    116 
    117 /anything/8
    118     \xc0\x80
    119     \xc1\x8f 
    120     \xe0\x9f\x80
    121     \xf0\x8f\x80\x80 
    122     \xf8\x87\x80\x80\x80  
    123     \xfc\x83\x80\x80\x80\x80
    124     \xfe\x80\x80\x80\x80\x80  
    125     \xff\x80\x80\x80\x80\x80  
    126     \xc3\x8f
    127     \xe0\xaf\x80
    128     \xe1\x80\x80
    129     \xf0\x9f\x80\x80 
    130     \xf1\x8f\x80\x80 
    131     \xf8\x88\x80\x80\x80  
    132     \xf9\x87\x80\x80\x80  
    133     \xfc\x84\x80\x80\x80\x80
    134     \xfd\x83\x80\x80\x80\x80
    135     \?\xf8\x88\x80\x80\x80  
    136     \?\xf9\x87\x80\x80\x80  
    137     \?\xfc\x84\x80\x80\x80\x80
    138     \?\xfd\x83\x80\x80\x80\x80
    139 
    140 /\x{100}/8DZ
    141 
    142 /\x{1000}/8DZ
    143 
    144 /\x{10000}/8DZ
    145 
    146 /\x{100000}/8DZ
    147 
    148 /\x{10ffff}/8DZ
    149 
    150 /[\x{ff}]/8DZ
    151 
    152 /[\x{100}]/8DZ
    153 
    154 /\x80/8DZ
    155 
    156 /\xff/8DZ
    157 
    158 /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
    159     \x{D55c}\x{ad6d}\x{C5B4} 
    160 
    161 /\x{65e5}\x{672c}\x{8a9e}/DZ8
    162     \x{65e5}\x{672c}\x{8a9e}
    163 
    164 /\x{80}/DZ8
    165 
    166 /\x{084}/DZ8
    167 
    168 /\x{104}/DZ8
    169 
    170 /\x{861}/DZ8
    171 
    172 /\x{212ab}/DZ8
    173 
    174 /-- This one is here not because it's different to Perl, but because the way
    175 the captured single-byte is displayed. (In Perl it becomes a character, and you
    176 can't tell the difference.) --/
    177     
    178 /X(\C)(.*)/8
    179     X\x{1234}
    180     X\nabc 
    181 
    182 /-- This one is here because Perl gives out a grumbly error message (quite 
    183 correctly, but that messes up comparisons). --/
    184     
    185 /a\Cb/8
    186     *** Failers 
    187     a\x{100}b 
    188     
    189 /[^ab\xC0-\xF0]/8SDZ
    190     \x{f1}
    191     \x{bf}
    192     \x{100}
    193     \x{1000}   
    194     *** Failers
    195     \x{c0} 
    196     \x{f0} 
    197 
    198 /{3,4}/8SDZ
    199   \x{100}\x{100}\x{100}\x{100\x{100}
    200 
    201 /(\x{100}+|x)/8SDZ
    202 
    203 /(\x{100}*a|x)/8SDZ
    204 
    205 /(\x{100}{0,2}a|x)/8SDZ
    206 
    207 /(\x{100}{1,2}a|x)/8SDZ
    208 
    209 /\x{100}/8DZ
    210 
    211 /a\x{100}\x{101}*/8DZ
    212 
    213 /a\x{100}\x{101}+/8DZ
    214 
    215 /[^\x{c4}]/DZ
    216 
    217 /[\x{100}]/8DZ
    218     \x{100}
    219     Z\x{100}
    220     \x{100}Z
    221     *** Failers 
    222 
    223 /[\xff]/DZ8
    224     >\x{ff}<
    225 
    226 /[^\xff]/8DZ
    227 
    228 /\x{100}abc(xyz(?1))/8DZ
    229 
    230 /a\x{1234}b/P8
    231     a\x{1234}b
    232 
    233 /\777/8I
    234   \x{1ff}
    235   \777 
    236   
    237 /\x{100}+\x{200}/8DZ
    238 
    239 /\x{100}+X/8DZ
    240 
    241 /^[\Q\E-\Q\E/BZ8
    242 
    243 /-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
    244     
    245 /X/8
    246     \x{d800}
    247     \x{d800}\?
    248     \x{da00}
    249     \x{da00}\?
    250     \x{dfff}
    251     \x{dfff}\?
    252     \x{110000}    
    253     \x{110000}\?    
    254     \x{2000000} 
    255     \x{2000000}\? 
    256     \x{7fffffff} 
    257     \x{7fffffff}\? 
    258 
    259 /(*UTF8)\x{1234}/
    260   abcd\x{1234}pqr
    261 
    262 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
    263 
    264 /\h/SI8
    265     ABC\x{09}
    266     ABC\x{20}
    267     ABC\x{a0}
    268     ABC\x{1680}
    269     ABC\x{180e}
    270     ABC\x{2000}
    271     ABC\x{202f} 
    272     ABC\x{205f} 
    273     ABC\x{3000} 
    274 
    275 /\v/SI8
    276     ABC\x{0a}
    277     ABC\x{0b}
    278     ABC\x{0c}
    279     ABC\x{0d}
    280     ABC\x{85}
    281     ABC\x{2028}
    282 
    283 /\h*A/SI8
    284     CDBABC
    285     
    286 /\v+A/SI8
    287 
    288 /\s?xxx\s/8SI
    289 
    290 /\sxxx\s/I8ST1
    291     AB\x{85}xxx\x{a0}XYZ
    292     AB\x{a0}xxx\x{85}XYZ
    293 
    294 /\S \S/I8ST1
    295     \x{a2} \x{84} 
    296     A Z 
    297 
    298 /a+/8
    299     a\x{123}aa\>1
    300     a\x{123}aa\>2
    301     a\x{123}aa\>3
    302     a\x{123}aa\>4
    303     a\x{123}aa\>5
    304     a\x{123}aa\>6
    305 
    306 /\x{1234}+/iS8I
    307 
    308 /\x{1234}+?/iS8I
    309 
    310 /\x{1234}++/iS8I
    311 
    312 /\x{1234}{2}/iS8I
    313 
    314 /[^\x{c4}]/8DZ
    315 
    316 /X+\x{200}/8DZ
    317 
    318 /\R/SI8
    319 
    320 /\777/8DZ
    321 
    322 /\w+\x{C4}/8BZ
    323     a\x{C4}\x{C4}
    324 
    325 /\w+\x{C4}/8BZT1
    326     a\x{C4}\x{C4}
    327     
    328 /\W+\x{C4}/8BZ
    329     !\x{C4}
    330  
    331 /\W+\x{C4}/8BZT1
    332     !\x{C4}
    333 
    334 /\W+\x{A1}/8BZ
    335     !\x{A1}
    336  
    337 /\W+\x{A1}/8BZT1
    338     !\x{A1}
    339 
    340 /X\s+\x{A0}/8BZ
    341     X\x20\x{A0}\x{A0}
    342 
    343 /X\s+\x{A0}/8BZT1
    344     X\x20\x{A0}\x{A0}
    345 
    346 /\S+\x{A0}/8BZ
    347     X\x{A0}\x{A0}
    348 
    349 /\S+\x{A0}/8BZT1
    350     X\x{A0}\x{A0}
    351 
    352 /\x{a0}+\s!/8BZ
    353     \x{a0}\x20!
    354 
    355 /\x{a0}+\s!/8BZT1
    356     \x{a0}\x20!
    357 
    358 /A/8
    359   \x{ff000041}
    360   \x{7f000041} 
    361 
    362 /(*UTF8)abc/9
    363 
    364 /abc/89
    365 
    366 /-- End of testinput15 --/
    367