Home | History | Annotate | Download | only in testdata
      1 # This set of tests is for UTF-16 and UTF-32 support, including Unicode
      2 # properties. It is relevant only to the 16-bit and 32-bit libraries. The
      3 # output is different for each library, so there are separate output files.
      4 
      5 /xxx/IB,utf,no_utf_check
      6 
      7 /abc/utf
      8     ]
      9 
     10 # Check maximum character size 
     11 
     12 /\x{ffff}/IB,utf
     13 
     14 /\x{10000}/IB,utf
     15 
     16 /\x{100}/IB,utf
     17 
     18 /\x{1000}/IB,utf
     19 
     20 /\x{10000}/IB,utf
     21 
     22 /\x{100000}/IB,utf
     23 
     24 /\x{10ffff}/IB,utf
     25 
     26 /[\x{ff}]/IB,utf
     27 
     28 /[\x{100}]/IB,utf
     29 
     30 /\x80/IB,utf
     31 
     32 /\xff/IB,utf
     33 
     34 /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
     35     \x{D55c}\x{ad6d}\x{C5B4}
     36 
     37 /\x{65e5}\x{672c}\x{8a9e}/IB,utf
     38     \x{65e5}\x{672c}\x{8a9e}
     39 
     40 /\x{80}/IB,utf
     41 
     42 /\x{084}/IB,utf
     43 
     44 /\x{104}/IB,utf
     45 
     46 /\x{861}/IB,utf
     47 
     48 /\x{212ab}/IB,utf
     49 
     50 /[^ab\xC0-\xF0]/IB,utf
     51     \x{f1}
     52     \x{bf}
     53     \x{100}
     54     \x{1000}
     55 \= Expect no match
     56     \x{c0}
     57     \x{f0}
     58 
     59 /{3,4}/IB,utf
     60   \x{100}\x{100}\x{100}\x{100\x{100}
     61 
     62 /(\x{100}+|x)/IB,utf
     63 
     64 /(\x{100}*a|x)/IB,utf
     65 
     66 /(\x{100}{0,2}a|x)/IB,utf
     67 
     68 /(\x{100}{1,2}a|x)/IB,utf
     69 
     70 /\x{100}/IB,utf
     71 
     72 /a\x{100}\x{101}*/IB,utf
     73 
     74 /a\x{100}\x{101}+/IB,utf
     75 
     76 /[^\x{c4}]/IB
     77 
     78 /[\x{100}]/IB,utf
     79     \x{100}
     80     Z\x{100}
     81     \x{100}Z
     82 
     83 /[\xff]/IB,utf
     84     >\x{ff}<
     85 
     86 /[^\xff]/IB,utf
     87 
     88 /\x{100}abc(xyz(?1))/IB,utf
     89 
     90 /\777/I,utf
     91   \x{1ff}
     92   \777
     93 
     94 /\x{100}+\x{200}/IB,utf
     95 
     96 /\x{100}+X/IB,utf
     97 
     98 /^[\Q\E-\Q\E/B,utf
     99 
    100 /X/utf
    101     XX\x{d800}\=no_utf_check
    102     XX\x{da00}\=no_utf_check
    103     XX\x{dc00}\=no_utf_check
    104     XX\x{de00}\=no_utf_check
    105     XX\x{dfff}\=no_utf_check
    106 \= Expect UTF error
    107     XX\x{d800}
    108     XX\x{da00}
    109     XX\x{dc00}
    110     XX\x{de00}
    111     XX\x{dfff}
    112     XX\x{110000}
    113     XX\x{d800}\x{1234}
    114 \= Expect no match
    115     XX\x{d800}\=offset=3
    116     
    117 /(?<=.)X/utf
    118     XX\x{d800}\=offset=3
    119 
    120 /(*UTF16)\x{11234}/
    121   abcd\x{11234}pqr
    122 
    123 /(*UTF)\x{11234}/I
    124   abcd\x{11234}pqr
    125 
    126 /(*UTF-32)\x{11234}/
    127   abcd\x{11234}pqr
    128 
    129 /(*UTF-32)\x{112}/
    130   abcd\x{11234}pqr
    131 
    132 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
    133 
    134 /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
    135 
    136 /\h/I,utf
    137     ABC\x{09}
    138     ABC\x{20}
    139     ABC\x{a0}
    140     ABC\x{1680}
    141     ABC\x{180e}
    142     ABC\x{2000}
    143     ABC\x{202f}
    144     ABC\x{205f}
    145     ABC\x{3000}
    146 
    147 /\v/I,utf
    148     ABC\x{0a}
    149     ABC\x{0b}
    150     ABC\x{0c}
    151     ABC\x{0d}
    152     ABC\x{85}
    153     ABC\x{2028}
    154 
    155 /\h*A/I,utf
    156     CDBABC
    157     \x{2000}ABC
    158 
    159 /\R*A/I,bsr=unicode,utf
    160     CDBABC
    161     \x{2028}A
    162 
    163 /\v+A/I,utf
    164 
    165 /\s?xxx\s/I,utf
    166 
    167 /\sxxx\s/I,utf,tables=2
    168     AB\x{85}xxx\x{a0}XYZ
    169     AB\x{a0}xxx\x{85}XYZ
    170 
    171 /\S \S/I,utf,tables=2
    172     \x{a2} \x{84}
    173     A Z
    174 
    175 /a+/utf
    176     a\x{123}aa\=offset=1
    177     a\x{123}aa\=offset=2
    178     a\x{123}aa\=offset=3
    179 \= Expect no match
    180     a\x{123}aa\=offset=4
    181 \= Expect bad offset error     
    182     a\x{123}aa\=offset=5
    183     a\x{123}aa\=offset=6
    184 
    185 /\x{1234}+/Ii,utf
    186 
    187 /\x{1234}+?/Ii,utf
    188 
    189 /\x{1234}++/Ii,utf
    190 
    191 /\x{1234}{2}/Ii,utf
    192 
    193 /[^\x{c4}]/IB,utf
    194 
    195 /X+\x{200}/IB,utf
    196 
    197 /\R/I,utf
    198 
    199 # Check bad offset 
    200 
    201 /a/utf
    202 \= Expect bad UTF-16 offset, or no match in 32-bit
    203     \x{10000}\=offset=1
    204     \x{10000}ab\=offset=1
    205 \= Expect 16-bit match, 32-bit no match
    206     \x{10000}ab\=offset=2
    207 \= Expect no match     
    208     \x{10000}ab\=offset=3
    209 \= Expect no match in 16-bit, bad offset in 32-bit    
    210     \x{10000}ab\=offset=4
    211 \= Expect bad offset     
    212     \x{10000}ab\=offset=5
    213 
    214 //utf
    215 
    216 /\w+\x{C4}/B,utf
    217     a\x{C4}\x{C4}
    218 
    219 /\w+\x{C4}/B,utf,tables=2
    220     a\x{C4}\x{C4}
    221     
    222 /\W+\x{C4}/B,utf
    223     !\x{C4}
    224  
    225 /\W+\x{C4}/B,utf,tables=2
    226     !\x{C4}
    227 
    228 /\W+\x{A1}/B,utf
    229     !\x{A1}
    230  
    231 /\W+\x{A1}/B,utf,tables=2
    232     !\x{A1}
    233 
    234 /X\s+\x{A0}/B,utf
    235     X\x20\x{A0}\x{A0}
    236 
    237 /X\s+\x{A0}/B,utf,tables=2
    238     X\x20\x{A0}\x{A0}
    239 
    240 /\S+\x{A0}/B,utf
    241     X\x{A0}\x{A0}
    242 
    243 /\S+\x{A0}/B,utf,tables=2
    244     X\x{A0}\x{A0}
    245 
    246 /\x{a0}+\s!/B,utf
    247     \x{a0}\x20!
    248 
    249 /\x{a0}+\s!/B,utf,tables=2
    250     \x{a0}\x20!
    251 
    252 /(*UTF)abc/never_utf
    253 
    254 /abc/utf,never_utf
    255 
    256 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
    257 
    258 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
    259 
    260 /AB\x{1fb0}/IB,utf
    261 
    262 /AB\x{1fb0}/IBi,utf
    263 
    264 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
    265     \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
    266     \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
    267 
    268 /[]/Bi,utf
    269 
    270 /[^]/Bi,utf
    271 
    272 /[[:blank:]]/B,ucp
    273 
    274 /\x{212a}+/Ii,utf
    275     KKkk\x{212a}
    276 
    277 /s+/Ii,utf
    278     SSss\x{17f}
    279 
    280 # Non-UTF characters should give errors in both 16-bit and 32-bit modes.
    281 
    282 /\x{110000}/utf
    283 
    284 /\o{4200000}/utf
    285 
    286 /\x{100}*A/IB,utf
    287     A
    288 
    289 /\x{100}*\d(?R)/IB,utf
    290 
    291 /[Z\x{100}]/IB,utf
    292     Z\x{100}
    293     \x{100}
    294     \x{100}Z
    295 
    296 /[z-\x{100}]/IB,utf
    297 
    298 /[z\Qa-d]\E]/IB,utf
    299     \x{100}
    300      
    301 
    302 /[ab\x{100}]abc(xyz(?1))/IB,utf
    303 
    304 /\x{100}*\s/IB,utf
    305 
    306 /\x{100}*\d/IB,utf
    307 
    308 /\x{100}*\w/IB,utf
    309 
    310 /\x{100}*\D/IB,utf
    311 
    312 /\x{100}*\S/IB,utf
    313 
    314 /\x{100}*\W/IB,utf
    315 
    316 /[\x{105}-\x{109}]/IBi,utf
    317     \x{104}
    318     \x{105}
    319     \x{109}  
    320 \= Expect no match
    321     \x{100}
    322     \x{10a} 
    323     
    324 /[z-\x{100}]/IBi,utf
    325     Z
    326     z
    327     \x{39c}
    328     \x{178}
    329     |
    330     \x{80}
    331     \x{ff}
    332     \x{100}
    333     \x{101} 
    334 \= Expect no match
    335     \x{102}
    336     Y
    337     y           
    338 
    339 /[z-\x{100}]/IBi,utf
    340 
    341 /\x{3a3}B/IBi,utf
    342 
    343 /./utf
    344     \x{110000}
    345 
    346 # End of testinput12
    347