Home | History | Annotate | Download | only in testdata
      1 /-- These tests for Unicode property support test PCRE's API and show some of
      2     the compiled code. They are not Perl-compatible. --/
      3 
      4 /[\p{L}]/DZ
      5 
      6 /[\p{^L}]/DZ
      7 
      8 /[\P{L}]/DZ
      9 
     10 /[\P{^L}]/DZ
     11 
     12 /[abc\p{L}\x{0660}]/8DZ
     13 
     14 /[\p{Nd}]/8DZ
     15     1234
     16 
     17 /[\p{Nd}+-]+/8DZ
     18     1234
     19     12-34
     20     12+\x{661}-34  
     21     ** Failers
     22     abcd  
     23 
     24 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
     25 
     26 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ
     27 
     28 /AB\x{1fb0}/8DZ
     29 
     30 /AB\x{1fb0}/8DZi
     31 
     32 /[\x{105}-\x{109}]/8iDZ
     33     \x{104}
     34     \x{105}
     35     \x{109}  
     36     ** Failers
     37     \x{100}
     38     \x{10a} 
     39     
     40 /[z-\x{100}]/8iDZ 
     41     Z
     42     z
     43     \x{39c}
     44     \x{178}
     45     |
     46     \x{80}
     47     \x{ff}
     48     \x{100}
     49     \x{101} 
     50     ** Failers
     51     \x{102}
     52     Y
     53     y           
     54 
     55 /[z-\x{100}]/8DZi
     56 
     57 /(?:[\PPa*]*){8,}/
     58 
     59 /[\P{Any}]/BZ
     60 
     61 /[\P{Any}\E]/BZ
     62 
     63 /(\P{Yi}+\277)/
     64 
     65 /(\P{Yi}+\277)?/
     66 
     67 /(?<=\P{Yi}{3}A)X/
     68 
     69 /\p{Yi}+(\P{Yi}+)(?1)/
     70 
     71 /(\P{Yi}{2}\277)?/
     72 
     73 /[\P{Yi}A]/
     74 
     75 /[\P{Yi}\P{Yi}\P{Yi}A]/
     76 
     77 /[^\P{Yi}A]/
     78 
     79 /[^\P{Yi}\P{Yi}\P{Yi}A]/
     80 
     81 /(\P{Yi}*\277)*/
     82 
     83 /(\P{Yi}*?\277)*/
     84 
     85 /(\p{Yi}*+\277)*/
     86 
     87 /(\P{Yi}?\277)*/
     88 
     89 /(\P{Yi}??\277)*/
     90 
     91 /(\p{Yi}?+\277)*/
     92 
     93 /(\P{Yi}{0,3}\277)*/
     94 
     95 /(\P{Yi}{0,3}?\277)*/
     96 
     97 /(\p{Yi}{0,3}+\277)*/
     98 
     99 /\p{Zl}{2,3}+/8BZ
    100     \xe2\x80\xa8\xe2\x80\xa8
    101     \x{2028}\x{2028}\x{2028}
    102     
    103 /\p{Zl}/8BZ
    104 
    105 /\p{Lu}{3}+/8BZ
    106 
    107 /\pL{2}+/8BZ
    108 
    109 /\p{Cc}{2}+/8BZ
    110 
    111 /^\p{Cs}/8
    112     \?\x{dfff}
    113     ** Failers
    114     \x{09f} 
    115   
    116 /^\p{Sc}+/8
    117     $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
    118     \x{9f2}
    119     ** Failers
    120     X
    121     \x{2c2}
    122   
    123 /^\p{Zs}/8
    124     \ \
    125     \x{a0}
    126     \x{1680}
    127     \x{180e}
    128     \x{2000}
    129     \x{2001}     
    130     ** Failers
    131     \x{2028}
    132     \x{200d} 
    133   
    134 /-- These four are here rather than in test 6 because Perl has problems with
    135     the negative versions of the properties. --/
    136       
    137 /\p{^Lu}/8i
    138     1234
    139     ** Failers
    140     ABC 
    141 
    142 /\P{Lu}/8i
    143     1234
    144     ** Failers
    145     ABC 
    146 
    147 /\p{Ll}/8i 
    148     a
    149     Az
    150     ** Failers
    151     ABC   
    152 
    153 /\p{Lu}/8i
    154     A
    155     a\x{10a0}B 
    156     ** Failers 
    157     a
    158     \x{1d00}  
    159 
    160 /[\x{c0}\x{391}]/8i
    161     \x{c0}
    162     \x{e0} 
    163 
    164 /-- The next two are special cases where the lengths of the different cases of
    165 the same character differ. The first went wrong with heap frame storage; the
    166 second was broken in all cases. --/
    167 
    168 /^\x{023a}+?(\x{0130}+)/8i
    169   \x{023a}\x{2c65}\x{0130}
    170   
    171 /^\x{023a}+([^X])/8i
    172   \x{023a}\x{2c65}X
    173 
    174 /\x{c0}+\x{116}+/8i
    175     \x{c0}\x{e0}\x{116}\x{117}
    176 
    177 /[\x{c0}\x{116}]+/8i
    178     \x{c0}\x{e0}\x{116}\x{117}
    179 
    180 /(\x{de})\1/8i
    181     \x{de}\x{de}
    182     \x{de}\x{fe}
    183     \x{fe}\x{fe}
    184     \x{fe}\x{de}
    185 
    186 /^\x{c0}$/8i
    187     \x{c0}
    188     \x{e0} 
    189 
    190 /^\x{e0}$/8i
    191     \x{c0}
    192     \x{e0} 
    193 
    194 /-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
    195 will match it only with UCP support, because without that it has no notion
    196 of case for anything other than the ASCII letters. --/ 
    197 
    198 /((?i)[\x{c0}])/8
    199     \x{c0}
    200     \x{e0} 
    201 
    202 /(?i:[\x{c0}])/8
    203     \x{c0}
    204     \x{e0} 
    205 
    206 /-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8
    207     
    208 /^\X/8
    209     A
    210     A\x{300}BC 
    211     A\x{300}\x{301}\x{302}BC 
    212     *** Failers
    213     \x{300}  
    214     
    215 /-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
    216 
    217 /^\p{Xan}/8
    218     ABCD
    219     1234
    220     \x{6ca}
    221     \x{a6c}
    222     \x{10a7}   
    223     ** Failers
    224     _ABC   
    225 
    226 /^\p{Xan}+/8
    227     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    228     ** Failers
    229     _ABC   
    230 
    231 /^\p{Xan}+?/8
    232     \x{6ca}\x{a6c}\x{10a7}_
    233 
    234 /^\p{Xan}*/8
    235     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    236     
    237 /^\p{Xan}{2,9}/8
    238     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    239     
    240 /^\p{Xan}{2,9}?/8
    241     \x{6ca}\x{a6c}\x{10a7}_
    242     
    243 /^[\p{Xan}]/8
    244     ABCD1234_
    245     1234abcd_
    246     \x{6ca}
    247     \x{a6c}
    248     \x{10a7}   
    249     ** Failers
    250     _ABC   
    251  
    252 /^[\p{Xan}]+/8
    253     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    254     ** Failers
    255     _ABC   
    256 
    257 /^>\p{Xsp}/8
    258     >\x{1680}\x{2028}\x{0b}
    259     >\x{a0} 
    260     ** Failers
    261     \x{0b} 
    262 
    263 /^>\p{Xsp}+/8
    264     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    265 
    266 /^>\p{Xsp}+?/8
    267     >\x{1680}\x{2028}\x{0b}
    268 
    269 /^>\p{Xsp}*/8
    270     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    271     
    272 /^>\p{Xsp}{2,9}/8
    273     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    274     
    275 /^>\p{Xsp}{2,9}?/8
    276     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    277     
    278 /^>[\p{Xsp}]/8
    279     >\x{2028}\x{0b}
    280  
    281 /^>[\p{Xsp}]+/8
    282     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    283 
    284 /^>\p{Xps}/8
    285     >\x{1680}\x{2028}\x{0b}
    286     >\x{a0} 
    287     ** Failers
    288     \x{0b} 
    289 
    290 /^>\p{Xps}+/8
    291     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    292 
    293 /^>\p{Xps}+?/8
    294     >\x{1680}\x{2028}\x{0b}
    295 
    296 /^>\p{Xps}*/8
    297     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    298     
    299 /^>\p{Xps}{2,9}/8
    300     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    301     
    302 /^>\p{Xps}{2,9}?/8
    303     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    304     
    305 /^>[\p{Xps}]/8
    306     >\x{2028}\x{0b}
    307  
    308 /^>[\p{Xps}]+/8
    309     > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
    310 
    311 /^\p{Xwd}/8
    312     ABCD
    313     1234
    314     \x{6ca}
    315     \x{a6c}
    316     \x{10a7}
    317     _ABC    
    318     ** Failers
    319     [] 
    320 
    321 /^\p{Xwd}+/8
    322     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    323 
    324 /^\p{Xwd}+?/8
    325     \x{6ca}\x{a6c}\x{10a7}_
    326 
    327 /^\p{Xwd}*/8
    328     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    329     
    330 /^\p{Xwd}{2,9}/8
    331     A_B12\x{6ca}\x{a6c}\x{10a7}
    332     
    333 /^\p{Xwd}{2,9}?/8
    334     \x{6ca}\x{a6c}\x{10a7}_
    335     
    336 /^[\p{Xwd}]/8
    337     ABCD1234_
    338     1234abcd_
    339     \x{6ca}
    340     \x{a6c}
    341     \x{10a7}   
    342     _ABC 
    343     ** Failers
    344     []   
    345  
    346 /^[\p{Xwd}]+/8
    347     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
    348 
    349 /-- A check not in UTF-8 mode --/
    350 
    351 /^[\p{Xwd}]+/
    352     ABCD1234_
    353     
    354 /-- Some negative checks --/
    355 
    356 /^[\P{Xwd}]+/8
    357     !.+\x{019}\x{35a}AB
    358 
    359 /^[\p{^Xwd}]+/8
    360     !.+\x{019}\x{35a}AB
    361 
    362 /[\D]/WBZ8
    363     1\x{3c8}2
    364 
    365 /[\d]/WBZ8
    366     >\x{6f4}<
    367 
    368 /[\S]/WBZ8
    369     \x{1680}\x{6f4}\x{1680}
    370 
    371 /[\s]/WBZ8
    372     >\x{1680}<
    373 
    374 /[\W]/WBZ8
    375     A\x{1712}B
    376 
    377 /[\w]/WBZ8
    378     >\x{1723}<
    379 
    380 /\D/WBZ8
    381     1\x{3c8}2
    382 
    383 /\d/WBZ8
    384     >\x{6f4}<
    385 
    386 /\S/WBZ8
    387     \x{1680}\x{6f4}\x{1680}
    388 
    389 /\s/WBZ8
    390     >\x{1680}>
    391 
    392 /\W/WBZ8
    393     A\x{1712}B
    394 
    395 /\w/WBZ8
    396     >\x{1723}<
    397 
    398 /[[:alpha:]]/WBZ
    399 
    400 /[[:lower:]]/WBZ
    401 
    402 /[[:upper:]]/WBZ
    403 
    404 /[[:alnum:]]/WBZ
    405 
    406 /[[:ascii:]]/WBZ
    407 
    408 /[[:blank:]]/WBZ
    409 
    410 /[[:cntrl:]]/WBZ
    411 
    412 /[[:digit:]]/WBZ
    413 
    414 /[[:graph:]]/WBZ
    415 
    416 /[[:print:]]/WBZ
    417 
    418 /[[:punct:]]/WBZ
    419 
    420 /[[:space:]]/WBZ
    421 
    422 /[[:word:]]/WBZ
    423 
    424 /[[:xdigit:]]/WBZ
    425 
    426 /-- Unicode properties for \b abd \B --/
    427 
    428 /\b...\B/8W
    429     abc_
    430     \x{37e}abc\x{376} 
    431     \x{37e}\x{376}\x{371}\x{393}\x{394} 
    432     !\x{c0}++\x{c1}\x{c2} 
    433     !\x{c0}+++++ 
    434 
    435 /-- Without PCRE_UCP, non-ASCII always fail, even if < 256  --/
    436 
    437 /\b...\B/8
    438     abc_
    439     ** Failers 
    440     \x{37e}abc\x{376} 
    441     \x{37e}\x{376}\x{371}\x{393}\x{394} 
    442     !\x{c0}++\x{c1}\x{c2} 
    443     !\x{c0}+++++ 
    444 
    445 /-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties  --/
    446 
    447 /\b...\B/W
    448     abc_
    449     !\x{c0}++\x{c1}\x{c2} 
    450     !\x{c0}+++++ 
    451 
    452 /-- POSIX interface --/
    453 
    454 /\w/P
    455     +++\x{c2}
    456 
    457 /\w/WP
    458     +++\x{c2}
    459     
    460 /-- Some of these are silly, but they check various combinations --/
    461 
    462 /[[:^alpha:][:^cntrl:]]+/8WBZ
    463     123
    464     abc 
    465 
    466 /[[:^cntrl:][:^alpha:]]+/8WBZ
    467     123
    468     abc 
    469 
    470 /[[:alpha:]]+/8WBZ
    471     abc
    472 
    473 /[[:^alpha:]\S]+/8WBZ
    474     123
    475     abc 
    476 
    477 /[^\d]+/8WBZ
    478     abc123
    479     abc\x{123}
    480     \x{660}abc   
    481 
    482 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI
    483     \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
    484     \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
    485 
    486 /\p{Xps}*/SI
    487 
    488 /\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
    489 
    490 /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
    491 
    492 /\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
    493 
    494 /\p{Han}+X\p{Greek}+\x{370}/BZ8
    495 
    496 /\p{Xan}+!\p{Xan}+A/BZ
    497 
    498 /\p{Xsp}+!\p{Xsp}\t/BZ
    499 
    500 /\p{Xps}+!\p{Xps}\t/BZ
    501 
    502 /\p{Xwd}+!\p{Xwd}_/BZ
    503 
    504 /A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
    505 
    506 /-- End of testinput12 --/
    507