Home | History | Annotate | Download | only in testdata
      1 <?xml version="1.0" encoding="UTF-8"?>
      2 
      3 <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved -->
      4 
      5 <!-- Test data file for string search  -->
      6 <!DOCTYPE stringsearch-tests [
      7 <!ELEMENT stringsearch-tests (test-case+)>
      8 <!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
      9 <!ELEMENT test-case (pattern, pre?, m?, post?)>
     10 <!ATTLIST test-case 
     11           id ID #REQUIRED
     12           locale CDATA "en" 
     13           strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" 
     14           norm (ON | OFF) "OFF"
     15           alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE"
     16           >
     17 
     18 <!ELEMENT pattern (#PCDATA)>
     19 <!ELEMENT pre  (#PCDATA)>
     20 <!ELEMENT m    (#PCDATA)>
     21 <!ELEMENT post (#PCDATA)>
     22 ]>
     23 
     24 <stringsearch-tests>
     25   <!-- debug="test11"     (for copying into the above element)  -->
     26     
     27     <!-- Very simple match  -->
     28     <test-case id="test01" >
     29        <pattern>abc</pattern>
     30        <pre>xxx</pre><m>abc</m><post>yyy</post>
     31     </test-case>
     32     
     33     <!-- Very simple no-match  -->
     34     <test-case id="test02" >
     35        <pattern>abc</pattern>
     36        <pre>xxx</pre><post>yyy</post>
     37     </test-case>
     38 
     39     <!-- Match after several near-misses. -->
     40     <test-case id="test03" >
     41        <pattern>string</pattern>
     42        <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
     43     </test-case>
     44     
     45     <test-case id="test04" strength="PRIMARY" >
     46        <pattern>FUSS</pattern>
     47        <pre>abc</pre><m>fuss</m><post>sss</post>
     48     </test-case>
     49     
     50     <test-case id="test05" strength="PRIMARY" >
     51        <pattern>FUSS</pattern>
     52        <pre>abc</pre><m>fu</m><post>sss</post>
     53     </test-case>
     54 
     55   <test-case id="test05.5" strength="PRIMARY" >
     56     <pattern>fuss</pattern>
     57     <pre>a </pre>
     58     <m>fu</m>
     59     <post>ball table</post>
     60   </test-case>
     61 
     62   <test-case id="test06" strength="PRIMARY" >
     63       <pattern>fu</pattern>
     64        <pre>abc</pre><m>fuss</m><post>xyz</post>
     65     </test-case>
     66     
     67     <test-case id="test07" strength="SECONDARY" >
     68       <pattern>fu</pattern>
     69       <pre>abcfussxyz</pre>
     70     </test-case>
     71     
     72     <test-case id="test08" strength="PRIMARY" >
     73       <pattern>fus</pattern>
     74       <pre>abcfu</pre><post>xyz</post>
     75     </test-case>
     76     
     77     <!-- A good match following an initial match that failed because
     78          of not ending on a character boundary -->
     79     <test-case id="test09" strength="PRIMARY">
     80       <pattern>fus</pattern>
     81       <pre>fu  </pre><m>fus</m><post>sss</post>
     82     </test-case>
     83 
     84 
     85     <!-- Test cases from usrchdat.c  BREAKITERATOREXACT -->
     86 
     87     <test-case id="test10" strength="TERTIARY">
     88       <pattern>fox</pattern>
     89       <m>fox</m><post>y fox</post>
     90     </test-case>
     91 
     92     <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook">
     93       <pattern>toe</pattern>
     94       <pre>This is a </pre><m>T</m><post>ne</post>
     95     </test-case>
     96     
     97     <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook">
     98       <pattern>toe</pattern>
     99       <pre>This is a </pre><post>Tne</post>
    100     </test-case>
    101     
    102     <test-case id="test12" strength="TERTIARY">
    103       <pattern>e</pattern>
    104       <pre>tsting that  dos not match </pre><m>e</m><post></post>
    105     </test-case>
    106     
    107     <test-case id="test13" strength="PRIMARY" locale="fr">
    108       <pattern>e</pattern>
    109       <pre></pre><m></m><post></post>
    110     </test-case>
    111     
    112     <test-case id="test14" strength="PRIMARY" locale="fr">
    113       <pattern>O</pattern>
    114       <pre>C</pre><m>O\u0302</m><post>T</post>
    115     </test-case>
    116 
    117 
    118     <!-- Test cases from usrchdat.c  STRENGTH -->
    119 
    120 
    121     <test-case id="test15" strength="PRIMARY" locale="en">
    122       <pattern>fox</pattern>
    123       <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
    124     </test-case>
    125     
    126     <test-case id="test16" strength="PRIMARY" locale="fr">
    127       <pattern>peche</pattern>
    128       <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
    129     </test-case>
    130     
    131     <test-case id="test17" strength="PRIMARY" locale="fr">
    132       <pattern>peche</pattern>
    133       <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
    134     </test-case>
    135     
    136     <test-case id="test18" strength="PRIMARY" locale="fr">
    137       <pattern>peche</pattern>
    138       <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
    139     </test-case>
    140     
    141     <test-case id="test19" strength="PRIMARY" locale="fr">
    142       <pattern>peche</pattern>
    143       <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
    144     </test-case>
    145     
    146     <test-case id="test20" strength="PRIMARY" locale="es">
    147       <pattern>channel</pattern>
    148       <pre>A </pre><m>channel</m><post>, </post>
    149     </test-case>
    150     
    151     <test-case id="test21" strength="PRIMARY" locale="es">
    152       <pattern>channel</pattern>
    153       <pre>A </pre><m>CHANNEL</m><post>, </post>
    154     </test-case>
    155     
    156     <test-case id="test22" strength="PRIMARY" locale="es">
    157       <pattern>channel</pattern>
    158       <pre>A </pre><m>Channel</m><post>s, </post>
    159     </test-case>
    160     
    161     <test-case id="test23" strength="PRIMARY" locale="es">
    162       <pattern>channel</pattern>
    163       <pre>A </pre><m>channel</m><post>... </post>
    164     </test-case>
    165     
    166     <test-case id="test24" strength="TERTIARY" locale="en">
    167       <pattern>A\u0300</pattern>
    168       <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
    169     </test-case>
    170     
    171     <!-- TODO:  In the original test data, this test matched at IDENTICAL strength.
    172                 Doesn't seem right.  The characters are different.
    173                 -->
    174     <test-case id="test24a" strength="IDENTICAL" locale="en">
    175       <pattern>A\u0300</pattern>
    176       <pre>At IDENTICAL, shoud this match?  </pre><m>\u00c0</m><post></post>
    177     </test-case>
    178 
    179   <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
    180     <pattern>A\u0300</pattern>
    181     <pre>At IDENTICAL, shoud this match?  </pre>
    182     <m>\u00c0</m>
    183     <post></post>
    184   </test-case>
    185 
    186   <test-case id="test25" strength="SECONDARY" locale="en">
    187       <pattern></pattern>
    188       <pre>12</pre><m></m><post> </post>
    189     </test-case>
    190     
    191     <test-case id="test26" strength="SECONDARY" locale="en">
    192       <pattern>A</pattern>
    193       <pre>12</pre><m>a</m><post>...</post>
    194     </test-case>
    195 
    196 
    197     <!--  Test Cases from usrchdat.c,  VARIABLE -->
    198     <test-case id="test27" strength="TERTIARY" locale="en">
    199       <pattern>blackbird</pattern>
    200       <pre>black-bird </pre><m>blackbird</m><post>...</post>
    201     </test-case>
    202 
    203     <test-case id="test28" strength="TERTIARY" locale="en">
    204       <pattern>go</pattern>
    205       <pre> on</pre>
    206     </test-case>
    207 
    208     <!-- TODO:  this gives an U_ILLEGAL_ARGUMENT error when opening
    209                 the UStringSearch.  How did the orignal test run? -->
    210     <!--
    211     <test-case id="test29" strength="PRIMARY" locale="en">
    212       <pattern>  </pattern>
    213       <pre></pre><m></m><post>abc</post>
    214     </test-case>
    215     -->
    216 
    217     <test-case id="test30" strength="SECONDARY" locale="en">
    218       <pattern>abc</pattern>
    219       <pre>  a bc   ab c    a  bc     ab  c"</pre>
    220     </test-case>
    221 
    222     <test-case id="test31" strength="SECONDARY" locale="en">
    223       <pattern>abc</pattern>
    224       <pre>           ---------------</pre>
    225     </test-case>
    226 
    227 
    228     <!--  Normalization test cases from usrchdat.c  -->
    229     <test-case id="test32" strength="TERTIARY"  norm="ON">
    230       <pattern>a\u0325\u0300</pattern>
    231       <pre></pre><m>a\u0300\u0325</m>
    232     </test-case>
    233 
    234 
    235     <test-case id="test32a" strength="TERTIARY"  norm="OFF">
    236       <pattern>a\u0325\u0300</pattern>
    237       <pre>a\u0300\u0325</pre>
    238     </test-case>
    239 
    240 
    241     <!-- COMPOSITEBOUNDARIES from usrchdat.c
    242          Boundaries are not identical to orignal test data because
    243          of matching only full combining sequences
    244     -->
    245     <test-case id="test40" strength="TERTIARY">
    246       <pattern>A</pattern>
    247       <pre></pre>   <!-- \u00C0 -->
    248     </test-case>
    249     
    250     <test-case id="test41" strength="TERTIARY">
    251       <pattern>A</pattern>
    252       <pre></pre><m>A</m><post>C</post>
    253     </test-case>
    254     
    255     <test-case id="test42" strength="TERTIARY">
    256       <pattern>A\u030A</pattern>
    257       <pre>\u01FA</pre>
    258     </test-case>
    259 
    260 
    261 
    262     <!-- SUPPLEMENTARYCANONICAL from usrchdat.c  -->
    263     <test-case id="test50" strength="TERTIARY">
    264       <pattern>\uD800\uDC00</pattern>
    265       <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
    266       <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
    267     </test-case>
    268     
    269     <test-case id="test51" strength="TERTIARY">
    270       <pattern>\\uD834\\uDDB9</pattern>
    271       <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
    272     </test-case>
    273 
    274     <test-case id="test52" strength="TERTIARY">
    275       <pattern> \\uD834\\uDDB9 </pattern>
    276       <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
    277     </test-case>
    278     
    279     <test-case id="test53" strength="TERTIARY">
    280       <pattern>-\\uD834\\uDDB9-</pattern>
    281       <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
    282     </test-case>
    283     
    284     <test-case id="test54" strength="TERTIARY">
    285       <pattern>,\\uD834\\uDDB9,</pattern>
    286       <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
    287     </test-case>
    288     
    289     <test-case id="test55" strength="TERTIARY">
    290       <pattern>?\\uD834\\uDDB9?</pattern>
    291       <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
    292     </test-case>
    293     
    294 
    295     <!-- Long combining sequences  -->
    296     <!-- Backwards search fails because patterns ends w/ ignorables
    297     <test-case id="test60" strength="PRIMARY">
    298       <pattern>A\u0301\u0301\u0301\u0301</pattern>
    299       <m>A\u0301\u0301\u0301\u0301\u0301</m>
    300     </test-case>
    301     -->
    302 
    303     <test-case id="test61" strength="TERTIARY">
    304       <pattern>A\u0301\u0301\u0301\u0301</pattern>
    305           <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
    306     </test-case>
    307     
    308     <test-case id="test62" strength="TERTIARY">
    309       <pattern>A\u0301\u0301\u0301\u0301</pattern>
    310             <m>A\u0301\u0301\u0301\u0301</m>
    311     </test-case>
    312 
    313     <!-- stand-alone combining marks don't match attached marks  -->
    314     <test-case id="test63" strength="TERTIARY">
    315       <pattern>\u0301</pattern>
    316       <pre>A\u0301\u0301\u0301\u0301</pre>
    317     </test-case>
    318     
    319     <test-case id="test64" strength="TERTIARY">
    320       <pattern>\u0301</pattern>
    321       <post>\u0301\u0301\u0301\u0301</post>
    322     </test-case>
    323 
    324   <!-- stand-alone combining mark does match an un-attached combining mark -->
    325     <test-case id="test65" strength="TERTIARY">
    326        <pattern>\u0301</pattern>
    327        <m>\u0301</m><post>A\u0301\u0301</post>
    328     </test-case>
    329 
    330     <test-case id="test66" strength="TERTIARY">
    331        <pattern>\u0301</pattern>
    332        <m>\u0301</m>
    333     </test-case>
    334           
    335     <!-- stand-alone combining marks at end of the target text -->
    336     <test-case id="test67" strength="TERTIARY">
    337        <pattern>\u0301</pattern>
    338        <pre>abcd\r</pre><m>\u0301</m>
    339     </test-case>
    340 
    341       <!-- attached combining marks at end of the target text, no match -->
    342     <test-case id="test68" strength="TERTIARY">
    343        <pattern>\u0301</pattern>
    344        <pre>abcd\u0301</pre>
    345     </test-case>
    346 
    347 
    348 
    349    <!-- no match within expansions at the start -->
    350     <test-case id="test70" strength="PRIMARY">
    351       <pattern>Eligature</pattern>
    352       <pre>ligature</pre>
    353     </test-case>
    354 
    355     <test-case id="test71" strength="PRIMARY">
    356       <pattern>AEligature</pattern>
    357       <m>ligature</m>
    358     </test-case>
    359 
    360     <test-case id="test72" strength="PRIMARY">
    361         <pattern>AEligature</pattern>
    362         <m>ligature</m>
    363     </test-case>
    364     
    365     <!-- unattached combining Tilde will not match a Tilde that is
    366          part of a composed   (\u00D1)  -->
    367     <test-case id="test73" strength="SECONDARY">
    368         <pattern>\u0303</pattern>  <!-- combining tilde -->
    369         <pre>&#x0d;</pre><m>\u0303</m>
    370     </test-case>
    371     
    372     <test-case id="test74" strength="SECONDARY">
    373         <pattern>\u0303</pattern>  <!-- combining tilde -->
    374         <pre> &#x0d;</pre><m>\u0303</m><post>a</post>
    375     </test-case>
    376 
    377   <test-case id="test75" strength="TERTIARY" locale="fr">
    378     <pattern>\u00EA</pattern>
    379     <pre>p</pre><m>\u00EA</m><post>che</post>
    380   </test-case>
    381 
    382   <test-case id="test76" strength="TERTIARY" locale="fr">
    383     <pattern>\u00EA</pattern>
    384     <pre>p</pre><m>e\u0302</m><post>che</post>
    385   </test-case>
    386 
    387   <test-case id="test77" strength="TERTIARY" locale="fr">
    388     <pattern>e\u0302</pattern>
    389     <pre>p</pre><m>\u00EA</m><post>che</post>
    390   </test-case>
    391 
    392   <!-- Test cases from ticket:5382 -->
    393   <test-case id="test78" strength="SECONDARY" locale="hu_HU">
    394     <pattern>\u0170</pattern>
    395     <m>\u0171</m>
    396     <post>12</post>
    397   </test-case>
    398 
    399   <test-case id="test79" strength="SECONDARY" locale="hu_HU">
    400     <pattern>\u0170</pattern>
    401     <pre>1</pre>
    402     <m>\u0171</m>
    403     <post>2</post>
    404   </test-case>
    405 
    406   <test-case id="test80" strength="SECONDARY" locale="hu_HU">
    407     <pattern>\u0170</pattern>
    408     <pre>12</pre>
    409     <m>\u0171</m>
    410   </test-case>
    411   
    412   <!-- Test cases from ticket:5959 -->
    413   <test-case id="test81" strength="SECONDARY">
    414     <pattern>\u2166</pattern>
    415     <m>VII</m>
    416   </test-case>
    417 
    418   <test-case id="test82" strength="SECONDARY">
    419     <pattern>VII</pattern>
    420     <m>\u2166</m>
    421   </test-case>
    422 
    423   <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
    424     <pattern>Universal Declaration of Human Rights</pattern>
    425     <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post>
    426   </test-case>
    427 
    428   <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en">
    429     <pattern>Universal Declaration of Human Rights</pattern>
    430     <pre>Proclaims this </pre>
    431     <m>Universal-Declaration-of-Human-Rights</m>
    432     <post> as a common standard of achievement for all peoples and all nations</post>
    433   </test-case>
    434 
    435   <test-case id="test84" strength="TERTIARY" locale="en">
    436     <pattern>\u05E9\u0591\u05E9</pattern>
    437     <m>\u05E9\u0592\u05E9</m>
    438   </test-case>
    439 
    440   <test-case id="test84b" strength="IDENTICAL" locale="en">
    441     <pattern>\u05E9\u0591\u05E9</pattern>
    442     <pre>\u05E9\u0592\u05E9</pre>
    443   </test-case>
    444 </stringsearch-tests>
    445   
    446