1 <?xml version="1.0" encoding="UTF-8"?> 2 3 <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved --> 4 5 <!-- Test data file for string search --> 6 <!DOCTYPE stringsearch-tests [ 7 <!ELEMENT stringsearch-tests (test-case+)> 8 <!ATTLIST stringsearch-tests debug IDREF #IMPLIED > 9 <!ELEMENT test-case (pattern, pre?, m?, post?)> 10 <!ATTLIST test-case 11 id ID #REQUIRED 12 locale CDATA "en" 13 strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" 14 norm (ON | OFF) "OFF" 15 alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE" 16 > 17 18 <!ELEMENT pattern (#PCDATA)> 19 <!ELEMENT pre (#PCDATA)> 20 <!ELEMENT m (#PCDATA)> 21 <!ELEMENT post (#PCDATA)> 22 ]> 23 24 <stringsearch-tests> 25 <!-- debug="test11" (for copying into the above element) --> 26 27 <!-- Very simple match --> 28 <test-case id="test01" > 29 <pattern>abc</pattern> 30 <pre>xxx</pre><m>abc</m><post>yyy</post> 31 </test-case> 32 33 <!-- Very simple no-match --> 34 <test-case id="test02" > 35 <pattern>abc</pattern> 36 <pre>xxx</pre><post>yyy</post> 37 </test-case> 38 39 <!-- Match after several near-misses. --> 40 <test-case id="test03" > 41 <pattern>string</pattern> 42 <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post> 43 </test-case> 44 45 <test-case id="test04" strength="PRIMARY" > 46 <pattern>FUSS</pattern> 47 <pre>abc</pre><m>fuss</m><post>sss</post> 48 </test-case> 49 50 <test-case id="test05" strength="PRIMARY" > 51 <pattern>FUSS</pattern> 52 <pre>abc</pre><m>fu</m><post>sss</post> 53 </test-case> 54 55 <test-case id="test05.5" strength="PRIMARY" > 56 <pattern>fuss</pattern> 57 <pre>a </pre> 58 <m>fu</m> 59 <post>ball table</post> 60 </test-case> 61 62 <test-case id="test06" strength="PRIMARY" > 63 <pattern>fu</pattern> 64 <pre>abc</pre><m>fuss</m><post>xyz</post> 65 </test-case> 66 67 <test-case id="test07" strength="SECONDARY" > 68 <pattern>fu</pattern> 69 <pre>abcfussxyz</pre> 70 </test-case> 71 72 <test-case id="test08" strength="PRIMARY" > 73 <pattern>fus</pattern> 74 <pre>abcfu</pre><post>xyz</post> 75 </test-case> 76 77 <!-- A good match following an initial match that failed because 78 of not ending on a character boundary --> 79 <test-case id="test09" strength="PRIMARY"> 80 <pattern>fus</pattern> 81 <pre>fu </pre><m>fus</m><post>sss</post> 82 </test-case> 83 84 85 <!-- Test cases from usrchdat.c BREAKITERATOREXACT --> 86 87 <test-case id="test10" strength="TERTIARY"> 88 <pattern>fox</pattern> 89 <m>fox</m><post>y fox</post> 90 </test-case> 91 92 <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook"> 93 <pattern>toe</pattern> 94 <pre>This is a </pre><m>T</m><post>ne</post> 95 </test-case> 96 97 <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook"> 98 <pattern>toe</pattern> 99 <pre>This is a </pre><post>Tne</post> 100 </test-case> 101 102 <test-case id="test12" strength="TERTIARY"> 103 <pattern>e</pattern> 104 <pre>tsting that dos not match </pre><m>e</m><post></post> 105 </test-case> 106 107 <test-case id="test13" strength="PRIMARY" locale="fr"> 108 <pattern>e</pattern> 109 <pre></pre><m></m><post></post> 110 </test-case> 111 112 <test-case id="test14" strength="PRIMARY" locale="fr"> 113 <pattern>O</pattern> 114 <pre>C</pre><m>O\u0302</m><post>T</post> 115 </test-case> 116 117 118 <!-- Test cases from usrchdat.c STRENGTH --> 119 120 121 <test-case id="test15" strength="PRIMARY" locale="en"> 122 <pattern>fox</pattern> 123 <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post> 124 </test-case> 125 126 <test-case id="test16" strength="PRIMARY" locale="fr"> 127 <pattern>peche</pattern> 128 <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post> 129 </test-case> 130 131 <test-case id="test17" strength="PRIMARY" locale="fr"> 132 <pattern>peche</pattern> 133 <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post> 134 </test-case> 135 136 <test-case id="test18" strength="PRIMARY" locale="fr"> 137 <pattern>peche</pattern> 138 <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post> 139 </test-case> 140 141 <test-case id="test19" strength="PRIMARY" locale="fr"> 142 <pattern>peche</pattern> 143 <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post> 144 </test-case> 145 146 <test-case id="test20" strength="PRIMARY" locale="es"> 147 <pattern>channel</pattern> 148 <pre>A </pre><m>channel</m><post>, </post> 149 </test-case> 150 151 <test-case id="test21" strength="PRIMARY" locale="es"> 152 <pattern>channel</pattern> 153 <pre>A </pre><m>CHANNEL</m><post>, </post> 154 </test-case> 155 156 <test-case id="test22" strength="PRIMARY" locale="es"> 157 <pattern>channel</pattern> 158 <pre>A </pre><m>Channel</m><post>s, </post> 159 </test-case> 160 161 <test-case id="test23" strength="PRIMARY" locale="es"> 162 <pattern>channel</pattern> 163 <pre>A </pre><m>channel</m><post>... </post> 164 </test-case> 165 166 <test-case id="test24" strength="TERTIARY" locale="en"> 167 <pattern>A\u0300</pattern> 168 <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post> 169 </test-case> 170 171 <!-- TODO: In the original test data, this test matched at IDENTICAL strength. 172 Doesn't seem right. The characters are different. 173 --> 174 <test-case id="test24a" strength="IDENTICAL" locale="en"> 175 <pattern>A\u0300</pattern> 176 <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post> 177 </test-case> 178 179 <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en"> 180 <pattern>A\u0300</pattern> 181 <pre>At IDENTICAL, shoud this match? </pre> 182 <m>\u00c0</m> 183 <post></post> 184 </test-case> 185 186 <test-case id="test25" strength="SECONDARY" locale="en"> 187 <pattern></pattern> 188 <pre>12</pre><m></m><post> </post> 189 </test-case> 190 191 <test-case id="test26" strength="SECONDARY" locale="en"> 192 <pattern>A</pattern> 193 <pre>12</pre><m>a</m><post>...</post> 194 </test-case> 195 196 197 <!-- Test Cases from usrchdat.c, VARIABLE --> 198 <test-case id="test27" strength="TERTIARY" locale="en"> 199 <pattern>blackbird</pattern> 200 <pre>black-bird </pre><m>blackbird</m><post>...</post> 201 </test-case> 202 203 <test-case id="test28" strength="TERTIARY" locale="en"> 204 <pattern>go</pattern> 205 <pre> on</pre> 206 </test-case> 207 208 <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening 209 the UStringSearch. How did the orignal test run? --> 210 <!-- 211 <test-case id="test29" strength="PRIMARY" locale="en"> 212 <pattern> </pattern> 213 <pre></pre><m></m><post>abc</post> 214 </test-case> 215 --> 216 217 <test-case id="test30" strength="SECONDARY" locale="en"> 218 <pattern>abc</pattern> 219 <pre> a bc ab c a bc ab c"</pre> 220 </test-case> 221 222 <test-case id="test31" strength="SECONDARY" locale="en"> 223 <pattern>abc</pattern> 224 <pre> ---------------</pre> 225 </test-case> 226 227 228 <!-- Normalization test cases from usrchdat.c --> 229 <test-case id="test32" strength="TERTIARY" norm="ON"> 230 <pattern>a\u0325\u0300</pattern> 231 <pre></pre><m>a\u0300\u0325</m> 232 </test-case> 233 234 235 <test-case id="test32a" strength="TERTIARY" norm="OFF"> 236 <pattern>a\u0325\u0300</pattern> 237 <pre>a\u0300\u0325</pre> 238 </test-case> 239 240 241 <!-- COMPOSITEBOUNDARIES from usrchdat.c 242 Boundaries are not identical to orignal test data because 243 of matching only full combining sequences 244 --> 245 <test-case id="test40" strength="TERTIARY"> 246 <pattern>A</pattern> 247 <pre></pre> <!-- \u00C0 --> 248 </test-case> 249 250 <test-case id="test41" strength="TERTIARY"> 251 <pattern>A</pattern> 252 <pre></pre><m>A</m><post>C</post> 253 </test-case> 254 255 <test-case id="test42" strength="TERTIARY"> 256 <pattern>A\u030A</pattern> 257 <pre>\u01FA</pre> 258 </test-case> 259 260 261 262 <!-- SUPPLEMENTARYCANONICAL from usrchdat.c --> 263 <test-case id="test50" strength="TERTIARY"> 264 <pattern>\uD800\uDC00</pattern> 265 <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m> 266 <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post> 267 </test-case> 268 269 <test-case id="test51" strength="TERTIARY"> 270 <pattern>\\uD834\\uDDB9</pattern> 271 <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post> 272 </test-case> 273 274 <test-case id="test52" strength="TERTIARY"> 275 <pattern> \\uD834\\uDDB9 </pattern> 276 <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post> 277 </test-case> 278 279 <test-case id="test53" strength="TERTIARY"> 280 <pattern>-\\uD834\\uDDB9-</pattern> 281 <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post> 282 </test-case> 283 284 <test-case id="test54" strength="TERTIARY"> 285 <pattern>,\\uD834\\uDDB9,</pattern> 286 <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post> 287 </test-case> 288 289 <test-case id="test55" strength="TERTIARY"> 290 <pattern>?\\uD834\\uDDB9?</pattern> 291 <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post> 292 </test-case> 293 294 295 <!-- Long combining sequences --> 296 <!-- Backwards search fails because patterns ends w/ ignorables 297 <test-case id="test60" strength="PRIMARY"> 298 <pattern>A\u0301\u0301\u0301\u0301</pattern> 299 <m>A\u0301\u0301\u0301\u0301\u0301</m> 300 </test-case> 301 --> 302 303 <test-case id="test61" strength="TERTIARY"> 304 <pattern>A\u0301\u0301\u0301\u0301</pattern> 305 <pre>A\u0301\u0301\u0301\u0301\u0301</pre> 306 </test-case> 307 308 <test-case id="test62" strength="TERTIARY"> 309 <pattern>A\u0301\u0301\u0301\u0301</pattern> 310 <m>A\u0301\u0301\u0301\u0301</m> 311 </test-case> 312 313 <!-- stand-alone combining marks don't match attached marks --> 314 <test-case id="test63" strength="TERTIARY"> 315 <pattern>\u0301</pattern> 316 <pre>A\u0301\u0301\u0301\u0301</pre> 317 </test-case> 318 319 <test-case id="test64" strength="TERTIARY"> 320 <pattern>\u0301</pattern> 321 <post>\u0301\u0301\u0301\u0301</post> 322 </test-case> 323 324 <!-- stand-alone combining mark does match an un-attached combining mark --> 325 <test-case id="test65" strength="TERTIARY"> 326 <pattern>\u0301</pattern> 327 <m>\u0301</m><post>A\u0301\u0301</post> 328 </test-case> 329 330 <test-case id="test66" strength="TERTIARY"> 331 <pattern>\u0301</pattern> 332 <m>\u0301</m> 333 </test-case> 334 335 <!-- stand-alone combining marks at end of the target text --> 336 <test-case id="test67" strength="TERTIARY"> 337 <pattern>\u0301</pattern> 338 <pre>abcd\r</pre><m>\u0301</m> 339 </test-case> 340 341 <!-- attached combining marks at end of the target text, no match --> 342 <test-case id="test68" strength="TERTIARY"> 343 <pattern>\u0301</pattern> 344 <pre>abcd\u0301</pre> 345 </test-case> 346 347 348 349 <!-- no match within expansions at the start --> 350 <test-case id="test70" strength="PRIMARY"> 351 <pattern>Eligature</pattern> 352 <pre>ligature</pre> 353 </test-case> 354 355 <test-case id="test71" strength="PRIMARY"> 356 <pattern>AEligature</pattern> 357 <m>ligature</m> 358 </test-case> 359 360 <test-case id="test72" strength="PRIMARY"> 361 <pattern>AEligature</pattern> 362 <m>ligature</m> 363 </test-case> 364 365 <!-- unattached combining Tilde will not match a Tilde that is 366 part of a composed (\u00D1) --> 367 <test-case id="test73" strength="SECONDARY"> 368 <pattern>\u0303</pattern> <!-- combining tilde --> 369 <pre>
</pre><m>\u0303</m> 370 </test-case> 371 372 <test-case id="test74" strength="SECONDARY"> 373 <pattern>\u0303</pattern> <!-- combining tilde --> 374 <pre> 
</pre><m>\u0303</m><post>a</post> 375 </test-case> 376 377 <test-case id="test75" strength="TERTIARY" locale="fr"> 378 <pattern>\u00EA</pattern> 379 <pre>p</pre><m>\u00EA</m><post>che</post> 380 </test-case> 381 382 <test-case id="test76" strength="TERTIARY" locale="fr"> 383 <pattern>\u00EA</pattern> 384 <pre>p</pre><m>e\u0302</m><post>che</post> 385 </test-case> 386 387 <test-case id="test77" strength="TERTIARY" locale="fr"> 388 <pattern>e\u0302</pattern> 389 <pre>p</pre><m>\u00EA</m><post>che</post> 390 </test-case> 391 392 <!-- Test cases from ticket:5382 --> 393 <test-case id="test78" strength="SECONDARY" locale="hu_HU"> 394 <pattern>\u0170</pattern> 395 <m>\u0171</m> 396 <post>12</post> 397 </test-case> 398 399 <test-case id="test79" strength="SECONDARY" locale="hu_HU"> 400 <pattern>\u0170</pattern> 401 <pre>1</pre> 402 <m>\u0171</m> 403 <post>2</post> 404 </test-case> 405 406 <test-case id="test80" strength="SECONDARY" locale="hu_HU"> 407 <pattern>\u0170</pattern> 408 <pre>12</pre> 409 <m>\u0171</m> 410 </test-case> 411 412 <!-- Test cases from ticket:5959 --> 413 <test-case id="test81" strength="SECONDARY"> 414 <pattern>\u2166</pattern> 415 <m>VII</m> 416 </test-case> 417 418 <test-case id="test82" strength="SECONDARY"> 419 <pattern>VII</pattern> 420 <m>\u2166</m> 421 </test-case> 422 423 <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en"> 424 <pattern>Universal Declaration of Human Rights</pattern> 425 <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post> 426 </test-case> 427 428 <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en"> 429 <pattern>Universal Declaration of Human Rights</pattern> 430 <pre>Proclaims this </pre> 431 <m>Universal-Declaration-of-Human-Rights</m> 432 <post> as a common standard of achievement for all peoples and all nations</post> 433 </test-case> 434 435 <test-case id="test84" strength="TERTIARY" locale="en"> 436 <pattern>\u05E9\u0591\u05E9</pattern> 437 <m>\u05E9\u0592\u05E9</m> 438 </test-case> 439 440 <test-case id="test84b" strength="IDENTICAL" locale="en"> 441 <pattern>\u05E9\u0591\u05E9</pattern> 442 <pre>\u05E9\u0592\u05E9</pre> 443 </test-case> 444 </stringsearch-tests> 445 446