Home | History | Annotate | Download | only in collator
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2002-2014, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 
     11 /**
     12  * Port From:   ICU4C v2.1 : collate/CollationIteratorTest
     13  * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp
     14  **/
     15 
     16 package android.icu.dev.test.collator;
     17 
     18 import java.text.CharacterIterator;
     19 import java.text.StringCharacterIterator;
     20 import java.util.Arrays;
     21 import java.util.Locale;
     22 
     23 import org.junit.Test;
     24 import org.junit.runner.RunWith;
     25 import org.junit.runners.JUnit4;
     26 
     27 import android.icu.dev.test.TestFmwk;
     28 import android.icu.lang.UCharacter;
     29 import android.icu.text.CollationElementIterator;
     30 import android.icu.text.Collator;
     31 import android.icu.text.RuleBasedCollator;
     32 import android.icu.text.UCharacterIterator;
     33 import android.icu.util.ULocale;
     34 import android.icu.testsharding.MainTestShard;
     35 
     36 @MainTestShard
     37 @RunWith(JUnit4.class)
     38 public class CollationIteratorTest extends TestFmwk {
     39 
     40     String test1 = "What subset of all possible test cases?";
     41     String test2 = "has the highest probability of detecting";
     42 
     43     /*
     44      * @bug 4157299
     45      */
     46     @Test
     47     public void TestClearBuffers(/* char* par */) {
     48         RuleBasedCollator c = null;
     49         try {
     50             c = new RuleBasedCollator("&a < b < c & ab = d");
     51         } catch (Exception e) {
     52             warnln("Couldn't create a RuleBasedCollator.");
     53             return;
     54         }
     55 
     56         String source = "abcd";
     57         CollationElementIterator i = c.getCollationElementIterator(source);
     58         int e0 = 0;
     59         try {
     60             e0 = i.next();    // save the first collation element
     61         } catch (Exception e) {
     62             errln("call to i.next() failed.");
     63             return;
     64         }
     65 
     66         try {
     67             i.setOffset(3);        // go to the expanding character
     68         } catch (Exception e) {
     69             errln("call to i.setOffset(3) failed.");
     70             return;
     71         }
     72 
     73         try {
     74             i.next();                // but only use up half of it
     75         } catch (Exception e) {
     76             errln("call to i.next() failed.");
     77             return;
     78         }
     79 
     80         try {
     81             i.setOffset(0);        // go back to the beginning
     82         } catch (Exception e) {
     83             errln("call to i.setOffset(0) failed. ");
     84         }
     85 
     86         int e = 0;
     87         try {
     88             e = i.next();    // and get this one again
     89         } catch (Exception ee) {
     90             errln("call to i.next() failed. ");
     91             return;
     92         }
     93 
     94         if (e != e0) {
     95             errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0));
     96         }
     97     }
     98 
     99     /** @bug 4108762
    100      * Test for getMaxExpansion()
    101      */
    102     @Test
    103     public void TestMaxExpansion(/* char* par */) {
    104         int unassigned = 0xEFFFD;
    105         String rule = "&a < ab < c/aba < d < z < ch";
    106         RuleBasedCollator coll = null;
    107         try {
    108             coll = new RuleBasedCollator(rule);
    109         } catch (Exception e) {
    110             warnln("Fail to create RuleBasedCollator");
    111             return;
    112         }
    113         char ch = 0;
    114         String str = String.valueOf(ch);
    115 
    116         CollationElementIterator iter = coll.getCollationElementIterator(str);
    117 
    118         while (ch < 0xFFFF) {
    119             int count = 1;
    120             ch ++;
    121             str = String.valueOf(ch);
    122             iter.setText(str);
    123             int order = iter.previous();
    124 
    125             // thai management
    126             if (order == 0) {
    127                 order = iter.previous();
    128             }
    129 
    130             while (iter.previous() != CollationElementIterator.NULLORDER) {
    131                 count ++;
    132             }
    133 
    134             if (iter.getMaxExpansion(order) < count) {
    135                 errln("Failure at codepoint " + ch + ", maximum expansion count < " + count);
    136             }
    137         }
    138 
    139         // testing for exact max expansion
    140         ch = 0;
    141         while (ch < 0x61) {
    142             str = String.valueOf(ch);
    143             iter.setText(str);
    144             int order = iter.previous();
    145 
    146             if (iter.getMaxExpansion(order) != 1) {
    147                 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    148                       + " maximum expansion count == 1");
    149             }
    150             ch ++;
    151         }
    152 
    153         ch = 0x63;
    154         str = String.valueOf(ch);
    155         iter.setText(str);
    156         int temporder = iter.previous();
    157 
    158         if (iter.getMaxExpansion(temporder) != 3) {
    159             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    160                                   + " maximum expansion count == 3");
    161         }
    162 
    163         ch = 0x64;
    164         str = String.valueOf(ch);
    165         iter.setText(str);
    166         temporder = iter.previous();
    167 
    168         if (iter.getMaxExpansion(temporder) != 1) {
    169             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    170                                   + " maximum expansion count == 1");
    171         }
    172 
    173         str = UCharacter.toString(unassigned);
    174         iter.setText(str);
    175         temporder = iter.previous();
    176 
    177         if (iter.getMaxExpansion(temporder) != 2) {
    178             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    179                                   + " maximum expansion count == 2");
    180         }
    181 
    182 
    183         // testing jamo
    184         ch = 0x1165;
    185         str = String.valueOf(ch);
    186         iter.setText(str);
    187         temporder = iter.previous();
    188 
    189         if (iter.getMaxExpansion(temporder) > 3) {
    190             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    191                                           + " maximum expansion count < 3");
    192         }
    193 
    194         // testing special jamo &a<\u1165
    195         rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";
    196 
    197         try {
    198             coll = new RuleBasedCollator(rule);
    199         } catch (Exception e) {
    200             errln("Fail to create RuleBasedCollator");
    201             return;
    202         }
    203         iter = coll.getCollationElementIterator(str);
    204 
    205         temporder = iter.previous();
    206 
    207         if (iter.getMaxExpansion(temporder) != 6) {
    208             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    209                                          + " maximum expansion count == 6");
    210         }
    211     }
    212 
    213     /**
    214      * Test for getOffset() and setOffset()
    215      */
    216     @Test
    217     public void TestOffset(/* char* par */) {
    218         RuleBasedCollator en_us;
    219         try {
    220             en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    221         } catch (Exception e) {
    222             warnln("ERROR: in creation of collator of ENGLISH locale");
    223             return;
    224         }
    225 
    226         CollationElementIterator iter = en_us.getCollationElementIterator(test1);
    227         // testing boundaries
    228         iter.setOffset(0);
    229         if (iter.previous() != CollationElementIterator.NULLORDER) {
    230             errln("Error: After setting offset to 0, we should be at the end "
    231                   + "of the backwards iteration");
    232         }
    233         iter.setOffset(test1.length());
    234         if (iter.next() != CollationElementIterator.NULLORDER) {
    235             errln("Error: After setting offset to the end of the string, we "
    236                   + "should be at the end of the forwards iteration");
    237         }
    238 
    239         // Run all the way through the iterator, then get the offset
    240         int[] orders = CollationTest.getOrders(iter);
    241         logln("orders.length = " + orders.length);
    242 
    243         int offset = iter.getOffset();
    244 
    245         if (offset != test1.length()) {
    246             String msg1 = "offset at end != length: ";
    247             String msg2 = " vs ";
    248             errln(msg1 + offset + msg2 + test1.length());
    249         }
    250 
    251         // Now set the offset back to the beginning and see if it works
    252         CollationElementIterator pristine = en_us.getCollationElementIterator(test1);
    253 
    254         try {
    255             iter.setOffset(0);
    256         } catch(Exception e) {
    257             errln("setOffset failed.");
    258         }
    259         assertEqual(iter, pristine);
    260 
    261         // setting offset in the middle of a contraction
    262         String contraction = "change";
    263         RuleBasedCollator tailored = null;
    264         try {
    265             tailored = new RuleBasedCollator("& a < ch");
    266         } catch (Exception e) {
    267             errln("Error: in creation of Spanish collator");
    268             return;
    269         }
    270         iter = tailored.getCollationElementIterator(contraction);
    271         int order[] = CollationTest.getOrders(iter);
    272         iter.setOffset(1); // sets offset in the middle of ch
    273         int order2[] = CollationTest.getOrders(iter);
    274         if (!Arrays.equals(order, order2)) {
    275             errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
    276         }
    277         contraction = "peache";
    278         iter = tailored.getCollationElementIterator(contraction);
    279         iter.setOffset(3);
    280         order = CollationTest.getOrders(iter);
    281         iter.setOffset(4); // sets offset in the middle of ch
    282         order2 = CollationTest.getOrders(iter);
    283         if (!Arrays.equals(order, order2)) {
    284             errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
    285         }
    286         // setting offset in the middle of a surrogate pair
    287         String surrogate = "\ud800\udc00str";
    288         iter = tailored.getCollationElementIterator(surrogate);
    289         order = CollationTest.getOrders(iter);
    290         iter.setOffset(1); // sets offset in the middle of surrogate
    291         order2 = CollationTest.getOrders(iter);
    292         if (!Arrays.equals(order, order2)) {
    293             errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
    294         }
    295         surrogate = "simple\ud800\udc00str";
    296         iter = tailored.getCollationElementIterator(surrogate);
    297         iter.setOffset(6);
    298         order = CollationTest.getOrders(iter);
    299         iter.setOffset(7); // sets offset in the middle of surrogate
    300         order2 = CollationTest.getOrders(iter);
    301         if (!Arrays.equals(order, order2)) {
    302             errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
    303         }
    304         // TODO: try iterating halfway through a messy string.
    305     }
    306 
    307 
    308 
    309     void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
    310         int c1, c2, count = 0;
    311         do {
    312             c1 = i1.next();
    313             c2 = i2.next();
    314             if (c1 != c2) {
    315                 errln("    " + count + ": strength(0x" +
    316                     Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")");
    317                 break;
    318             }
    319             count += 1;
    320         } while (c1 != CollationElementIterator.NULLORDER);
    321         CollationTest.backAndForth(this, i1);
    322         CollationTest.backAndForth(this, i2);
    323     }
    324 
    325     /**
    326      * Test for CollationElementIterator.previous()
    327      *
    328      * @bug 4108758 - Make sure it works with contracting characters
    329      *
    330      */
    331     @Test
    332     public void TestPrevious(/* char* par */) {
    333         RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    334         CollationElementIterator iter = en_us.getCollationElementIterator(test1);
    335 
    336         // A basic test to see if it's working at all
    337         CollationTest.backAndForth(this, iter);
    338 
    339         // Test with a contracting character sequence
    340         String source;
    341         RuleBasedCollator c1 = null;
    342         try {
    343             c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    344         } catch (Exception e) {
    345             errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
    346             return;
    347         }
    348 
    349         source = "abchdcba";
    350         iter = c1.getCollationElementIterator(source);
    351         CollationTest.backAndForth(this, iter);
    352 
    353         // Test with an expanding character sequence
    354         RuleBasedCollator c2 = null;
    355         try {
    356             c2 = new RuleBasedCollator("&a < b < c/abd < d");
    357         } catch (Exception e ) {
    358             errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
    359             return;
    360         }
    361 
    362         source = "abcd";
    363         iter = c2.getCollationElementIterator(source);
    364         CollationTest.backAndForth(this, iter);
    365 
    366         // Now try both
    367         RuleBasedCollator c3 = null;
    368         try {
    369             c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
    370         } catch (Exception e) {
    371             errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
    372             return;
    373         }
    374 
    375         source = "abcdbchdc";
    376         iter = c3.getCollationElementIterator(source);
    377         CollationTest.backAndForth(this, iter);
    378 
    379         source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
    380         Collator c4 = null;
    381         try {
    382             c4 = Collator.getInstance(new Locale("th", "TH", ""));
    383         } catch (Exception e) {
    384             errln("Couldn't create a collator");
    385             return;
    386         }
    387 
    388         iter = ((RuleBasedCollator)c4).getCollationElementIterator(source);
    389         CollationTest.backAndForth(this, iter);
    390 
    391         source= "\u0061\u30CF\u3099\u30FC";
    392         Collator c5 = null;
    393         try {
    394             c5 = Collator.getInstance(new Locale("ja", "JP", ""));
    395         } catch (Exception e) {
    396             errln("Couldn't create Japanese collator\n");
    397             return;
    398         }
    399         iter = ((RuleBasedCollator)c5).getCollationElementIterator(source);
    400 
    401         CollationTest.backAndForth(this, iter);
    402     }
    403 
    404 
    405 
    406     /**
    407      * Test for setText()
    408      */
    409     @Test
    410     public void TestSetText(/* char* par */) {
    411         RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    412         CollationElementIterator iter1 = en_us.getCollationElementIterator(test1);
    413         CollationElementIterator iter2 = en_us.getCollationElementIterator(test2);
    414 
    415         // Run through the second iterator just to exercise it
    416         int c = iter2.next();
    417         int i = 0;
    418 
    419         while ( ++i < 10 && c != CollationElementIterator.NULLORDER) {
    420             try {
    421                 c = iter2.next();
    422             } catch (Exception e) {
    423                 errln("iter2.next() returned an error.");
    424                 break;
    425             }
    426         }
    427 
    428         // Now set it to point to the same string as the first iterator
    429         try {
    430             iter2.setText(test1);
    431         } catch (Exception e) {
    432             errln("call to iter2->setText(test1) failed.");
    433             return;
    434         }
    435         assertEqual(iter1, iter2);
    436 
    437         iter1.reset();
    438         //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
    439         CharacterIterator chariter = new StringCharacterIterator(test1);
    440         try {
    441             iter2.setText(chariter);
    442         } catch (Exception e ) {
    443             errln("call to iter2->setText(chariter(test1)) failed.");
    444             return;
    445         }
    446         assertEqual(iter1, iter2);
    447 
    448         iter1.reset();
    449         //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
    450         UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
    451         try {
    452             iter2.setText(uchariter);
    453         } catch (Exception e ) {
    454             errln("call to iter2->setText(uchariter(test1)) failed.");
    455             return;
    456         }
    457         assertEqual(iter1, iter2);
    458     }
    459 
    460     /**
    461      * Test for CollationElementIterator previous and next for the whole set of
    462      * unicode characters.
    463      */
    464     @Test
    465     public void TestUnicodeChar() {
    466         RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    467         CollationElementIterator iter;
    468         char codepoint;
    469         StringBuffer source = new StringBuffer();
    470         source.append("\u0e4d\u0e4e\u0e4f");
    471         // source.append("\u04e8\u04e9");
    472         iter = en_us.getCollationElementIterator(source.toString());
    473         // A basic test to see if it's working at all
    474         CollationTest.backAndForth(this, iter);
    475         for (codepoint = 1; codepoint < 0xFFFE;) {
    476             source.delete(0, source.length());
    477             while (codepoint % 0xFF != 0) {
    478                 if (UCharacter.isDefined(codepoint)) {
    479                     source.append(codepoint);
    480                 }
    481                 codepoint ++;
    482             }
    483 
    484             if (UCharacter.isDefined(codepoint)) {
    485                 source.append(codepoint);
    486             }
    487 
    488             if (codepoint != 0xFFFF) {
    489                 codepoint ++;
    490             }
    491             /*if (codepoint >= 0x04fc) {
    492                 System.out.println("codepoint " + Integer.toHexString(codepoint));
    493                 String str = source.substring(230, 232);
    494                 System.out.println(android.icu.impl.Utility.escape(str));
    495                 System.out.println("codepoint " + Integer.toHexString(codepoint)
    496                                    + "length " + str.length());
    497                 iter = en_us.getCollationElementIterator(str);
    498                 CollationTest.backAndForth(this, iter);
    499             }
    500             */
    501             iter = en_us.getCollationElementIterator(source.toString());
    502             // A basic test to see if it's working at all
    503             CollationTest.backAndForth(this, iter);
    504         }
    505     }
    506 
    507     /**
    508      * Test for CollationElementIterator previous and next for the whole set of
    509      * unicode characters with normalization on.
    510      */
    511     @Test
    512     public void TestNormalizedUnicodeChar()
    513     {
    514         // thai should have normalization on
    515         RuleBasedCollator th_th = null;
    516         try {
    517             th_th = (RuleBasedCollator)Collator.getInstance(
    518                                                        new Locale("th", "TH"));
    519         } catch (Exception e) {
    520             warnln("Error creating Thai collator");
    521             return;
    522         }
    523         StringBuffer source = new StringBuffer();
    524         source.append('\uFDFA');
    525         CollationElementIterator iter
    526                         = th_th.getCollationElementIterator(source.toString());
    527         CollationTest.backAndForth(this, iter);
    528         for (char codepoint = 0x1; codepoint < 0xfffe;) {
    529             source.delete(0, source.length());
    530             while (codepoint % 0xFF != 0) {
    531                 if (UCharacter.isDefined(codepoint)) {
    532                     source.append(codepoint);
    533                 }
    534                 codepoint ++;
    535             }
    536 
    537             if (UCharacter.isDefined(codepoint)) {
    538                 source.append(codepoint);
    539             }
    540 
    541             if (codepoint != 0xFFFF) {
    542                 codepoint ++;
    543             }
    544 
    545             /*if (((int)codepoint) >= 0xfe00) {
    546                 String str = source.substring(185, 190);
    547                 System.out.println(android.icu.impl.Utility.escape(str));
    548                 System.out.println("codepoint "
    549                                    + Integer.toHexString(codepoint)
    550                                    + "length " + str.length());
    551                 iter = th_th.getCollationElementIterator(str);
    552                 CollationTest.backAndForth(this, iter);
    553             */
    554             iter = th_th.getCollationElementIterator(source.toString());
    555             // A basic test to see if it's working at all
    556             CollationTest.backAndForth(this, iter);
    557         }
    558     }
    559 
    560     /**
    561     * Testing the discontiguous contractions
    562     */
    563     @Test
    564     public void TestDiscontiguous()
    565     {
    566         String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315";
    567         String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC",
    568                         // base character blocked
    569                         "XD\u0300", "XD\u0300\u0315",
    570                         // non blocking combining character
    571                         "X\u0319\u0300", "X\u0319\u0300\u0315",
    572                         // blocking combining character
    573                         "X\u0314\u0300", "X\u0314\u0300\u0315",
    574                         // contraction prefix
    575                         "ABDC", "AB\u0315C","X\u0300D\u0315",
    576                         "X\u0300\u0319\u0315", "X\u0300\u031A\u0315",
    577                         // ends not with a contraction character
    578                         "X\u0319\u0300D", "X\u0319\u0300\u0315D",
    579                         "X\u0300D\u0315D", "X\u0300\u0319\u0315D",
    580                         "X\u0300\u031A\u0315D"
    581         };
    582         String tgt[] = {// non blocking combining character
    583                         "A D B", "A D BC", "A \u0315 B", "A \u0315 BC",
    584                         // base character blocked
    585                         "X D \u0300", "X D \u0300\u0315",
    586                         // non blocking combining character
    587                         "X\u0300 \u0319", "X\u0300\u0315 \u0319",
    588                         // blocking combining character
    589                         "X \u0314 \u0300", "X \u0314 \u0300\u0315",
    590                         // contraction prefix
    591                         "AB DC", "AB \u0315 C","X\u0300 D \u0315",
    592                         "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315",
    593                         // ends not with a contraction character
    594                         "X\u0300 \u0319D", "X\u0300\u0315 \u0319D",
    595                         "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D",
    596                         "X\u0300 \u031A\u0315D"
    597         };
    598         int count = 0;
    599         try {
    600             RuleBasedCollator coll = new RuleBasedCollator(rulestr);
    601             CollationElementIterator iter
    602                                         = coll.getCollationElementIterator("");
    603             CollationElementIterator resultiter
    604                                         = coll.getCollationElementIterator("");
    605             while (count < src.length) {
    606                 iter.setText(src[count]);
    607                 int s = 0;
    608                 while (s < tgt[count].length()) {
    609                     int e = tgt[count].indexOf(' ', s);
    610                     if (e < 0) {
    611                         e = tgt[count].length();
    612                     }
    613                     String resultstr = tgt[count].substring(s, e);
    614                     resultiter.setText(resultstr);
    615                     int ce = resultiter.next();
    616                     while (ce != CollationElementIterator.NULLORDER) {
    617                         if (ce != iter.next()) {
    618                             errln("Discontiguos contraction test mismatch at"
    619                                   + count);
    620                             return;
    621                         }
    622                         ce = resultiter.next();
    623                     }
    624                     s = e + 1;
    625                 }
    626                 iter.reset();
    627                 CollationTest.backAndForth(this, iter);
    628                 count ++;
    629             }
    630         }
    631         catch (Exception e) {
    632             warnln("Error running discontiguous tests " + e.toString());
    633         }
    634     }
    635 
    636     /**
    637     * Test the incremental normalization
    638     */
    639     @Test
    640     public void TestNormalization()
    641     {
    642         String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";
    643         String testdata[] = {"\u1ED9", "o\u0323\u0302",
    644                             "\u0300\u0315", "\u0315\u0300",
    645                             "A\u0300\u0315B", "A\u0315\u0300B",
    646                             "A\u0316\u0315B", "A\u0315\u0316B",
    647                             "\u0316\u0300\u0315", "\u0315\u0300\u0316",
    648                             "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
    649                             "\u0316\u0315\u0300", "A\u0316\u0315\u0300B"};
    650         RuleBasedCollator coll = null;
    651         try {
    652             coll = new RuleBasedCollator(rules);
    653             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    654         } catch (Exception e) {
    655             warnln("ERROR: in creation of collator using rules " + rules);
    656             return;
    657         }
    658 
    659         CollationElementIterator iter = coll.getCollationElementIterator("testing");
    660         for (int count = 0; count < testdata.length; count ++) {
    661             iter.setText(testdata[count]);
    662             CollationTest.backAndForth(this, iter);
    663         }
    664     }
    665 
    666     /**
    667      * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
    668      * normalization on AND jamo tailoring, among other things.
    669      *
    670      * Note: This test is sensitive to changes of the root collator,
    671      * for example whether the ae-ligature maps to three CEs (as in the DUCET)
    672      * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
    673      * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
    674      * For example, the DUCET's artificial secondary CE in the ae-ligature
    675      * may map to two 32-bit iterator CEs (as it did until ICU 52).
    676      */
    677     @Test
    678     public void TestSearchCollatorElements()
    679     {
    680         String tsceText =
    681             " \uAC00" +              // simple LV Hangul
    682             " \uAC01" +              // simple LVT Hangul
    683             " \uAC0F" +              // LVTT, last jamo expands for search
    684             " \uAFFF" +              // LLVVVTT, every jamo expands for search
    685             " \u1100\u1161\u11A8" +  // 0xAC01 as conjoining jamo
    686             " \u3131\u314F\u3131" +  // 0xAC01 as compatibility jamo
    687             " \u1100\u1161\u11B6" +  // 0xAC0F as conjoining jamo; last expands for search
    688             " \u1101\u1170\u11B6" +  // 0xAFFF as conjoining jamo; all expand for search
    689             " \u00E6" +              // small letter ae, expands
    690             " \u1E4D" +              // small letter o with tilde and acute, decomposes
    691             " ";
    692 
    693         int[] rootStandardOffsets = {
    694             0,  1,2,
    695             2,  3,4,4,
    696             4,  5,6,6,
    697             6,  7,8,8,
    698             8,  9,10,11,
    699             12, 13,14,15,
    700             16, 17,18,19,
    701             20, 21,22,23,
    702             24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    703             26, 27,28,28,
    704             28,
    705             29
    706         };
    707 
    708         int[] rootSearchOffsets = {
    709             0,  1,2,
    710             2,  3,4,4,
    711             4,  5,6,6,6,
    712             6,  7,8,8,8,8,8,8,
    713             8,  9,10,11,
    714             12, 13,14,15,
    715             16, 17,18,19,20,
    716             20, 21,22,22,23,23,23,24,
    717             24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    718             26, 27,28,28,
    719             28,
    720             29
    721         };
    722 
    723         class TSCEItem {
    724             private String localeString;
    725             private int[] offsets;
    726             TSCEItem(String locStr, int[] offs) {
    727                 localeString = locStr;
    728                 offsets = offs;
    729             }
    730             public String getLocaleString() { return localeString; }
    731             public int[] getOffsets() { return offsets; }
    732         }
    733         final TSCEItem[] tsceItems = {
    734             new TSCEItem( "root",                  rootStandardOffsets ),
    735             new TSCEItem( "root@collation=search", rootSearchOffsets   ),
    736         };
    737 
    738         for (TSCEItem tsceItem: tsceItems) {
    739             String localeString = tsceItem.getLocaleString();
    740             ULocale uloc = new ULocale(localeString);
    741             RuleBasedCollator col = null;
    742             try {
    743                 col = (RuleBasedCollator)Collator.getInstance(uloc);
    744             } catch (Exception e) {
    745                 errln("Error: in locale " + localeString + ", err in Collator.getInstance");
    746                 continue;
    747             }
    748             CollationElementIterator uce = col.getCollationElementIterator(tsceText);
    749             int[] offsets = tsceItem.getOffsets();
    750             int ioff, noff = offsets.length;
    751             int offset, element;
    752 
    753             ioff = 0;
    754             do {
    755                 offset = uce.getOffset();
    756                 element = uce.next();
    757                 logln(String.format("(%s) offset=%2d  ce=%08x\n", tsceItem.localeString, offset, element));
    758                 if (element == 0) {
    759                     errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
    760                 }
    761                 if ( ioff < noff ) {
    762                     if ( offset != offsets[ioff] ) {
    763                         errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset);
    764                         //ioff = noff;
    765                         //break;
    766                     }
    767                     ioff++;
    768                 } else {
    769                     errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected");
    770                 }
    771             } while (element != CollationElementIterator.NULLORDER);
    772             if ( ioff < noff ) {
    773                 errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
    774             }
    775 
    776             // backwards test
    777             uce.setOffset(tsceText.length());
    778             ioff = noff;
    779             do {
    780                 offset = uce.getOffset();
    781                 element = uce.previous();
    782                 if (element == 0) {
    783                     errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0");
    784                 }
    785                 if ( ioff > 0 ) {
    786                     ioff--;
    787                     if ( offset != offsets[ioff] ) {
    788                         errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset);
    789                         //ioff = 0;
    790                         //break;
    791                     }
    792                 } else {
    793                     errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected");
    794                 }
    795             } while (element != CollationElementIterator.NULLORDER);
    796             if ( ioff > 0 ) {
    797                 errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
    798             }
    799         }
    800     }
    801 }
    802