Home | History | Annotate | Download | only in collator
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2002-2014, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 
     10 /**
     11  * Port From:   ICU4C v2.1 : collate/CollationIteratorTest
     12  * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp
     13  **/
     14 
     15 package com.ibm.icu.dev.test.collator;
     16 
     17 import java.text.CharacterIterator;
     18 import java.text.StringCharacterIterator;
     19 import java.util.Arrays;
     20 import java.util.Locale;
     21 
     22 import org.junit.Test;
     23 import org.junit.runner.RunWith;
     24 import org.junit.runners.JUnit4;
     25 
     26 import com.ibm.icu.dev.test.TestFmwk;
     27 import com.ibm.icu.lang.UCharacter;
     28 import com.ibm.icu.text.CollationElementIterator;
     29 import com.ibm.icu.text.Collator;
     30 import com.ibm.icu.text.RuleBasedCollator;
     31 import com.ibm.icu.text.UCharacterIterator;
     32 import com.ibm.icu.util.ULocale;
     33 
     34 @RunWith(JUnit4.class)
     35 public class CollationIteratorTest extends TestFmwk {
     36 
     37     String test1 = "What subset of all possible test cases?";
     38     String test2 = "has the highest probability of detecting";
     39 
     40     /*
     41      * @bug 4157299
     42      */
     43     @Test
     44     public void TestClearBuffers(/* char* par */) {
     45         RuleBasedCollator c = null;
     46         try {
     47             c = new RuleBasedCollator("&a < b < c & ab = d");
     48         } catch (Exception e) {
     49             warnln("Couldn't create a RuleBasedCollator.");
     50             return;
     51         }
     52 
     53         String source = "abcd";
     54         CollationElementIterator i = c.getCollationElementIterator(source);
     55         int e0 = 0;
     56         try {
     57             e0 = i.next();    // save the first collation element
     58         } catch (Exception e) {
     59             errln("call to i.next() failed.");
     60             return;
     61         }
     62 
     63         try {
     64             i.setOffset(3);        // go to the expanding character
     65         } catch (Exception e) {
     66             errln("call to i.setOffset(3) failed.");
     67             return;
     68         }
     69 
     70         try {
     71             i.next();                // but only use up half of it
     72         } catch (Exception e) {
     73             errln("call to i.next() failed.");
     74             return;
     75         }
     76 
     77         try {
     78             i.setOffset(0);        // go back to the beginning
     79         } catch (Exception e) {
     80             errln("call to i.setOffset(0) failed. ");
     81         }
     82 
     83         int e = 0;
     84         try {
     85             e = i.next();    // and get this one again
     86         } catch (Exception ee) {
     87             errln("call to i.next() failed. ");
     88             return;
     89         }
     90 
     91         if (e != e0) {
     92             errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0));
     93         }
     94     }
     95 
     96     /** @bug 4108762
     97      * Test for getMaxExpansion()
     98      */
     99     @Test
    100     public void TestMaxExpansion(/* char* par */) {
    101         int unassigned = 0xEFFFD;
    102         String rule = "&a < ab < c/aba < d < z < ch";
    103         RuleBasedCollator coll = null;
    104         try {
    105             coll = new RuleBasedCollator(rule);
    106         } catch (Exception e) {
    107             warnln("Fail to create RuleBasedCollator");
    108             return;
    109         }
    110         char ch = 0;
    111         String str = String.valueOf(ch);
    112 
    113         CollationElementIterator iter = coll.getCollationElementIterator(str);
    114 
    115         while (ch < 0xFFFF) {
    116             int count = 1;
    117             ch ++;
    118             str = String.valueOf(ch);
    119             iter.setText(str);
    120             int order = iter.previous();
    121 
    122             // thai management
    123             if (order == 0) {
    124                 order = iter.previous();
    125             }
    126 
    127             while (iter.previous() != CollationElementIterator.NULLORDER) {
    128                 count ++;
    129             }
    130 
    131             if (iter.getMaxExpansion(order) < count) {
    132                 errln("Failure at codepoint " + ch + ", maximum expansion count < " + count);
    133             }
    134         }
    135 
    136         // testing for exact max expansion
    137         ch = 0;
    138         while (ch < 0x61) {
    139             str = String.valueOf(ch);
    140             iter.setText(str);
    141             int order = iter.previous();
    142 
    143             if (iter.getMaxExpansion(order) != 1) {
    144                 errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    145                       + " maximum expansion count == 1");
    146             }
    147             ch ++;
    148         }
    149 
    150         ch = 0x63;
    151         str = String.valueOf(ch);
    152         iter.setText(str);
    153         int temporder = iter.previous();
    154 
    155         if (iter.getMaxExpansion(temporder) != 3) {
    156             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    157                                   + " maximum expansion count == 3");
    158         }
    159 
    160         ch = 0x64;
    161         str = String.valueOf(ch);
    162         iter.setText(str);
    163         temporder = iter.previous();
    164 
    165         if (iter.getMaxExpansion(temporder) != 1) {
    166             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    167                                   + " maximum expansion count == 1");
    168         }
    169 
    170         str = UCharacter.toString(unassigned);
    171         iter.setText(str);
    172         temporder = iter.previous();
    173 
    174         if (iter.getMaxExpansion(temporder) != 2) {
    175             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    176                                   + " maximum expansion count == 2");
    177         }
    178 
    179 
    180         // testing jamo
    181         ch = 0x1165;
    182         str = String.valueOf(ch);
    183         iter.setText(str);
    184         temporder = iter.previous();
    185 
    186         if (iter.getMaxExpansion(temporder) > 3) {
    187             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    188                                           + " maximum expansion count < 3");
    189         }
    190 
    191         // testing special jamo &a<\u1165
    192         rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";
    193 
    194         try {
    195             coll = new RuleBasedCollator(rule);
    196         } catch (Exception e) {
    197             errln("Fail to create RuleBasedCollator");
    198             return;
    199         }
    200         iter = coll.getCollationElementIterator(str);
    201 
    202         temporder = iter.previous();
    203 
    204         if (iter.getMaxExpansion(temporder) != 6) {
    205             errln("Failure at codepoint 0x" + Integer.toHexString(ch)
    206                                          + " maximum expansion count == 6");
    207         }
    208     }
    209 
    210     /**
    211      * Test for getOffset() and setOffset()
    212      */
    213     @Test
    214     public void TestOffset(/* char* par */) {
    215         RuleBasedCollator en_us;
    216         try {
    217             en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    218         } catch (Exception e) {
    219             warnln("ERROR: in creation of collator of ENGLISH locale");
    220             return;
    221         }
    222 
    223         CollationElementIterator iter = en_us.getCollationElementIterator(test1);
    224         // testing boundaries
    225         iter.setOffset(0);
    226         if (iter.previous() != CollationElementIterator.NULLORDER) {
    227             errln("Error: After setting offset to 0, we should be at the end "
    228                   + "of the backwards iteration");
    229         }
    230         iter.setOffset(test1.length());
    231         if (iter.next() != CollationElementIterator.NULLORDER) {
    232             errln("Error: After setting offset to the end of the string, we "
    233                   + "should be at the end of the forwards iteration");
    234         }
    235 
    236         // Run all the way through the iterator, then get the offset
    237         int[] orders = CollationTest.getOrders(iter);
    238         logln("orders.length = " + orders.length);
    239 
    240         int offset = iter.getOffset();
    241 
    242         if (offset != test1.length()) {
    243             String msg1 = "offset at end != length: ";
    244             String msg2 = " vs ";
    245             errln(msg1 + offset + msg2 + test1.length());
    246         }
    247 
    248         // Now set the offset back to the beginning and see if it works
    249         CollationElementIterator pristine = en_us.getCollationElementIterator(test1);
    250 
    251         try {
    252             iter.setOffset(0);
    253         } catch(Exception e) {
    254             errln("setOffset failed.");
    255         }
    256         assertEqual(iter, pristine);
    257 
    258         // setting offset in the middle of a contraction
    259         String contraction = "change";
    260         RuleBasedCollator tailored = null;
    261         try {
    262             tailored = new RuleBasedCollator("& a < ch");
    263         } catch (Exception e) {
    264             errln("Error: in creation of Spanish collator");
    265             return;
    266         }
    267         iter = tailored.getCollationElementIterator(contraction);
    268         int order[] = CollationTest.getOrders(iter);
    269         iter.setOffset(1); // sets offset in the middle of ch
    270         int order2[] = CollationTest.getOrders(iter);
    271         if (!Arrays.equals(order, order2)) {
    272             errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
    273         }
    274         contraction = "peache";
    275         iter = tailored.getCollationElementIterator(contraction);
    276         iter.setOffset(3);
    277         order = CollationTest.getOrders(iter);
    278         iter.setOffset(4); // sets offset in the middle of ch
    279         order2 = CollationTest.getOrders(iter);
    280         if (!Arrays.equals(order, order2)) {
    281             errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
    282         }
    283         // setting offset in the middle of a surrogate pair
    284         String surrogate = "\ud800\udc00str";
    285         iter = tailored.getCollationElementIterator(surrogate);
    286         order = CollationTest.getOrders(iter);
    287         iter.setOffset(1); // sets offset in the middle of surrogate
    288         order2 = CollationTest.getOrders(iter);
    289         if (!Arrays.equals(order, order2)) {
    290             errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
    291         }
    292         surrogate = "simple\ud800\udc00str";
    293         iter = tailored.getCollationElementIterator(surrogate);
    294         iter.setOffset(6);
    295         order = CollationTest.getOrders(iter);
    296         iter.setOffset(7); // sets offset in the middle of surrogate
    297         order2 = CollationTest.getOrders(iter);
    298         if (!Arrays.equals(order, order2)) {
    299             errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
    300         }
    301         // TODO: try iterating halfway through a messy string.
    302     }
    303 
    304 
    305 
    306     void assertEqual(CollationElementIterator i1, CollationElementIterator i2) {
    307         int c1, c2, count = 0;
    308         do {
    309             c1 = i1.next();
    310             c2 = i2.next();
    311             if (c1 != c2) {
    312                 errln("    " + count + ": strength(0x" +
    313                     Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")");
    314                 break;
    315             }
    316             count += 1;
    317         } while (c1 != CollationElementIterator.NULLORDER);
    318         CollationTest.backAndForth(this, i1);
    319         CollationTest.backAndForth(this, i2);
    320     }
    321 
    322     /**
    323      * Test for CollationElementIterator.previous()
    324      *
    325      * @bug 4108758 - Make sure it works with contracting characters
    326      *
    327      */
    328     @Test
    329     public void TestPrevious(/* char* par */) {
    330         RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    331         CollationElementIterator iter = en_us.getCollationElementIterator(test1);
    332 
    333         // A basic test to see if it's working at all
    334         CollationTest.backAndForth(this, iter);
    335 
    336         // Test with a contracting character sequence
    337         String source;
    338         RuleBasedCollator c1 = null;
    339         try {
    340             c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
    341         } catch (Exception e) {
    342             errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
    343             return;
    344         }
    345 
    346         source = "abchdcba";
    347         iter = c1.getCollationElementIterator(source);
    348         CollationTest.backAndForth(this, iter);
    349 
    350         // Test with an expanding character sequence
    351         RuleBasedCollator c2 = null;
    352         try {
    353             c2 = new RuleBasedCollator("&a < b < c/abd < d");
    354         } catch (Exception e ) {
    355             errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
    356             return;
    357         }
    358 
    359         source = "abcd";
    360         iter = c2.getCollationElementIterator(source);
    361         CollationTest.backAndForth(this, iter);
    362 
    363         // Now try both
    364         RuleBasedCollator c3 = null;
    365         try {
    366             c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
    367         } catch (Exception e) {
    368             errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
    369             return;
    370         }
    371 
    372         source = "abcdbchdc";
    373         iter = c3.getCollationElementIterator(source);
    374         CollationTest.backAndForth(this, iter);
    375 
    376         source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
    377         Collator c4 = null;
    378         try {
    379             c4 = Collator.getInstance(new Locale("th", "TH", ""));
    380         } catch (Exception e) {
    381             errln("Couldn't create a collator");
    382             return;
    383         }
    384 
    385         iter = ((RuleBasedCollator)c4).getCollationElementIterator(source);
    386         CollationTest.backAndForth(this, iter);
    387 
    388         source= "\u0061\u30CF\u3099\u30FC";
    389         Collator c5 = null;
    390         try {
    391             c5 = Collator.getInstance(new Locale("ja", "JP", ""));
    392         } catch (Exception e) {
    393             errln("Couldn't create Japanese collator\n");
    394             return;
    395         }
    396         iter = ((RuleBasedCollator)c5).getCollationElementIterator(source);
    397 
    398         CollationTest.backAndForth(this, iter);
    399     }
    400 
    401 
    402 
    403     /**
    404      * Test for setText()
    405      */
    406     @Test
    407     public void TestSetText(/* char* par */) {
    408         RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    409         CollationElementIterator iter1 = en_us.getCollationElementIterator(test1);
    410         CollationElementIterator iter2 = en_us.getCollationElementIterator(test2);
    411 
    412         // Run through the second iterator just to exercise it
    413         int c = iter2.next();
    414         int i = 0;
    415 
    416         while ( ++i < 10 && c != CollationElementIterator.NULLORDER) {
    417             try {
    418                 c = iter2.next();
    419             } catch (Exception e) {
    420                 errln("iter2.next() returned an error.");
    421                 break;
    422             }
    423         }
    424 
    425         // Now set it to point to the same string as the first iterator
    426         try {
    427             iter2.setText(test1);
    428         } catch (Exception e) {
    429             errln("call to iter2->setText(test1) failed.");
    430             return;
    431         }
    432         assertEqual(iter1, iter2);
    433 
    434         iter1.reset();
    435         //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
    436         CharacterIterator chariter = new StringCharacterIterator(test1);
    437         try {
    438             iter2.setText(chariter);
    439         } catch (Exception e ) {
    440             errln("call to iter2->setText(chariter(test1)) failed.");
    441             return;
    442         }
    443         assertEqual(iter1, iter2);
    444 
    445         iter1.reset();
    446         //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
    447         UCharacterIterator uchariter = UCharacterIterator.getInstance(test1);
    448         try {
    449             iter2.setText(uchariter);
    450         } catch (Exception e ) {
    451             errln("call to iter2->setText(uchariter(test1)) failed.");
    452             return;
    453         }
    454         assertEqual(iter1, iter2);
    455     }
    456 
    457     /**
    458      * Test for CollationElementIterator previous and next for the whole set of
    459      * unicode characters.
    460      */
    461     @Test
    462     public void TestUnicodeChar() {
    463         RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
    464         CollationElementIterator iter;
    465         char codepoint;
    466         StringBuffer source = new StringBuffer();
    467         source.append("\u0e4d\u0e4e\u0e4f");
    468         // source.append("\u04e8\u04e9");
    469         iter = en_us.getCollationElementIterator(source.toString());
    470         // A basic test to see if it's working at all
    471         CollationTest.backAndForth(this, iter);
    472         for (codepoint = 1; codepoint < 0xFFFE;) {
    473             source.delete(0, source.length());
    474             while (codepoint % 0xFF != 0) {
    475                 if (UCharacter.isDefined(codepoint)) {
    476                     source.append(codepoint);
    477                 }
    478                 codepoint ++;
    479             }
    480 
    481             if (UCharacter.isDefined(codepoint)) {
    482                 source.append(codepoint);
    483             }
    484 
    485             if (codepoint != 0xFFFF) {
    486                 codepoint ++;
    487             }
    488             /*if (codepoint >= 0x04fc) {
    489                 System.out.println("codepoint " + Integer.toHexString(codepoint));
    490                 String str = source.substring(230, 232);
    491                 System.out.println(com.ibm.icu.impl.Utility.escape(str));
    492                 System.out.println("codepoint " + Integer.toHexString(codepoint)
    493                                    + "length " + str.length());
    494                 iter = en_us.getCollationElementIterator(str);
    495                 CollationTest.backAndForth(this, iter);
    496             }
    497             */
    498             iter = en_us.getCollationElementIterator(source.toString());
    499             // A basic test to see if it's working at all
    500             CollationTest.backAndForth(this, iter);
    501         }
    502     }
    503 
    504     /**
    505      * Test for CollationElementIterator previous and next for the whole set of
    506      * unicode characters with normalization on.
    507      */
    508     @Test
    509     public void TestNormalizedUnicodeChar()
    510     {
    511         // thai should have normalization on
    512         RuleBasedCollator th_th = null;
    513         try {
    514             th_th = (RuleBasedCollator)Collator.getInstance(
    515                                                        new Locale("th", "TH"));
    516         } catch (Exception e) {
    517             warnln("Error creating Thai collator");
    518             return;
    519         }
    520         StringBuffer source = new StringBuffer();
    521         source.append('\uFDFA');
    522         CollationElementIterator iter
    523                         = th_th.getCollationElementIterator(source.toString());
    524         CollationTest.backAndForth(this, iter);
    525         for (char codepoint = 0x1; codepoint < 0xfffe;) {
    526             source.delete(0, source.length());
    527             while (codepoint % 0xFF != 0) {
    528                 if (UCharacter.isDefined(codepoint)) {
    529                     source.append(codepoint);
    530                 }
    531                 codepoint ++;
    532             }
    533 
    534             if (UCharacter.isDefined(codepoint)) {
    535                 source.append(codepoint);
    536             }
    537 
    538             if (codepoint != 0xFFFF) {
    539                 codepoint ++;
    540             }
    541 
    542             /*if (((int)codepoint) >= 0xfe00) {
    543                 String str = source.substring(185, 190);
    544                 System.out.println(com.ibm.icu.impl.Utility.escape(str));
    545                 System.out.println("codepoint "
    546                                    + Integer.toHexString(codepoint)
    547                                    + "length " + str.length());
    548                 iter = th_th.getCollationElementIterator(str);
    549                 CollationTest.backAndForth(this, iter);
    550             */
    551             iter = th_th.getCollationElementIterator(source.toString());
    552             // A basic test to see if it's working at all
    553             CollationTest.backAndForth(this, iter);
    554         }
    555     }
    556 
    557     /**
    558     * Testing the discontiguous contractions
    559     */
    560     @Test
    561     public void TestDiscontiguous()
    562     {
    563         String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315";
    564         String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC",
    565                         // base character blocked
    566                         "XD\u0300", "XD\u0300\u0315",
    567                         // non blocking combining character
    568                         "X\u0319\u0300", "X\u0319\u0300\u0315",
    569                         // blocking combining character
    570                         "X\u0314\u0300", "X\u0314\u0300\u0315",
    571                         // contraction prefix
    572                         "ABDC", "AB\u0315C","X\u0300D\u0315",
    573                         "X\u0300\u0319\u0315", "X\u0300\u031A\u0315",
    574                         // ends not with a contraction character
    575                         "X\u0319\u0300D", "X\u0319\u0300\u0315D",
    576                         "X\u0300D\u0315D", "X\u0300\u0319\u0315D",
    577                         "X\u0300\u031A\u0315D"
    578         };
    579         String tgt[] = {// non blocking combining character
    580                         "A D B", "A D BC", "A \u0315 B", "A \u0315 BC",
    581                         // base character blocked
    582                         "X D \u0300", "X D \u0300\u0315",
    583                         // non blocking combining character
    584                         "X\u0300 \u0319", "X\u0300\u0315 \u0319",
    585                         // blocking combining character
    586                         "X \u0314 \u0300", "X \u0314 \u0300\u0315",
    587                         // contraction prefix
    588                         "AB DC", "AB \u0315 C","X\u0300 D \u0315",
    589                         "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315",
    590                         // ends not with a contraction character
    591                         "X\u0300 \u0319D", "X\u0300\u0315 \u0319D",
    592                         "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D",
    593                         "X\u0300 \u031A\u0315D"
    594         };
    595         int count = 0;
    596         try {
    597             RuleBasedCollator coll = new RuleBasedCollator(rulestr);
    598             CollationElementIterator iter
    599                                         = coll.getCollationElementIterator("");
    600             CollationElementIterator resultiter
    601                                         = coll.getCollationElementIterator("");
    602             while (count < src.length) {
    603                 iter.setText(src[count]);
    604                 int s = 0;
    605                 while (s < tgt[count].length()) {
    606                     int e = tgt[count].indexOf(' ', s);
    607                     if (e < 0) {
    608                         e = tgt[count].length();
    609                     }
    610                     String resultstr = tgt[count].substring(s, e);
    611                     resultiter.setText(resultstr);
    612                     int ce = resultiter.next();
    613                     while (ce != CollationElementIterator.NULLORDER) {
    614                         if (ce != iter.next()) {
    615                             errln("Discontiguos contraction test mismatch at"
    616                                   + count);
    617                             return;
    618                         }
    619                         ce = resultiter.next();
    620                     }
    621                     s = e + 1;
    622                 }
    623                 iter.reset();
    624                 CollationTest.backAndForth(this, iter);
    625                 count ++;
    626             }
    627         }
    628         catch (Exception e) {
    629             warnln("Error running discontiguous tests " + e.toString());
    630         }
    631     }
    632 
    633     /**
    634     * Test the incremental normalization
    635     */
    636     @Test
    637     public void TestNormalization()
    638     {
    639         String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";
    640         String testdata[] = {"\u1ED9", "o\u0323\u0302",
    641                             "\u0300\u0315", "\u0315\u0300",
    642                             "A\u0300\u0315B", "A\u0315\u0300B",
    643                             "A\u0316\u0315B", "A\u0315\u0316B",
    644                             "\u0316\u0300\u0315", "\u0315\u0300\u0316",
    645                             "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
    646                             "\u0316\u0315\u0300", "A\u0316\u0315\u0300B"};
    647         RuleBasedCollator coll = null;
    648         try {
    649             coll = new RuleBasedCollator(rules);
    650             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    651         } catch (Exception e) {
    652             warnln("ERROR: in creation of collator using rules " + rules);
    653             return;
    654         }
    655 
    656         CollationElementIterator iter = coll.getCollationElementIterator("testing");
    657         for (int count = 0; count < testdata.length; count ++) {
    658             iter.setText(testdata[count]);
    659             CollationTest.backAndForth(this, iter);
    660         }
    661     }
    662 
    663     /**
    664      * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
    665      * normalization on AND jamo tailoring, among other things.
    666      *
    667      * Note: This test is sensitive to changes of the root collator,
    668      * for example whether the ae-ligature maps to three CEs (as in the DUCET)
    669      * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
    670      * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
    671      * For example, the DUCET's artificial secondary CE in the ae-ligature
    672      * may map to two 32-bit iterator CEs (as it did until ICU 52).
    673      */
    674     @Test
    675     public void TestSearchCollatorElements()
    676     {
    677         String tsceText =
    678             " \uAC00" +              // simple LV Hangul
    679             " \uAC01" +              // simple LVT Hangul
    680             " \uAC0F" +              // LVTT, last jamo expands for search
    681             " \uAFFF" +              // LLVVVTT, every jamo expands for search
    682             " \u1100\u1161\u11A8" +  // 0xAC01 as conjoining jamo
    683             " \u3131\u314F\u3131" +  // 0xAC01 as compatibility jamo
    684             " \u1100\u1161\u11B6" +  // 0xAC0F as conjoining jamo; last expands for search
    685             " \u1101\u1170\u11B6" +  // 0xAFFF as conjoining jamo; all expand for search
    686             " \u00E6" +              // small letter ae, expands
    687             " \u1E4D" +              // small letter o with tilde and acute, decomposes
    688             " ";
    689 
    690         int[] rootStandardOffsets = {
    691             0,  1,2,
    692             2,  3,4,4,
    693             4,  5,6,6,
    694             6,  7,8,8,
    695             8,  9,10,11,
    696             12, 13,14,15,
    697             16, 17,18,19,
    698             20, 21,22,23,
    699             24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    700             26, 27,28,28,
    701             28,
    702             29
    703         };
    704 
    705         int[] rootSearchOffsets = {
    706             0,  1,2,
    707             2,  3,4,4,
    708             4,  5,6,6,6,
    709             6,  7,8,8,8,8,8,8,
    710             8,  9,10,11,
    711             12, 13,14,15,
    712             16, 17,18,19,20,
    713             20, 21,22,22,23,23,23,24,
    714             24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
    715             26, 27,28,28,
    716             28,
    717             29
    718         };
    719 
    720         class TSCEItem {
    721             private String localeString;
    722             private int[] offsets;
    723             TSCEItem(String locStr, int[] offs) {
    724                 localeString = locStr;
    725                 offsets = offs;
    726             }
    727             public String getLocaleString() { return localeString; }
    728             public int[] getOffsets() { return offsets; }
    729         }
    730         final TSCEItem[] tsceItems = {
    731             new TSCEItem( "root",                  rootStandardOffsets ),
    732             new TSCEItem( "root@collation=search", rootSearchOffsets   ),
    733         };
    734 
    735         for (TSCEItem tsceItem: tsceItems) {
    736             String localeString = tsceItem.getLocaleString();
    737             ULocale uloc = new ULocale(localeString);
    738             RuleBasedCollator col = null;
    739             try {
    740                 col = (RuleBasedCollator)Collator.getInstance(uloc);
    741             } catch (Exception e) {
    742                 errln("Error: in locale " + localeString + ", err in Collator.getInstance");
    743                 continue;
    744             }
    745             CollationElementIterator uce = col.getCollationElementIterator(tsceText);
    746             int[] offsets = tsceItem.getOffsets();
    747             int ioff, noff = offsets.length;
    748             int offset, element;
    749 
    750             ioff = 0;
    751             do {
    752                 offset = uce.getOffset();
    753                 element = uce.next();
    754                 logln(String.format("(%s) offset=%2d  ce=%08x\n", tsceItem.localeString, offset, element));
    755                 if (element == 0) {
    756                     errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
    757                 }
    758                 if ( ioff < noff ) {
    759                     if ( offset != offsets[ioff] ) {
    760                         errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset);
    761                         //ioff = noff;
    762                         //break;
    763                     }
    764                     ioff++;
    765                 } else {
    766                     errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected");
    767                 }
    768             } while (element != CollationElementIterator.NULLORDER);
    769             if ( ioff < noff ) {
    770                 errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
    771             }
    772 
    773             // backwards test
    774             uce.setOffset(tsceText.length());
    775             ioff = noff;
    776             do {
    777                 offset = uce.getOffset();
    778                 element = uce.previous();
    779                 if (element == 0) {
    780                     errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0");
    781                 }
    782                 if ( ioff > 0 ) {
    783                     ioff--;
    784                     if ( offset != offsets[ioff] ) {
    785                         errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset);
    786                         //ioff = 0;
    787                         //break;
    788                     }
    789                 } else {
    790                     errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected");
    791                 }
    792             } while (element != CollationElementIterator.NULLORDER);
    793             if ( ioff > 0 ) {
    794                 errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
    795             }
    796         }
    797     }
    798 }
    799