Home | History | Annotate | Download | only in collator
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 2002-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  *******************************************************************************
      6  */
      7 
      8 /**
      9  * Port From:   ICU4C v2.1 : Collate/CollationDummyTest
     10  * Source File: $ICU4CRoot/source/test/intltest/allcoll.cpp
     11  *              $ICU4CRoot/source/test/cintltst/callcoll.c
     12  **/
     13 
     14 package com.ibm.icu.dev.test.collator;
     15 
     16 import java.util.Locale;
     17 
     18 import com.ibm.icu.dev.test.TestFmwk;
     19 import com.ibm.icu.text.CollationElementIterator;
     20 import com.ibm.icu.text.CollationKey;
     21 import com.ibm.icu.text.Collator;
     22 import com.ibm.icu.text.Normalizer;
     23 import com.ibm.icu.text.RuleBasedCollator;
     24 
     25 public class CollationDummyTest extends TestFmwk {
     26     public static void main(String[] args) throws Exception {
     27         new CollationDummyTest().run(args);
     28     }
     29 
     30     //testSourceCases[][] and testTargetCases[][], testCases[][] are ported from the file callcoll.c in icu4c
     31     private static char[][] testSourceCases = {
     32         {0x61, 0x62, 0x27, 0x63},
     33         {0x63, 0x6f, 0x2d, 0x6f, 0x70},
     34         {0x61, 0x62},
     35         {0x61, 0x6d, 0x70, 0x65, 0x72, 0x73, 0x61, 0x64},
     36         {0x61, 0x6c, 0x6c},
     37         {0x66, 0x6f, 0x75, 0x72},
     38         {0x66, 0x69, 0x76, 0x65},
     39         {0x31},
     40         {0x31},
     41         {0x31},                                            //  10
     42         {0x32},
     43         {0x32},
     44         {0x48, 0x65, 0x6c, 0x6c, 0x6f},
     45         {0x61, 0x3c, 0x62},
     46         {0x61, 0x3c, 0x62},
     47         {0x61, 0x63, 0x63},
     48         {0x61, 0x63, 0x48, 0x63},  //  simple test
     49         {0x70, 0x00EA, 0x63, 0x68, 0x65},
     50         {0x61, 0x62, 0x63},
     51         {0x61, 0x62, 0x63},                                  //  20
     52         {0x61, 0x62, 0x63},
     53         {0x61, 0x62, 0x63},
     54         {0x61, 0x62, 0x63},
     55         {0x61, 0x00E6, 0x63},
     56         {0x61, 0x63, 0x48, 0x63},  //  primary test
     57         {0x62, 0x6c, 0x61, 0x63, 0x6b},
     58         {0x66, 0x6f, 0x75, 0x72},
     59         {0x66, 0x69, 0x76, 0x65},
     60         {0x31},
     61         {0x61, 0x62, 0x63},                                        //  30
     62         {0x61, 0x62, 0x63},
     63         {0x61, 0x62, 0x63, 0x48},
     64         {0x61, 0x62, 0x63},
     65         {0x61, 0x63, 0x48, 0x63},                              //  34
     66         {0x61, 0x63, 0x65, 0x30},
     67         {0x31, 0x30},
     68         {0x70, 0x00EA,0x30}                                    // 37
     69     };
     70 
     71     private static char[][] testTargetCases = {
     72         {0x61, 0x62, 0x63, 0x27},
     73         {0x43, 0x4f, 0x4f, 0x50},
     74         {0x61, 0x62, 0x63},
     75         {0x26},
     76         {0x26},
     77         {0x34},
     78         {0x35},
     79         {0x6f, 0x6e, 0x65},
     80         {0x6e, 0x6e, 0x65},
     81         {0x70, 0x6e, 0x65},                                  //  10
     82         {0x74, 0x77, 0x6f},
     83         {0x75, 0x77, 0x6f},
     84         {0x68, 0x65, 0x6c, 0x6c, 0x4f},
     85         {0x61, 0x3c, 0x3d, 0x62},
     86         {0x61, 0x62, 0x63},
     87         {0x61, 0x43, 0x48, 0x63},
     88         {0x61, 0x43, 0x48, 0x63},  //  simple test
     89         {0x70, 0x00E9, 0x63, 0x68, 0x00E9},
     90         {0x61, 0x62, 0x63},
     91         {0x61, 0x42, 0x43},                                  //  20
     92         {0x61, 0x62, 0x63, 0x68},
     93         {0x61, 0x62, 0x64},
     94         {0x00E4, 0x62, 0x63},
     95         {0x61, 0x00C6, 0x63},
     96         {0x61, 0x43, 0x48, 0x63},  //  primary test
     97         {0x62, 0x6c, 0x61, 0x63, 0x6b, 0x2d, 0x62, 0x69, 0x72, 0x64},
     98         {0x34},
     99         {0x35},
    100         {0x6f, 0x6e, 0x65},
    101         {0x61, 0x62, 0x63},
    102         {0x61, 0x42, 0x63},                                  //  30
    103         {0x61, 0x62, 0x63, 0x68},
    104         {0x61, 0x62, 0x64},
    105         {0x61, 0x43, 0x48, 0x63},                                //  34
    106         {0x61, 0x63, 0x65, 0x30},
    107         {0x31, 0x30},
    108         {0x70, 0x00EB,0x30}                                    // 37
    109     };
    110 
    111     private static char[][] testCases = {
    112         {0x61},
    113         {0x41},
    114         {0x00e4},
    115         {0x00c4},
    116         {0x61, 0x65},
    117         {0x61, 0x45},
    118         {0x41, 0x65},
    119         {0x41, 0x45},
    120         {0x00e6},
    121         {0x00c6},
    122         {0x62},
    123         {0x63},
    124         {0x7a}
    125     };
    126 
    127     int[] results = {
    128         -1,
    129         -1, //Collator::GREATER,
    130         -1,
    131         -1,
    132         -1,
    133         -1,
    134         -1,
    135         1,
    136         1,
    137         -1,                                     //  10
    138         1,
    139         -1,
    140         1,
    141         1,
    142         -1,
    143         -1,
    144         -1,
    145     //  test primary > 17
    146         0,
    147         0,
    148         0,                                    //  20
    149         -1,
    150         -1,
    151         0,
    152         0,
    153         0,
    154         -1,
    155     //  test secondary > 26
    156         0,
    157         0,
    158         0,
    159         0,
    160         0,                                    //  30
    161         0,
    162         -1,
    163         0,                                     //  34
    164         0,
    165         0,
    166         -1
    167     };
    168 
    169     final int MAX_TOKEN_LEN = 16;
    170 
    171     public RuleBasedCollator myCollation;
    172 
    173     public CollationDummyTest() {
    174     }
    175     protected void init() throws Exception{
    176         String ruleset = "& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
    177         // String ruleset = "& Four, 4";
    178         myCollation = null;
    179         myCollation = new RuleBasedCollator(ruleset);
    180     }
    181 
    182     // perform test with strength tertiary
    183     public void TestTertiary() {
    184         int i = 0;
    185         myCollation.setStrength(Collator.TERTIARY);
    186         for (i = 0; i < 17 ; i++) {
    187             doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    188         }
    189     }
    190 
    191     // perform test with strength PRIMARY
    192     public void TestPrimary() {
    193        // problem in strcollinc for unfinshed contractions
    194        myCollation.setStrength(Collator.PRIMARY);
    195         for (int i = 17; i < 26 ; i++) {
    196             doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    197         }
    198     }
    199 
    200     //perform test with strength SECONDARY
    201     public void TestSecondary() {
    202         int i;
    203         myCollation.setStrength(Collator.SECONDARY);
    204         for (i = 26; i < 34; i++) {
    205             doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    206         }
    207     }
    208 
    209     // perform extra tests
    210     public void TestExtra() {
    211         int i, j;
    212         myCollation.setStrength(Collator.TERTIARY);
    213         for (i = 0; i < testCases.length - 1; i++) {
    214             for (j = i + 1; j < testCases.length; j += 1) {
    215                 doTest(myCollation, testCases[i], testCases[j], -1);
    216             }
    217         }
    218     }
    219 
    220     public void TestIdentical() {
    221         int i;
    222         myCollation.setStrength(Collator.IDENTICAL);
    223         for (i= 34; i<37; i++) {
    224             doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    225         }
    226     }
    227 
    228     public void TestJB581() {
    229         String source = "THISISATEST.";
    230         String target = "Thisisatest.";
    231         Collator coll = null;
    232         try {
    233             coll = Collator.getInstance(Locale.ENGLISH);
    234         } catch (Exception e) {
    235             errln("ERROR: Failed to create the collator for : en_US\n");
    236             return;
    237         }
    238 
    239         int result = coll.compare(source, target);
    240         // result is 1, secondary differences only for ignorable space characters
    241         if (result != 1) {
    242             errln("Comparing two strings with only secondary differences in C failed.\n");
    243             return;
    244         }
    245 
    246         // To compare them with just primary differences
    247         coll.setStrength(Collator.PRIMARY);
    248         result = coll.compare(source, target);
    249         // result is 0
    250         if (result != 0) {
    251             errln("Comparing two strings with no differences in C failed.\n");
    252             return;
    253         }
    254 
    255         // Now, do the same comparison with keys
    256         CollationKey sourceKeyOut, targetKeyOut;
    257         sourceKeyOut = coll.getCollationKey(source);
    258         targetKeyOut = coll.getCollationKey(target);
    259         result = sourceKeyOut.compareTo(targetKeyOut);
    260         if (result != 0) {
    261             errln("Comparing two strings with sort keys in C failed.\n");
    262             return;
    263         }
    264     }
    265 
    266     //TestSurrogates() is ported from cintltst/callcoll.c
    267 
    268     /**
    269     * Tests surrogate support.
    270     */
    271     public void TestSurrogates()
    272     {
    273         String rules = "&z<'\ud800\udc00'<'\ud800\udc0a\u0308'<A";
    274         String source[] = {"z",
    275                            "\uD800\uDC00",
    276                            "\ud800\udc0a\u0308",
    277                            "\ud800\udc02"
    278         };
    279 
    280         String target[] = {"\uD800\uDC00",
    281                            "\ud800\udc0a\u0308",
    282                            "A",
    283                            "\ud800\udc03"
    284         };
    285 
    286         // this test is to verify the supplementary sort key order in the english
    287         // collator
    288         Collator enCollation;
    289         try {
    290             enCollation = Collator.getInstance(Locale.ENGLISH);
    291         } catch (Exception e) {
    292             errln("ERROR: Failed to create the collator for ENGLISH");
    293             return;
    294         }
    295 
    296         myCollation.setStrength(Collator.TERTIARY);
    297         int count = 0;
    298         // logln("start of english collation supplementary characters test\n");
    299         while (count < 2) {
    300             doTest(enCollation, source[count], target[count], -1);
    301             count ++;
    302         }
    303         doTest(enCollation, source[count], target[count], 1);
    304 
    305         // logln("start of tailored collation supplementary characters test\n");
    306         count = 0;
    307         Collator newCollation;
    308         try {
    309             newCollation = new RuleBasedCollator(rules);
    310         } catch (Exception e) {
    311             errln("ERROR: Failed to create the collator for rules");
    312             return;
    313         }
    314 
    315         // tests getting collation elements for surrogates for tailored rules
    316         while (count < 4) {
    317             doTest(newCollation, source[count], target[count], -1);
    318             count ++;
    319         }
    320 
    321         // tests that \uD801\uDC01 still has the same value, not changed
    322         CollationKey enKey = enCollation.getCollationKey(source[3]);
    323         CollationKey newKey = newCollation.getCollationKey(source[3]);
    324         int keyResult = enKey.compareTo(newKey);
    325         if(keyResult != 0) {
    326             errln("Failed : non-tailored supplementary characters should have the same value\n");
    327         }
    328     }
    329 
    330     private static final boolean SUPPORT_VARIABLE_TOP_RELATION = false;
    331     //TestVariableTop() is ported from cintltst/callcoll.c
    332     /**
    333     * Tests the [variable top] tag in rule syntax. Since the default [alternate]
    334     * tag has the value shifted, any codepoints before [variable top] should give
    335     * a primary ce of 0.
    336     */
    337     public void TestVariableTop() {
    338         /*
    339          * Starting with ICU 53, setting the variable top via a pseudo relation string
    340          * is not supported any more.
    341          * It was replaced by the [maxVariable symbol] setting.
    342          * See ICU tickets #9958 and #8032.
    343          */
    344         if(!SUPPORT_VARIABLE_TOP_RELATION) { return; }
    345         String rule = "&z = [variable top]";
    346         Collator  myColl;
    347         Collator  enColl;
    348         char[] source = new char[1];
    349         char ch;
    350         int expected[] = {0};
    351 
    352         try {
    353             enColl = Collator.getInstance(Locale.ENGLISH);
    354         } catch (Exception e) {
    355             errln("ERROR: Failed to create the collator for ENGLISH");
    356             return;
    357         }
    358 
    359         try{
    360             myColl = new RuleBasedCollator(rule);
    361         } catch(Exception e){
    362             errln("Fail to create RuleBasedCollator with rules:" + rule);
    363             return;
    364         }
    365         enColl.setStrength(Collator.PRIMARY);
    366         myColl.setStrength(Collator.PRIMARY);
    367 
    368         ((RuleBasedCollator)enColl).setAlternateHandlingShifted(true);
    369         ((RuleBasedCollator)myColl).setAlternateHandlingShifted(true);
    370 
    371         if(((RuleBasedCollator)enColl).isAlternateHandlingShifted() != true) {
    372             errln("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    373         }
    374 
    375         // space is supposed to be a variable
    376         CollationKey key = enColl.getCollationKey(" ");
    377         byte[] result = key.toByteArray();
    378 
    379         for(int i = 0; i < result.length; i++) {
    380             if(result[i]!= expected[i]) {
    381                 errln("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    382                 break;
    383             }
    384         }
    385 
    386         ch = 'a';
    387         while (ch < 'z') {
    388             source[0] = ch;
    389             key = myColl.getCollationKey(new String(source));
    390             result = key.toByteArray();
    391 
    392             for(int i = 0; i < result.length; i++) {
    393                 if(result[i]!= expected[i]) {
    394                     errln("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    395                     break;
    396                 }
    397             }
    398             ch ++;
    399         }
    400     }
    401 
    402     public void TestJB1401() {
    403         Collator     myCollator = null;
    404         char[] NFD_UnsafeStartChars = {
    405             0x0f73,          // Tibetan Vowel Sign II
    406             0x0f75,          // Tibetan Vowel Sign UU
    407             0x0f81,          // Tibetan Vowel Sign Reversed II
    408             0
    409         };
    410         int i;
    411 
    412         try{
    413             myCollator = Collator.getInstance(Locale.ENGLISH);
    414         } catch(Exception e) {
    415             errln("ERROR: Failed to create the collator for ENGLISH");
    416             return;
    417         }
    418         myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    419         for (i=0; ; i++) {
    420             // Get the next funny character to be tested, and set up the
    421             // three test strings X, Y, Z, consisting of an A-grave + test char,
    422             // in original form, NFD, and then NFC form.
    423             char c = NFD_UnsafeStartChars[i];
    424             if (c==0) {break;}
    425 
    426             String x = "\u00C0" + c;       // \u00C0 is A Grave
    427             String y;
    428             String z;
    429 
    430             try{
    431                 y = Normalizer.decompose(x, false);
    432                 z = Normalizer.decompose(y, true);
    433             } catch (Exception e) {
    434                 errln("ERROR: Failed to normalize test of character" + c);
    435                 return;
    436             }
    437 
    438             // Collation test.  All three strings should be equal.
    439             // doTest does both strcoll and sort keys, with params in both orders.
    440             doTest(myCollator, x, y, 0);
    441             doTest(myCollator, x, z, 0);
    442             doTest(myCollator, y, z, 0);
    443 
    444             // Run collation element iterators over the three strings.  Results should be same for each.
    445 
    446             {
    447                 CollationElementIterator ceiX, ceiY, ceiZ;
    448                 int ceX, ceY, ceZ;
    449                 int j;
    450                 try {
    451                     ceiX = ((RuleBasedCollator)myCollator).getCollationElementIterator(x);
    452                     ceiY = ((RuleBasedCollator)myCollator).getCollationElementIterator(y);
    453                     ceiZ = ((RuleBasedCollator)myCollator).getCollationElementIterator(z);
    454                 } catch(Exception e) {
    455                     errln("ERROR: getCollationElementIterator failed");
    456                     return;
    457                 }
    458 
    459                 for (j=0;; j++) {
    460                     try{
    461                         ceX = ceiX.next();
    462                         ceY = ceiY.next();
    463                         ceZ = ceiZ.next();
    464                     } catch (Exception e) {
    465                         errln("ERROR: CollationElementIterator.next failed for iteration " + j);
    466                         break;
    467                     }
    468 
    469                     if (ceX != ceY || ceY != ceZ) {
    470                         errln("ERROR: ucol_next failed for iteration " + j);
    471                         break;
    472                     }
    473                     if (ceX == CollationElementIterator.NULLORDER) {
    474                         break;
    475                     }
    476                 }
    477             }
    478         }
    479     }
    480 
    481     // main test method called with different strengths,
    482     // tests comparison of custum collation with different strengths
    483 
    484     private void doTest(Collator collation, char[] source, char[] target, int result) {
    485         String s = new String(source);
    486         String t = new String(target);
    487         doTestVariant(collation, s, t, result);
    488         if(result == -1) {
    489             doTestVariant(collation, t, s, 1);
    490         } else if(result == 1) {
    491             doTestVariant(collation, t, s, -1);
    492         } else {
    493             doTestVariant(collation, t, s, 0);
    494         }
    495     }
    496 
    497     // main test method called with different strengths,
    498     // tests comparison of custum collation with different strengths
    499 
    500     private void doTest(Collator collation,String s, String t, int result) {
    501         doTestVariant(collation, s, t, result);
    502         if(result == -1) {
    503             doTestVariant(collation, t, s, 1);
    504         } else if(result == 1) {
    505             doTestVariant(collation, t, s, -1);
    506         } else {
    507             doTestVariant(collation, t, s, 0);
    508         }
    509     }
    510 
    511     private void doTestVariant(Collator collation, String source, String target, int result) {
    512         int compareResult = collation.compare(source, target);
    513         CollationKey srckey , tgtkey;
    514         srckey = collation.getCollationKey(source);
    515         tgtkey = collation.getCollationKey(target);
    516         int keyResult = srckey.compareTo(tgtkey);
    517         if (compareResult != result) {
    518             errln("String comparison failed in variant test\n");
    519         }
    520         if (keyResult != result) {
    521             errln("Collation key comparison failed in variant test\n");
    522         }
    523     }
    524 }