Home | History | Annotate | Download | only in normalizer
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2010, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 package android.icu.dev.test.normalizer;
     11 
     12 import java.util.Collection;
     13 import java.util.Iterator;
     14 import java.util.Set;
     15 import java.util.SortedSet;
     16 import java.util.TreeSet;
     17 
     18 import org.junit.Test;
     19 import org.junit.runner.RunWith;
     20 import org.junit.runners.JUnit4;
     21 
     22 import android.icu.dev.test.TestFmwk;
     23 import android.icu.impl.Utility;
     24 import android.icu.lang.UCharacter;
     25 import android.icu.text.CanonicalIterator;
     26 import android.icu.text.Normalizer;
     27 import android.icu.text.UTF16;
     28 import android.icu.testsharding.MainTestShard;
     29 
     30 
     31 // TODO: fit into test framework
     32 
     33 @MainTestShard
     34 @RunWith(JUnit4.class)
     35 public class TestCanonicalIterator extends TestFmwk {
     36 
     37     static final boolean SHOW_NAMES = false;
     38 
     39     static final String testArray[][] = {
     40         {"\u00C5d\u0307\u0327", "A\u030Ad\u0307\u0327, A\u030Ad\u0327\u0307, A\u030A\u1E0B\u0327, "
     41             + "A\u030A\u1E11\u0307, \u00C5d\u0307\u0327, \u00C5d\u0327\u0307, "
     42             + "\u00C5\u1E0B\u0327, \u00C5\u1E11\u0307, \u212Bd\u0307\u0327, "
     43             + "\u212Bd\u0327\u0307, \u212B\u1E0B\u0327, \u212B\u1E11\u0307"},
     44         {"\u010d\u017E", "c\u030Cz\u030C, c\u030C\u017E, \u010Dz\u030C, \u010D\u017E"},
     45         {"x\u0307\u0327", "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327"},
     46     };
     47 
     48     @Test
     49     public void TestExhaustive() {
     50         int counter = 0;
     51         CanonicalIterator it = new CanonicalIterator("");
     52         /*
     53         CanonicalIterator slowIt = new CanonicalIterator("");
     54         slowIt.SKIP_ZEROS = false;
     55         */
     56         //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name");
     57         //Set itSet = new TreeSet();
     58         //Set slowItSet = new TreeSet();
     59 
     60 
     61         for (int i = 0; i < 0x10FFFF; ++i) {
     62 
     63             // skip characters we know don't have decomps
     64             int type = UCharacter.getType(i);
     65             if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE
     66                 || type == Character.SURROGATE) continue;
     67 
     68             if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0));
     69 
     70             String s = UTF16.valueOf(i);
     71             characterTest(s, i, it);
     72 
     73             characterTest(s + "\u0345", i, it);
     74         }
     75     }
     76 
     77     public int TestSpeed() {
     78          // skip unless verbose
     79         if (!isVerbose()) return 0;
     80 
     81            String s = "\uAC01\u0345";
     82 
     83         CanonicalIterator it = new CanonicalIterator(s);
     84         double start, end;
     85         int x = 0; // just to keep code from optimizing away.
     86         int iterations = 10000;
     87         double slowDelta = 0;
     88 
     89         /*
     90         CanonicalIterator slowIt = new CanonicalIterator(s);
     91         slowIt.SKIP_ZEROS = false;
     92 
     93         start = System.currentTimeMillis();
     94         for (int i = 0; i < iterations; ++i) {
     95             slowIt.setSource(s);
     96             while (true) {
     97                 String item = slowIt.next();
     98                 if (item == null) break;
     99                 x += item.length();
    100             }
    101         }
    102         end = System.currentTimeMillis();
    103         double slowDelta = (end-start) / iterations;
    104         logln("Slow iteration: " + slowDelta);
    105         */
    106 
    107         start = System.currentTimeMillis();
    108         for (int i = 0; i < iterations; ++i) {
    109             it.setSource(s);
    110             while (true) {
    111                 String item = it.next();
    112                 if (item == null) break;
    113                 x += item.length();
    114             }
    115         }
    116         end = System.currentTimeMillis();
    117         double fastDelta = (end-start) / iterations;
    118         logln("Fast iteration: " + fastDelta + (slowDelta != 0 ? ", " + (fastDelta/slowDelta) : ""));
    119 
    120 
    121         return x;
    122     }
    123 
    124     @Test
    125     public void TestBasic() {
    126 //      This is not interesting anymore as the data is already built
    127 //      beforehand
    128 
    129 //        check build
    130 //        UnicodeSet ss = CanonicalIterator.getSafeStart();
    131 //        logln("Safe Start: " + ss.toPattern(true));
    132 //        ss = CanonicalIterator.getStarts('a');
    133 //        expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
    134 //            new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
    135 //            + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
    136 //                );
    137 
    138         // check permute
    139         // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
    140 
    141         Set results = new TreeSet();
    142         CanonicalIterator.permute("ABC", false, results);
    143         expectEqual("Simple permutation ", "", collectionToString(results), "ABC, ACB, BAC, BCA, CAB, CBA");
    144 
    145         // try samples
    146         SortedSet set = new TreeSet();
    147         for (int i = 0; i < testArray.length; ++i) {
    148             //logln("Results for: " + name.transliterate(testArray[i]));
    149             CanonicalIterator it = new CanonicalIterator(testArray[i][0]);
    150            // int counter = 0;
    151             set.clear();
    152             String first = null;
    153             while (true) {
    154                 String result = it.next();
    155                 if(first==null){
    156                     first = result;
    157                 }
    158                 if (result == null) break;
    159                 set.add(result); // sort them
    160                 //logln(++counter + ": " + hex.transliterate(result));
    161                 //logln(" = " + name.transliterate(result));
    162             }
    163             expectEqual(i + ": ", testArray[i][0], collectionToString(set), testArray[i][1]);
    164             it.reset();
    165             if(!it.next().equals(first)){
    166                 errln("CanonicalIterator.reset() failed");
    167             }
    168             if(!it.getSource().equals(Normalizer.normalize(testArray[i][0],Normalizer.NFD))){
    169                 errln("CanonicalIterator.getSource() does not return NFD of input source");
    170             }
    171         }
    172     }
    173 
    174     private void expectEqual(String message, String item, Object a, Object b) {
    175         if (!a.equals(b)) {
    176             errln("FAIL: " + message + getReadable(item));
    177             errln("\t" + getReadable(a));
    178             errln("\t" + getReadable(b));
    179         } else {
    180             logln("Checked: " + message + getReadable(item));
    181             logln("\t" + getReadable(a));
    182             logln("\t" + getReadable(b));
    183         }
    184     }
    185 
    186     //Transliterator name = null;
    187     //Transliterator hex = null;
    188 
    189     public String getReadable(Object obj) {
    190         if (obj == null) return "null";
    191         String s = obj.toString();
    192         if (s.length() == 0) return "";
    193         // set up for readable display
    194         //if (name == null) name = Transliterator.getInstance("[^\\ -\\u007F] name");
    195         //if (hex == null) hex = Transliterator.getInstance("[^\\ -\\u007F] hex");
    196         return "[" + (SHOW_NAMES ? hex(s) + "; " : "") + hex(s) + "]";
    197     }
    198 
    199     private void characterTest(String s, int ch, CanonicalIterator it)
    200     {
    201         int mixedCounter = 0;
    202         int lastMixedCounter = -1;
    203         boolean gotDecomp = false;
    204         boolean gotComp = false;
    205         boolean gotSource = false;
    206         String decomp = Normalizer.decompose(s, false);
    207         String comp = Normalizer.compose(s, false);
    208 
    209         // skip characters that don't have either decomp.
    210         // need quick test for this!
    211         if (s.equals(decomp) && s.equals(comp)) return;
    212 
    213         it.setSource(s);
    214 
    215         while (true) {
    216             String item = it.next();
    217             if (item == null) break;
    218             if (item.equals(s)) gotSource = true;
    219             if (item.equals(decomp)) gotDecomp = true;
    220             if (item.equals(comp)) gotComp = true;
    221             if ((mixedCounter & 0x7F) == 0 && (ch < 0xAD00 || ch > 0xAC00 + 11172)) {
    222                 if (lastMixedCounter != mixedCounter) {
    223                     logln("");
    224                     lastMixedCounter = mixedCounter;
    225                 }
    226                 logln("\t" + mixedCounter + "\t" + hex(item)
    227                 + (item.equals(s) ? "\t(*original*)" : "")
    228                 + (item.equals(decomp) ? "\t(*decomp*)" : "")
    229                 + (item.equals(comp) ? "\t(*comp*)" : "")
    230                 );
    231             }
    232 
    233         }
    234 
    235         // check that zeros optimization doesn't mess up.
    236         /*
    237         if (true) {
    238             it.reset();
    239             itSet.clear();
    240             while (true) {
    241                 String item = it.next();
    242                 if (item == null) break;
    243                 itSet.add(item);
    244             }
    245             slowIt.setSource(s);
    246             slowItSet.clear();
    247             while (true) {
    248                 String item = slowIt.next();
    249                 if (item == null) break;
    250                 slowItSet.add(item);
    251             }
    252             if (!itSet.equals(slowItSet)) {
    253                 errln("Zero optimization failure with " + getReadable(s));
    254             }
    255         }
    256         */
    257 
    258         mixedCounter++;
    259         if (!gotSource || !gotDecomp || !gotComp) {
    260             errln("FAIL CanonicalIterator: " + s + " decomp: " +decomp+" comp: "+comp);
    261             it.reset();
    262             for(String item=it.next();item!=null;item=it.next()){
    263                 err(item + "    ");
    264             }
    265             errln("");
    266         }
    267     }
    268 
    269     static String collectionToString(Collection col) {
    270         StringBuffer result = new StringBuffer();
    271         Iterator it = col.iterator();
    272         while (it.hasNext()) {
    273             if (result.length() != 0) result.append(", ");
    274             result.append(it.next().toString());
    275         }
    276         return result.toString();
    277     }
    278 }