Home | History | Annotate | Download | only in normalizer
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 1996-2010, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.dev.test.normalizer;
     10 
     11 import org.junit.Test;
     12 import org.junit.runner.RunWith;
     13 import org.junit.runners.JUnit4;
     14 
     15 import com.ibm.icu.dev.test.TestFmwk;
     16 import com.ibm.icu.impl.Utility;
     17 import com.ibm.icu.lang.UCharacter;
     18 import com.ibm.icu.lang.UProperty;
     19 import com.ibm.icu.text.ComposedCharIter;
     20 import com.ibm.icu.text.Normalizer;
     21 import com.ibm.icu.text.StringCharacterIterator;
     22 
     23 @RunWith(JUnit4.class)
     24 public class TestDeprecatedNormalizerAPI extends TestFmwk
     25 {
     26     public TestDeprecatedNormalizerAPI() {
     27     }
     28 
     29     @Test
     30     public void TestNormalizerAPI(){
     31          // instantiate a Normalizer from a CharacterIterator
     32         String s=Utility.unescape("a\u0308\uac00\\U0002f800");
     33         // make s a bit longer and more interesting
     34         java.text.CharacterIterator iter = new StringCharacterIterator(s+s);
     35         //test deprecated constructors
     36         Normalizer norm = new Normalizer(iter, Normalizer.NFC,0);
     37         if(norm.next()!=0xe4) {
     38             errln("error in Normalizer(CharacterIterator).next()");
     39         }
     40         Normalizer norm2 = new Normalizer(s,Normalizer.NFC,0);
     41         if(norm2.next()!=0xe4) {
     42             errln("error in Normalizer(CharacterIterator).next()");
     43         }
     44         // test clone(), ==, and hashCode()
     45         Normalizer clone=(Normalizer)norm.clone();
     46         if(clone.getBeginIndex()!= norm.getBeginIndex()){
     47            errln("error in Normalizer.getBeginIndex()");
     48         }
     49 
     50         if(clone.getEndIndex()!= norm.getEndIndex()){
     51            errln("error in Normalizer.getEndIndex()");
     52         }
     53         // test setOption() and getOption()
     54         clone.setOption(0xaa0000, true);
     55         clone.setOption(0x20000, false);
     56         if(clone.getOption(0x880000) ==0|| clone.getOption(0x20000)==1) {
     57            errln("error in Normalizer::setOption() or Normalizer::getOption()");
     58         }
     59         //test deprecated normalize method
     60         Normalizer.normalize(s,Normalizer.NFC,0);
     61         //test deprecated compose method
     62         Normalizer.compose(s,false,0);
     63         //test deprecated decompose method
     64         Normalizer.decompose(s,false,0);
     65 
     66     }
     67 
     68     /**
     69      * Run through all of the characters returned by a composed-char iterator
     70      * and make sure that:
     71      * <ul>
     72      * <li>a) They do indeed have decompositions.
     73      * <li>b) The decomposition according to the iterator is the same as
     74      *          returned by Normalizer.decompose().
     75      * <li>c) All characters <em>not</em> returned by the iterator do not
     76      *          have decompositions.
     77      * </ul>
     78      */
     79     @Test
     80     public void TestComposedCharIter() {
     81         doTestComposedChars(false);
     82     }
     83 
     84     private void doTestComposedChars(boolean compat) {
     85         int options = Normalizer.IGNORE_HANGUL;
     86         ComposedCharIter iter = new ComposedCharIter(compat, options);
     87 
     88         char lastChar = 0;
     89 
     90         while (iter.hasNext()) {
     91             char ch = iter.next();
     92 
     93             // Test all characters between the last one and this one to make
     94             // sure that they don't have decompositions
     95             assertNoDecomp(lastChar, ch, compat, options);
     96             lastChar = ch;
     97 
     98             // Now make sure that the decompositions for this character
     99             // make sense
    100             String chString   = new StringBuffer().append(ch).toString();
    101             String iterDecomp = iter.decomposition();
    102             String normDecomp = Normalizer.decompose(chString, compat);
    103 
    104             if (iterDecomp.equals(chString)) {
    105                 errln("ERROR: " + hex(ch) + " has identical decomp");
    106             }
    107             else if (!iterDecomp.equals(normDecomp)) {
    108                 errln("ERROR: Normalizer decomp for " + hex(ch) + " (" + hex(normDecomp) + ")"
    109                     + " != iter decomp (" + hex(iterDecomp) + ")" );
    110             }
    111         }
    112         assertNoDecomp(lastChar, '\uFFFF', compat, options);
    113     }
    114 
    115     void assertNoDecomp(char start, char limit, boolean compat, int options)
    116     {
    117         for (char x = ++start; x < limit; x++) {
    118             String xString   = new StringBuffer().append(x).toString();
    119             String decomp = Normalizer.decompose(xString, compat);
    120             if (!decomp.equals(xString)) {
    121                 errln("ERROR: " + hex(x) + " has decomposition (" + hex(decomp) + ")"
    122                     + " but was not returned by iterator");
    123             }
    124         }
    125     }
    126 
    127 
    128     @Test
    129     public void TestRoundTrip() {
    130         int options = Normalizer.IGNORE_HANGUL;
    131         boolean compat = false;
    132 
    133         ComposedCharIter iter = new ComposedCharIter(false, options);
    134         while (iter.hasNext()) {
    135             final char ch = iter.next();
    136 
    137             String chStr = String.valueOf(ch);
    138             String decomp = iter.decomposition();
    139             String comp = Normalizer.compose(decomp, compat);
    140 
    141             if (UCharacter.hasBinaryProperty(ch, UProperty.FULL_COMPOSITION_EXCLUSION)) {
    142                 logln("Skipped excluded char " + hex(ch) + " (" + UCharacter.getName(ch) + ")" );
    143                 continue;
    144             }
    145 
    146             // Avoid disparaged characters
    147             if (decomp.length() == 4) continue;
    148 
    149             if (!comp.equals(chStr)) {
    150                 errln("ERROR: Round trip invalid: " + hex(chStr) + " --> " + hex(decomp)
    151                     + " --> " + hex(comp));
    152 
    153                 errln("  char decomp is '" + decomp + "'");
    154             }
    155         }
    156     }
    157 }
    158