Home | History | Annotate | Download | only in normalizer
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2015, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 
     11 package android.icu.dev.test.normalizer;
     12 
     13 import java.io.BufferedReader;
     14 import java.io.IOException;
     15 
     16 import org.junit.Ignore;
     17 import org.junit.Test;
     18 import org.junit.runner.RunWith;
     19 import org.junit.runners.JUnit4;
     20 
     21 import android.icu.dev.test.TestFmwk;
     22 import android.icu.dev.test.TestUtil;
     23 import android.icu.text.UTF16;
     24 import android.icu.text.UnicodeSet;
     25 import android.icu.testsharding.MainTestShard;
     26 
     27 
     28 @MainTestShard
     29 @RunWith(JUnit4.class)
     30 public class UnicodeNormalizerConformanceTest extends TestFmwk {
     31 
     32     UnicodeNormalizer normalizer_C, normalizer_D, normalizer_KC, normalizer_KD;
     33 
     34     public UnicodeNormalizerConformanceTest() {
     35         // Doesn't matter what the string and mode are; we'll change
     36         // them later as needed.
     37         normalizer_C = new UnicodeNormalizer(UnicodeNormalizer.C, true);
     38         normalizer_D = new UnicodeNormalizer(UnicodeNormalizer.D, false);
     39         normalizer_KC = new UnicodeNormalizer(UnicodeNormalizer.KC, false);
     40         normalizer_KD = new UnicodeNormalizer(UnicodeNormalizer.KD, false);
     41 
     42     }
     43     // more interesting conformance test cases, not in the unicode.org NormalizationTest.txt
     44     static  String[] moreCases ={
     45         // Markus 2001aug30
     46         "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0",
     47 
     48         // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129
     49         "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1"
     50     };
     51 
     52     /**
     53      * Test the conformance of NewNormalizer to
     54      * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.
     55      * This file must be located at the path specified as TEST_SUITE_FILE.
     56      */
     57     @Test
     58     public void TestConformance() throws Exception{
     59         String line = null;
     60         String[] fields = new String[5];
     61         StringBuffer buf = new StringBuffer();
     62         int passCount = 0;
     63         int failCount = 0;
     64         UnicodeSet other = new UnicodeSet(0, 0x10ffff);
     65         int c=0;
     66         BufferedReader input = null;
     67         try {
     68             input = TestUtil.getDataReader("unicode/NormalizationTest.txt");
     69             for (int count = 0;;++count) {
     70                 line = input.readLine();
     71                 if (line == null) {
     72                     //read the extra test cases
     73                     if(count > moreCases.length) {
     74                         count = 0;
     75                     } else if(count == moreCases.length) {
     76                         // all done
     77                         break;
     78                     }
     79                     line = moreCases[count++];
     80                 }
     81                 if (line.length() == 0) continue;
     82 
     83                 // Expect 5 columns of this format:
     84                 // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments>
     85 
     86                 // Skip comments
     87                 if (line.charAt(0) == '#'  || line.charAt(0)=='@') continue;
     88 
     89                 // Parse out the fields
     90                 hexsplit(line, ';', fields, buf);
     91 
     92                 // Remove a single code point from the "other" UnicodeSet
     93                 if(fields[0].length()==UTF16.moveCodePointOffset(fields[0],0, 1)) {
     94                     c=UTF16.charAt(fields[0],0);
     95                     if(0xac20<=c && c<=0xd73f) {
     96                         // not an exhaustive test run: skip most Hangul syllables
     97                         if(c==0xac20) {
     98                             other.remove(0xac20, 0xd73f);
     99                         }
    100                         continue;
    101                     }
    102                     other.remove(c);
    103                 }
    104                 if (checkConformance(fields, line)) {
    105                     ++passCount;
    106                 } else {
    107                     ++failCount;
    108                 }
    109                 if ((count % 1000) == 999) {
    110                     logln("Line " + (count+1));
    111                 }
    112             }
    113         } catch (IOException ex) {
    114             ex.printStackTrace();
    115             throw new IllegalArgumentException("Couldn't read file "
    116               + ex.getClass().getName() + " " + ex.getMessage()
    117               + " line = " + line
    118               );
    119         } finally {
    120             if (input != null) {
    121                 try {
    122                     input.close();
    123                 } catch (Exception ignored) {
    124                 }
    125             }
    126         }
    127 
    128         if (failCount != 0) {
    129             errln("Total: " + failCount + " lines failed, " +
    130                   passCount + " lines passed");
    131         } else {
    132             logln("Total: " + passCount + " lines passed");
    133         }
    134     }
    135 
    136     /**
    137      * Verify the conformance of the given line of the Unicode
    138      * normalization (UTR 15) test suite file.  For each line,
    139      * there are five columns, corresponding to field[0]..field[4].
    140      *
    141      * The following invariants must be true for all conformant implementations
    142      *  c2 == NFC(c1) == NFC(c2) == NFC(c3)
    143      *  c3 == NFD(c1) == NFD(c2) == NFD(c3)
    144      *  c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
    145      *  c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
    146      *
    147      * @param field the 5 columns
    148      * @param line the source line from the test suite file
    149      * @return true if the test passes
    150      */
    151     private boolean checkConformance(String[] field, String line) throws Exception{
    152         boolean pass = true;
    153        // StringBuffer buf = new StringBuffer(); // scratch
    154         String out;
    155         int i=0;
    156         for (i=0; i<5; ++i) {
    157             if (i<3) {
    158                 out = normalizer_C.normalize(field[i]);
    159                 pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c" + (i+1));
    160 
    161                 out = normalizer_D.normalize(field[i]);
    162                 pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c" + (i+1));
    163 
    164             }
    165             out = normalizer_KC.normalize(field[i]);
    166             pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c" + (i+1));
    167 
    168             out = normalizer_KD.normalize(field[i]);
    169             pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c" + (i+1));
    170 
    171         }
    172 
    173         if (!pass) {
    174             errln("FAIL: " + line);
    175         }
    176 
    177         return pass;
    178     }
    179 
    180     /**
    181      * @param op name of normalization form, e.g., "KC"
    182      * @param s string being normalized
    183      * @param got value received
    184      * @param exp expected value
    185      * @param msg description of this test
    186      * @returns true if got == exp
    187      */
    188     private boolean assertEqual(String op, String s, String got,
    189                                 String exp, String msg) {
    190         if (exp.equals(got)) {
    191             return true;
    192         }
    193         errln(("      " + msg + ") " + op + "(" + s + ")=" + hex(got) +
    194                              ", exp. " + hex(exp)));
    195         return false;
    196     }
    197 
    198     /**
    199      * Split a string into pieces based on the given delimiter
    200      * character.  Then, parse the resultant fields from hex into
    201      * characters.  That is, "0040 0400;0C00;0899" -> new String[] {
    202      * "\u0040\u0400", "\u0C00", "\u0899" }.  The output is assumed to
    203      * be of the proper length already, and exactly output.length
    204      * fields are parsed.  If there are too few an exception is
    205      * thrown.  If there are too many the extras are ignored.
    206      *
    207      * @param buf scratch buffer
    208      */
    209     private static void hexsplit(String s, char delimiter,
    210                                  String[] output, StringBuffer buf) {
    211         int i;
    212         int pos = 0;
    213         for (i=0; i<output.length; ++i) {
    214             int delim = s.indexOf(delimiter, pos);
    215             if (delim < 0) {
    216                 throw new IllegalArgumentException("Missing field in " + s);
    217             }
    218             // Our field is from pos..delim-1.
    219             buf.setLength(0);
    220 
    221             String toHex = s.substring(pos,delim);
    222             pos = delim;
    223             int index = 0;
    224             int len = toHex.length();
    225             while(index< len){
    226                 if(toHex.charAt(index)==' '){
    227                     index++;
    228                 }else{
    229                     int spacePos = toHex.indexOf(' ', index);
    230                     if(spacePos==-1){
    231                         appendInt(buf,toHex.substring(index,len),s);
    232                         spacePos = len;
    233                     }else{
    234                         appendInt(buf,toHex.substring(index, spacePos),s);
    235                     }
    236                     index = spacePos+1;
    237                 }
    238             }
    239 
    240             if (buf.length() < 1) {
    241                 throw new IllegalArgumentException("Empty field " + i + " in " + s);
    242             }
    243             output[i] = buf.toString();
    244             ++pos; // Skip over delim
    245         }
    246     }
    247     public static void appendInt(StringBuffer buf, String strToHex, String s){
    248         int hex = Integer.parseInt(strToHex,16);
    249         if (hex < 0 ) {
    250             throw new IllegalArgumentException("Out of range hex " +
    251                                                 hex + " in " + s);
    252         }else if (hex > 0xFFFF){
    253             buf.append((char)((hex>>10)+0xd7c0));
    254             buf.append((char)((hex&0x3ff)|0xdc00));
    255         }else{
    256             buf.append((char) hex);
    257         }
    258     }
    259 
    260     // Specific tests for debugging.  These are generally failures
    261     // taken from the conformance file, but culled out to make
    262     // debugging easier.  These can be eliminated without affecting
    263     // coverage.
    264     @Ignore
    265     @Test
    266     public void _hideTestCase6() throws Exception{
    267         _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;");
    268     }
    269 
    270     private void _testOneLine(String line) throws Exception{
    271         String[] fields = new String[5];
    272         StringBuffer buf = new StringBuffer();
    273         // Parse out the fields
    274         hexsplit(line, ';', fields, buf);
    275         checkConformance(fields, line);
    276     }
    277 
    278 
    279 }
    280