Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.util.Set;
      4 
      5 import org.unicode.cldr.test.DisplayAndInputProcessor;
      6 import org.unicode.cldr.util.CLDRConfig;
      7 import org.unicode.cldr.util.CLDRFile;
      8 import org.unicode.cldr.util.CLDRFile.ExemplarType;
      9 import org.unicode.cldr.util.Factory;
     10 
     11 import com.ibm.icu.dev.test.TestFmwk;
     12 import com.ibm.icu.lang.CharSequences;
     13 import com.ibm.icu.text.UnicodeSet;
     14 import com.ibm.icu.text.UnicodeSetIterator;
     15 
     16 public class TestDisplayAndInputProcessor extends TestFmwk {
     17 
     18     CLDRConfig info = CLDRConfig.getInstance();
     19 
     20     public static void main(String[] args) {
     21         new TestDisplayAndInputProcessor().run(args);
     22     }
     23 
     24     public void TestAll() {
     25         showCldrFile(info.getEnglish());
     26         showCldrFile(info.getCLDRFile("ar", true));
     27         showCldrFile(info.getCLDRFile("ja", true));
     28         showCldrFile(info.getCLDRFile("hi", true));
     29         showCldrFile(info.getCLDRFile("wae", true));
     30     }
     31 
     32     public void TestAExemplars() {
     33         UnicodeSet test = new UnicodeSet();
     34         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info.getEnglish(), true);
     35         Exception[] internalException = new Exception[1];
     36 
     37         for (String s : new UnicodeSet("[!-#%-\\]_a-~-- ------- ------- -\\\\]")) {
     38             test.clear().add(s);
     39             String value = test.toPattern(false);
     40             String path = CLDRFile.getExemplarPath(ExemplarType.numbers);
     41 
     42             String display = daip.processForDisplay(path, value);
     43             internalException[0] = null;
     44             String input = daip.processInput(path, display, internalException);
     45 
     46             try {
     47                 UnicodeSet roundTrip = new UnicodeSet(input);
     48                 if (!assertEquals(test.toString() + "=>" + display, test, roundTrip)) {
     49                     input = daip.processInput(path, display, internalException); // for debugging
     50                 }
     51             } catch (Exception e) {
     52                 errln(test.toString() + "=>" + display + ": Failed to parse " + input);
     53             }
     54         }
     55     }
     56 
     57     public void TestTasawaq() {
     58         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
     59             .getCLDRFile("twq", true));
     60         // time for data driven test
     61         final String input = "[Z \u017E ]";
     62         final String expect = "[z \u017E]"; // lower case
     63         String value = daip.processInput(
     64             "//ldml/characters/exemplarCharacters", input, null);
     65         if (!value.equals(expect)) {
     66             errln("Tasawaq incorrectly normalized with output: '" + value
     67                 + "', expected '" + expect + "'");
     68         }
     69     }
     70 
     71     public void TestMalayalam() {
     72         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
     73             .getCLDRFile("ml", false));
     74         String value = daip.processInput(
     75             "//ldml/localeDisplayNames/languages/language[@type=\"alg\"]",
     76             " ", null);
     77         if (!value
     78             .equals("\u0D05\u0D7D\u0D17\u0D4B\u0D7A\u0D15\u0D4D\u0D2F\u0D7B \u0D2D\u0D3E\u0D37")) {
     79             errln("Malayalam incorrectly normalized with output: " + value);
     80         }
     81     }
     82 
     83     public void TestRomanian() {
     84         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
     85             .getCLDRFile("ro", false));
     86         String value = daip
     87             .processInput(
     88                 "//ldml/localeDisplayNames/types/type[@type=\"hant\"][@key=\"numbers\"]",
     89                 "Numerale chineze\u015Fti tradi\u0163ionale", null);
     90         if (!value.equals("Numerale chineze\u0219ti tradi\u021Bionale")) {
     91             errln("Romanian incorrectly normalized: " + value);
     92         }
     93     }
     94 
     95     public void TestMyanmarZawgyi() {
     96         // Check that the Zawgyi detector and Zawgyi->Unicode converter perform
     97         // correctly.
     98         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
     99             .getCLDRFile("my", false));
    100         String z_mi = " ( )";
    101         String u_mi = " ( )";
    102 
    103         // Check that z_mi is detected as Zawgyi, and converted to u_mi.
    104         // Check that the converted version is detected as Unicode.
    105         String converted_z_mi = daip.processInput("", z_mi, null);
    106         if (!converted_z_mi.equals(u_mi)) {
    107             errln("Myanmar Zawgyi value incorrectly normalized: \n " + z_mi
    108                 + " to \n" + ">" + converted_z_mi + "<, expected\n" + ">"
    109                 + u_mi + "<");
    110         }
    111         String converted_u_mi = daip.processInput("", u_mi, null);
    112         if (!converted_u_mi.equals(u_mi)) {
    113             errln("Myanmar Unicode value incorrectly changed:\n" + u_mi
    114                 + " to\n" + converted_u_mi);
    115         }
    116         // TODO(ccorn): test other strings with the converter.
    117         String mixed_latn_zawgyi = "ABCDE " + z_mi + "XYZ";
    118         String mixed_latn_unicode = "ABCDE " + u_mi + "XYZ";
    119         String converted_mixed = daip.processInput("", mixed_latn_zawgyi, null);
    120         if (!converted_mixed.equals(mixed_latn_unicode)) {
    121             errln("Myanmar mixed value incorrectly normalized:"
    122                 + converted_mixed.length() + "\n" + mixed_latn_zawgyi
    123                 + " to " + mixed_latn_unicode.length() + "\n"
    124                 + converted_mixed + ", expected\n" + mixed_latn_unicode);
    125         }
    126 
    127         // Test 1039 conversion - simple cases.
    128         String z1039 = "\u1031\u1019\u102c\u1004\u1039\u1038\u101b\u102e\u0020\u0028\u1014"
    129             + "\u101A\u1030\u1038\u1007\u102E\u101C\u1014\u1039\u1000\u107D\u103C\u1014\u1039\u1038\u101B\u103D\u102D";
    130         String u103a = "\u1019\u1031\u102c\u1004\u103a\u1038\u101b\u102e\u0020\u0028\u1014"
    131             + "\u101A\u1030\u1038\u1007\u102E\u101C\u1014\u103A\u1000\u103B\u103D\u1014\u103A\u1038\u101B\u103E\u102D";
    132         String converted_1039 = daip.processInput("", z1039, null);
    133         if (!converted_1039.equals(u103a)) {
    134             errln("Myanmar #1039 (Unicode) was changed: \n" + z1039 + " to \n"
    135                 + converted_1039 + ", expected \n" + u103a);
    136         }
    137 
    138         String z0 = "\u1000\u1005\u102C\u1038\u101E\u1019\u102C\u1038"; // Test
    139         // #0
    140         String converted_0 = daip.processInput("", z0, null);
    141         if (!converted_0.equals(z0)) {
    142             errln("Myanmar #0 (Unicode) was changed: " + z0 + " to "
    143                 + converted_0);
    144         }
    145 
    146         String z5 = "\u1021\u101E\u1004\u1039\u1038\u1019\u103D"; // Test #5
    147         String u5 = "\u1021\u101E\u1004\u103A\u1038\u1019\u103E";
    148         String converted_5 = daip.processInput("", z5, null);
    149         if (!converted_5.equals(u5)) {
    150             errln("Myanmar #5 incorrectly normalized: " + z5 + " to "
    151                 + converted_5);
    152         }
    153 
    154         String z_zero = "\u1031\u1040\u1037";
    155         String u_zero = "\u101d\u1031\u1037";
    156         String converted_zero = daip.processInput("", z_zero, null);
    157         if (!converted_zero.equals(u_zero)) {
    158             errln("Myanmar with diacritics and zero incorrectly normalized:\n"
    159                 + z_zero + " to\n" + converted_zero + '\n' + u_zero);
    160         }
    161         // Check that multiple digits are not converted.
    162         z_zero = "\u1041\u1040\u1037";
    163         u_zero = "\u1041\u1040\u1037";
    164         converted_zero = daip.processInput("", z_zero, null);
    165         if (!converted_zero.equals(u_zero)) {
    166             errln("Myanmar with two zeros incorrectly normalized:\n" + z_zero
    167                 + " to\n" + converted_zero + '\n' + u_zero);
    168         }
    169 
    170         // More checks that Unicode is not converted.
    171         String is_unicode = "\u1019\u101B\u103E\u102D\u101E\u1031\u102C";
    172         String check_is_unicode = daip.processInput("", is_unicode, null);
    173         if (!check_is_unicode.equals(is_unicode)) {
    174             errln("Myanmar should not have converted:\n" + is_unicode + " to\n"
    175                 + check_is_unicode);
    176         }
    177         is_unicode = "\u1001\u103B\u103c";
    178         check_is_unicode = daip.processInput("", is_unicode, null);
    179         if (!check_is_unicode.equals(is_unicode)) {
    180             errln("Myanmar should not have converted:\n" + is_unicode + " to\n"
    181                 + check_is_unicode);
    182         }
    183         is_unicode = "\u1001\u103E\u103A";
    184         check_is_unicode = daip.processInput("", is_unicode, null);
    185         if (!check_is_unicode.equals(is_unicode)) {
    186             errln("Myanmar should not have converted:\n" + is_unicode + " to\n"
    187                 + check_is_unicode);
    188         }
    189     }
    190 
    191     public void TestCompactNumberFormats() {
    192         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
    193             info.getEnglish(), false);
    194         String xpath = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"] ";
    195         String value = daip.processInput(xpath, "0.00K.", null);
    196         assertEquals("Period not correctly quoted", "0K'.'", value);
    197         value = daip.processInput(xpath, "00.0K'.'", null);
    198         assertEquals("Quotes should not be double-quoted", "00K'.'", value);
    199         value = daip.processForDisplay(xpath, "0.0 K'.'");
    200         assertEquals("There should be no quotes left", "0.0 K.", value);
    201     }
    202 
    203     public void TestPatternCanonicalization() {
    204         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
    205             info.getEnglish(), false);
    206         String xpath = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
    207         String value = daip.processInput(xpath, "#,###,##0.###", null);
    208         assertEquals("Format not correctly canonicalized", "#,##0.###", value);
    209     }
    210 
    211     public void TestCurrencyFormatSpaces() {
    212         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
    213             info.getEnglish(), false);
    214         String xpath = "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
    215         String value = daip.processInput(xpath, " #,##0.00", null); // breaking
    216         // space
    217         assertEquals("Breaking space not replaced", "#,##0.00", value); // non-breaking
    218         // space
    219     }
    220 
    221     private Boolean usesModifierApostrophe(CLDRFile testFile) {
    222         char MODIFIER_LETTER_APOSTROPHE = '\u02BC';
    223         String exemplarSet = testFile
    224             .getWinningValue("//ldml/characters/exemplarCharacters");
    225         UnicodeSet mainExemplarSet = new UnicodeSet(exemplarSet);
    226         UnicodeSetIterator usi = new UnicodeSetIterator(mainExemplarSet);
    227         while (usi.next()) {
    228             if (usi.codepoint == MODIFIER_LETTER_APOSTROPHE
    229                 || (usi.codepoint == UnicodeSetIterator.IS_STRING && usi
    230                     .getString().indexOf(MODIFIER_LETTER_APOSTROPHE) >= 0)) {
    231                 return true;
    232             }
    233         }
    234         return false;
    235     }
    236 
    237     public void TestModifierApostropheLocales() {
    238         Factory f = info.getFullCldrFactory();
    239         Set<String> allLanguages = f.getAvailableLanguages();
    240         for (String thisLanguage : allLanguages) {
    241             CLDRFile thisLanguageFile = f.make(thisLanguage, true);
    242             try {
    243                 if (usesModifierApostrophe(thisLanguageFile)) {
    244                     if (!DisplayAndInputProcessor.LANGUAGES_USING_MODIFIER_APOSTROPHE
    245                         .contains(thisLanguage)) {
    246                         errln("Language : "
    247                             + thisLanguage
    248                             + " uses MODIFIER_LETTER_APOSROPHE, but is not on the list in DAIP.LANGUAGES_USING_MODIFIER_APOSTROPHE");
    249                     }
    250                 } else {
    251                     if (DisplayAndInputProcessor.LANGUAGES_USING_MODIFIER_APOSTROPHE
    252                         .contains(thisLanguage)) {
    253                         errln("Language : "
    254                             + thisLanguage
    255                             + "is on the list in DAIP.LANGUAGES_USING_MODIFIER_APOSTROPHE, but the main exemplars don't use this character.");
    256                     }
    257                 }
    258             } catch(Throwable t) {
    259                 t.printStackTrace();
    260                 errln("Error in " + thisLanguage + " - " + t.getMessage());
    261             }
    262         }
    263     }
    264 
    265     public void TestQuoteNormalization() {
    266         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
    267             info.getEnglish(), false);
    268         String xpath = "//ldml/units/unitLength[@type=\"narrow\"]/unitPattern[@count=\"one\"]";
    269         String value = daip.processInput(xpath, "{0}''", null); // breaking
    270         // space
    271         assertEquals("Quotes not normalized", "{0}", value); // non-breaking
    272         // space
    273     }
    274 
    275     private void showCldrFile(final CLDRFile cldrFile) {
    276         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFile,
    277             true);
    278         Exception[] internalException = new Exception[1];
    279         for (String path : cldrFile) {
    280             String value = cldrFile.getStringValue(path);
    281             if (value.equals("[\\- , . %  + 0-9]")) {
    282                 int debug = 0;
    283             }
    284             String display = daip.processForDisplay(path, value);
    285             internalException[0] = null;
    286             String input = daip.processInput(path, display, internalException);
    287             String diff = diff(value, input, path);
    288             if (diff != null) {
    289                 errln(cldrFile.getLocaleID() + "\tNo roundtrip in DAIP:"
    290                     + "\n\t  value<"
    291                     + value
    292                     + ">\n\tdisplay<"
    293                     + display
    294                     + ">\n\t  input<"
    295                     + input
    296                     + ">\n\t   diff<"
    297                     + diff
    298                     + (internalException[0] != null ? ">\n\texcep<"
    299                         + internalException[0] : "")
    300                     + ">\n\tpath<"
    301                     + path + ">");
    302                 daip.processInput(path, value, internalException); // for
    303                 // debugging
    304             } else if (!CharSequences.equals(value, display)
    305                 || !CharSequences.equals(value, input)
    306                 || internalException[0] != null) {
    307                 logln("DAIP Changes"
    308                     + "\n\tvalue<"
    309                     + value
    310                     + ">\n\tdisplay<"
    311                     + display
    312                     + ">\n\tinput<"
    313                     + input
    314                     + ">\n\tdiff<"
    315                     + diff
    316                     + (internalException[0] != null ? ">\n\texcep<"
    317                         + internalException[0] : "")
    318                     + ">\n\tpath<"
    319                     + path + ">");
    320             }
    321         }
    322     }
    323 
    324     private String diff(String value, String input, String path) {
    325         if (value.equals(input)) {
    326             return null;
    327         }
    328         if (path.contains("/exemplarCharacters") || path.contains("/parseLenient")) {
    329             try {
    330                 UnicodeSet s1 = new UnicodeSet(value);
    331                 UnicodeSet s2 = new UnicodeSet(input);
    332                 if (!s1.equals(s2)) {
    333                     UnicodeSet temp = new UnicodeSet(s1).removeAll(s2);
    334                     UnicodeSet temp2 = new UnicodeSet(s2).removeAll(s1);
    335                     temp.addAll(temp2);
    336                     return temp.toPattern(true);
    337                 }
    338                 return null;
    339             } catch (Exception e) {
    340                 // TODO: handle exception
    341             }
    342         }
    343         String value2 = value.replace('[', '(').replace(']', ')')
    344             .replace('', '').replace('', '');
    345         if (value2.equals(input)) {
    346             return null;
    347         }
    348         return "?";
    349     }
    350 }
    351