Home | History | Annotate | Download | only in unittest
      1 package org.unicode.cldr.unittest;
      2 
      3 import java.util.ArrayList;
      4 import java.util.HashSet;
      5 import java.util.List;
      6 import java.util.Set;
      7 
      8 import org.unicode.cldr.icu.NewLdml2IcuConverter;
      9 import org.unicode.cldr.util.CLDRConfig;
     10 import org.unicode.cldr.util.CLDRFile;
     11 import org.unicode.cldr.util.CLDRFile.DraftStatus;
     12 import org.unicode.cldr.util.CLDRPaths;
     13 import org.unicode.cldr.util.CldrUtility;
     14 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
     15 import org.unicode.cldr.util.Pair;
     16 import org.unicode.cldr.util.RegexFileParser;
     17 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
     18 import org.unicode.cldr.util.RegexFileParser.VariableProcessor;
     19 import org.unicode.cldr.util.RegexLookup;
     20 import org.unicode.cldr.util.RegexLookup.RegexFinder;
     21 import org.unicode.cldr.util.XMLFileReader;
     22 import org.unicode.cldr.util.XPathParts;
     23 
     24 import com.ibm.icu.dev.test.TestFmwk;
     25 import com.ibm.icu.text.Transform;
     26 
     27 /**
     28  * Tests the parts of the Ldml2IcuConverter that uses RegexLookups to convert
     29  * values to ICU. Data that is converted using other methods isn't tested here.
     30  *
     31  * @author jchye
     32  */
     33 public class TestLdml2ICU extends TestFmwk {
     34     private static final boolean DEBUG = false;
     35 
     36     static final CLDRConfig info = CLDRConfig.getInstance();
     37 
     38     private static final Transform<String, RegexFinder> XPATH_TRANSFORM = new Transform<String, RegexFinder>() {
     39         public RegexFinder transform(String source) {
     40             final String newSource = source.replace("[@", "\\[@");
     41             return new RegexFinder("^" + newSource + "$");
     42         }
     43     };
     44 
     45     public static void main(String[] args) {
     46         new TestLdml2ICU().run(args);
     47     }
     48 
     49     enum ExclusionType {
     50         UNCONVERTED, IGNORE, // May be converted or not, but we don't care
     51         WARNING;
     52         public static Transform<String, Pair<ExclusionType, String>> TRANSFORM = new Transform<String, Pair<ExclusionType, String>>() {
     53             public Pair<ExclusionType, String> transform(String source) {
     54                 String value = null;
     55                 if (source.contains(";")) {
     56                     String[] split = source.split("\\s*;\\s*");
     57                     source = split[0];
     58                     value = split[1];
     59                 }
     60                 ExclusionType type = ExclusionType
     61                     .valueOf(source.toUpperCase());
     62                 return Pair.of(type, value);
     63             }
     64         };
     65     }
     66 
     67     static final RegexLookup<Pair<ExclusionType, String>> exclusions = RegexLookup
     68         .of(ExclusionType.TRANSFORM)
     69         .setPatternTransform(RegexLookup.RegexFinderTransformPath)
     70         .loadFromFile(TestLdml2ICU.class, "../util/data/testLdml2Icu.txt");
     71 
     72     public void TestEnglish() {
     73         checkLocaleRegexes("en");
     74     }
     75 
     76     public void TestArabic() {
     77         checkLocaleRegexes("ar");
     78     }
     79 
     80     public void TestRoot() {
     81         checkLocaleRegexes("root");
     82     }
     83 
     84     public void TestRussian() {
     85         checkLocaleRegexes("ru");
     86     }
     87 
     88     public void TestJapanese() {
     89         checkLocaleRegexes("ja");
     90     }
     91 
     92     public void TestTamil() {
     93         checkLocaleRegexes("ta");
     94     }
     95 
     96     public void TestSupplemental() {
     97         checkSupplementalRegexes("supplementalData");
     98     }
     99 
    100     public void TestSupplmentalMetadata() {
    101         checkSupplementalRegexes("supplementalMetadata");
    102     }
    103 
    104 //    public void TestTelephoneCodeData() {
    105 //        checkSupplementalRegexes("telephoneCodeData");
    106 //    }
    107 //
    108     public void TestMetaZones() {
    109         checkSupplementalRegexes("metaZones");
    110     }
    111 
    112     public void TestLanguageInfo() {
    113         checkSupplementalRegexes("languageInfo");
    114     }
    115 
    116     public void TestLikelySubtags() {
    117         checkSupplementalRegexes("likelySubtags");
    118     }
    119 
    120     public void TestNumberingSystems() {
    121         checkSupplementalRegexes("numberingSystems");
    122     }
    123 
    124     public void TestWindowsZones() {
    125         checkSupplementalRegexes("windowsZones");
    126     }
    127 
    128     public void TestGenderList() {
    129         checkSupplementalRegexes("genderList");
    130     }
    131 
    132 //    public void TestPostalCodeData() {
    133 //        checkSupplementalRegexes("postalCodeData");
    134 //    }
    135 
    136     /**
    137      * Loads the regex files used to convert XPaths to ICU paths.
    138      */
    139     private static RegexLookup<Object> loadRegexes(String filename) {
    140         final RegexLookup<Object> lookup = RegexLookup.of()
    141             .setPatternTransform(XPATH_TRANSFORM);
    142         RegexFileParser parser = new RegexFileParser();
    143         parser.setLineParser(new RegexLineParser() {
    144             int patternNum = 0;
    145 
    146             @Override
    147             public void parse(String line) {
    148                 int pos = line.indexOf(";");
    149                 // We only care about the patterns.
    150                 if (pos == 0)
    151                     return;
    152                 String pattern = pos < 0 ? line : line.substring(0, pos).trim();
    153                 lookup.add(pattern, patternNum++);
    154             }
    155         });
    156         parser.setVariableProcessor(new VariableProcessor() {
    157             VariableReplacer variables = new VariableReplacer();
    158 
    159             @Override
    160             public void add(String variableName, String value) {
    161                 if (value.startsWith("//")) { // is xpath
    162                     value = "[^\"]++";
    163                 }
    164                 variables.add(variableName, value);
    165             }
    166 
    167             @Override
    168             public String replace(String str) {
    169                 return variables.replace(str);
    170             }
    171 
    172         });
    173         parser.parse(NewLdml2IcuConverter.class, filename);
    174         return lookup;
    175     }
    176 
    177     /**
    178      * Checks conversion of XML files in the supplemental directory.
    179      *
    180      * @param name
    181      *            the name of the XML file to be converted (minus the extension)
    182      */
    183     private void checkSupplementalRegexes(String name) {
    184         RegexLookup<Object> lookup = loadRegexes("ldml2icu_supplemental.txt");
    185         List<Pair<String, String>> cldrData = new ArrayList<Pair<String, String>>();
    186         XMLFileReader.loadPathValues(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY
    187             + name + ".xml", cldrData, true);
    188         XPathParts parts = new XPathParts();
    189         for (Pair<String, String> pair : cldrData) {
    190             String xpath = CLDRFile.getNondraftNonaltXPath(pair.getFirst());
    191             xpath = parts.set(xpath).toString();
    192             checkPath(lookup, xpath, pair.getSecond());
    193         }
    194     }
    195 
    196     Set<String> unconverted = new HashSet<String>();
    197 
    198     /**
    199      * Checks if an xpath was matched by a RegexLookup.
    200      */
    201     private <T> void checkPath(RegexLookup<T> lookup, String xpath, String value) {
    202         Pair<ExclusionType, String> exclusionInfo = exclusions.get(xpath);
    203         ExclusionType exclusionType = null;
    204         if (exclusionInfo != null) {
    205             exclusionType = exclusionInfo.getFirst();
    206         }
    207 
    208         if (lookup.get(xpath) == null) {
    209             String errorMessage = "CLDR xpath  <" + xpath + "> with value <"
    210                 + value + "> was not converted to ICU.";
    211             if (exclusionType == null) {
    212                 CldrUtility.logRegexLookup(this, lookup, xpath);
    213                 errln(errorMessage);
    214             } else if (exclusionType == ExclusionType.WARNING) {
    215                 logln(errorMessage);
    216             } else if (exclusionType == ExclusionType.UNCONVERTED) {
    217                 String template = xpath.replaceAll("\"[^\"]++\"", "*");
    218                 if (!unconverted.add(template)) {
    219                     logln("Not converted: " + xpath);
    220                 }
    221             }
    222         } else if (exclusionType == ExclusionType.UNCONVERTED) {
    223             CldrUtility.logRegexLookup(this, exclusions, xpath);
    224             errln("CLDR xpath <"
    225                 + xpath
    226                 + "> is in the exclusions list but was matched. "
    227                 + "To make the test pass, remove the relevant regex from org/unicode/cldr/util/data/testLdml2Icu.txt");
    228         }
    229     }
    230 
    231     /**
    232      * Checks conversion of XML locale files.
    233      *
    234      * @param name
    235      *            the name of the XML file to be converted (minus the extension)
    236      */
    237     private void checkLocaleRegexes(String locale) {
    238         CLDRFile plain = info.getCldrFactory().make(locale, false,
    239             DraftStatus.contributed);
    240         RegexLookup<Object> lookup = loadRegexes("ldml2icu_locale.txt");
    241         for (String xpath : plain) {
    242             if (DEBUG && xpath.contains("defaultNumberingSystem")) {
    243                 int debug = 0;
    244             }
    245             String fullPath = CLDRFile.getNondraftNonaltXPath(plain
    246                 .getFullXPath(xpath));
    247             checkPath(lookup, fullPath, plain.getStringValue(xpath));
    248         }
    249     }
    250 }
    251