1 package org.unicode.cldr.unittest; 2 3 import java.util.ArrayList; 4 import java.util.HashSet; 5 import java.util.List; 6 import java.util.Set; 7 8 import org.unicode.cldr.icu.NewLdml2IcuConverter; 9 import org.unicode.cldr.util.CLDRConfig; 10 import org.unicode.cldr.util.CLDRFile; 11 import org.unicode.cldr.util.CLDRFile.DraftStatus; 12 import org.unicode.cldr.util.CLDRPaths; 13 import org.unicode.cldr.util.CldrUtility; 14 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 15 import org.unicode.cldr.util.Pair; 16 import org.unicode.cldr.util.RegexFileParser; 17 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 18 import org.unicode.cldr.util.RegexFileParser.VariableProcessor; 19 import org.unicode.cldr.util.RegexLookup; 20 import org.unicode.cldr.util.RegexLookup.RegexFinder; 21 import org.unicode.cldr.util.XMLFileReader; 22 import org.unicode.cldr.util.XPathParts; 23 24 import com.ibm.icu.dev.test.TestFmwk; 25 import com.ibm.icu.text.Transform; 26 27 /** 28 * Tests the parts of the Ldml2IcuConverter that uses RegexLookups to convert 29 * values to ICU. Data that is converted using other methods isn't tested here. 30 * 31 * @author jchye 32 */ 33 public class TestLdml2ICU extends TestFmwk { 34 private static final boolean DEBUG = false; 35 36 static final CLDRConfig info = CLDRConfig.getInstance(); 37 38 private static final Transform<String, RegexFinder> XPATH_TRANSFORM = new Transform<String, RegexFinder>() { 39 public RegexFinder transform(String source) { 40 final String newSource = source.replace("[@", "\\[@"); 41 return new RegexFinder("^" + newSource + "$"); 42 } 43 }; 44 45 public static void main(String[] args) { 46 new TestLdml2ICU().run(args); 47 } 48 49 enum ExclusionType { 50 UNCONVERTED, IGNORE, // May be converted or not, but we don't care 51 WARNING; 52 public static Transform<String, Pair<ExclusionType, String>> TRANSFORM = new Transform<String, Pair<ExclusionType, String>>() { 53 public Pair<ExclusionType, String> transform(String source) { 54 String value = null; 55 if (source.contains(";")) { 56 String[] split = source.split("\\s*;\\s*"); 57 source = split[0]; 58 value = split[1]; 59 } 60 ExclusionType type = ExclusionType 61 .valueOf(source.toUpperCase()); 62 return Pair.of(type, value); 63 } 64 }; 65 } 66 67 static final RegexLookup<Pair<ExclusionType, String>> exclusions = RegexLookup 68 .of(ExclusionType.TRANSFORM) 69 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 70 .loadFromFile(TestLdml2ICU.class, "../util/data/testLdml2Icu.txt"); 71 72 public void TestEnglish() { 73 checkLocaleRegexes("en"); 74 } 75 76 public void TestArabic() { 77 checkLocaleRegexes("ar"); 78 } 79 80 public void TestRoot() { 81 checkLocaleRegexes("root"); 82 } 83 84 public void TestRussian() { 85 checkLocaleRegexes("ru"); 86 } 87 88 public void TestJapanese() { 89 checkLocaleRegexes("ja"); 90 } 91 92 public void TestTamil() { 93 checkLocaleRegexes("ta"); 94 } 95 96 public void TestSupplemental() { 97 checkSupplementalRegexes("supplementalData"); 98 } 99 100 public void TestSupplmentalMetadata() { 101 checkSupplementalRegexes("supplementalMetadata"); 102 } 103 104 // public void TestTelephoneCodeData() { 105 // checkSupplementalRegexes("telephoneCodeData"); 106 // } 107 // 108 public void TestMetaZones() { 109 checkSupplementalRegexes("metaZones"); 110 } 111 112 public void TestLanguageInfo() { 113 checkSupplementalRegexes("languageInfo"); 114 } 115 116 public void TestLikelySubtags() { 117 checkSupplementalRegexes("likelySubtags"); 118 } 119 120 public void TestNumberingSystems() { 121 checkSupplementalRegexes("numberingSystems"); 122 } 123 124 public void TestWindowsZones() { 125 checkSupplementalRegexes("windowsZones"); 126 } 127 128 public void TestGenderList() { 129 checkSupplementalRegexes("genderList"); 130 } 131 132 // public void TestPostalCodeData() { 133 // checkSupplementalRegexes("postalCodeData"); 134 // } 135 136 /** 137 * Loads the regex files used to convert XPaths to ICU paths. 138 */ 139 private static RegexLookup<Object> loadRegexes(String filename) { 140 final RegexLookup<Object> lookup = RegexLookup.of() 141 .setPatternTransform(XPATH_TRANSFORM); 142 RegexFileParser parser = new RegexFileParser(); 143 parser.setLineParser(new RegexLineParser() { 144 int patternNum = 0; 145 146 @Override 147 public void parse(String line) { 148 int pos = line.indexOf(";"); 149 // We only care about the patterns. 150 if (pos == 0) 151 return; 152 String pattern = pos < 0 ? line : line.substring(0, pos).trim(); 153 lookup.add(pattern, patternNum++); 154 } 155 }); 156 parser.setVariableProcessor(new VariableProcessor() { 157 VariableReplacer variables = new VariableReplacer(); 158 159 @Override 160 public void add(String variableName, String value) { 161 if (value.startsWith("//")) { // is xpath 162 value = "[^\"]++"; 163 } 164 variables.add(variableName, value); 165 } 166 167 @Override 168 public String replace(String str) { 169 return variables.replace(str); 170 } 171 172 }); 173 parser.parse(NewLdml2IcuConverter.class, filename); 174 return lookup; 175 } 176 177 /** 178 * Checks conversion of XML files in the supplemental directory. 179 * 180 * @param name 181 * the name of the XML file to be converted (minus the extension) 182 */ 183 private void checkSupplementalRegexes(String name) { 184 RegexLookup<Object> lookup = loadRegexes("ldml2icu_supplemental.txt"); 185 List<Pair<String, String>> cldrData = new ArrayList<Pair<String, String>>(); 186 XMLFileReader.loadPathValues(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY 187 + name + ".xml", cldrData, true); 188 XPathParts parts = new XPathParts(); 189 for (Pair<String, String> pair : cldrData) { 190 String xpath = CLDRFile.getNondraftNonaltXPath(pair.getFirst()); 191 xpath = parts.set(xpath).toString(); 192 checkPath(lookup, xpath, pair.getSecond()); 193 } 194 } 195 196 Set<String> unconverted = new HashSet<String>(); 197 198 /** 199 * Checks if an xpath was matched by a RegexLookup. 200 */ 201 private <T> void checkPath(RegexLookup<T> lookup, String xpath, String value) { 202 Pair<ExclusionType, String> exclusionInfo = exclusions.get(xpath); 203 ExclusionType exclusionType = null; 204 if (exclusionInfo != null) { 205 exclusionType = exclusionInfo.getFirst(); 206 } 207 208 if (lookup.get(xpath) == null) { 209 String errorMessage = "CLDR xpath <" + xpath + "> with value <" 210 + value + "> was not converted to ICU."; 211 if (exclusionType == null) { 212 CldrUtility.logRegexLookup(this, lookup, xpath); 213 errln(errorMessage); 214 } else if (exclusionType == ExclusionType.WARNING) { 215 logln(errorMessage); 216 } else if (exclusionType == ExclusionType.UNCONVERTED) { 217 String template = xpath.replaceAll("\"[^\"]++\"", "*"); 218 if (!unconverted.add(template)) { 219 logln("Not converted: " + xpath); 220 } 221 } 222 } else if (exclusionType == ExclusionType.UNCONVERTED) { 223 CldrUtility.logRegexLookup(this, exclusions, xpath); 224 errln("CLDR xpath <" 225 + xpath 226 + "> is in the exclusions list but was matched. " 227 + "To make the test pass, remove the relevant regex from org/unicode/cldr/util/data/testLdml2Icu.txt"); 228 } 229 } 230 231 /** 232 * Checks conversion of XML locale files. 233 * 234 * @param name 235 * the name of the XML file to be converted (minus the extension) 236 */ 237 private void checkLocaleRegexes(String locale) { 238 CLDRFile plain = info.getCldrFactory().make(locale, false, 239 DraftStatus.contributed); 240 RegexLookup<Object> lookup = loadRegexes("ldml2icu_locale.txt"); 241 for (String xpath : plain) { 242 if (DEBUG && xpath.contains("defaultNumberingSystem")) { 243 int debug = 0; 244 } 245 String fullPath = CLDRFile.getNondraftNonaltXPath(plain 246 .getFullXPath(xpath)); 247 checkPath(lookup, fullPath, plain.getStringValue(xpath)); 248 } 249 } 250 } 251