1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.IOException; 6 import java.io.PrintWriter; 7 import java.io.StringWriter; 8 import java.util.ArrayList; 9 import java.util.Arrays; 10 import java.util.Calendar; 11 import java.util.Collections; 12 import java.util.Date; 13 import java.util.EnumSet; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.LinkedHashMap; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.regex.Matcher; 27 import java.util.regex.Pattern; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.test.CheckExemplars; 31 import org.unicode.cldr.test.CoverageLevel2; 32 import org.unicode.cldr.test.DisplayAndInputProcessor; 33 import org.unicode.cldr.test.QuickCheck; 34 import org.unicode.cldr.tool.Option.Options; 35 import org.unicode.cldr.util.Builder; 36 import org.unicode.cldr.util.CLDRFile; 37 import org.unicode.cldr.util.CLDRPaths; 38 import org.unicode.cldr.util.Factory; 39 import org.unicode.cldr.util.FileCopier; 40 import org.unicode.cldr.util.LanguageTagParser; 41 import org.unicode.cldr.util.Level; 42 import org.unicode.cldr.util.PathDescription; 43 import org.unicode.cldr.util.PatternCache; 44 import org.unicode.cldr.util.PatternPlaceholders; 45 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 46 import org.unicode.cldr.util.PrettyPath; 47 import org.unicode.cldr.util.RegexLookup; 48 import org.unicode.cldr.util.RegexLookup.Finder; 49 import org.unicode.cldr.util.RegexUtilities; 50 import org.unicode.cldr.util.StandardCodes; 51 import org.unicode.cldr.util.StringId; 52 import org.unicode.cldr.util.SupplementalDataInfo; 53 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 54 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 56 import org.unicode.cldr.util.TransliteratorUtilities; 57 import org.unicode.cldr.util.With; 58 import org.unicode.cldr.util.XMLFileReader; 59 import org.unicode.cldr.util.XMLSource; 60 import org.unicode.cldr.util.XPathParts; 61 import org.xml.sax.Attributes; 62 import org.xml.sax.ContentHandler; 63 import org.xml.sax.ErrorHandler; 64 import org.xml.sax.InputSource; 65 import org.xml.sax.Locator; 66 import org.xml.sax.SAXException; 67 import org.xml.sax.SAXParseException; 68 import org.xml.sax.XMLReader; 69 70 import com.ibm.icu.dev.util.CollectionUtilities; 71 import com.ibm.icu.impl.Relation; 72 import com.ibm.icu.impl.Row; 73 import com.ibm.icu.impl.Row.R2; 74 import com.ibm.icu.lang.CharSequences; 75 import com.ibm.icu.text.BreakIterator; 76 import com.ibm.icu.text.DateFormat; 77 import com.ibm.icu.text.MessageFormat; 78 import com.ibm.icu.text.PluralRules; 79 import com.ibm.icu.text.SimpleDateFormat; 80 import com.ibm.icu.text.Transform; 81 import com.ibm.icu.text.UnicodeSet; 82 import com.ibm.icu.util.Output; 83 import com.ibm.icu.util.TimeZone; 84 import com.ibm.icu.util.ULocale; 85 86 public class GenerateXMB { 87 private static final String DEBUG_PATH = "[@type=\"day\"]/unitPattern[@count=\"1\"]"; 88 89 static StandardCodes sc = StandardCodes.make(); 90 91 static final String DATE; 92 static { 93 DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); 94 DATE = dateFormat.format(new Date()); 95 } 96 static final String stock = "en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK"; 97 private static final HashSet<String> REGION_LOCALES = new HashSet<String>(Arrays.asList(stock.split("\\|"))); 98 99 final static Options myOptions = new Options("In normal usage, you set the -t option for the target.") 100 .add("target", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/xmb/", 101 "The target directory for building. Will generate an English .xmb file, and .wsb files for other languages.") 102 .add( 103 "file", 104 ".*", 105 stock, 106 "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering") 107 // "^(sl|fr)$", 108 .add("path", ".*", "Filter the information based on path name, using a regex argument") 109 // "dates.*(pattern|available)", 110 .add("content", ".*", "Filter the information based on content name, using a regex argument") 111 .add("jason", ".*", "Generate JSON versions instead") 112 .add("zone", null, "Show metazoneinfo and exit") 113 .add("wsb", ".*", "Show metazoneinfo and exit") 114 .add("kompare", ".*", CLDRPaths.BASE_DIRECTORY + "../DATA/cldr/common/google-bulk-imports", 115 "Compare data with directory; generate files in -target.") 116 .add("project_name", 'n', ".*", "CLDR", "The ID of the project."); 117 118 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 119 // static Matcher contentMatcher; 120 static Matcher pathMatcher; 121 static RegexLookup<String> pathFindRemover = new RegexLookup<String>().loadFromFile(GenerateXMB.class, 122 "xmbSkip.txt");; // .compile("//ldml/dates/calendars/calendar\\[@type=\"(?!gregorian).*").matcher(""); 123 static PrettyPath prettyPath = new PrettyPath(); 124 static int errors = 0; 125 static Relation<String, String> path2errors = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 126 127 // enum Handling {SKIP}; 128 static final Matcher datePatternMatcher = PatternCache.get("dates.*(pattern|available)").matcher(""); 129 130 public static final boolean DEBUG = false; 131 132 private static final HashSet<String> SKIP_LOCALES = new HashSet<String>( 133 Arrays.asList(new String[] { "en", "root" })); 134 135 public static String DTD_VERSION; 136 137 private static String projectId; 138 139 enum PlaceholderType { 140 BRACES, // e.g. {NAME} 141 XML, // e.g. <ph name='NAME' /> 142 XML_EXAMPLE // e.g. <ph name='NAME' /><ex>EXAMPLE</ex>{0}</ph> 143 }; 144 145 public static void main(String[] args) throws Exception { 146 myOptions.parse(args, true); 147 Option option; 148 option = myOptions.get("zone"); 149 if (option.doesOccur()) { 150 showMetazoneInfo(); 151 return; 152 } 153 option = myOptions.get("file"); 154 String fileMatcherString = option.getValue(); 155 option = myOptions.get("content"); 156 Matcher contentMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null; 157 option = myOptions.get("path"); 158 pathMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null; 159 160 String targetDir = myOptions.get("target").getValue(); 161 countFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "counts.txt"); 162 163 Factory cldrFactory1 = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 164 CLDRFile english = cldrFactory1.make("en", true); 165 CLDRFile englishTop = cldrFactory1.make("en", false); 166 DTD_VERSION = englishTop.getDtdVersion(); 167 168 CLDRFile root = cldrFactory1.make("en", true); 169 170 showDefaultContents(targetDir, english); 171 EnglishInfo englishInfo = new EnglishInfo(targetDir, english, root); 172 173 option = myOptions.get("kompare"); 174 if (option.doesOccur()) { 175 compareDirectory = option.getValue(); 176 compareFiles(fileMatcherString, contentMatcher, targetDir, cldrFactory1, english, englishInfo); 177 return; 178 } 179 180 if (myOptions.get("wsb").doesOccur()) { 181 displayWsb(myOptions.get("wsb").getValue(), englishInfo); 182 return; 183 } 184 185 projectId = myOptions.get("project_name").getValue(); 186 187 writeFile(targetDir, "en", englishInfo, english, true, false); 188 writeFile(targetDir + "/filtered/", "en", englishInfo, english, true, true); 189 190 // TODO: 191 // Replace {0}... with placeholders (Mostly done, but need better examples) 192 // Replace datetime fields (MMM, L, ...) with placeholders 193 // Skip items that we don't need translated (most language names, script names, deprecated region names, etc. 194 // Add descriptions 195 // Add pages with detailed descriptions, and links from the descriptions 196 // Represent the items with count= as ICUSyntax 197 // Filter items that we don't want to get translated, and add others that we need even if not in English 198 // Rewire items that are in undistinguished attributes 199 // Test each xml file for validity 200 // Generate strings that let the user choose the placeholder style hh vs HH,...??? 201 202 Factory cldrFactory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, fileMatcherString); 203 LanguageTagParser ltp = new LanguageTagParser(); 204 205 for (String file : cldrFactory2.getAvailable()) { 206 if (SKIP_LOCALES.contains(file)) { 207 continue; 208 } 209 210 // skip all locales with regions (with certain exceptions) 211 if (ltp.set(file).getRegion().length() != 0) { 212 if (!REGION_LOCALES.contains(file)) { 213 continue; 214 } 215 } 216 217 // skip anything without plural rules 218 final PluralInfo plurals = supplementalDataInfo.getPlurals(file, false); 219 if (plurals == null) { 220 System.out.println("Skipping " + file + ", no plural rules"); 221 continue; 222 } 223 224 CLDRFile cldrFile = cldrFactory2.make(file, true); 225 writeFile(targetDir + "/wsb/", file, englishInfo, cldrFile, false, false); 226 writeFile(targetDir + "/wsb/filtered/", file, englishInfo, cldrFile, false, true); 227 countFile.flush(); 228 } 229 countFile.close(); 230 PrintWriter errorFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "errors.txt"); 231 for (Entry<String, Set<String>> entry : path2errors.keyValuesSet()) { 232 errorFile.println(entry); 233 } 234 errorFile.close(); 235 System.out.println("Errors: " + (errors + path2errors.size())); 236 } 237 238 private static void compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir, 239 Factory cldrFactory1, CLDRFile english, 240 EnglishInfo englishInfo) throws IOException { 241 SubmittedPathFixer fixer = new SubmittedPathFixer(); 242 Factory cldrFactory2 = Factory.make(compareDirectory, fileMatcherString); 243 PrintWriter output = null; 244 PrintWriter log = FileUtilities.openUTF8Writer(targetDir + "/log/", "skipped.txt"); 245 246 for (String file : cldrFactory2.getAvailable()) { 247 // System.out.println("Checking " + file); 248 CLDRFile submitted = cldrFactory2.make(file, false); 249 CLDRFile trunk = cldrFactory1.make(file, true); 250 for (String path : With.in(submitted.iterator(null, submitted.getComparator()))) { 251 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 252 continue; 253 } 254 String submittedValue = submitted.getStringValue(path); 255 if (contentMatcher != null && !contentMatcher.reset(submittedValue).matches()) { 256 continue; 257 } 258 PathStatus pathStatus = shouldSkipPath(path, submittedValue); 259 if (pathStatus == PathStatus.SKIP) { 260 continue; 261 } 262 263 // fix alt 264 String trunkPath = fixer.fix(path, false); 265 String trunkValue = trunk.getStringValue(trunkPath); 266 if (CharSequences.equals(submittedValue, trunkValue)) { 267 continue; 268 } 269 if (output == null) { 270 output = FileUtilities.openUTF8Writer(targetDir, file + ".txt"); 271 output.println("ID\tEnglish\tSource\tRelease\tDescription"); 272 } 273 String englishValue = english.getStringValue(trunkPath); 274 final PathInfo pathInfo = englishInfo.getPathInfo(trunkPath); 275 String description; 276 if (pathInfo == null) { 277 log.println(file + "\tDescription unavailable for " + trunkPath); 278 errors++; 279 String temp = fixer.fix(path, true); 280 englishInfo.getPathInfo(trunkPath); 281 continue; 282 } else { 283 description = pathInfo.getDescription(); 284 } 285 long id = StringId.getId(trunkPath); 286 if (englishValue == null) { 287 log.println(file + "\tEmpty English for " + trunkPath); 288 errors++; 289 continue; 290 } 291 output.println(id + "\t" + ssquote(englishValue, false) + "\t" + ssquote(submittedValue, false) + "\t" 292 + ssquote(trunkValue, true) + "\t" + description); 293 } 294 if (output != null) { 295 output.close(); 296 output = null; 297 } 298 log.flush(); 299 } 300 log.close(); 301 } 302 303 static Output<String[]> matches = new Output<String[]>(); 304 static List<String> failures = new ArrayList<String>(); 305 static Output<Finder> matcherFound = new Output<Finder>(); 306 307 enum PathStatus { 308 SKIP, KEEP, MAYBE 309 } 310 311 public static PathStatus shouldSkipPath(String path, String value) { 312 // skip if 313 List<String> myFailures = null; 314 if (false && path.contains("currencies") && path.contains("symbol")) { 315 myFailures = failures; 316 } 317 String skipPath = pathFindRemover.get(path, null, matches, matcherFound, myFailures); 318 if (myFailures != null && failures.size() != 0) { 319 System.out.println("Failures\n\t" + CollectionUtilities.join(failures, "\n\t")); 320 failures.clear(); 321 } 322 if (skipPath == null || skipPath.equals("MAYBE")) { 323 return PathStatus.MAYBE; 324 } else if (skipPath.equals("VALUE")) { 325 return value.equals(matches.value[1]) ? PathStatus.SKIP : PathStatus.MAYBE; 326 } else if (skipPath.equals("SKIP")) { 327 return PathStatus.SKIP; 328 } else if (skipPath.equals("KEEP")) { 329 return PathStatus.KEEP; 330 } 331 throw new IllegalArgumentException("Unexpected xmbSkip.txt value: " + skipPath); 332 } 333 334 private static String ssquote(String englishValue, boolean showRemoved) { 335 if (englishValue == null) { 336 return showRemoved ? "[removed]" : "[empty]"; 337 } 338 englishValue = englishValue.replace("\"", """); 339 return englishValue; 340 } 341 342 static class SubmittedPathFixer { 343 private static final Pattern PATH_FIX = PatternCache.get("\\[@alt=\"" + 344 "(?:proposed|((?!proposed)[-a-zA-Z0-9]*)-proposed)" + 345 "-u\\d+-implicit[0-9.]+" + 346 "(?:-proposed-u\\d+-implicit[0-9.]+)?" + // NOTE: we allow duplicated alt values because of a generation 347 // bug. 348 // -proposed-u971-implicit2.0 349 "\"]"); 350 static Matcher pathFix = PATH_FIX.matcher(""); 351 352 public String fix(String path, boolean debug) { 353 if (pathFix.reset(path).find()) { 354 if (debug) { 355 // debug in case we get a mismatch 356 String temp = "REGEX:\t" + 357 RegexUtilities.showMismatch(PATH_FIX, path.substring(pathFix.start(0))); 358 } 359 final String group = pathFix.group(1); 360 String replacement = group == null ? "" : "[@alt=\"" + group + "\"]"; 361 String trunkPath = path.substring(0, pathFix.start(0)) + replacement + path.substring(pathFix.end(0)); 362 // HACK because of change in CLDR defaults 363 if (trunkPath.startsWith("//ldml/numbers/symbols/")) { 364 trunkPath = "//ldml/numbers/symbols[@numberSystem=\"latn\"]/" 365 + trunkPath.substring("//ldml/numbers/symbols/".length()); 366 } 367 return trunkPath; 368 } 369 return path; 370 } 371 372 } 373 374 private static void showDefaultContents(String targetDir, CLDRFile english) throws IOException { 375 PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "locales.txt"); 376 String[] locales = stock.split("\\|"); 377 Set<R2<String, String>> sorted = new TreeSet<R2<String, String>>(); 378 for (String locale : locales) { 379 if (locale.isEmpty()) continue; 380 String name = english.getName(locale); 381 R2<String, String> row = Row.of(name, locale); 382 sorted.add(row); 383 } 384 Set<String> defaultContents = supplementalDataInfo.getDefaultContentLocales(); 385 386 for (R2<String, String> row : sorted) { 387 String locale = row.get1(); 388 String dlocale = getDefaultContentLocale(locale, defaultContents); 389 out.println(row.get0() + "\t" + locale + "\t" + english.getName(dlocale) + "\t" + dlocale); 390 } 391 out.close(); 392 } 393 394 private static String getDefaultContentLocale(String locale, Set<String> defaultContents) { 395 String best = null; 396 for (String s : defaultContents) { 397 if (s.startsWith(locale)) { 398 if (best == null) { 399 best = s; 400 } else if (s.length() < best.length()) { 401 best = s; 402 } 403 } 404 } 405 if (best == null) { 406 return locale; 407 } 408 return best; 409 } 410 411 static final Pattern COUNT_OR_ALT_ATTRIBUTE = PatternCache.get("\\[@(count)=\"([^\"]*)\"]"); 412 static final Pattern PLURAL_XPATH = Pattern 413 .compile("//ldml/(units/unit|numbers/(decimal|currency)Formats).*\\[@count=\"\\w+\"].*"); 414 static final Pattern SKIP_EXEMPLAR_TEST = PatternCache.get( 415 "/(currencySpacing" 416 + "|hourFormat" 417 + "|exemplarCharacters" 418 + "|pattern" 419 + "|localizedPatternChars" 420 + "|segmentations" 421 + "|dateFormatItem" 422 + "|references" 423 + "|unitPattern" 424 + "|intervalFormatItem" 425 + "|localeDisplayNames/variants/" 426 + "|commonlyUsed" 427 + "|currency.*/symbol" 428 + "|symbols/(exponential|nan))"); 429 430 static final Matcher skipExemplarTest = SKIP_EXEMPLAR_TEST.matcher(""); 431 static final UnicodeSet ASCII_LATIN = new UnicodeSet("[A-Za-z]").freeze(); 432 static final UnicodeSet LATIN = new UnicodeSet("[:sc=Latn:]").freeze(); 433 434 static final Matcher keepFromRoot = PatternCache.get("/(exemplarCity|currencies/currency.*/symbol)").matcher(""); 435 static final Matcher currencyDisplayName = Pattern 436 .compile("/currencies/currency\\[@type=\"([^\"]*)\"]/displayName").matcher(""); 437 438 private static void writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile, 439 boolean isEnglish, boolean filter) throws IOException { 440 441 String extension = "xml"; 442 XPathParts xpathParts = new XPathParts(); 443 Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 444 Set<String> seenStarred = new HashSet<String>(); 445 446 Relation<String, Row.R2<PathInfo, String>> countItems = Relation.of( 447 new TreeMap<String, Set<Row.R2<PathInfo, String>>>(), TreeSet.class); 448 Matcher countMatcher = COUNT_OR_ALT_ATTRIBUTE.matcher(""); 449 int lineCount = 0; 450 int wordCount = 0; 451 int messageCount = 0; 452 453 StringWriter buffer = new StringWriter(); 454 PrintWriter out1 = new PrintWriter(buffer); 455 StringWriter buffer3 = new StringWriter(); 456 PrintWriter out3 = new PrintWriter(buffer3); 457 UnicodeSet exemplars = getExemplars(cldrFile); 458 459 for (PathInfo pathInfo : englishInfo) { 460 if (false && pathInfo.id == 46139888945574604L) { // for debugging 461 System.out.println("?"); 462 } 463 String path = pathInfo.getPath(); 464 String value; 465 if (isEnglish) { 466 value = pathInfo.englishValue; 467 } else { 468 value = cldrFile.getStringValue(path); 469 } 470 // Remove quotes from number formats (we'll put them back in during 471 // post-processing). 472 // TODO: we should actually call daip.processForDisplay() here, but 473 // it does more stuff than we need it to do, e.g. stripping the 474 // brackets from exemplarCharacters. 475 if (DisplayAndInputProcessor.NUMBER_FORMAT_XPATH.matcher(path).matches()) { 476 value = value.replace("'", ""); 477 } 478 479 // skip root if not English 480 if (!isEnglish && value != null && !keepFromRoot.reset(path).find()) { // note that mismatched script will 481 // be checked later 482 String locale = cldrFile.getSourceLocaleID(path, null); 483 if (locale.equals("root")) { 484 reasonsToPaths.put("root", path + "\t" + value); 485 continue; 486 } 487 if (locale.equals(XMLSource.CODE_FALLBACK_ID)) { 488 reasonsToPaths.put("codeFallback", path + "\t" + value); 489 continue; 490 } 491 } 492 boolean hasPlurals = PLURAL_XPATH.matcher(path).matches(); 493 if (filter && !hasPlurals) { 494 String starred = pathInfo.getStarredPath(); 495 if (seenStarred.contains(starred)) { 496 continue; 497 } 498 seenStarred.add(starred); 499 } 500 if (value == null) { 501 reasonsToPaths.put("missing", path + " " + value); 502 continue; 503 } 504 if (!isEnglish) { 505 String fullPath = cldrFile.getFullXPath(path); 506 if (fullPath.contains("draft")) { 507 xpathParts.set(fullPath); 508 String draftValue = xpathParts.getAttributeValue(-1, "draft"); 509 if (!draftValue.equals("contributed")) { 510 reasonsToPaths.put(draftValue, path + "\t" + value); 511 continue; 512 } 513 } 514 } 515 if (!isEnglish 516 && !exemplars.containsAll(value) 517 && !skipExemplarTest.reset(path).find()) { 518 // check for special cases in currency names. If the code itself occurs in the name, that's ok 519 // ldml/numbers/currencies/currency[@type="XXX"]/displayName 520 boolean bad = true; 521 if (currencyDisplayName.reset(path).find()) { 522 String code = currencyDisplayName.group(1); 523 String value2 = value.replace(code, ""); 524 bad = !exemplars.containsAll(value2); 525 } 526 if (bad) { 527 UnicodeSet diff = new UnicodeSet().addAll(value).removeAll(exemplars); 528 reasonsToPaths.put("exemplars", path + "\t" + value + "\t" + diff); 529 continue; 530 } 531 } 532 // String fullPath = cldrFile.getStringValue(path); 533 // //ldml/units/unit[@type="day"]/unitPattern[@count="one"] 534 if (hasPlurals) { 535 countMatcher.reset(path).find(); 536 String countLessPath = countMatcher.replaceAll(""); 537 countItems.put(countLessPath, Row.of(pathInfo, value)); 538 continue; 539 } 540 if (!isEnglish && pathInfo.changedEnglish) { 541 reasonsToPaths.put("changed-english", path); 542 } else { 543 writePathInfo(out1, pathInfo, value, isEnglish); 544 messageCount++; 545 } 546 if (isEnglish) { 547 writeJavaInfo(out3, pathInfo.getStringId(), pathInfo.getPath(), value); 548 } 549 wordCount += pathInfo.wordCount; 550 ++lineCount; 551 } 552 R2<Integer, Integer> lineWordCount = writeCountPathInfo(out1, out3, cldrFile.getLocaleID(), countItems, 553 isEnglish, filter); 554 messageCount += lineWordCount.get0(); 555 lineCount += lineWordCount.get0(); 556 wordCount += lineWordCount.get1(); 557 if (!filter && countItems.size() != lineWordCount.get0().intValue()) { 558 System.out.println(localeId + "\t" + countItems.size() + "\t" + lineWordCount.get0().intValue()); 559 } 560 out1.flush(); 561 out3.flush(); 562 563 String file = LanguageCodeConverter.toGoogleLocaleId(localeId); 564 String localeName = englishInfo.getName(localeId); 565 PrintWriter out = FileUtilities.openUTF8Writer(targetDir, file + "." + extension); 566 567 if (isEnglish) { 568 FileCopier.copy(GenerateXMB.class, "xmb-dtd.xml", out); 569 // FileUtilities.appendFile(GenerateXMB.class, "xmb-dtd.xml", out); 570 out.println("<!-- " + localeName + " -->"); 571 out.println("<messagebundle class='" + projectId + "'> <!-- version: " + DTD_VERSION + ", date: " + DATE 572 + " -->"); 573 out.println(buffer.toString()); 574 out.println("</messagebundle>"); 575 576 PrintWriter out3File = FileUtilities.openUTF8Writer(targetDir, "IdToPath.java"); 577 out3File.println("package org.unicode.cldr.tool;"); 578 out3File.println(); 579 out3File.println("import java.util.HashMap;"); 580 out3File.println(); 581 out3File.println("/**"); 582 out3File.println(" * Autogenerated by GenerateXMB for use by ConvertXTB."); 583 out3File.println(" * Do not manually edit this file."); 584 out3File.println(" */"); 585 out3File.println("public class IdToPath {"); 586 out3File.println(" static final HashMap<String,String> map = new HashMap<String,String>();"); 587 out3File.println(" public static String getPath(String id) {"); 588 out3File.println(" return map.get(id);"); 589 out3File.println(" }"); 590 out3File.println(" static {"); 591 out3File.println(" String[][] data = {"); 592 out3File.println(buffer3); 593 out3File.println(" };"); 594 out3File.println(" for (String[] pair : data) {"); 595 out3File.println(" map.put(pair[0], pair[1]);"); 596 out3File.println(" }"); 597 out3File.println(" }"); 598 out3File.println("}"); 599 out3File.close(); 600 } else { 601 602 // FileUtilities.appendFile(GenerateXMB.class, "wsb-dtd.xml", out); 603 FileCopier.copy(GenerateXMB.class, "wsb-dtd.xml", out); 604 out.println("<!-- " + localeName + " -->"); 605 out.println("<worldserverbundles lazarus_id='dummy' date='" + DATE + "'> <!-- version: " + DTD_VERSION 606 + " -->"); 607 out.println(" <worldserverbundle project_id='" + projectId + "' message_count='" + messageCount + "'>"); 608 out.println(buffer.toString()); 609 out.println(" </worldserverbundle>"); 610 out.println("</worldserverbundles>"); 611 } 612 out.close(); 613 QuickCheck.check(new File(targetDir, file + "." + extension)); 614 if (!filter) { 615 countFile.println(file + "\t" + lineCount + "\t" + wordCount); 616 } 617 if (!isEnglish && !filter) { 618 writeReasons(reasonsToPaths, targetDir, file); 619 } 620 } 621 622 private static void writeJavaInfo(PrintWriter out3, String id, String path, String value) { 623 out3.println(" {\"" + id + "\",\"" + path.replace("\"", "\\\"") + "\",\"" 624 + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\"},"); 625 } 626 627 private static UnicodeSet getExemplars(CLDRFile cldrFile) { 628 UnicodeSet exemplars = cldrFile.getExemplarSet("", CLDRFile.WinningChoice.WINNING); 629 boolean isLatin = exemplars.containsSome(ASCII_LATIN); 630 exemplars.addAll(CheckExemplars.AlwaysOK); 631 UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", CLDRFile.WinningChoice.WINNING); 632 if (auxExemplars != null) { 633 exemplars.addAll(auxExemplars); 634 } 635 if (!isLatin) { 636 exemplars.removeAll(LATIN); 637 } 638 exemplars.freeze(); 639 return exemplars; 640 } 641 642 static final Pattern COUNT_ATTRIBUTE = PatternCache.get("\\[@count=\"([^\"]*)\"]"); 643 static final Pattern PLURAL_NUMBER = PatternCache.get("(decimal|number)Format"); 644 645 private static Row.R2<Integer, Integer> writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale, 646 Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter) { 647 Matcher m = COUNT_ATTRIBUTE.matcher(""); 648 int wordCount = 0; 649 PluralInfo pluralInfo = supplementalDataInfo.getPlurals(locale); 650 int lineCount = 0; 651 Set<String> errorSet = new LinkedHashSet<String>(); 652 for (Entry<String, Set<R2<PathInfo, String>>> entry : countItems.keyValuesSet()) { 653 String countLessPath = entry.getKey(); 654 Map<String, String> fullValues = new TreeMap<String, String>(); 655 PathInfo pathInfo = null; 656 String value = null; 657 for (R2<PathInfo, String> entry2 : entry.getValue()) { 658 PathInfo pathInfoN = entry2.get0(); 659 m.reset(pathInfoN.getPath()).find(); 660 String count = m.group(1); 661 if (count.equals("other")) { 662 pathInfo = pathInfoN; 663 } 664 value = entry2.get1(); 665 fullValues.put(count, value); 666 } 667 if (pathInfo == null) { 668 continue; 669 } 670 if (fullValues.size() < 2) { 671 // if we don't have two count values, skip 672 System.out.println(locale + "\tMust have 2 count values: " + entry.getKey()); 673 continue; 674 } 675 String fullPlurals = showPlurals(fullValues, locale, pathInfo, pluralInfo, isEnglish, errorSet); 676 if (fullPlurals == null) { 677 System.out.println(locale + "\tCan't format plurals for: " + entry.getKey() + "\t" + errorSet); 678 errors++; 679 continue; 680 } 681 682 out.println(); 683 out.println(" <!-- " 684 // + prettyPath.getPrettyPath(pathInfo.getPath(), false) + " ; " 685 + countLessPath + " -->"); 686 out.println(" <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'"); 687 out.println(" >" + fullPlurals + "</msg>"); 688 // Use the last plural value in the loop because we only need it for example purposes. 689 writeJavaInfo(out3, pathInfo.getStringId(), countLessPath, value); 690 // if (!isEnglish || pathInfo.placeholderReplacements != null) { 691 // out.println("\t<!-- English original:\t" + pathInfo.getEnglishValue() + "\t-->"); 692 // } 693 out.flush(); 694 ++lineCount; 695 wordCount += pathInfo.wordCount * 3; 696 if (filter) { 697 break; 698 } 699 } 700 return Row.of(lineCount, wordCount); 701 } 702 703 static final String[] PLURAL_KEYS = { "=0", "=1", "zero", "one", "two", "few", "many", "other" }; 704 static final String[] EXTRA_PLURAL_KEYS = { "0", "1", "zero", "one", "two", "few", "many" }; 705 706 private static String showPlurals(Map<String, String> values, 707 String locale, PathInfo pathInfo, PluralInfo pluralInfo, 708 boolean isEnglish, Set<String> errorSet) { 709 errorSet.clear(); 710 /* 711 * Desired output for English XMB 712 * <msg desc= 713 * "[ICU Syntax] Plural forms for a number of hours. These are special messages: before translating, see cldr.org/translation/plurals." 714 * > 715 * {LENGTH, select, 716 * abbreviated { 717 * {NUMBER_OF_HOURS, plural, 718 * =0 {0 hrs} 719 * =1 {1 hr} 720 * zero {# hrs} 721 * one {# hrs} 722 * two {# hrs} 723 * few {# hrs} 724 * many {# hrs} 725 * other {# hrs}}} 726 * full { 727 * {NUMBER_OF_HOURS, plural, 728 * =0 {0 hours} 729 * =1 {1 hour} 730 * zero {# hours} 731 * one {# hours} 732 * two {# hours} 733 * few {# hours} 734 * many {# hours} 735 * other {# hours}}}} 736 * </msg> 737 * 738 * NOTE: For the WSB, the format has to match the following, WITHOUT LFs 739 * 740 * <msg id='1431840205484292448' desc='[ICU Syntax] who is viewing? This message requires special attention. 741 * Please follow the instructions here: 742 * https://sites.google.com/a/google.com/localization-info-site/Home/training/icusyntax'> 743 * <ph name='[PLURAL_NUM_USERS_OFFSET_1]' ex='Special placeholder used in [ICU Syntax] messages, see 744 * instructions page.'/> 745 * <ph name='[=0]'/>No one else is viewing. 746 * <ph name='[=1]'/><ph name='USERNAME' ex='Bob'/> is viewing. 747 * <ph name='[=2]'/><ph name='USERNAME' ex='Bob'/> and one other are viewing. 748 * <ph name='[ZERO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 749 * <ph name='[ONE]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 750 * <ph name='[TWO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 751 * <ph name='[FEW]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 752 * <ph name='[MANY]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 753 * <ph name='[OTHER]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing. 754 * <ph name='[END_PLURAL]'/> 755 * </msg> 756 */ 757 Matcher matcher = PLURAL_NUMBER.matcher(pathInfo.getPath()); 758 String var = null; 759 if (matcher.find()) { 760 // Plural doesn't use placeholders so create a label. 761 var = matcher.group(1).toUpperCase() + "_NUMBER"; 762 } else { 763 var = pathInfo.getFirstVariable(); 764 } 765 766 StringBuilder result = new StringBuilder(); 767 if (isEnglish) { 768 result.append('{') 769 // .append("PLURAL_") 770 .append(var).append(",plural,"); 771 } else { 772 result.append("<ph name='[PLURAL_").append(var).append("]'/>"); // ex='Special placeholder used in [ICU 773 // Syntax] messages, see instructions page.' 774 } 775 for (String key : PLURAL_KEYS) { 776 String value; 777 String coreKey = key.startsWith("=") ? key.substring(1, 2) : key; 778 value = values.get(coreKey); 779 if (value == null) { 780 if (key.startsWith("=")) { 781 String stringCount = key.substring(1); 782 // handle both =x case, and the category 783 int intCount = Integer.parseInt(stringCount); 784 Count count = pluralInfo.getCount(intCount); 785 value = values.get(count.toString()); 786 if (value == null) { 787 errorSet.add("Bad key/value " + key + "='" + value + "' in " + values); 788 return null; 789 } 790 value = value.replace("{0}", stringCount); 791 } else { 792 value = values.get("other"); 793 if (value == null) { 794 errorSet.add("No 'other' value in " + values); 795 return null; 796 } 797 } 798 } 799 String newValue = MessageFormat.format(MessageFormat.autoQuoteApostrophe(value), 800 new Object[] { key.startsWith("=") ? key.substring(1, 2) : "#" }); 801 PlaceholderType type = isEnglish ? PlaceholderType.BRACES : PlaceholderType.XML; 802 newValue = pathInfo.transformValue(newValue, type); 803 if (isEnglish) { 804 result.append("\n ").append(key).append(" {").append(newValue).append('}'); 805 } else { 806 String prefix = key.toUpperCase(Locale.ENGLISH); 807 result.append("<!--\n --><ph name='[").append(prefix).append("]'/>").append(newValue); 808 } 809 } 810 if (isEnglish) { 811 result.append('}'); 812 } else { 813 result.append("<!--\n --><ph name='[END_PLURAL]'/>"); 814 } 815 return result.toString(); 816 } 817 818 private static void writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish) { 819 out.println(); 820 out.println(" <!-- " + pathInfo.getPath() + " -->"); 821 out.println(" <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'"); 822 PlaceholderType type = isEnglish ? PlaceholderType.XML_EXAMPLE : PlaceholderType.XML; 823 String transformValue = pathInfo.transformValue(value, type); 824 out.println(" >" + transformValue + "</msg>"); 825 value = TransliteratorUtilities.toHTML.transform(value); 826 if (!value.equals(transformValue) && (!isEnglish || pathInfo.placeholders != null)) { 827 out.println(" <!-- English original: " + value + " -->"); 828 } 829 out.flush(); 830 } 831 832 private static void writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename) 833 throws IOException { 834 targetDir += "/skipped/"; 835 filename += ".txt"; 836 PrintWriter out = FileUtilities.openUTF8Writer(targetDir, filename); 837 out.println("# " + DATE); 838 for (Entry<String, Set<String>> reasonToSet : reasonsToPaths.keyValuesSet()) { 839 for (String path : reasonToSet.getValue()) { 840 out.println(reasonToSet.getKey() + " " + path); 841 } 842 } 843 out.close(); 844 } 845 846 static class PathInfo implements Comparable<PathInfo> { 847 private static final Pattern PLACEHOLDER = PatternCache.get("\\{(\\d)}"); 848 849 private final String path; 850 private final Long id; 851 private final String stringId; 852 private final String englishValue; 853 private final boolean changedEnglish; 854 private final Map<String, PlaceholderInfo> placeholders; 855 private final String description; 856 private final String starredPath; 857 private final int wordCount; 858 859 private static final BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH); 860 private static final UnicodeSet ALPHABETIC = new UnicodeSet("[:Alphabetic:]"); 861 862 public PathInfo(String path, String englishValue, boolean changedEnglish, 863 Map<String, PlaceholderInfo> placeholders, 864 String description, String starredPath) { 865 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) { 866 int x = 0; 867 } 868 if (description == null) { 869 path2errors.put(path, "missing description"); 870 } 871 this.path = path; 872 long id = StringId.getId(path); 873 this.id = id; 874 stringId = String.valueOf(id); 875 this.englishValue = englishValue; 876 this.changedEnglish = changedEnglish; 877 this.placeholders = placeholders; 878 this.description = description == null ? null : description.intern(); 879 this.starredPath = starredPath; 880 // count words 881 int tempCount = 0; 882 bi.setText(englishValue); 883 int start = bi.first(); 884 for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) { 885 String word = englishValue.substring(start, end); 886 if (ALPHABETIC.containsSome(word)) { 887 ++tempCount; 888 } 889 } 890 wordCount = tempCount == 0 ? 1 : tempCount; 891 } 892 893 public String getFirstVariable() { 894 // ... name='FIRST_PART_OF_TEXT' ... 895 PlaceholderInfo info = placeholders.get("{0}"); 896 if (info == null) { 897 throw new IllegalArgumentException("Missing {0} for " + this); 898 } 899 return info.name; 900 } 901 902 public String getPath() { 903 return path; 904 } 905 906 public Long getId() { 907 return id; 908 } 909 910 public String getStringId() { 911 return stringId; 912 } 913 914 public String getEnglishValue() { 915 return englishValue; 916 } 917 918 public String getDescription() { 919 return description; 920 } 921 922 public String getStarredPath() { 923 return starredPath; 924 } 925 926 public Map<String, String> getPlaceholderReplacementsToOriginal() { 927 if (placeholders == null) return null; 928 Map<String, String> placeholderOutput = new LinkedHashMap<String, String>(); 929 for (String id : placeholders.keySet()) { 930 placeholderOutput.put(id, getPlaceholderWithExample(id)); 931 } 932 return placeholderOutput; 933 } 934 935 private String getPlaceholderWithExample(String placeholder) { 936 PlaceholderInfo info = placeholders.get(placeholder); 937 // <ph name='x'><ex>xxx</ex>yyy</ph> 938 return "<ph name='" + info.name + "'><ex>" + info.example + "</ex>" + placeholder + "</ph>"; 939 } 940 941 // static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser(); 942 943 private String transformValue(String value, PlaceholderType type) { 944 value = TransliteratorUtilities.toHTML.transform(value); 945 if (placeholders == null) return value; 946 947 String placeholderFormat = ""; 948 switch (type) { 949 case BRACES: 950 placeholderFormat = "'{'{0}'}'"; 951 break; 952 case XML: 953 placeholderFormat = "<ph name=''[{0}]'' />"; 954 break; 955 case XML_EXAMPLE: 956 placeholderFormat = "<ph name=''{0}''><ex>{1}</ex>'{'{2}'}'</ph>"; 957 break; 958 } 959 Matcher matcher = PLACEHOLDER.matcher(value); 960 StringBuffer buffer = new StringBuffer(); 961 int start = 0; 962 while (matcher.find()) { 963 buffer.append(value.substring(start, matcher.start())); 964 PlaceholderInfo info = placeholders.get(matcher.group()); 965 buffer.append(MessageFormat.format(placeholderFormat, 966 new Object[] { info.name, info.example, matcher.group(1) })); 967 start = matcher.end(); 968 } 969 buffer.append(value.substring(start)); 970 return buffer.toString(); 971 } 972 973 private String replacePlaceholders(String value, String placeholderStart, String placeholderEnd) { 974 Matcher matcher = PLACEHOLDER.matcher(value); 975 StringBuffer buffer = new StringBuffer(); 976 int start = 0; 977 while (matcher.find()) { 978 buffer.append(value.substring(start, matcher.start())); 979 String name = placeholders.get(matcher.group()).name; 980 buffer.append(placeholderStart).append(name).append(placeholderEnd); 981 start = matcher.end(); 982 } 983 buffer.append(value.substring(start)); 984 return buffer.toString(); 985 } 986 987 @Override 988 public int compareTo(PathInfo arg0) { 989 return path.compareTo(arg0.path); 990 } 991 992 public String toString() { 993 return path; 994 } 995 } 996 997 static class EnglishInfo implements Iterable<PathInfo> { 998 999 final Map<String, PathInfo> pathToPathInfo = new TreeMap<String, PathInfo>(); 1000 final Map<Long, PathInfo> longToPathInfo = new HashMap<Long, PathInfo>(); 1001 final CLDRFile english; 1002 1003 PathInfo getPathInfo(long hash) { 1004 return longToPathInfo.get(hash); 1005 } 1006 1007 public String getName(String localeId) { 1008 return english.getName(localeId); 1009 } 1010 1011 PathInfo getPathInfo(String path) { 1012 return pathToPathInfo.get(path); 1013 } 1014 1015 EnglishInfo(String targetDir, CLDRFile english, CLDRFile root) throws Exception { 1016 1017 Map<String, String> oldPathValueMap = ReadXMB.load(CLDRPaths.BASE_DIRECTORY + 1018 "/cldr-tools/org/unicode/cldr/unittest/data/xmb/", 1019 "en.xml"); 1020 1021 PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance(); 1022 1023 this.english = english; 1024 // we don't want the fully resolved paths, but we do want the direct inheritance from root. 1025 //Status status = new Status(); 1026 Map<String, List<Set<String>>> starredPaths = new TreeMap<String, List<Set<String>>>(); 1027 1028 HashSet<String> metazonePaths = new HashSet<String>(); 1029 // ^//ldml/dates/timeZoneNames/metazone\[@type="([^"]*)"] 1030 for (MetazoneInfo metazoneInfo : MetazoneInfo.METAZONE_LIST) { 1031 for (String item : metazoneInfo.getTypes()) { 1032 String path = "//ldml/dates/timeZoneNames/metazone[@type=\"" + metazoneInfo.metazoneId + "\"]" 1033 + item; 1034 metazonePaths.add(path); 1035 } 1036 } 1037 1038 // TODO add short countries 1039 HashSet<String> extraLanguages = new HashSet<String>(); 1040 // ldml/localeDisplayNames/languages/language[@type=".*"] 1041 1042 for (String langId : PathDescription.EXTRA_LANGUAGES) { 1043 String langPath = "//ldml/localeDisplayNames/languages/language[@type=\"" + langId + "\"]"; 1044 extraLanguages.add(langPath); 1045 } 1046 1047 Set<String> sorted = Builder.with(new TreeSet<String>()) 1048 .addAll(english) 1049 .removeAll( 1050 new Transform<String, Boolean>() { 1051 public Boolean transform(String source) { 1052 return source.startsWith("//ldml/dates/timeZoneNames/metazone") ? Boolean.TRUE 1053 : Boolean.FALSE; 1054 } 1055 }) 1056 .get(); 1057 sorted.addAll(metazonePaths); 1058 if (DEBUG) { 1059 TreeSet<String> diffs = new TreeSet<String>(extraLanguages); 1060 diffs.removeAll(sorted); 1061 System.out.println(diffs); 1062 } 1063 sorted.addAll(extraLanguages); 1064 1065 // add the extra Count items. 1066 Map<String, String> extras = new HashMap<String, String>(); 1067 Matcher m = COUNT_ATTRIBUTE.matcher(""); 1068 1069 for (String path : sorted) { 1070 if (path.contains("[@count=\"")) { 1071 m.reset(path).find(); 1072 for (String key : EXTRA_PLURAL_KEYS) { 1073 String path2 = path.substring(0, m.start(1)) + key + path.substring(m.end(1)); 1074 extras.put(path2, path); 1075 } 1076 } 1077 // if (path.contains("ellipsis")) { 1078 // System.out.println(path); 1079 // } 1080 } 1081 sorted.addAll(extras.keySet()); 1082 1083 Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 1084 Set<String> missingDescriptions = new TreeSet<String>(); 1085 //Output<String[]> pathArguments = new Output<String[]>(); 1086 1087 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en"); 1088 RegexLookup<Boolean> coverageAllow = new RegexLookup<Boolean>() 1089 .add("^//ldml/localeDisplayNames/keys/key", true) 1090 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(jv|zxx|gsw|eo)\"]", true) 1091 .add("^//ldml/localeDisplayNames/scripts/script", true) 1092 .add("^//ldml/localeDisplayNames/types/type", true) 1093 .add( 1094 "^//ldml/dates/calendars/calendar\\[@type=\"[^\"]*\"]/dayPeriods/dayPeriodContext\\[@type=\"format\"]", 1095 true); 1096 1097 // TODO: for each count='other' path, add the other keywords and values 1098 PathDescription pathDescription = new PathDescription(GenerateXMB.supplementalDataInfo, english, extras, 1099 starredPaths, PathDescription.ErrorHandling.SKIP); 1100 1101 for (String path : sorted) { 1102 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) { 1103 int x = 0; 1104 } 1105 String value = english.getStringValue(path); 1106 Level level = coverageLevel.getLevel(path); 1107 if (value == null) { 1108 value = "[EMPTY]"; 1109 addSkipReasons(reasonsToPaths, "empty-value", level, path, value); 1110 continue; 1111 } 1112 if (pathMatcher != null 1113 && !pathMatcher.reset(path).find()) { 1114 addSkipReasons(reasonsToPaths, "path-parameter", level, path, value); 1115 continue; 1116 } 1117 PathStatus pathStatus = shouldSkipPath(path, value); 1118 if (pathStatus == PathStatus.SKIP) { 1119 addSkipReasons(reasonsToPaths, "path-remove", level, path, value); 1120 continue; 1121 } 1122 1123 if (level.compareTo(Level.MODERN) > 0 && pathStatus != PathStatus.KEEP) { 1124 if (coverageAllow.get(path) == null) { // HACK 1125 addSkipReasons(reasonsToPaths, "coverage", level, path, value); 1126 continue; 1127 } else { 1128 addSkipReasons(reasonsToPaths, "coverage*", level, path, value); 1129 continue; 1130 // System.out.println("Not skipping " + path); 1131 } 1132 } 1133 1134 String description = pathDescription.getDescription(path, value, level, null); 1135 EnumSet<PathDescription.Status> descriptionStatus = pathDescription.getStatus(); 1136 if (!descriptionStatus.isEmpty()) { 1137 addSkipReasons(reasonsToPaths, descriptionStatus.toString(), level, path, value); 1138 description = null; 1139 } else { 1140 description = "[ICU CLDR] " + description; 1141 } 1142 1143 String oldValue = oldPathValueMap.get(path); 1144 boolean changedEnglish = !value.equals(oldValue); 1145 PathInfo row = new PathInfo(path, value, changedEnglish, patternPlaceholders.get(path), description, 1146 pathDescription.getStarredPathOutput()); 1147 1148 if (description == PathDescription.MISSING_DESCRIPTION) { 1149 missingDescriptions.add(pathDescription.getStarredPathOutput()); 1150 } 1151 1152 Long hash = row.getId(); 1153 if (longToPathInfo.containsKey(hash)) { 1154 throw new IllegalArgumentException("Id collision for " 1155 + path + " and " + longToPathInfo.get(hash).getPath()); 1156 } 1157 pathToPathInfo.put(path, row); 1158 longToPathInfo.put(hash, row); 1159 if (value.contains("{0}") && patternPlaceholders.get(path) == null) { 1160 System.out.println("ERROR, no placeholders for {0}...: " + path + " ; " + value); 1161 } 1162 } 1163 1164 PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-paths.txt"); 1165 out.println("# " + DATE); 1166 for (Entry<String, List<Set<String>>> starredPath : starredPaths.entrySet()) { 1167 out.println(starredPath.getKey() + "\t\t" + starredPath.getValue()); 1168 } 1169 out.close(); 1170 out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-missingDescriptions.txt"); 1171 out.println("# " + DATE); 1172 for (String starredPath : missingDescriptions) { 1173 // ^//ldml/dates/timeZoneNames/zone\[@type=".*"]/exemplarCity ; ROOT timezone ; The name of a city in: 1174 // {0}. See cldr.org/xxxx. 1175 out.println(toRegexPath(starredPath) + "\t;\tDESCRIPTION\t" + starredPaths.get(starredPath)); 1176 } 1177 out.close(); 1178 writeReasons(reasonsToPaths, targetDir, "en"); 1179 } 1180 1181 private String toRegexPath(String starredPath) { 1182 String result = starredPath.replace("[", "\\["); 1183 result = result.replace("\".*\"", "\"([^\"]*)\""); 1184 return "^" + result; 1185 } 1186 1187 @Override 1188 public Iterator<PathInfo> iterator() { 1189 return pathToPathInfo.values().iterator(); 1190 } 1191 } 1192 1193 static void addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level, 1194 String path, String value) { 1195 reasonsToPaths.put(descriptionStatus + "\t" + level, path + "\t" + value); 1196 } 1197 1198 // Get Date-Time in milliseconds 1199 private static long getDateTimeinMillis(int year, int month, int date) { 1200 Calendar cal = Calendar.getInstance(); 1201 cal.set(year, month, date); 1202 return cal.getTimeInMillis(); 1203 } 1204 1205 static final long START_TIME = getDateTimeinMillis(2000, 1, 0); 1206 static final long END_TIME = getDateTimeinMillis(2015, 1, 0); 1207 static final long DELTA_TIME = 15 * 60 * 1000; 1208 static final long MIN_DAYLIGHT_PERIOD = 90L * 24 * 60 * 60 * 1000; 1209 1210 static final Set<String> HAS_DAYLIGHT; 1211 static { 1212 Set<String> hasDaylightTemp = new HashSet<String>(); 1213 Date date = new Date(); 1214 main: for (String zoneId : sc.getCanonicalTimeZones()) { 1215 TimeZone zone = TimeZone.getTimeZone(zoneId); 1216 for (long time = START_TIME + MIN_DAYLIGHT_PERIOD; time < END_TIME; time += MIN_DAYLIGHT_PERIOD) { 1217 date.setTime(time); 1218 if (zone.inDaylightTime(date)) { 1219 hasDaylightTemp.add(zoneId); 1220 if (false && !zone.useDaylightTime()) { 1221 System.out.println(zoneId + "\tuseDaylightTime()==false, but \tinDaylightTime(/" + date 1222 + "/)==true"); 1223 } 1224 continue main; 1225 } 1226 } 1227 } 1228 HAS_DAYLIGHT = Collections.unmodifiableSet(hasDaylightTemp); 1229 } 1230 1231 static final Set<String> SINGULAR_COUNTRIES; 1232 1233 private static PrintWriter countFile; 1234 static { 1235 // start with certain special-case countries 1236 Set<String> singularCountries = new HashSet<String>( 1237 Arrays.asList("CL EC ES NZ PT AQ FM GL KI UM PF".split(" "))); 1238 1239 Map<String, Set<String>> countryToZoneSet = sc.getCountryToZoneSet(); 1240 1241 main: for (Entry<String, Set<String>> countryZones : countryToZoneSet.entrySet()) { 1242 String country = countryZones.getKey(); 1243 if (country.equals("001")) { 1244 continue; 1245 } 1246 Set<String> zones = countryZones.getValue(); 1247 if (zones.size() == 1) { 1248 singularCountries.add(country); 1249 continue; 1250 } 1251 // make a set of sets 1252 List<TimeZone> initial = new ArrayList<TimeZone>(); 1253 for (String s : zones) { 1254 initial.add(TimeZone.getTimeZone(s)); 1255 } 1256 // now cycle through the times and see if we find any differences 1257 for (long time = START_TIME; time < END_TIME; time += DELTA_TIME) { 1258 int firstOffset = Integer.MIN_VALUE; 1259 for (TimeZone zone : initial) { 1260 int offset = zone.getOffset(time); 1261 if (firstOffset == Integer.MIN_VALUE) { 1262 firstOffset = offset; 1263 } else { 1264 if (firstOffset != offset) { 1265 if (false) 1266 System.out.println(country 1267 + " Difference at: " + new Date(time) 1268 + ", " + zone.getDisplayName() + " " + (offset / 1000.0 / 60 / 60) 1269 + ", " + initial.iterator().next().getDisplayName() + " " 1270 + (firstOffset / 1000.0 / 60 / 60)); 1271 continue main; 1272 } 1273 } 1274 } 1275 } 1276 singularCountries.add(country); 1277 } 1278 SINGULAR_COUNTRIES = Collections.unmodifiableSet(singularCountries); 1279 } 1280 1281 static final class MetazoneInfo { 1282 1283 /** 1284 * @param metazoneId 1285 * @param singleCountry 1286 * @param hasDaylight 1287 * @param zonesForCountry 1288 * @param regionToZone 1289 */ 1290 public MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight) { 1291 this.golden = golden; 1292 this.metazoneId = metazoneId; 1293 this.singleCountry = singleCountry; 1294 this.hasDaylight = hasDaylight; 1295 } 1296 1297 static final String[] GENERIC = { "/long/generic", 1298 // "/short/generic" 1299 }; 1300 static final String[] DAYLIGHT = { "/long/generic", "/long/standard", "/long/daylight", 1301 // "/short/generic", "/short/standard", "/short/daylight" 1302 }; 1303 1304 public String[] getTypes() { 1305 return hasDaylight ? DAYLIGHT : GENERIC; 1306 } 1307 1308 private final String metazoneId; 1309 private final String golden; 1310 private final boolean singleCountry; 1311 private final boolean hasDaylight; 1312 1313 static final List<MetazoneInfo> METAZONE_LIST; 1314 static { 1315 // Set<String> zones = supplementalDataInfo.getCanonicalTimeZones(); 1316 ArrayList<MetazoneInfo> result = new ArrayList<MetazoneInfo>(); 1317 1318 Map<String, String> zoneToCountry = sc.getZoneToCounty(); 1319 1320 Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone(); 1321 for (String metazone : supplementalDataInfo.getAllMetazones()) { 1322 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone); 1323 String golden = regionToZone.get("001"); 1324 if (golden == null) { 1325 throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone); 1326 } 1327 String region = zoneToCountry.get(golden); 1328 boolean isSingleCountry = SINGULAR_COUNTRIES.contains(region); 1329 if (isSingleCountry) { 1330 continue; 1331 } 1332 1333 // TimeZone goldenZone = TimeZone.getTimeZone(golden); 1334 1335 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(golden); 1336 if (metazoneRanges == null) { 1337 throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone); 1338 } 1339 MetazoneInfo item = new MetazoneInfo(metazone, golden, isSingleCountry, HAS_DAYLIGHT.contains(golden)); 1340 result.add(item); 1341 } 1342 METAZONE_LIST = Collections.unmodifiableList(result); 1343 } 1344 1345 public String toString() { 1346 return sc.getZoneToCounty().get(golden) 1347 + "\t" + metazoneId 1348 + "\t" + golden 1349 + "\t" + (singleCountry ? "singleCountry" : "") 1350 + "\t" + (hasDaylight ? "useDaylightTime" : "") 1351 // + ": " + zonesForCountry 1352 // + "\t" + regionToZone; 1353 ; 1354 } 1355 } 1356 1357 static void showMetazoneInfo() { 1358 System.out.println("\nZones in multiple metazones\n"); 1359 1360 for (String zone : sc.getCanonicalTimeZones()) { 1361 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(zone); 1362 if (metazoneRanges == null) { 1363 System.out.println("Zone doesn't have metazone! " + zone); 1364 continue; 1365 } 1366 if (metazoneRanges.size() != 1) { 1367 for (MetaZoneRange range : metazoneRanges) { 1368 System.out.println(zone + ":\t" + range); 1369 } 1370 System.out.println(); 1371 } 1372 } 1373 1374 System.out.println("\nMetazoneInfo\n"); 1375 1376 for (boolean singleCountry : new boolean[] { false }) { 1377 for (boolean hasDaylight : new boolean[] { false, true }) { 1378 for (MetazoneInfo mzone : MetazoneInfo.METAZONE_LIST) { 1379 if (mzone.hasDaylight != hasDaylight) continue; 1380 if (mzone.singleCountry != singleCountry) continue; 1381 System.out.println(mzone); 1382 } 1383 } 1384 } 1385 } 1386 1387 private static void displayWsb(String file, EnglishInfo info) { 1388 try { 1389 String[] parts = file.split("/"); 1390 ULocale locale = new ULocale(parts[parts.length - 2]); 1391 FileInputStream fis = new FileInputStream(file); 1392 XMLReader xmlReader = XMLFileReader.createXMLReader(false); 1393 xmlReader.setErrorHandler(new MyErrorHandler()); 1394 Map<String, String> data = new TreeMap<String, String>(); 1395 xmlReader.setContentHandler(new MyContentHandler(locale, data, info)); 1396 InputSource is = new InputSource(fis); 1397 is.setSystemId(file); 1398 xmlReader.parse(is); 1399 fis.close(); 1400 for (Entry<String, String> entity : data.entrySet()) { 1401 String path = entity.getKey(); 1402 String value = entity.getValue(); 1403 PathInfo pathInfo = info.getPathInfo(path); 1404 System.out.println(value + "\t" + (pathInfo == null ? "?" : pathInfo.englishValue) + "\t" + path); 1405 } 1406 } catch (SAXParseException e) { 1407 System.out.println("\t" + "Can't read " + file); 1408 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1409 } catch (SAXException e) { 1410 System.out.println("\t" + "Can't read " + file); 1411 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1412 } catch (IOException e) { 1413 System.out.println("\t" + "Can't read " + file); 1414 System.out.println("\t" + e.getClass() + "\t" + e.getMessage()); 1415 } 1416 } 1417 1418 static class MyErrorHandler implements ErrorHandler { 1419 public void error(SAXParseException exception) throws SAXException { 1420 System.out.println("\nerror: " + XMLFileReader.showSAX(exception)); 1421 throw exception; 1422 } 1423 1424 public void fatalError(SAXParseException exception) throws SAXException { 1425 System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception)); 1426 throw exception; 1427 } 1428 1429 public void warning(SAXParseException exception) throws SAXException { 1430 System.out.println("\nwarning: " + XMLFileReader.showSAX(exception)); 1431 throw exception; 1432 } 1433 } 1434 1435 static class MyContentHandler implements ContentHandler { 1436 private static final boolean SHOW = false; 1437 private Map<String, String> myData; 1438 private EnglishInfo info; 1439 private PathInfo lastPathInfo; 1440 private StringBuilder currentText = new StringBuilder(); 1441 private long lastId; 1442 private String lastPluralTag; 1443 private Map<String, String> pluralTags = new LinkedHashMap<String, String>(); 1444 private Set<String> pluralKeywords; 1445 1446 public MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info) { 1447 myData = data; 1448 this.info = info; 1449 PluralRules rules = PluralRules.forLocale(locale); 1450 pluralKeywords = Builder.with(new HashSet<String>()).addAll(rules.getKeywords()).add("0").add("1").freeze(); 1451 } 1452 1453 @Override 1454 public void characters(char[] arg0, int arg1, int arg2) throws SAXException { 1455 String chars = String.valueOf(arg0, arg1, arg2); 1456 // if (SHOW) System.out.println("\t characters\t" + chars); 1457 currentText.append(chars); 1458 } 1459 1460 @Override 1461 public void endDocument() throws SAXException { 1462 if (SHOW) System.out.println("\t endDocument\t"); 1463 } 1464 1465 @Override 1466 public void endElement(String arg0, String arg1, String qName) throws SAXException { 1467 // if (SHOW) System.out.println("\t endElement\t" + arg0 + "\t" + arg1 + "\t" + qName); 1468 if (qName.equals("msg")) { 1469 String chars = currentText.toString().replace("\n", "").trim(); 1470 if (lastPathInfo == null) { 1471 System.out.println("***Missing path info for " + lastId + "\t" + chars); 1472 // myData.put("*** Missing path: " + lastId, chars); 1473 } else if (pluralTags.size() != 0) { 1474 for (Entry<String, String> pluralTagEntry : pluralTags.entrySet()) { 1475 String pluralTag = pluralTagEntry.getKey(); 1476 String pluralTagValue = pluralTagEntry.getValue(); 1477 if (pluralKeywords.contains(pluralTag)) { 1478 String fixedCount = lastPathInfo.path.replace("other", pluralTag); 1479 myData.put(fixedCount, pluralTagValue); 1480 } else { 1481 System.out.println("***Skipping " + pluralTag + "\t" + pluralTagValue); 1482 } 1483 } 1484 // myData.put(lastPathInfo.path, pluralTags.toString()); 1485 pluralTags.clear(); 1486 } else { 1487 myData.put(lastPathInfo.path, chars); 1488 } 1489 currentText.setLength(0); 1490 } 1491 } 1492 1493 @Override 1494 public void endPrefixMapping(String arg0) throws SAXException { 1495 if (SHOW) System.out.println("\t endPrefixMapping\t" + arg0); 1496 } 1497 1498 @Override 1499 public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { 1500 if (SHOW) System.out.println("\t ignorableWhitespace\t" + String.valueOf(arg0, arg1, arg2)); 1501 } 1502 1503 @Override 1504 public void processingInstruction(String arg0, String arg1) throws SAXException { 1505 if (SHOW) System.out.println("\t processingInstruction\t" + arg0 + "\t" + arg1); 1506 } 1507 1508 @Override 1509 public void setDocumentLocator(Locator arg0) { 1510 if (SHOW) System.out.println("\t setDocumentLocator\t" + arg0); 1511 } 1512 1513 @Override 1514 public void skippedEntity(String arg0) throws SAXException { 1515 if (SHOW) System.out.println("\t skippedEntity\t" + arg0); 1516 } 1517 1518 @Override 1519 public void startDocument() throws SAXException { 1520 if (SHOW) System.out.println("\t startDocument\t"); 1521 } 1522 1523 @Override 1524 public void startElement(String arg0, String arg1, String qName, Attributes arg3) throws SAXException { 1525 // if (SHOW) System.out.println("\t startElement\t" + arg0 + "\t" + arg1 + "\t" + qName + "\t" + 1526 // showAttributes(arg3)); 1527 if (qName.equals("msg")) { 1528 lastId = Long.parseLong(arg3.getValue("id")); 1529 lastPathInfo = info.getPathInfo(lastId); 1530 currentText.setLength(0); 1531 } else if (qName.equals("ph")) { 1532 String name = arg3.getValue("name"); 1533 String original = lastPathInfo.getPlaceholderReplacementsToOriginal().get(name); 1534 if (original != null) { 1535 currentText.append(original); 1536 } else if (name.startsWith("[PLURAL_")) { 1537 pluralTags.clear(); 1538 lastPluralTag = "[START_PLURAL]"; 1539 } else { 1540 String pluralTag = PLURAL_TAGS.get(name); 1541 if (pluralTag != null) { 1542 String chars = currentText.toString().replace("\n", "").trim(); 1543 pluralTags.put(lastPluralTag, chars); 1544 currentText.setLength(0); 1545 lastPluralTag = pluralTag; 1546 } else { 1547 System.out.println("***Can't find " + name + " in " 1548 + lastPathInfo.getPlaceholderReplacementsToOriginal()); 1549 } 1550 } 1551 } 1552 } 1553 1554 private String showAttributes(Attributes atts) { 1555 String result = ""; 1556 for (int i = 0; i < atts.getLength(); ++i) { 1557 result += atts.getQName(i) + "=\"" + atts.getValue(i) + "\"\t"; 1558 } 1559 return result; 1560 } 1561 1562 @Override 1563 public void startPrefixMapping(String arg0, String arg1) throws SAXException { 1564 if (SHOW) System.out.println("\t startPrefixMapping\t" + arg0 + "\t" + arg1); 1565 } 1566 } 1567 1568 static final Map<String, String> PLURAL_TAGS = Builder.with(new HashMap<String, String>()) 1569 .put("[=0]", "0") 1570 .put("[=1]", "1") 1571 .put("[ZERO]", PluralRules.KEYWORD_ZERO) 1572 .put("[ONE]", PluralRules.KEYWORD_ONE) 1573 .put("[TWO]", PluralRules.KEYWORD_TWO) 1574 .put("[FEW]", PluralRules.KEYWORD_FEW) 1575 .put("[MANY]", PluralRules.KEYWORD_MANY) 1576 .put("[OTHER]", PluralRules.KEYWORD_OTHER) 1577 .put("[END_PLURAL]", "") 1578 .freeze(); 1579 1580 private static String compareDirectory; 1581 } 1582