Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.io.File;
      4 import java.io.FileInputStream;
      5 import java.io.IOException;
      6 import java.io.PrintWriter;
      7 import java.io.StringWriter;
      8 import java.util.ArrayList;
      9 import java.util.Arrays;
     10 import java.util.Calendar;
     11 import java.util.Collections;
     12 import java.util.Date;
     13 import java.util.EnumSet;
     14 import java.util.HashMap;
     15 import java.util.HashSet;
     16 import java.util.Iterator;
     17 import java.util.LinkedHashMap;
     18 import java.util.LinkedHashSet;
     19 import java.util.List;
     20 import java.util.Locale;
     21 import java.util.Map;
     22 import java.util.Map.Entry;
     23 import java.util.Set;
     24 import java.util.TreeMap;
     25 import java.util.TreeSet;
     26 import java.util.regex.Matcher;
     27 import java.util.regex.Pattern;
     28 
     29 import org.unicode.cldr.draft.FileUtilities;
     30 import org.unicode.cldr.test.CheckExemplars;
     31 import org.unicode.cldr.test.CoverageLevel2;
     32 import org.unicode.cldr.test.DisplayAndInputProcessor;
     33 import org.unicode.cldr.test.QuickCheck;
     34 import org.unicode.cldr.tool.Option.Options;
     35 import org.unicode.cldr.util.Builder;
     36 import org.unicode.cldr.util.CLDRFile;
     37 import org.unicode.cldr.util.CLDRPaths;
     38 import org.unicode.cldr.util.Factory;
     39 import org.unicode.cldr.util.FileCopier;
     40 import org.unicode.cldr.util.LanguageTagParser;
     41 import org.unicode.cldr.util.Level;
     42 import org.unicode.cldr.util.PathDescription;
     43 import org.unicode.cldr.util.PatternCache;
     44 import org.unicode.cldr.util.PatternPlaceholders;
     45 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
     46 import org.unicode.cldr.util.PrettyPath;
     47 import org.unicode.cldr.util.RegexLookup;
     48 import org.unicode.cldr.util.RegexLookup.Finder;
     49 import org.unicode.cldr.util.RegexUtilities;
     50 import org.unicode.cldr.util.StandardCodes;
     51 import org.unicode.cldr.util.StringId;
     52 import org.unicode.cldr.util.SupplementalDataInfo;
     53 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
     54 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
     55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
     56 import org.unicode.cldr.util.TransliteratorUtilities;
     57 import org.unicode.cldr.util.With;
     58 import org.unicode.cldr.util.XMLFileReader;
     59 import org.unicode.cldr.util.XMLSource;
     60 import org.unicode.cldr.util.XPathParts;
     61 import org.xml.sax.Attributes;
     62 import org.xml.sax.ContentHandler;
     63 import org.xml.sax.ErrorHandler;
     64 import org.xml.sax.InputSource;
     65 import org.xml.sax.Locator;
     66 import org.xml.sax.SAXException;
     67 import org.xml.sax.SAXParseException;
     68 import org.xml.sax.XMLReader;
     69 
     70 import com.ibm.icu.dev.util.CollectionUtilities;
     71 import com.ibm.icu.impl.Relation;
     72 import com.ibm.icu.impl.Row;
     73 import com.ibm.icu.impl.Row.R2;
     74 import com.ibm.icu.lang.CharSequences;
     75 import com.ibm.icu.text.BreakIterator;
     76 import com.ibm.icu.text.DateFormat;
     77 import com.ibm.icu.text.MessageFormat;
     78 import com.ibm.icu.text.PluralRules;
     79 import com.ibm.icu.text.SimpleDateFormat;
     80 import com.ibm.icu.text.Transform;
     81 import com.ibm.icu.text.UnicodeSet;
     82 import com.ibm.icu.util.Output;
     83 import com.ibm.icu.util.TimeZone;
     84 import com.ibm.icu.util.ULocale;
     85 
     86 public class GenerateXMB {
     87     private static final String DEBUG_PATH = "[@type=\"day\"]/unitPattern[@count=\"1\"]";
     88 
     89     static StandardCodes sc = StandardCodes.make();
     90 
     91     static final String DATE;
     92     static {
     93         DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
     94         DATE = dateFormat.format(new Date());
     95     }
     96     static final String stock = "en|ar|de|es|fr|it|ja|ko|nl|pl|ru|th|tr|pt|zh|zh_Hant|bg|ca|cs|da|el|fa|fi|fil|hi|hr|hu|id|lt|lv|ro|sk|sl|sr|sv|uk|vi|he|nb|et|ms|am|bn|gu|is|kn|ml|mr|sw|ta|te|ur|eu|gl|af|zu|en_GB|es_419|pt_PT|fr_CA|zh_Hant_HK";
     97     private static final HashSet<String> REGION_LOCALES = new HashSet<String>(Arrays.asList(stock.split("\\|")));
     98 
     99     final static Options myOptions = new Options("In normal usage, you set the -t option for the target.")
    100         .add("target", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/xmb/",
    101             "The target directory for building. Will generate an English .xmb file, and .wsb files for other languages.")
    102         .add(
    103             "file",
    104             ".*",
    105             stock,
    106             "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering")
    107         // "^(sl|fr)$",
    108         .add("path", ".*", "Filter the information based on path name, using a regex argument")
    109         // "dates.*(pattern|available)",
    110         .add("content", ".*", "Filter the information based on content name, using a regex argument")
    111         .add("jason", ".*", "Generate JSON versions instead")
    112         .add("zone", null, "Show metazoneinfo and exit")
    113         .add("wsb", ".*", "Show metazoneinfo and exit")
    114         .add("kompare", ".*", CLDRPaths.BASE_DIRECTORY + "../DATA/cldr/common/google-bulk-imports",
    115             "Compare data with directory; generate files in -target.")
    116         .add("project_name", 'n', ".*", "CLDR", "The ID of the project.");
    117 
    118     static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
    119     // static Matcher contentMatcher;
    120     static Matcher pathMatcher;
    121     static RegexLookup<String> pathFindRemover = new RegexLookup<String>().loadFromFile(GenerateXMB.class,
    122         "xmbSkip.txt");; // .compile("//ldml/dates/calendars/calendar\\[@type=\"(?!gregorian).*").matcher("");
    123     static PrettyPath prettyPath = new PrettyPath();
    124     static int errors = 0;
    125     static Relation<String, String> path2errors = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
    126 
    127     // enum Handling {SKIP};
    128     static final Matcher datePatternMatcher = PatternCache.get("dates.*(pattern|available)").matcher("");
    129 
    130     public static final boolean DEBUG = false;
    131 
    132     private static final HashSet<String> SKIP_LOCALES = new HashSet<String>(
    133         Arrays.asList(new String[] { "en", "root" }));
    134 
    135     public static String DTD_VERSION;
    136 
    137     private static String projectId;
    138 
    139     enum PlaceholderType {
    140         BRACES, // e.g. {NAME}
    141         XML, // e.g. <ph name='NAME' />
    142         XML_EXAMPLE // e.g. <ph name='NAME' /><ex>EXAMPLE</ex>{0}</ph>
    143     };
    144 
    145     public static void main(String[] args) throws Exception {
    146         myOptions.parse(args, true);
    147         Option option;
    148         option = myOptions.get("zone");
    149         if (option.doesOccur()) {
    150             showMetazoneInfo();
    151             return;
    152         }
    153         option = myOptions.get("file");
    154         String fileMatcherString = option.getValue();
    155         option = myOptions.get("content");
    156         Matcher contentMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null;
    157         option = myOptions.get("path");
    158         pathMatcher = option.doesOccur() ? PatternCache.get(option.getValue()).matcher("") : null;
    159 
    160         String targetDir = myOptions.get("target").getValue();
    161         countFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "counts.txt");
    162 
    163         Factory cldrFactory1 = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
    164         CLDRFile english = cldrFactory1.make("en", true);
    165         CLDRFile englishTop = cldrFactory1.make("en", false);
    166         DTD_VERSION = englishTop.getDtdVersion();
    167 
    168         CLDRFile root = cldrFactory1.make("en", true);
    169 
    170         showDefaultContents(targetDir, english);
    171         EnglishInfo englishInfo = new EnglishInfo(targetDir, english, root);
    172 
    173         option = myOptions.get("kompare");
    174         if (option.doesOccur()) {
    175             compareDirectory = option.getValue();
    176             compareFiles(fileMatcherString, contentMatcher, targetDir, cldrFactory1, english, englishInfo);
    177             return;
    178         }
    179 
    180         if (myOptions.get("wsb").doesOccur()) {
    181             displayWsb(myOptions.get("wsb").getValue(), englishInfo);
    182             return;
    183         }
    184 
    185         projectId = myOptions.get("project_name").getValue();
    186 
    187         writeFile(targetDir, "en", englishInfo, english, true, false);
    188         writeFile(targetDir + "/filtered/", "en", englishInfo, english, true, true);
    189 
    190         // TODO:
    191         // Replace {0}... with placeholders (Mostly done, but need better examples)
    192         // Replace datetime fields (MMM, L, ...) with placeholders
    193         // Skip items that we don't need translated (most language names, script names, deprecated region names, etc.
    194         // Add descriptions
    195         // Add pages with detailed descriptions, and links from the descriptions
    196         // Represent the items with count= as ICUSyntax
    197         // Filter items that we don't want to get translated, and add others that we need even if not in English
    198         // Rewire items that are in undistinguished attributes
    199         // Test each xml file for validity
    200         // Generate strings that let the user choose the placeholder style hh vs HH,...???
    201 
    202         Factory cldrFactory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, fileMatcherString);
    203         LanguageTagParser ltp = new LanguageTagParser();
    204 
    205         for (String file : cldrFactory2.getAvailable()) {
    206             if (SKIP_LOCALES.contains(file)) {
    207                 continue;
    208             }
    209 
    210             // skip all locales with regions (with certain exceptions)
    211             if (ltp.set(file).getRegion().length() != 0) {
    212                 if (!REGION_LOCALES.contains(file)) {
    213                     continue;
    214                 }
    215             }
    216 
    217             // skip anything without plural rules
    218             final PluralInfo plurals = supplementalDataInfo.getPlurals(file, false);
    219             if (plurals == null) {
    220                 System.out.println("Skipping " + file + ", no plural rules");
    221                 continue;
    222             }
    223 
    224             CLDRFile cldrFile = cldrFactory2.make(file, true);
    225             writeFile(targetDir + "/wsb/", file, englishInfo, cldrFile, false, false);
    226             writeFile(targetDir + "/wsb/filtered/", file, englishInfo, cldrFile, false, true);
    227             countFile.flush();
    228         }
    229         countFile.close();
    230         PrintWriter errorFile = FileUtilities.openUTF8Writer(targetDir + "/log/", "errors.txt");
    231         for (Entry<String, Set<String>> entry : path2errors.keyValuesSet()) {
    232             errorFile.println(entry);
    233         }
    234         errorFile.close();
    235         System.out.println("Errors: " + (errors + path2errors.size()));
    236     }
    237 
    238     private static void compareFiles(String fileMatcherString, Matcher contentMatcher, String targetDir,
    239         Factory cldrFactory1, CLDRFile english,
    240         EnglishInfo englishInfo) throws IOException {
    241         SubmittedPathFixer fixer = new SubmittedPathFixer();
    242         Factory cldrFactory2 = Factory.make(compareDirectory, fileMatcherString);
    243         PrintWriter output = null;
    244         PrintWriter log = FileUtilities.openUTF8Writer(targetDir + "/log/", "skipped.txt");
    245 
    246         for (String file : cldrFactory2.getAvailable()) {
    247             // System.out.println("Checking " + file);
    248             CLDRFile submitted = cldrFactory2.make(file, false);
    249             CLDRFile trunk = cldrFactory1.make(file, true);
    250             for (String path : With.in(submitted.iterator(null, submitted.getComparator()))) {
    251                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
    252                     continue;
    253                 }
    254                 String submittedValue = submitted.getStringValue(path);
    255                 if (contentMatcher != null && !contentMatcher.reset(submittedValue).matches()) {
    256                     continue;
    257                 }
    258                 PathStatus pathStatus = shouldSkipPath(path, submittedValue);
    259                 if (pathStatus == PathStatus.SKIP) {
    260                     continue;
    261                 }
    262 
    263                 // fix alt
    264                 String trunkPath = fixer.fix(path, false);
    265                 String trunkValue = trunk.getStringValue(trunkPath);
    266                 if (CharSequences.equals(submittedValue, trunkValue)) {
    267                     continue;
    268                 }
    269                 if (output == null) {
    270                     output = FileUtilities.openUTF8Writer(targetDir, file + ".txt");
    271                     output.println("ID\tEnglish\tSource\tRelease\tDescription");
    272                 }
    273                 String englishValue = english.getStringValue(trunkPath);
    274                 final PathInfo pathInfo = englishInfo.getPathInfo(trunkPath);
    275                 String description;
    276                 if (pathInfo == null) {
    277                     log.println(file + "\tDescription unavailable for " + trunkPath);
    278                     errors++;
    279                     String temp = fixer.fix(path, true);
    280                     englishInfo.getPathInfo(trunkPath);
    281                     continue;
    282                 } else {
    283                     description = pathInfo.getDescription();
    284                 }
    285                 long id = StringId.getId(trunkPath);
    286                 if (englishValue == null) {
    287                     log.println(file + "\tEmpty English for " + trunkPath);
    288                     errors++;
    289                     continue;
    290                 }
    291                 output.println(id + "\t" + ssquote(englishValue, false) + "\t" + ssquote(submittedValue, false) + "\t"
    292                     + ssquote(trunkValue, true) + "\t" + description);
    293             }
    294             if (output != null) {
    295                 output.close();
    296                 output = null;
    297             }
    298             log.flush();
    299         }
    300         log.close();
    301     }
    302 
    303     static Output<String[]> matches = new Output<String[]>();
    304     static List<String> failures = new ArrayList<String>();
    305     static Output<Finder> matcherFound = new Output<Finder>();
    306 
    307     enum PathStatus {
    308         SKIP, KEEP, MAYBE
    309     }
    310 
    311     public static PathStatus shouldSkipPath(String path, String value) {
    312         // skip if
    313         List<String> myFailures = null;
    314         if (false && path.contains("currencies") && path.contains("symbol")) {
    315             myFailures = failures;
    316         }
    317         String skipPath = pathFindRemover.get(path, null, matches, matcherFound, myFailures);
    318         if (myFailures != null && failures.size() != 0) {
    319             System.out.println("Failures\n\t" + CollectionUtilities.join(failures, "\n\t"));
    320             failures.clear();
    321         }
    322         if (skipPath == null || skipPath.equals("MAYBE")) {
    323             return PathStatus.MAYBE;
    324         } else if (skipPath.equals("VALUE")) {
    325             return value.equals(matches.value[1]) ? PathStatus.SKIP : PathStatus.MAYBE;
    326         } else if (skipPath.equals("SKIP")) {
    327             return PathStatus.SKIP;
    328         } else if (skipPath.equals("KEEP")) {
    329             return PathStatus.KEEP;
    330         }
    331         throw new IllegalArgumentException("Unexpected xmbSkip.txt value: " + skipPath);
    332     }
    333 
    334     private static String ssquote(String englishValue, boolean showRemoved) {
    335         if (englishValue == null) {
    336             return showRemoved ? "[removed]" : "[empty]";
    337         }
    338         englishValue = englishValue.replace("\"", "&quot;");
    339         return englishValue;
    340     }
    341 
    342     static class SubmittedPathFixer {
    343         private static final Pattern PATH_FIX = PatternCache.get("\\[@alt=\"" +
    344             "(?:proposed|((?!proposed)[-a-zA-Z0-9]*)-proposed)" +
    345             "-u\\d+-implicit[0-9.]+" +
    346             "(?:-proposed-u\\d+-implicit[0-9.]+)?" + // NOTE: we allow duplicated alt values because of a generation
    347             // bug.
    348             // -proposed-u971-implicit2.0
    349             "\"]");
    350         static Matcher pathFix = PATH_FIX.matcher("");
    351 
    352         public String fix(String path, boolean debug) {
    353             if (pathFix.reset(path).find()) {
    354                 if (debug) {
    355                     // debug in case we get a mismatch
    356                     String temp = "REGEX:\t" +
    357                         RegexUtilities.showMismatch(PATH_FIX, path.substring(pathFix.start(0)));
    358                 }
    359                 final String group = pathFix.group(1);
    360                 String replacement = group == null ? "" : "[@alt=\"" + group + "\"]";
    361                 String trunkPath = path.substring(0, pathFix.start(0)) + replacement + path.substring(pathFix.end(0));
    362                 // HACK because of change in CLDR defaults
    363                 if (trunkPath.startsWith("//ldml/numbers/symbols/")) {
    364                     trunkPath = "//ldml/numbers/symbols[@numberSystem=\"latn\"]/"
    365                         + trunkPath.substring("//ldml/numbers/symbols/".length());
    366                 }
    367                 return trunkPath;
    368             }
    369             return path;
    370         }
    371 
    372     }
    373 
    374     private static void showDefaultContents(String targetDir, CLDRFile english) throws IOException {
    375         PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "locales.txt");
    376         String[] locales = stock.split("\\|");
    377         Set<R2<String, String>> sorted = new TreeSet<R2<String, String>>();
    378         for (String locale : locales) {
    379             if (locale.isEmpty()) continue;
    380             String name = english.getName(locale);
    381             R2<String, String> row = Row.of(name, locale);
    382             sorted.add(row);
    383         }
    384         Set<String> defaultContents = supplementalDataInfo.getDefaultContentLocales();
    385 
    386         for (R2<String, String> row : sorted) {
    387             String locale = row.get1();
    388             String dlocale = getDefaultContentLocale(locale, defaultContents);
    389             out.println(row.get0() + "\t" + locale + "\t" + english.getName(dlocale) + "\t" + dlocale);
    390         }
    391         out.close();
    392     }
    393 
    394     private static String getDefaultContentLocale(String locale, Set<String> defaultContents) {
    395         String best = null;
    396         for (String s : defaultContents) {
    397             if (s.startsWith(locale)) {
    398                 if (best == null) {
    399                     best = s;
    400                 } else if (s.length() < best.length()) {
    401                     best = s;
    402                 }
    403             }
    404         }
    405         if (best == null) {
    406             return locale;
    407         }
    408         return best;
    409     }
    410 
    411     static final Pattern COUNT_OR_ALT_ATTRIBUTE = PatternCache.get("\\[@(count)=\"([^\"]*)\"]");
    412     static final Pattern PLURAL_XPATH = Pattern
    413         .compile("//ldml/(units/unit|numbers/(decimal|currency)Formats).*\\[@count=\"\\w+\"].*");
    414     static final Pattern SKIP_EXEMPLAR_TEST = PatternCache.get(
    415         "/(currencySpacing"
    416             + "|hourFormat"
    417             + "|exemplarCharacters"
    418             + "|pattern"
    419             + "|localizedPatternChars"
    420             + "|segmentations"
    421             + "|dateFormatItem"
    422             + "|references"
    423             + "|unitPattern"
    424             + "|intervalFormatItem"
    425             + "|localeDisplayNames/variants/"
    426             + "|commonlyUsed"
    427             + "|currency.*/symbol"
    428             + "|symbols/(exponential|nan))");
    429 
    430     static final Matcher skipExemplarTest = SKIP_EXEMPLAR_TEST.matcher("");
    431     static final UnicodeSet ASCII_LATIN = new UnicodeSet("[A-Za-z]").freeze();
    432     static final UnicodeSet LATIN = new UnicodeSet("[:sc=Latn:]").freeze();
    433 
    434     static final Matcher keepFromRoot = PatternCache.get("/(exemplarCity|currencies/currency.*/symbol)").matcher("");
    435     static final Matcher currencyDisplayName = Pattern
    436         .compile("/currencies/currency\\[@type=\"([^\"]*)\"]/displayName").matcher("");
    437 
    438     private static void writeFile(String targetDir, String localeId, EnglishInfo englishInfo, CLDRFile cldrFile,
    439         boolean isEnglish, boolean filter) throws IOException {
    440 
    441         String extension = "xml";
    442         XPathParts xpathParts = new XPathParts();
    443         Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
    444         Set<String> seenStarred = new HashSet<String>();
    445 
    446         Relation<String, Row.R2<PathInfo, String>> countItems = Relation.of(
    447             new TreeMap<String, Set<Row.R2<PathInfo, String>>>(), TreeSet.class);
    448         Matcher countMatcher = COUNT_OR_ALT_ATTRIBUTE.matcher("");
    449         int lineCount = 0;
    450         int wordCount = 0;
    451         int messageCount = 0;
    452 
    453         StringWriter buffer = new StringWriter();
    454         PrintWriter out1 = new PrintWriter(buffer);
    455         StringWriter buffer3 = new StringWriter();
    456         PrintWriter out3 = new PrintWriter(buffer3);
    457         UnicodeSet exemplars = getExemplars(cldrFile);
    458 
    459         for (PathInfo pathInfo : englishInfo) {
    460             if (false && pathInfo.id == 46139888945574604L) { // for debugging
    461                 System.out.println("?");
    462             }
    463             String path = pathInfo.getPath();
    464             String value;
    465             if (isEnglish) {
    466                 value = pathInfo.englishValue;
    467             } else {
    468                 value = cldrFile.getStringValue(path);
    469             }
    470             // Remove quotes from number formats (we'll put them back in during
    471             // post-processing).
    472             // TODO: we should actually call daip.processForDisplay() here, but
    473             // it does more stuff than we need it to do, e.g. stripping the
    474             // brackets from exemplarCharacters.
    475             if (DisplayAndInputProcessor.NUMBER_FORMAT_XPATH.matcher(path).matches()) {
    476                 value = value.replace("'", "");
    477             }
    478 
    479             // skip root if not English
    480             if (!isEnglish && value != null && !keepFromRoot.reset(path).find()) { // note that mismatched script will
    481                 // be checked later
    482                 String locale = cldrFile.getSourceLocaleID(path, null);
    483                 if (locale.equals("root")) {
    484                     reasonsToPaths.put("root", path + "\t" + value);
    485                     continue;
    486                 }
    487                 if (locale.equals(XMLSource.CODE_FALLBACK_ID)) {
    488                     reasonsToPaths.put("codeFallback", path + "\t" + value);
    489                     continue;
    490                 }
    491             }
    492             boolean hasPlurals = PLURAL_XPATH.matcher(path).matches();
    493             if (filter && !hasPlurals) {
    494                 String starred = pathInfo.getStarredPath();
    495                 if (seenStarred.contains(starred)) {
    496                     continue;
    497                 }
    498                 seenStarred.add(starred);
    499             }
    500             if (value == null) {
    501                 reasonsToPaths.put("missing", path + "	" + value);
    502                 continue;
    503             }
    504             if (!isEnglish) {
    505                 String fullPath = cldrFile.getFullXPath(path);
    506                 if (fullPath.contains("draft")) {
    507                     xpathParts.set(fullPath);
    508                     String draftValue = xpathParts.getAttributeValue(-1, "draft");
    509                     if (!draftValue.equals("contributed")) {
    510                         reasonsToPaths.put(draftValue, path + "\t" + value);
    511                         continue;
    512                     }
    513                 }
    514             }
    515             if (!isEnglish
    516                 && !exemplars.containsAll(value)
    517                 && !skipExemplarTest.reset(path).find()) {
    518                 // check for special cases in currency names. If the code itself occurs in the name, that's ok
    519                 // ldml/numbers/currencies/currency[@type="XXX"]/displayName
    520                 boolean bad = true;
    521                 if (currencyDisplayName.reset(path).find()) {
    522                     String code = currencyDisplayName.group(1);
    523                     String value2 = value.replace(code, "");
    524                     bad = !exemplars.containsAll(value2);
    525                 }
    526                 if (bad) {
    527                     UnicodeSet diff = new UnicodeSet().addAll(value).removeAll(exemplars);
    528                     reasonsToPaths.put("exemplars", path + "\t" + value + "\t" + diff);
    529                     continue;
    530                 }
    531             }
    532             // String fullPath = cldrFile.getStringValue(path);
    533             // //ldml/units/unit[@type="day"]/unitPattern[@count="one"]
    534             if (hasPlurals) {
    535                 countMatcher.reset(path).find();
    536                 String countLessPath = countMatcher.replaceAll("");
    537                 countItems.put(countLessPath, Row.of(pathInfo, value));
    538                 continue;
    539             }
    540             if (!isEnglish && pathInfo.changedEnglish) {
    541                 reasonsToPaths.put("changed-english", path);
    542             } else {
    543                 writePathInfo(out1, pathInfo, value, isEnglish);
    544                 messageCount++;
    545             }
    546             if (isEnglish) {
    547                 writeJavaInfo(out3, pathInfo.getStringId(), pathInfo.getPath(), value);
    548             }
    549             wordCount += pathInfo.wordCount;
    550             ++lineCount;
    551         }
    552         R2<Integer, Integer> lineWordCount = writeCountPathInfo(out1, out3, cldrFile.getLocaleID(), countItems,
    553             isEnglish, filter);
    554         messageCount += lineWordCount.get0();
    555         lineCount += lineWordCount.get0();
    556         wordCount += lineWordCount.get1();
    557         if (!filter && countItems.size() != lineWordCount.get0().intValue()) {
    558             System.out.println(localeId + "\t" + countItems.size() + "\t" + lineWordCount.get0().intValue());
    559         }
    560         out1.flush();
    561         out3.flush();
    562 
    563         String file = LanguageCodeConverter.toGoogleLocaleId(localeId);
    564         String localeName = englishInfo.getName(localeId);
    565         PrintWriter out = FileUtilities.openUTF8Writer(targetDir, file + "." + extension);
    566 
    567         if (isEnglish) {
    568             FileCopier.copy(GenerateXMB.class, "xmb-dtd.xml", out);
    569 //            FileUtilities.appendFile(GenerateXMB.class, "xmb-dtd.xml", out);
    570             out.println("<!-- " + localeName + " -->");
    571             out.println("<messagebundle class='" + projectId + "'> <!-- version: " + DTD_VERSION + ", date: " + DATE
    572                 + " -->");
    573             out.println(buffer.toString());
    574             out.println("</messagebundle>");
    575 
    576             PrintWriter out3File = FileUtilities.openUTF8Writer(targetDir, "IdToPath.java");
    577             out3File.println("package org.unicode.cldr.tool;");
    578             out3File.println();
    579             out3File.println("import java.util.HashMap;");
    580             out3File.println();
    581             out3File.println("/**");
    582             out3File.println(" * Autogenerated by GenerateXMB for use by ConvertXTB.");
    583             out3File.println(" * Do not manually edit this file.");
    584             out3File.println(" */");
    585             out3File.println("public class IdToPath {");
    586             out3File.println("  static final HashMap<String,String> map = new HashMap<String,String>();");
    587             out3File.println("  public static String getPath(String id) {");
    588             out3File.println("      return map.get(id);");
    589             out3File.println("  }");
    590             out3File.println("  static {");
    591             out3File.println("      String[][] data = {");
    592             out3File.println(buffer3);
    593             out3File.println("      };");
    594             out3File.println("      for (String[] pair : data) {");
    595             out3File.println("          map.put(pair[0], pair[1]);");
    596             out3File.println("      }");
    597             out3File.println("  }");
    598             out3File.println("}");
    599             out3File.close();
    600         } else {
    601 
    602 //            FileUtilities.appendFile(GenerateXMB.class, "wsb-dtd.xml", out);
    603             FileCopier.copy(GenerateXMB.class, "wsb-dtd.xml", out);
    604             out.println("<!-- " + localeName + " -->");
    605             out.println("<worldserverbundles lazarus_id='dummy' date='" + DATE + "'> <!-- version: " + DTD_VERSION
    606                 + " -->");
    607             out.println("  <worldserverbundle project_id='" + projectId + "' message_count='" + messageCount + "'>");
    608             out.println(buffer.toString());
    609             out.println("  </worldserverbundle>");
    610             out.println("</worldserverbundles>");
    611         }
    612         out.close();
    613         QuickCheck.check(new File(targetDir, file + "." + extension));
    614         if (!filter) {
    615             countFile.println(file + "\t" + lineCount + "\t" + wordCount);
    616         }
    617         if (!isEnglish && !filter) {
    618             writeReasons(reasonsToPaths, targetDir, file);
    619         }
    620     }
    621 
    622     private static void writeJavaInfo(PrintWriter out3, String id, String path, String value) {
    623         out3.println("              {\"" + id + "\",\"" + path.replace("\"", "\\\"") + "\",\""
    624             + value.replace("\\", "\\\\").replace("\"", "\\\"") + "\"},");
    625     }
    626 
    627     private static UnicodeSet getExemplars(CLDRFile cldrFile) {
    628         UnicodeSet exemplars = cldrFile.getExemplarSet("", CLDRFile.WinningChoice.WINNING);
    629         boolean isLatin = exemplars.containsSome(ASCII_LATIN);
    630         exemplars.addAll(CheckExemplars.AlwaysOK);
    631         UnicodeSet auxExemplars = cldrFile.getExemplarSet("auxiliary", CLDRFile.WinningChoice.WINNING);
    632         if (auxExemplars != null) {
    633             exemplars.addAll(auxExemplars);
    634         }
    635         if (!isLatin) {
    636             exemplars.removeAll(LATIN);
    637         }
    638         exemplars.freeze();
    639         return exemplars;
    640     }
    641 
    642     static final Pattern COUNT_ATTRIBUTE = PatternCache.get("\\[@count=\"([^\"]*)\"]");
    643     static final Pattern PLURAL_NUMBER = PatternCache.get("(decimal|number)Format");
    644 
    645     private static Row.R2<Integer, Integer> writeCountPathInfo(PrintWriter out, PrintWriter out3, String locale,
    646         Relation<String, R2<PathInfo, String>> countItems, boolean isEnglish, boolean filter) {
    647         Matcher m = COUNT_ATTRIBUTE.matcher("");
    648         int wordCount = 0;
    649         PluralInfo pluralInfo = supplementalDataInfo.getPlurals(locale);
    650         int lineCount = 0;
    651         Set<String> errorSet = new LinkedHashSet<String>();
    652         for (Entry<String, Set<R2<PathInfo, String>>> entry : countItems.keyValuesSet()) {
    653             String countLessPath = entry.getKey();
    654             Map<String, String> fullValues = new TreeMap<String, String>();
    655             PathInfo pathInfo = null;
    656             String value = null;
    657             for (R2<PathInfo, String> entry2 : entry.getValue()) {
    658                 PathInfo pathInfoN = entry2.get0();
    659                 m.reset(pathInfoN.getPath()).find();
    660                 String count = m.group(1);
    661                 if (count.equals("other")) {
    662                     pathInfo = pathInfoN;
    663                 }
    664                 value = entry2.get1();
    665                 fullValues.put(count, value);
    666             }
    667             if (pathInfo == null) {
    668                 continue;
    669             }
    670             if (fullValues.size() < 2) {
    671                 // if we don't have two count values, skip
    672                 System.out.println(locale + "\tMust have 2 count values: " + entry.getKey());
    673                 continue;
    674             }
    675             String fullPlurals = showPlurals(fullValues, locale, pathInfo, pluralInfo, isEnglish, errorSet);
    676             if (fullPlurals == null) {
    677                 System.out.println(locale + "\tCan't format plurals for: " + entry.getKey() + "\t" + errorSet);
    678                 errors++;
    679                 continue;
    680             }
    681 
    682             out.println();
    683             out.println("    <!--    "
    684                 // + prettyPath.getPrettyPath(pathInfo.getPath(), false) + " ;    "
    685                 + countLessPath + "    -->");
    686             out.println("    <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'");
    687             out.println("     >" + fullPlurals + "</msg>");
    688             // Use the last plural value in the loop because we only need it for example purposes.
    689             writeJavaInfo(out3, pathInfo.getStringId(), countLessPath, value);
    690             // if (!isEnglish || pathInfo.placeholderReplacements != null) {
    691             // out.println("\t<!-- English original:\t" + pathInfo.getEnglishValue() + "\t-->");
    692             // }
    693             out.flush();
    694             ++lineCount;
    695             wordCount += pathInfo.wordCount * 3;
    696             if (filter) {
    697                 break;
    698             }
    699         }
    700         return Row.of(lineCount, wordCount);
    701     }
    702 
    703     static final String[] PLURAL_KEYS = { "=0", "=1", "zero", "one", "two", "few", "many", "other" };
    704     static final String[] EXTRA_PLURAL_KEYS = { "0", "1", "zero", "one", "two", "few", "many" };
    705 
    706     private static String showPlurals(Map<String, String> values,
    707         String locale, PathInfo pathInfo, PluralInfo pluralInfo,
    708         boolean isEnglish, Set<String> errorSet) {
    709         errorSet.clear();
    710         /*
    711          * Desired output for English XMB
    712          * <msg desc=
    713          * "[ICU Syntax] Plural forms for a number of hours. These are special messages: before translating, see cldr.org/translation/plurals."
    714          * >
    715          * {LENGTH, select,
    716          * abbreviated {
    717          * {NUMBER_OF_HOURS, plural,
    718          * =0 {0 hrs}
    719          * =1 {1 hr}
    720          * zero {# hrs}
    721          * one {# hrs}
    722          * two {# hrs}
    723          * few {# hrs}
    724          * many {# hrs}
    725          * other {# hrs}}}
    726          * full {
    727          * {NUMBER_OF_HOURS, plural,
    728          * =0 {0 hours}
    729          * =1 {1 hour}
    730          * zero {# hours}
    731          * one {# hours}
    732          * two {# hours}
    733          * few {# hours}
    734          * many {# hours}
    735          * other {# hours}}}}
    736          * </msg>
    737          *
    738          * NOTE: For the WSB, the format has to match the following, WITHOUT LFs
    739          *
    740          * <msg id='1431840205484292448' desc='[ICU Syntax] who is viewing? This message requires special attention.
    741          * Please follow the instructions here:
    742          * https://sites.google.com/a/google.com/localization-info-site/Home/training/icusyntax'>
    743          * <ph name='[PLURAL_NUM_USERS_OFFSET_1]' ex='Special placeholder used in [ICU Syntax] messages, see
    744          * instructions page.'/>
    745          * <ph name='[=0]'/>No one else is viewing.
    746          * <ph name='[=1]'/><ph name='USERNAME' ex='Bob'/> is viewing.
    747          * <ph name='[=2]'/><ph name='USERNAME' ex='Bob'/> and one other are viewing.
    748          * <ph name='[ZERO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
    749          * <ph name='[ONE]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
    750          * <ph name='[TWO]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
    751          * <ph name='[FEW]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
    752          * <ph name='[MANY]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
    753          * <ph name='[OTHER]'/><ph name='USERNAME' ex='Bob'/> and # others are viewing.
    754          * <ph name='[END_PLURAL]'/>
    755          * </msg>
    756          */
    757         Matcher matcher = PLURAL_NUMBER.matcher(pathInfo.getPath());
    758         String var = null;
    759         if (matcher.find()) {
    760             // Plural doesn't use placeholders so create a label.
    761             var = matcher.group(1).toUpperCase() + "_NUMBER";
    762         } else {
    763             var = pathInfo.getFirstVariable();
    764         }
    765 
    766         StringBuilder result = new StringBuilder();
    767         if (isEnglish) {
    768             result.append('{')
    769                 // .append("PLURAL_")
    770                 .append(var).append(",plural,");
    771         } else {
    772             result.append("<ph name='[PLURAL_").append(var).append("]'/>"); // ex='Special placeholder used in [ICU
    773             // Syntax] messages, see instructions page.'
    774         }
    775         for (String key : PLURAL_KEYS) {
    776             String value;
    777             String coreKey = key.startsWith("=") ? key.substring(1, 2) : key;
    778             value = values.get(coreKey);
    779             if (value == null) {
    780                 if (key.startsWith("=")) {
    781                     String stringCount = key.substring(1);
    782                     // handle both =x case, and the category
    783                     int intCount = Integer.parseInt(stringCount);
    784                     Count count = pluralInfo.getCount(intCount);
    785                     value = values.get(count.toString());
    786                     if (value == null) {
    787                         errorSet.add("Bad key/value " + key + "='" + value + "' in " + values);
    788                         return null;
    789                     }
    790                     value = value.replace("{0}", stringCount);
    791                 } else {
    792                     value = values.get("other");
    793                     if (value == null) {
    794                         errorSet.add("No 'other' value in " + values);
    795                         return null;
    796                     }
    797                 }
    798             }
    799             String newValue = MessageFormat.format(MessageFormat.autoQuoteApostrophe(value),
    800                 new Object[] { key.startsWith("=") ? key.substring(1, 2) : "#" });
    801             PlaceholderType type = isEnglish ? PlaceholderType.BRACES : PlaceholderType.XML;
    802             newValue = pathInfo.transformValue(newValue, type);
    803             if (isEnglish) {
    804                 result.append("\n            ").append(key).append(" {").append(newValue).append('}');
    805             } else {
    806                 String prefix = key.toUpperCase(Locale.ENGLISH);
    807                 result.append("<!--\n        --><ph name='[").append(prefix).append("]'/>").append(newValue);
    808             }
    809         }
    810         if (isEnglish) {
    811             result.append('}');
    812         } else {
    813             result.append("<!--\n        --><ph name='[END_PLURAL]'/>");
    814         }
    815         return result.toString();
    816     }
    817 
    818     private static void writePathInfo(PrintWriter out, PathInfo pathInfo, String value, boolean isEnglish) {
    819         out.println();
    820         out.println("    <!--    " + pathInfo.getPath() + "    -->");
    821         out.println("    <msg id='" + pathInfo.getStringId() + "' desc='" + pathInfo.description + "'");
    822         PlaceholderType type = isEnglish ? PlaceholderType.XML_EXAMPLE : PlaceholderType.XML;
    823         String transformValue = pathInfo.transformValue(value, type);
    824         out.println("     >" + transformValue + "</msg>");
    825         value = TransliteratorUtilities.toHTML.transform(value);
    826         if (!value.equals(transformValue) && (!isEnglish || pathInfo.placeholders != null)) {
    827             out.println("    <!-- English original:    " + value + "    -->");
    828         }
    829         out.flush();
    830     }
    831 
    832     private static void writeReasons(Relation<String, String> reasonsToPaths, String targetDir, String filename)
    833         throws IOException {
    834         targetDir += "/skipped/";
    835         filename += ".txt";
    836         PrintWriter out = FileUtilities.openUTF8Writer(targetDir, filename);
    837         out.println("# " + DATE);
    838         for (Entry<String, Set<String>> reasonToSet : reasonsToPaths.keyValuesSet()) {
    839             for (String path : reasonToSet.getValue()) {
    840                 out.println(reasonToSet.getKey() + "    " + path);
    841             }
    842         }
    843         out.close();
    844     }
    845 
    846     static class PathInfo implements Comparable<PathInfo> {
    847         private static final Pattern PLACEHOLDER = PatternCache.get("\\{(\\d)}");
    848 
    849         private final String path;
    850         private final Long id;
    851         private final String stringId;
    852         private final String englishValue;
    853         private final boolean changedEnglish;
    854         private final Map<String, PlaceholderInfo> placeholders;
    855         private final String description;
    856         private final String starredPath;
    857         private final int wordCount;
    858 
    859         private static final BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH);
    860         private static final UnicodeSet ALPHABETIC = new UnicodeSet("[:Alphabetic:]");
    861 
    862         public PathInfo(String path, String englishValue, boolean changedEnglish,
    863             Map<String, PlaceholderInfo> placeholders,
    864             String description, String starredPath) {
    865             if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) {
    866                 int x = 0;
    867             }
    868             if (description == null) {
    869                 path2errors.put(path, "missing description");
    870             }
    871             this.path = path;
    872             long id = StringId.getId(path);
    873             this.id = id;
    874             stringId = String.valueOf(id);
    875             this.englishValue = englishValue;
    876             this.changedEnglish = changedEnglish;
    877             this.placeholders = placeholders;
    878             this.description = description == null ? null : description.intern();
    879             this.starredPath = starredPath;
    880             // count words
    881             int tempCount = 0;
    882             bi.setText(englishValue);
    883             int start = bi.first();
    884             for (int end = bi.next(); end != BreakIterator.DONE; start = end, end = bi.next()) {
    885                 String word = englishValue.substring(start, end);
    886                 if (ALPHABETIC.containsSome(word)) {
    887                     ++tempCount;
    888                 }
    889             }
    890             wordCount = tempCount == 0 ? 1 : tempCount;
    891         }
    892 
    893         public String getFirstVariable() {
    894             // ... name='FIRST_PART_OF_TEXT' ...
    895             PlaceholderInfo info = placeholders.get("{0}");
    896             if (info == null) {
    897                 throw new IllegalArgumentException("Missing {0} for " + this);
    898             }
    899             return info.name;
    900         }
    901 
    902         public String getPath() {
    903             return path;
    904         }
    905 
    906         public Long getId() {
    907             return id;
    908         }
    909 
    910         public String getStringId() {
    911             return stringId;
    912         }
    913 
    914         public String getEnglishValue() {
    915             return englishValue;
    916         }
    917 
    918         public String getDescription() {
    919             return description;
    920         }
    921 
    922         public String getStarredPath() {
    923             return starredPath;
    924         }
    925 
    926         public Map<String, String> getPlaceholderReplacementsToOriginal() {
    927             if (placeholders == null) return null;
    928             Map<String, String> placeholderOutput = new LinkedHashMap<String, String>();
    929             for (String id : placeholders.keySet()) {
    930                 placeholderOutput.put(id, getPlaceholderWithExample(id));
    931             }
    932             return placeholderOutput;
    933         }
    934 
    935         private String getPlaceholderWithExample(String placeholder) {
    936             PlaceholderInfo info = placeholders.get(placeholder);
    937             // <ph name='x'><ex>xxx</ex>yyy</ph>
    938             return "<ph name='" + info.name + "'><ex>" + info.example + "</ex>" + placeholder + "</ph>";
    939         }
    940 
    941         // static DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
    942 
    943         private String transformValue(String value, PlaceholderType type) {
    944             value = TransliteratorUtilities.toHTML.transform(value);
    945             if (placeholders == null) return value;
    946 
    947             String placeholderFormat = "";
    948             switch (type) {
    949             case BRACES:
    950                 placeholderFormat = "'{'{0}'}'";
    951                 break;
    952             case XML:
    953                 placeholderFormat = "<ph name=''[{0}]'' />";
    954                 break;
    955             case XML_EXAMPLE:
    956                 placeholderFormat = "<ph name=''{0}''><ex>{1}</ex>'{'{2}'}'</ph>";
    957                 break;
    958             }
    959             Matcher matcher = PLACEHOLDER.matcher(value);
    960             StringBuffer buffer = new StringBuffer();
    961             int start = 0;
    962             while (matcher.find()) {
    963                 buffer.append(value.substring(start, matcher.start()));
    964                 PlaceholderInfo info = placeholders.get(matcher.group());
    965                 buffer.append(MessageFormat.format(placeholderFormat,
    966                     new Object[] { info.name, info.example, matcher.group(1) }));
    967                 start = matcher.end();
    968             }
    969             buffer.append(value.substring(start));
    970             return buffer.toString();
    971         }
    972 
    973         private String replacePlaceholders(String value, String placeholderStart, String placeholderEnd) {
    974             Matcher matcher = PLACEHOLDER.matcher(value);
    975             StringBuffer buffer = new StringBuffer();
    976             int start = 0;
    977             while (matcher.find()) {
    978                 buffer.append(value.substring(start, matcher.start()));
    979                 String name = placeholders.get(matcher.group()).name;
    980                 buffer.append(placeholderStart).append(name).append(placeholderEnd);
    981                 start = matcher.end();
    982             }
    983             buffer.append(value.substring(start));
    984             return buffer.toString();
    985         }
    986 
    987         @Override
    988         public int compareTo(PathInfo arg0) {
    989             return path.compareTo(arg0.path);
    990         }
    991 
    992         public String toString() {
    993             return path;
    994         }
    995     }
    996 
    997     static class EnglishInfo implements Iterable<PathInfo> {
    998 
    999         final Map<String, PathInfo> pathToPathInfo = new TreeMap<String, PathInfo>();
   1000         final Map<Long, PathInfo> longToPathInfo = new HashMap<Long, PathInfo>();
   1001         final CLDRFile english;
   1002 
   1003         PathInfo getPathInfo(long hash) {
   1004             return longToPathInfo.get(hash);
   1005         }
   1006 
   1007         public String getName(String localeId) {
   1008             return english.getName(localeId);
   1009         }
   1010 
   1011         PathInfo getPathInfo(String path) {
   1012             return pathToPathInfo.get(path);
   1013         }
   1014 
   1015         EnglishInfo(String targetDir, CLDRFile english, CLDRFile root) throws Exception {
   1016 
   1017             Map<String, String> oldPathValueMap = ReadXMB.load(CLDRPaths.BASE_DIRECTORY +
   1018                 "/cldr-tools/org/unicode/cldr/unittest/data/xmb/",
   1019                 "en.xml");
   1020 
   1021             PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
   1022 
   1023             this.english = english;
   1024             // we don't want the fully resolved paths, but we do want the direct inheritance from root.
   1025             //Status status = new Status();
   1026             Map<String, List<Set<String>>> starredPaths = new TreeMap<String, List<Set<String>>>();
   1027 
   1028             HashSet<String> metazonePaths = new HashSet<String>();
   1029             // ^//ldml/dates/timeZoneNames/metazone\[@type="([^"]*)"]
   1030             for (MetazoneInfo metazoneInfo : MetazoneInfo.METAZONE_LIST) {
   1031                 for (String item : metazoneInfo.getTypes()) {
   1032                     String path = "//ldml/dates/timeZoneNames/metazone[@type=\"" + metazoneInfo.metazoneId + "\"]"
   1033                         + item;
   1034                     metazonePaths.add(path);
   1035                 }
   1036             }
   1037 
   1038             // TODO add short countries
   1039             HashSet<String> extraLanguages = new HashSet<String>();
   1040             // ldml/localeDisplayNames/languages/language[@type=".*"]
   1041 
   1042             for (String langId : PathDescription.EXTRA_LANGUAGES) {
   1043                 String langPath = "//ldml/localeDisplayNames/languages/language[@type=\"" + langId + "\"]";
   1044                 extraLanguages.add(langPath);
   1045             }
   1046 
   1047             Set<String> sorted = Builder.with(new TreeSet<String>())
   1048                 .addAll(english)
   1049                 .removeAll(
   1050                     new Transform<String, Boolean>() {
   1051                         public Boolean transform(String source) {
   1052                             return source.startsWith("//ldml/dates/timeZoneNames/metazone") ? Boolean.TRUE
   1053                                 : Boolean.FALSE;
   1054                         }
   1055                     })
   1056                 .get();
   1057             sorted.addAll(metazonePaths);
   1058             if (DEBUG) {
   1059                 TreeSet<String> diffs = new TreeSet<String>(extraLanguages);
   1060                 diffs.removeAll(sorted);
   1061                 System.out.println(diffs);
   1062             }
   1063             sorted.addAll(extraLanguages);
   1064 
   1065             // add the extra Count items.
   1066             Map<String, String> extras = new HashMap<String, String>();
   1067             Matcher m = COUNT_ATTRIBUTE.matcher("");
   1068 
   1069             for (String path : sorted) {
   1070                 if (path.contains("[@count=\"")) {
   1071                     m.reset(path).find();
   1072                     for (String key : EXTRA_PLURAL_KEYS) {
   1073                         String path2 = path.substring(0, m.start(1)) + key + path.substring(m.end(1));
   1074                         extras.put(path2, path);
   1075                     }
   1076                 }
   1077                 // if (path.contains("ellipsis")) {
   1078                 // System.out.println(path);
   1079                 // }
   1080             }
   1081             sorted.addAll(extras.keySet());
   1082 
   1083             Relation<String, String> reasonsToPaths = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
   1084             Set<String> missingDescriptions = new TreeSet<String>();
   1085             //Output<String[]> pathArguments = new Output<String[]>();
   1086 
   1087             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en");
   1088             RegexLookup<Boolean> coverageAllow = new RegexLookup<Boolean>()
   1089                 .add("^//ldml/localeDisplayNames/keys/key", true)
   1090                 .add("^//ldml/localeDisplayNames/languages/language\\[@type=\"(jv|zxx|gsw|eo)\"]", true)
   1091                 .add("^//ldml/localeDisplayNames/scripts/script", true)
   1092                 .add("^//ldml/localeDisplayNames/types/type", true)
   1093                 .add(
   1094                     "^//ldml/dates/calendars/calendar\\[@type=\"[^\"]*\"]/dayPeriods/dayPeriodContext\\[@type=\"format\"]",
   1095                     true);
   1096 
   1097             // TODO: for each count='other' path, add the other keywords and values
   1098             PathDescription pathDescription = new PathDescription(GenerateXMB.supplementalDataInfo, english, extras,
   1099                 starredPaths, PathDescription.ErrorHandling.SKIP);
   1100 
   1101             for (String path : sorted) {
   1102                 if (DEBUG_PATH != null && path.contains(DEBUG_PATH)) {
   1103                     int x = 0;
   1104                 }
   1105                 String value = english.getStringValue(path);
   1106                 Level level = coverageLevel.getLevel(path);
   1107                 if (value == null) {
   1108                     value = "[EMPTY]";
   1109                     addSkipReasons(reasonsToPaths, "empty-value", level, path, value);
   1110                     continue;
   1111                 }
   1112                 if (pathMatcher != null
   1113                     && !pathMatcher.reset(path).find()) {
   1114                     addSkipReasons(reasonsToPaths, "path-parameter", level, path, value);
   1115                     continue;
   1116                 }
   1117                 PathStatus pathStatus = shouldSkipPath(path, value);
   1118                 if (pathStatus == PathStatus.SKIP) {
   1119                     addSkipReasons(reasonsToPaths, "path-remove", level, path, value);
   1120                     continue;
   1121                 }
   1122 
   1123                 if (level.compareTo(Level.MODERN) > 0 && pathStatus != PathStatus.KEEP) {
   1124                     if (coverageAllow.get(path) == null) { // HACK
   1125                         addSkipReasons(reasonsToPaths, "coverage", level, path, value);
   1126                         continue;
   1127                     } else {
   1128                         addSkipReasons(reasonsToPaths, "coverage*", level, path, value);
   1129                         continue;
   1130                         // System.out.println("Not skipping " + path);
   1131                     }
   1132                 }
   1133 
   1134                 String description = pathDescription.getDescription(path, value, level, null);
   1135                 EnumSet<PathDescription.Status> descriptionStatus = pathDescription.getStatus();
   1136                 if (!descriptionStatus.isEmpty()) {
   1137                     addSkipReasons(reasonsToPaths, descriptionStatus.toString(), level, path, value);
   1138                     description = null;
   1139                 } else {
   1140                     description = "[ICU CLDR] " + description;
   1141                 }
   1142 
   1143                 String oldValue = oldPathValueMap.get(path);
   1144                 boolean changedEnglish = !value.equals(oldValue);
   1145                 PathInfo row = new PathInfo(path, value, changedEnglish, patternPlaceholders.get(path), description,
   1146                     pathDescription.getStarredPathOutput());
   1147 
   1148                 if (description == PathDescription.MISSING_DESCRIPTION) {
   1149                     missingDescriptions.add(pathDescription.getStarredPathOutput());
   1150                 }
   1151 
   1152                 Long hash = row.getId();
   1153                 if (longToPathInfo.containsKey(hash)) {
   1154                     throw new IllegalArgumentException("Id collision for "
   1155                         + path + " and " + longToPathInfo.get(hash).getPath());
   1156                 }
   1157                 pathToPathInfo.put(path, row);
   1158                 longToPathInfo.put(hash, row);
   1159                 if (value.contains("{0}") && patternPlaceholders.get(path) == null) {
   1160                     System.out.println("ERROR, no placeholders for {0}...: " + path + " ; " + value);
   1161                 }
   1162             }
   1163 
   1164             PrintWriter out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-paths.txt");
   1165             out.println("# " + DATE);
   1166             for (Entry<String, List<Set<String>>> starredPath : starredPaths.entrySet()) {
   1167                 out.println(starredPath.getKey() + "\t\t" + starredPath.getValue());
   1168             }
   1169             out.close();
   1170             out = FileUtilities.openUTF8Writer(targetDir + "/log/", "en-missingDescriptions.txt");
   1171             out.println("# " + DATE);
   1172             for (String starredPath : missingDescriptions) {
   1173                 // ^//ldml/dates/timeZoneNames/zone\[@type=".*"]/exemplarCity ; ROOT timezone ; The name of a city in:
   1174                 // {0}. See cldr.org/xxxx.
   1175                 out.println(toRegexPath(starredPath) + "\t;\tDESCRIPTION\t" + starredPaths.get(starredPath));
   1176             }
   1177             out.close();
   1178             writeReasons(reasonsToPaths, targetDir, "en");
   1179         }
   1180 
   1181         private String toRegexPath(String starredPath) {
   1182             String result = starredPath.replace("[", "\\[");
   1183             result = result.replace("\".*\"", "\"([^\"]*)\"");
   1184             return "^" + result;
   1185         }
   1186 
   1187         @Override
   1188         public Iterator<PathInfo> iterator() {
   1189             return pathToPathInfo.values().iterator();
   1190         }
   1191     }
   1192 
   1193     static void addSkipReasons(Relation<String, String> reasonsToPaths, String descriptionStatus, Level level,
   1194         String path, String value) {
   1195         reasonsToPaths.put(descriptionStatus + "\t" + level, path + "\t" + value);
   1196     }
   1197 
   1198     // Get Date-Time in milliseconds
   1199     private static long getDateTimeinMillis(int year, int month, int date) {
   1200         Calendar cal = Calendar.getInstance();
   1201         cal.set(year, month, date);
   1202         return cal.getTimeInMillis();
   1203     }
   1204 
   1205     static final long START_TIME = getDateTimeinMillis(2000, 1, 0);
   1206     static final long END_TIME = getDateTimeinMillis(2015, 1, 0);
   1207     static final long DELTA_TIME = 15 * 60 * 1000;
   1208     static final long MIN_DAYLIGHT_PERIOD = 90L * 24 * 60 * 60 * 1000;
   1209 
   1210     static final Set<String> HAS_DAYLIGHT;
   1211     static {
   1212         Set<String> hasDaylightTemp = new HashSet<String>();
   1213         Date date = new Date();
   1214         main: for (String zoneId : sc.getCanonicalTimeZones()) {
   1215             TimeZone zone = TimeZone.getTimeZone(zoneId);
   1216             for (long time = START_TIME + MIN_DAYLIGHT_PERIOD; time < END_TIME; time += MIN_DAYLIGHT_PERIOD) {
   1217                 date.setTime(time);
   1218                 if (zone.inDaylightTime(date)) {
   1219                     hasDaylightTemp.add(zoneId);
   1220                     if (false && !zone.useDaylightTime()) {
   1221                         System.out.println(zoneId + "\tuseDaylightTime()==false, but \tinDaylightTime(/" + date
   1222                             + "/)==true");
   1223                     }
   1224                     continue main;
   1225                 }
   1226             }
   1227         }
   1228         HAS_DAYLIGHT = Collections.unmodifiableSet(hasDaylightTemp);
   1229     }
   1230 
   1231     static final Set<String> SINGULAR_COUNTRIES;
   1232 
   1233     private static PrintWriter countFile;
   1234     static {
   1235         // start with certain special-case countries
   1236         Set<String> singularCountries = new HashSet<String>(
   1237             Arrays.asList("CL EC ES NZ PT AQ FM GL KI UM PF".split(" ")));
   1238 
   1239         Map<String, Set<String>> countryToZoneSet = sc.getCountryToZoneSet();
   1240 
   1241         main: for (Entry<String, Set<String>> countryZones : countryToZoneSet.entrySet()) {
   1242             String country = countryZones.getKey();
   1243             if (country.equals("001")) {
   1244                 continue;
   1245             }
   1246             Set<String> zones = countryZones.getValue();
   1247             if (zones.size() == 1) {
   1248                 singularCountries.add(country);
   1249                 continue;
   1250             }
   1251             // make a set of sets
   1252             List<TimeZone> initial = new ArrayList<TimeZone>();
   1253             for (String s : zones) {
   1254                 initial.add(TimeZone.getTimeZone(s));
   1255             }
   1256             // now cycle through the times and see if we find any differences
   1257             for (long time = START_TIME; time < END_TIME; time += DELTA_TIME) {
   1258                 int firstOffset = Integer.MIN_VALUE;
   1259                 for (TimeZone zone : initial) {
   1260                     int offset = zone.getOffset(time);
   1261                     if (firstOffset == Integer.MIN_VALUE) {
   1262                         firstOffset = offset;
   1263                     } else {
   1264                         if (firstOffset != offset) {
   1265                             if (false)
   1266                                 System.out.println(country
   1267                                     + " Difference at: " + new Date(time)
   1268                                     + ", " + zone.getDisplayName() + " " + (offset / 1000.0 / 60 / 60)
   1269                                     + ", " + initial.iterator().next().getDisplayName() + " "
   1270                                     + (firstOffset / 1000.0 / 60 / 60));
   1271                             continue main;
   1272                         }
   1273                     }
   1274                 }
   1275             }
   1276             singularCountries.add(country);
   1277         }
   1278         SINGULAR_COUNTRIES = Collections.unmodifiableSet(singularCountries);
   1279     }
   1280 
   1281     static final class MetazoneInfo {
   1282 
   1283         /**
   1284          * @param metazoneId
   1285          * @param singleCountry
   1286          * @param hasDaylight
   1287          * @param zonesForCountry
   1288          * @param regionToZone
   1289          */
   1290         public MetazoneInfo(String metazoneId, String golden, boolean singleCountry, boolean hasDaylight) {
   1291             this.golden = golden;
   1292             this.metazoneId = metazoneId;
   1293             this.singleCountry = singleCountry;
   1294             this.hasDaylight = hasDaylight;
   1295         }
   1296 
   1297         static final String[] GENERIC = { "/long/generic",
   1298             // "/short/generic"
   1299         };
   1300         static final String[] DAYLIGHT = { "/long/generic", "/long/standard", "/long/daylight",
   1301             // "/short/generic", "/short/standard", "/short/daylight"
   1302         };
   1303 
   1304         public String[] getTypes() {
   1305             return hasDaylight ? DAYLIGHT : GENERIC;
   1306         }
   1307 
   1308         private final String metazoneId;
   1309         private final String golden;
   1310         private final boolean singleCountry;
   1311         private final boolean hasDaylight;
   1312 
   1313         static final List<MetazoneInfo> METAZONE_LIST;
   1314         static {
   1315             // Set<String> zones = supplementalDataInfo.getCanonicalTimeZones();
   1316             ArrayList<MetazoneInfo> result = new ArrayList<MetazoneInfo>();
   1317 
   1318             Map<String, String> zoneToCountry = sc.getZoneToCounty();
   1319 
   1320             Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone();
   1321             for (String metazone : supplementalDataInfo.getAllMetazones()) {
   1322                 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone);
   1323                 String golden = regionToZone.get("001");
   1324                 if (golden == null) {
   1325                     throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone);
   1326                 }
   1327                 String region = zoneToCountry.get(golden);
   1328                 boolean isSingleCountry = SINGULAR_COUNTRIES.contains(region);
   1329                 if (isSingleCountry) {
   1330                     continue;
   1331                 }
   1332 
   1333                 // TimeZone goldenZone = TimeZone.getTimeZone(golden);
   1334 
   1335                 Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(golden);
   1336                 if (metazoneRanges == null) {
   1337                     throw new IllegalArgumentException("Missing golden zone " + metazone + ", " + regionToZone);
   1338                 }
   1339                 MetazoneInfo item = new MetazoneInfo(metazone, golden, isSingleCountry, HAS_DAYLIGHT.contains(golden));
   1340                 result.add(item);
   1341             }
   1342             METAZONE_LIST = Collections.unmodifiableList(result);
   1343         }
   1344 
   1345         public String toString() {
   1346             return sc.getZoneToCounty().get(golden)
   1347                 + "\t" + metazoneId
   1348                 + "\t" + golden
   1349                 + "\t" + (singleCountry ? "singleCountry" : "")
   1350                 + "\t" + (hasDaylight ? "useDaylightTime" : "")
   1351             // + ": " + zonesForCountry
   1352             // + "\t" + regionToZone;
   1353             ;
   1354         }
   1355     }
   1356 
   1357     static void showMetazoneInfo() {
   1358         System.out.println("\nZones in multiple metazones\n");
   1359 
   1360         for (String zone : sc.getCanonicalTimeZones()) {
   1361             Set<SupplementalDataInfo.MetaZoneRange> metazoneRanges = supplementalDataInfo.getMetaZoneRanges(zone);
   1362             if (metazoneRanges == null) {
   1363                 System.out.println("Zone doesn't have metazone! " + zone);
   1364                 continue;
   1365             }
   1366             if (metazoneRanges.size() != 1) {
   1367                 for (MetaZoneRange range : metazoneRanges) {
   1368                     System.out.println(zone + ":\t" + range);
   1369                 }
   1370                 System.out.println();
   1371             }
   1372         }
   1373 
   1374         System.out.println("\nMetazoneInfo\n");
   1375 
   1376         for (boolean singleCountry : new boolean[] { false }) {
   1377             for (boolean hasDaylight : new boolean[] { false, true }) {
   1378                 for (MetazoneInfo mzone : MetazoneInfo.METAZONE_LIST) {
   1379                     if (mzone.hasDaylight != hasDaylight) continue;
   1380                     if (mzone.singleCountry != singleCountry) continue;
   1381                     System.out.println(mzone);
   1382                 }
   1383             }
   1384         }
   1385     }
   1386 
   1387     private static void displayWsb(String file, EnglishInfo info) {
   1388         try {
   1389             String[] parts = file.split("/");
   1390             ULocale locale = new ULocale(parts[parts.length - 2]);
   1391             FileInputStream fis = new FileInputStream(file);
   1392             XMLReader xmlReader = XMLFileReader.createXMLReader(false);
   1393             xmlReader.setErrorHandler(new MyErrorHandler());
   1394             Map<String, String> data = new TreeMap<String, String>();
   1395             xmlReader.setContentHandler(new MyContentHandler(locale, data, info));
   1396             InputSource is = new InputSource(fis);
   1397             is.setSystemId(file);
   1398             xmlReader.parse(is);
   1399             fis.close();
   1400             for (Entry<String, String> entity : data.entrySet()) {
   1401                 String path = entity.getKey();
   1402                 String value = entity.getValue();
   1403                 PathInfo pathInfo = info.getPathInfo(path);
   1404                 System.out.println(value + "\t" + (pathInfo == null ? "?" : pathInfo.englishValue) + "\t" + path);
   1405             }
   1406         } catch (SAXParseException e) {
   1407             System.out.println("\t" + "Can't read " + file);
   1408             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
   1409         } catch (SAXException e) {
   1410             System.out.println("\t" + "Can't read " + file);
   1411             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
   1412         } catch (IOException e) {
   1413             System.out.println("\t" + "Can't read " + file);
   1414             System.out.println("\t" + e.getClass() + "\t" + e.getMessage());
   1415         }
   1416     }
   1417 
   1418     static class MyErrorHandler implements ErrorHandler {
   1419         public void error(SAXParseException exception) throws SAXException {
   1420             System.out.println("\nerror: " + XMLFileReader.showSAX(exception));
   1421             throw exception;
   1422         }
   1423 
   1424         public void fatalError(SAXParseException exception) throws SAXException {
   1425             System.out.println("\nfatalError: " + XMLFileReader.showSAX(exception));
   1426             throw exception;
   1427         }
   1428 
   1429         public void warning(SAXParseException exception) throws SAXException {
   1430             System.out.println("\nwarning: " + XMLFileReader.showSAX(exception));
   1431             throw exception;
   1432         }
   1433     }
   1434 
   1435     static class MyContentHandler implements ContentHandler {
   1436         private static final boolean SHOW = false;
   1437         private Map<String, String> myData;
   1438         private EnglishInfo info;
   1439         private PathInfo lastPathInfo;
   1440         private StringBuilder currentText = new StringBuilder();
   1441         private long lastId;
   1442         private String lastPluralTag;
   1443         private Map<String, String> pluralTags = new LinkedHashMap<String, String>();
   1444         private Set<String> pluralKeywords;
   1445 
   1446         public MyContentHandler(ULocale locale, Map<String, String> data, EnglishInfo info) {
   1447             myData = data;
   1448             this.info = info;
   1449             PluralRules rules = PluralRules.forLocale(locale);
   1450             pluralKeywords = Builder.with(new HashSet<String>()).addAll(rules.getKeywords()).add("0").add("1").freeze();
   1451         }
   1452 
   1453         @Override
   1454         public void characters(char[] arg0, int arg1, int arg2) throws SAXException {
   1455             String chars = String.valueOf(arg0, arg1, arg2);
   1456             // if (SHOW) System.out.println("\t characters\t" + chars);
   1457             currentText.append(chars);
   1458         }
   1459 
   1460         @Override
   1461         public void endDocument() throws SAXException {
   1462             if (SHOW) System.out.println("\t endDocument\t");
   1463         }
   1464 
   1465         @Override
   1466         public void endElement(String arg0, String arg1, String qName) throws SAXException {
   1467             // if (SHOW) System.out.println("\t endElement\t" + arg0 + "\t" + arg1 + "\t" + qName);
   1468             if (qName.equals("msg")) {
   1469                 String chars = currentText.toString().replace("\n", "").trim();
   1470                 if (lastPathInfo == null) {
   1471                     System.out.println("***Missing path info for " + lastId + "\t" + chars);
   1472                     // myData.put("*** Missing path: " + lastId, chars);
   1473                 } else if (pluralTags.size() != 0) {
   1474                     for (Entry<String, String> pluralTagEntry : pluralTags.entrySet()) {
   1475                         String pluralTag = pluralTagEntry.getKey();
   1476                         String pluralTagValue = pluralTagEntry.getValue();
   1477                         if (pluralKeywords.contains(pluralTag)) {
   1478                             String fixedCount = lastPathInfo.path.replace("other", pluralTag);
   1479                             myData.put(fixedCount, pluralTagValue);
   1480                         } else {
   1481                             System.out.println("***Skipping " + pluralTag + "\t" + pluralTagValue);
   1482                         }
   1483                     }
   1484                     // myData.put(lastPathInfo.path, pluralTags.toString());
   1485                     pluralTags.clear();
   1486                 } else {
   1487                     myData.put(lastPathInfo.path, chars);
   1488                 }
   1489                 currentText.setLength(0);
   1490             }
   1491         }
   1492 
   1493         @Override
   1494         public void endPrefixMapping(String arg0) throws SAXException {
   1495             if (SHOW) System.out.println("\t endPrefixMapping\t" + arg0);
   1496         }
   1497 
   1498         @Override
   1499         public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException {
   1500             if (SHOW) System.out.println("\t ignorableWhitespace\t" + String.valueOf(arg0, arg1, arg2));
   1501         }
   1502 
   1503         @Override
   1504         public void processingInstruction(String arg0, String arg1) throws SAXException {
   1505             if (SHOW) System.out.println("\t processingInstruction\t" + arg0 + "\t" + arg1);
   1506         }
   1507 
   1508         @Override
   1509         public void setDocumentLocator(Locator arg0) {
   1510             if (SHOW) System.out.println("\t setDocumentLocator\t" + arg0);
   1511         }
   1512 
   1513         @Override
   1514         public void skippedEntity(String arg0) throws SAXException {
   1515             if (SHOW) System.out.println("\t skippedEntity\t" + arg0);
   1516         }
   1517 
   1518         @Override
   1519         public void startDocument() throws SAXException {
   1520             if (SHOW) System.out.println("\t startDocument\t");
   1521         }
   1522 
   1523         @Override
   1524         public void startElement(String arg0, String arg1, String qName, Attributes arg3) throws SAXException {
   1525             // if (SHOW) System.out.println("\t startElement\t" + arg0 + "\t" + arg1 + "\t" + qName + "\t" +
   1526             // showAttributes(arg3));
   1527             if (qName.equals("msg")) {
   1528                 lastId = Long.parseLong(arg3.getValue("id"));
   1529                 lastPathInfo = info.getPathInfo(lastId);
   1530                 currentText.setLength(0);
   1531             } else if (qName.equals("ph")) {
   1532                 String name = arg3.getValue("name");
   1533                 String original = lastPathInfo.getPlaceholderReplacementsToOriginal().get(name);
   1534                 if (original != null) {
   1535                     currentText.append(original);
   1536                 } else if (name.startsWith("[PLURAL_")) {
   1537                     pluralTags.clear();
   1538                     lastPluralTag = "[START_PLURAL]";
   1539                 } else {
   1540                     String pluralTag = PLURAL_TAGS.get(name);
   1541                     if (pluralTag != null) {
   1542                         String chars = currentText.toString().replace("\n", "").trim();
   1543                         pluralTags.put(lastPluralTag, chars);
   1544                         currentText.setLength(0);
   1545                         lastPluralTag = pluralTag;
   1546                     } else {
   1547                         System.out.println("***Can't find " + name + " in "
   1548                             + lastPathInfo.getPlaceholderReplacementsToOriginal());
   1549                     }
   1550                 }
   1551             }
   1552         }
   1553 
   1554         private String showAttributes(Attributes atts) {
   1555             String result = "";
   1556             for (int i = 0; i < atts.getLength(); ++i) {
   1557                 result += atts.getQName(i) + "=\"" + atts.getValue(i) + "\"\t";
   1558             }
   1559             return result;
   1560         }
   1561 
   1562         @Override
   1563         public void startPrefixMapping(String arg0, String arg1) throws SAXException {
   1564             if (SHOW) System.out.println("\t startPrefixMapping\t" + arg0 + "\t" + arg1);
   1565         }
   1566     }
   1567 
   1568     static final Map<String, String> PLURAL_TAGS = Builder.with(new HashMap<String, String>())
   1569         .put("[=0]", "0")
   1570         .put("[=1]", "1")
   1571         .put("[ZERO]", PluralRules.KEYWORD_ZERO)
   1572         .put("[ONE]", PluralRules.KEYWORD_ONE)
   1573         .put("[TWO]", PluralRules.KEYWORD_TWO)
   1574         .put("[FEW]", PluralRules.KEYWORD_FEW)
   1575         .put("[MANY]", PluralRules.KEYWORD_MANY)
   1576         .put("[OTHER]", PluralRules.KEYWORD_OTHER)
   1577         .put("[END_PLURAL]", "")
   1578         .freeze();
   1579 
   1580     private static String compareDirectory;
   1581 }
   1582