Home | History | Annotate | Download | only in tool
      1 /*
      2  **********************************************************************
      3  * Copyright (c) 2002-2004, International Business Machines
      4  * Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  * Author: Mark Davis
      7  **********************************************************************
      8  */
      9 package org.unicode.cldr.tool;
     10 
     11 import java.io.File;
     12 import java.io.IOException;
     13 import java.io.PrintWriter;
     14 import java.util.Arrays;
     15 import java.util.Comparator;
     16 import java.util.Date;
     17 import java.util.EnumSet;
     18 import java.util.HashMap;
     19 import java.util.HashSet;
     20 import java.util.Iterator;
     21 import java.util.Locale;
     22 import java.util.Map;
     23 import java.util.Map.Entry;
     24 import java.util.Set;
     25 import java.util.TreeMap;
     26 import java.util.TreeSet;
     27 import java.util.regex.Matcher;
     28 
     29 import org.unicode.cldr.draft.FileUtilities;
     30 import org.unicode.cldr.tool.ShowData.DataShower;
     31 import org.unicode.cldr.util.CLDRFile;
     32 import org.unicode.cldr.util.CLDRFile.Status;
     33 import org.unicode.cldr.util.CLDRPaths;
     34 import org.unicode.cldr.util.CldrUtility;
     35 import org.unicode.cldr.util.Factory;
     36 import org.unicode.cldr.util.FileCopier;
     37 import org.unicode.cldr.util.LanguageTagParser;
     38 import org.unicode.cldr.util.LanguageTagParser.Fields;
     39 import org.unicode.cldr.util.LocaleIDParser;
     40 import org.unicode.cldr.util.PathHeader;
     41 import org.unicode.cldr.util.PathHeader.PageId;
     42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
     43 import org.unicode.cldr.util.PatternCache;
     44 import org.unicode.cldr.util.SimpleFactory;
     45 import org.unicode.cldr.util.StringId;
     46 import org.unicode.cldr.util.TransliteratorUtilities;
     47 import org.unicode.cldr.util.XPathParts;
     48 import org.xml.sax.SAXException;
     49 
     50 import com.google.common.collect.ImmutableMap;
     51 import com.ibm.icu.dev.tool.UOption;
     52 import com.ibm.icu.dev.util.UnicodeMap;
     53 import com.ibm.icu.impl.Relation;
     54 import com.ibm.icu.impl.Utility;
     55 import com.ibm.icu.lang.UCharacter;
     56 import com.ibm.icu.lang.UScript;
     57 import com.ibm.icu.text.BreakIterator;
     58 import com.ibm.icu.text.Collator;
     59 import com.ibm.icu.text.Normalizer;
     60 import com.ibm.icu.text.RuleBasedCollator;
     61 import com.ibm.icu.text.RuleBasedNumberFormat;
     62 import com.ibm.icu.text.Transliterator;
     63 import com.ibm.icu.text.UTF16;
     64 import com.ibm.icu.text.UnicodeSet;
     65 import com.ibm.icu.text.UnicodeSetIterator;
     66 import com.ibm.icu.util.Output;
     67 import com.ibm.icu.util.ULocale;
     68 
     69 /**
     70  * This is a simple class that walks through the CLDR hierarchy.
     71  * It gathers together all the items from all the locales that share the
     72  * same element chain, and thus presents a "sideways" view of the data, in files called
     73  * by_type/X.html, where X is a type. X may be the concatenation of more than more than
     74  * one element, where the file would otherwise be too large.
     75  *
     76  * @author medavis
     77  */
     78 /*
     79  * Notes:
     80  * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3
     81  * http://developers.sun.com/dev/coolstuff/xml/readme.html
     82  * http://lists.xml.org/archives/xml-dev/200007/msg00284.html
     83  * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html
     84  */
     85 public class GenerateSidewaysView {
     86     private static final String DIR_NAME = "by_type";
     87     // debug flags
     88     static final boolean DEBUG = false;
     89     static final boolean DEBUG2 = false;
     90     static final boolean DEBUG_SHOW_ADD = false;
     91     static final boolean DEBUG_ELEMENT = false;
     92     static final boolean DEBUG_SHOW_BAT = false;
     93 
     94     static final boolean FIX_ZONE_ALIASES = true;
     95 
     96     private static final int HELP1 = 0,
     97         HELP2 = 1,
     98         SOURCEDIR = 2,
     99         DESTDIR = 3,
    100         MATCH = 4,
    101         SKIP = 5,
    102         TZADIR = 6,
    103         NONVALIDATING = 7,
    104         SHOW_DTD = 8,
    105         TRANSLIT = 9,
    106         PATH = 10;
    107 
    108     private static final UOption[] options = {
    109         UOption.HELP_H(),
    110         UOption.HELP_QUESTION_MARK(),
    111         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
    112         UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/
    113         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
    114         UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
    115         UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault(
    116             "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
    117         UOption.create("nonvalidating", 'n', UOption.NO_ARG),
    118         UOption.create("dtd", 'w', UOption.NO_ARG),
    119         UOption.create("transliterate", 'y', UOption.NO_ARG),
    120         UOption.create("path", 'p', UOption.REQUIRES_ARG),
    121     };
    122 
    123     private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher("");
    124     // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF);
    125     protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze();
    126 
    127     static int getFirstScript(UnicodeSet exemplars) {
    128         for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
    129             int script = UScript.getScript(it.codepoint);
    130             if (script == UScript.COMMON || script == UScript.INHERITED) {
    131                 continue;
    132             }
    133             return script;
    134         }
    135         return UScript.COMMON;
    136     }
    137 
    138     static Comparator<Object> UCA;
    139     static {
    140         RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
    141         UCA2.setNumericCollation(true);
    142         UCA2.setStrength(Collator.IDENTICAL);
    143         UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0));
    144     }
    145 
    146     private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<PathHeader, Map<String, Set<String>>>();
    147     private static XPathParts parts = new XPathParts(null, null);
    148     private static long startTime = System.currentTimeMillis();
    149 
    150     static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
    151     static {
    152         standardCollation.setStrength(Collator.IDENTICAL);
    153         standardCollation.setNumericCollation(true);
    154     }
    155 
    156     private static CLDRFile english;
    157     // private static DataShower dataShower = new DataShower();
    158     private static Matcher pathMatcher;
    159 
    160     public static void main(String[] args) throws SAXException, IOException {
    161         startTime = System.currentTimeMillis();
    162         ToolUtilities.registerExtraTransliterators();
    163         UOption.parseArgs(args, options);
    164 
    165         pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher("");
    166 
    167         File[] paths = {
    168             new File(CLDRPaths.MAIN_DIRECTORY),
    169             new File(CLDRPaths.ANNOTATIONS_DIRECTORY),
    170             new File(CLDRPaths.SUBDIVISIONS_DIRECTORY)
    171         };
    172         Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value);
    173 
    174         // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value);
    175         english = cldrFactory.make("en", true);
    176         pathHeaderFactory = PathHeader.getFactory(english);
    177 
    178         FileCopier.ensureDirectoryExists(options[DESTDIR].value);
    179         FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css");
    180 
    181         // now get the info
    182 
    183         loadInformation(cldrFactory);
    184         String oldMain = "";
    185         PrintWriter out = null;
    186 
    187         System.out.println("Getting types " + path_value_locales.size());
    188         // Set<String> types = new TreeSet<String>();
    189         // for (PathHeader path : path_value_locales.keySet()) {
    190         // String main = getFileName2(path);
    191         // if (!main.equals(oldMain)) {
    192         // oldMain = main;
    193         // types.add(main);
    194         // }
    195         // }
    196         String headerString = getHeader(path_value_locales.keySet());
    197         FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
    198             ImmutableMap.of(
    199                 "%header%", headerString,
    200                 "%version%", ToolConstants.CHART_DISPLAY_VERSION,
    201                 "%index-title%", "Main Charts Index",
    202                 "%date%", CldrUtility.isoFormatDateOnly(new Date())));
    203 //        FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
    204 //            new String[] { "%header%", headerString });
    205 
    206         System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath());
    207         // Transliterator toLatin = Transliterator.getInstance("any-latin");
    208         toHTML = TransliteratorUtilities.toHTML;
    209         // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
    210 
    211         String oldHeader = "";
    212         Output<PrintWriter> tsvFile = new Output<>();
    213 
    214         for (PathHeader path : path_value_locales.keySet()) {
    215             String main = getFileName2(path, null);
    216             if (!main.equals(oldMain)) {
    217                 oldMain = main;
    218                 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile);
    219                 out.println("<table class='table'>");
    220                 oldHeader = "";
    221             }
    222             String key = path.getCode();
    223             String anchor = toHTML.transliterate(key);
    224 
    225             String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path);
    226             String englishValue = english.getStringValue(originalPath);
    227             if (englishValue != null) {
    228                 englishValue = "English: " + englishValue + "";
    229             } else {
    230                 englishValue = "";
    231             }
    232 
    233             String header = path.getHeader();
    234             if (!header.equals(oldHeader) && !header.equals("null")) {
    235                 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header)
    236                     + "</th></tr>");
    237                 oldHeader = header;
    238             }
    239             String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath()));
    240             out.println("<tr>" +
    241                 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" +
    242                 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" +
    243                 "</tr>");
    244             Map<String, Set<String>> value_locales = path_value_locales.get(path);
    245             for (String value : value_locales.keySet()) {
    246                 // String outValue = toHTML.transliterate(value);
    247                 // String transValue = value;
    248                 // try {
    249                 // transValue = toLatin.transliterate(value);
    250                 // } catch (RuntimeException e) {
    251                 // }
    252                 // if (!transValue.equals(value)) {
    253                 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>";
    254                 // }
    255                 String valueClass = " class='value'";
    256                 if (DataShower.getBidiStyle(value).length() != 0) {
    257                     valueClass = " class='rtl_value'";
    258                 }
    259                 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>");
    260                 tsvFile.value.print(
    261                     path.getSection()
    262                         + "\t" + path.getPage()
    263                         + "\t" + path.getHeader()
    264                         + "\t" + path.getCode()
    265                         + "\t" + value
    266                         + "\t");
    267 
    268                 Set<String> locales = value_locales.get(value);
    269                 boolean first = true;
    270                 boolean containsRoot = locales.contains("root");
    271                 for (String locale : locales) {
    272                     if (first)
    273                         first = false;
    274                     else
    275                         out.print(" ");
    276                     if (locale.endsWith("*")) {
    277                         locale = locale.substring(0, locale.length() - 1);
    278                         out.print("<i>\u00B7" + locale + "\u00B7</i>");
    279                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
    280                     } else if (!containsRoot) {
    281                         out.print("\u00B7" + locale + "\u00B7");
    282                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
    283                     } else if (locale.contains("_")) {
    284                         // not same as root, but need to test for parent
    285                         // if the parent is not in the same list, then we include anyway.
    286                         // Cf http://unicode.org/cldr/trac/ticket/7228
    287                         String parent = LocaleIDParser.getParent(locale);
    288                         if (!locales.contains(parent)) {
    289                             out.print("<b>\u00B7" + locale + "\u00B7</b>");
    290                             tsvFile.value.print("\u00B7" + locale + "\u00B7");
    291                         }
    292                     }
    293                 }
    294                 if (containsRoot) {
    295                     out.print("<b>\u00B7all\u00B7others\u00B7</b>");
    296                     tsvFile.value.print("\u00B7all-others\u00B7");
    297                 }
    298                 out.println("</td></tr>");
    299                 tsvFile.value.println();
    300             }
    301         }
    302         for (String[] pair : EXEMPLARS) {
    303             showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile);
    304         }
    305         finish(out, tsvFile.value);
    306         finishAll(out, tsvFile.value);
    307         System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION)
    308             .format((System.currentTimeMillis() - startTime) / 1000.0));
    309     }
    310 
    311     // static Comparator UCA;
    312     // static {
    313     // RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
    314     // UCA2.setNumericCollation(true);
    315     // UCA2.setStrength(UCA2.IDENTICAL);
    316     // UCA = new CollectionUtilities.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0) );
    317     // }
    318 
    319     static final String[][] EXEMPLARS = {
    320         { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" },
    321         { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" },
    322         { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" },
    323         // TODO look at numbers, auxiliary
    324     };
    325 
    326     private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title,
    327         Output<PrintWriter> tsvFile)
    328         throws IOException {
    329         PathHeader cleanPath = fixPath(pathName, null);
    330         String filename = getFileName2(cleanPath, variant);
    331         out = start(out, filename, headerString, title, tsvFile);
    332         Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath);
    333 
    334         // TODO change logic so that aux characters characters work well.
    335 
    336         Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<String, UnicodeMap<Set<String>>>();
    337         // UnicodeMap mapping = new UnicodeMap();
    338         UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]");
    339 
    340         // get the locale information
    341         UnicodeSet totalExemplars = new UnicodeSet();
    342         for (String value : value_locales.keySet()) {
    343             // flatten out UnicodeSet
    344             UnicodeSet exemplars = new UnicodeSet(value);
    345             if (variant.equals("main")) {
    346                 UnicodeSet extras = new UnicodeSet();
    347                 for (String item : exemplars) {
    348                     extras.addAll(Normalizer.normalize(item, Normalizer.NFD));
    349                 }
    350                 exemplars.addAll(extras);
    351             }
    352             totalExemplars.addAll(exemplars);
    353             exemplars.removeAll(stuffToSkip);
    354 
    355             Set<String> locales = value_locales.get(value);
    356             //String script = UScript.getName(getFirstScript(exemplars));
    357             for (String locale : locales) {
    358                 checkTr(script_UnicodeMap);
    359                 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale;
    360                 String script = LOCALE_TO_SCRIPT.get(key);
    361                 // try a few variants until we get the script
    362                 if (script == null && key.contains("_")) {
    363                     String simpleParent = LanguageTagParser.getSimpleParent(key);
    364                     script = LOCALE_TO_SCRIPT.get(simpleParent);
    365                     if (script == null && simpleParent.contains("_")) {
    366                         simpleParent = LanguageTagParser.getSimpleParent(simpleParent);
    367                         script = LOCALE_TO_SCRIPT.get(simpleParent);
    368                     }
    369                 }
    370                 if (script == null) {
    371                     script = UScript.getName(UScript.UNKNOWN);
    372                 }
    373                 Set<String> temp = new HashSet<String>();
    374                 temp.add(locale);
    375                 checkTr(script_UnicodeMap);
    376                 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
    377                 if (mapping == null) {
    378                     script_UnicodeMap.put(script, mapping = new UnicodeMap<Set<String>>());
    379                 }
    380                 checkTr(script_UnicodeMap);
    381                 mapping.composeWith(exemplars, temp, setComposer);
    382                 checkTr(script_UnicodeMap);
    383             }
    384         }
    385         System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false));
    386         for (String script : script_UnicodeMap.keySet()) {
    387             UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
    388             writeCharToLocaleMapping(out, script, mapping);
    389         }
    390         return out;
    391     }
    392 
    393     private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) {
    394         UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic");
    395         if (unicodeMap == null) {
    396             return;
    397         }
    398         Set<String> foo = unicodeMap.get(0x21);
    399         if (foo == null) {
    400             return;
    401         }
    402         if (foo.contains("tr")) {
    403             System.out.println("huh?");
    404         }
    405     }
    406 
    407     private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) {
    408         BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for
    409         // script
    410         System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet());
    411         if (script.equals("Hangul")) { //  || script.equals("Common")
    412             return; // skip these
    413         }
    414         // find out all the locales and all the characters
    415         Set<String> allLocales = new TreeSet<String>(UCA);
    416         Set<String> allChars = new TreeSet<String>(UCA);
    417         Set<String> allStrings = new TreeSet<String>(UCA);
    418         for (Set<String> locales : mapping.getAvailableValues()) {
    419             allLocales.addAll(locales);
    420             UnicodeSet unicodeSet = mapping.keySet(locales);
    421             for (String item : unicodeSet) {
    422                 charBreaks.setText(item);
    423                 int endFirst = charBreaks.next();
    424                 if (endFirst == item.length()) {
    425                     allChars.add(item);
    426                 } else {
    427                     allStrings.add(item);
    428                 }
    429             }
    430         }
    431         // get the columns, and show them
    432         out.println("<table class='table' style='width:1%'>");
    433         out.println("<caption>" + script + "</caption>");
    434         exemplarHeader(out, allChars);
    435 
    436         for (String locale : allLocales) {
    437             String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>"
    438                 + cleanLocale(locale, true) + "</td>";
    439             out.println("<tr>");
    440             out.println(headerHeader);
    441 
    442             for (String item : allChars) {
    443                 // String exemplarsWithoutBrackets = displayExemplars(item);
    444                 if (mapping.get(item).contains(locale)) {
    445                     out.println("<td class='cell'" +
    446                         ">" + displayCharacter(item) + "</td>");
    447                 } else {
    448                     out.println("<td class='empty'>\u00a0</td>");
    449                 }
    450             }
    451             // now strings, if any
    452             StringBuilder strings = new StringBuilder();
    453             int lastLineStart = 0;
    454             for (String item : allStrings) {
    455                 // String exemplarsWithoutBrackets = displayExemplars(item);
    456                 if (mapping.get(item).contains(locale)) {
    457                     int str_len = strings.length();
    458                     if (str_len != 0) {
    459                         if (str_len - lastLineStart > 20) {
    460                             strings.append(System.lineSeparator());
    461                             lastLineStart = str_len;
    462                         } else {
    463                             strings.append(' ');
    464                         }
    465                     }
    466                     strings.append(displayCharacter(item));
    467                 }
    468             }
    469             if (strings.length() == 0) {
    470                 out.println("<td class='empty'>\u00a0</td>");
    471             } else {
    472                 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>")
    473                     + "</td>");
    474             }
    475 
    476             out.println(headerHeader);
    477             out.println("</tr>");
    478         }
    479         exemplarHeader(out, allChars);
    480         out.println("</table>");
    481         out.flush();
    482     }
    483 
    484     private static String characterTitle(String item) {
    485         return ("title='U+" +
    486             toHTML.transform(
    487                 Utility.hex(item, 4, ", U+", true, new StringBuilder())
    488                     + " " + UCharacter.getName(item, ", "))
    489             + "'");
    490     }
    491 
    492     private static void exemplarHeader(PrintWriter out, Set<String> allChars) {
    493         out.println("<tr>");
    494         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
    495         for (String item : allChars) {
    496             out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>");
    497         }
    498         out.println("<th class='head'>Clusters</th>");
    499         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
    500         out.println("</tr>");
    501     }
    502 
    503     static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze();
    504 
    505     public static String displayCharacter(String item) {
    506         if (item.length() == 0) return "<i>none</i>";
    507         int ch = item.codePointAt(0);
    508         if (NONSPACING.contains(ch)) {
    509             item = "\u00a0" + item + "\u00a0";
    510         }
    511         String result = toHTML.transform(item);
    512         return result;
    513     }
    514 
    515     static LanguageTagParser cleanLocaleParser = new LanguageTagParser();
    516     static Set<Fields> allButScripts = EnumSet.allOf(Fields.class);
    517     static {
    518         allButScripts.remove(Fields.SCRIPT);
    519     }
    520 
    521     private static String cleanLocale(String item, boolean name) {
    522         if (item == null) {
    523             return "<i>null</i>";
    524         }
    525         boolean draft = item.endsWith("*");
    526         if (draft) {
    527             item = item.substring(0, item.length() - 1);
    528         }
    529         cleanLocaleParser.set(item);
    530         item = cleanLocaleParser.toString(allButScripts);
    531         String core = item;
    532         item = toHTML.transform(item);
    533         if (name) {
    534             item = english.getName(core);
    535             item = item == null ? "<i>null</i>" : toHTML.transform(item);
    536         }
    537         if (draft) {
    538             item = "<i>" + item + "</i>";
    539         }
    540         return item;
    541     }
    542 
    543     // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) {
    544     // String exemplarsWithoutBrackets = displayExemplars(lastChars);
    545     // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>");
    546     // for (String item : allLocales) {
    547     // String cleanItem;
    548     // if (locales.contains(item)) {
    549     // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>";
    550     // } else {
    551     // cleanItem = "<td class='value'>\u00a0</td>";
    552     // }
    553     // out.println(cleanItem);
    554     // }
    555     // out.println("</tr>");
    556     // }
    557 
    558     // private static final StringTransform MyTransform = new StringTransform() {
    559     //
    560     // public String transform(String source) {
    561     // StringBuilder builder = new StringBuilder();
    562     // int cp = 0;
    563     // builder.append("<span title='");
    564     // String prefix = "";
    565     // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
    566     // cp = UTF16.charAt(source, i);
    567     // if (i == 0) {
    568     // if (COMBINING.contains(cp)) {
    569     // prefix = "\u25CC";
    570     // }
    571     // } else {
    572     // builder.append(" + ");
    573     // }
    574     // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp));
    575     // }
    576     // builder.append("'>").append(prefix).append(source).append("</span>");
    577     // return builder.toString();
    578     // }
    579     //
    580     // };
    581 
    582     // private static String displayExemplars(UnicodeSet lastChars) {
    583     // String exemplarsWithoutBrackets = new PrettyPrinter()
    584     // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT))
    585     // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)
    586     // .setStrength2(Collator.PRIMARY))
    587     // .setCompressRanges(true)
    588     // .setToQuote(ALL_CHARS)
    589     // .setQuoter(MyTransform)
    590     // .format(lastChars);
    591     // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1);
    592     // return exemplarsWithoutBrackets;
    593     // }
    594 
    595     // private static boolean isNextCharacter(String last, String value) {
    596     // if (UTF16.hasMoreCodePointsThan(last, 1)) return false;
    597     // if (UTF16.hasMoreCodePointsThan(value, 1)) return false;
    598     // int lastChar = UTF16.charAt(last,0);
    599     // int valueChar = UTF16.charAt(value,0);
    600     // return lastChar + 1 == valueChar;
    601     // }
    602 
    603     static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() {
    604         public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) {
    605             if (a == null) {
    606                 return b;
    607             } else if (b == null) {
    608                 return a;
    609             } else {
    610                 TreeSet<String> result = new TreeSet<String>(a);
    611                 result.addAll(b);
    612                 return result;
    613             }
    614         }
    615     };
    616 
    617     static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<String, String>();
    618 
    619     private static void loadInformation(Factory cldrFactory) {
    620         Set<String> alllocales = cldrFactory.getAvailable();
    621         String[] postFix = new String[] { "" };
    622         // gather all information
    623         // TODO tweek for value-laden attributes
    624         for (String localeID : alllocales) {
    625             System.out.println("Loading: " + localeID);
    626             System.out.flush();
    627 
    628             CLDRFile cldrFile;
    629             try {
    630                 cldrFile = cldrFactory.make(localeID, localeID.equals("root"));
    631             } catch (IllegalArgumentException e) {
    632                 System.err.println("Couldn't open " + localeID);
    633                 continue;
    634             }
    635             if (cldrFile.isNonInheriting()) continue;
    636             for (String path : cldrFile) {
    637                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
    638                     continue;
    639                 }
    640                 if (altProposedMatcher.reset(path).matches()) {
    641                     continue;
    642                 }
    643                 if (path.indexOf("/alias") >= 0) continue;
    644                 if (path.indexOf("/identity") >= 0) continue;
    645                 if (path.indexOf("/references") >= 0) continue;
    646                 PathHeader cleanPath = fixPath(path, postFix);
    647                 final SurveyToolStatus surveyToolStatus = cleanPath.getSurveyToolStatus();
    648                 if (surveyToolStatus == SurveyToolStatus.DEPRECATED || surveyToolStatus == SurveyToolStatus.HIDE) {
    649                     // System.out.println("Skipping " + path);
    650                     continue;
    651                 }
    652                 String fullPath = cldrFile.getFullXPath(path);
    653                 String value = getValue(cldrFile, path, fullPath);
    654                 if (value == null) {
    655                     continue;
    656                 }
    657                 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0
    658                     || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) {
    659                     postFix[0] = "*";
    660                 }
    661                 if (path.equals("//ldml/characters/exemplarCharacters")) {
    662                     UnicodeSet exemplars = new UnicodeSet(value);
    663                     String script = UScript.getName(getFirstScript(exemplars));
    664                     LOCALE_TO_SCRIPT.put(localeID, script);
    665                 }
    666                 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath);
    667                 if (value_locales == null) {
    668                     path_value_locales.put(cleanPath, value_locales = new TreeMap<String, Set<String>>(
    669                         standardCollation));
    670                 }
    671                 Set<String> locales = value_locales.get(value);
    672                 if (locales == null) {
    673                     value_locales.put(value, locales = new TreeSet<String>());
    674                 }
    675                 locales.add(localeID + postFix[0]);
    676             }
    677         }
    678         Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
    679         for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) {
    680             sorted.put(s.getValue(), s.getKey());
    681         }
    682         for (Entry<String, Set<String>> s : sorted.keyValuesSet()) {
    683             System.out.println(s);
    684         }
    685     }
    686 
    687     static PathHeader.Factory pathHeaderFactory;
    688 
    689     // static org.unicode.cldr.util.PrettyPath prettyPath = new org.unicode.cldr.util.PrettyPath();
    690     /**
    691      *
    692      */
    693     private static PathHeader fixPath(String path, String[] localePrefix) {
    694         if (localePrefix != null) localePrefix[0] = "";
    695         //        if (path.indexOf("[@alt=") >= 0 || path.indexOf("[@draft=") >= 0) {
    696         //            if (localePrefix != null) localePrefix[0] = "*";
    697         //            path = removeAttributes(path, skipSet);
    698         //        }
    699         // if (usePrettyPath) path = prettyPath.getPrettyPath(path);
    700         return pathHeaderFactory.fromPath(path);
    701     }
    702 
    703     private static String removeAttributes(String xpath, Set<String> skipAttributes) {
    704         XPathParts parts = new XPathParts(null, null).set(xpath);
    705         removeAttributes(parts, skipAttributes);
    706         return parts.toString();
    707     }
    708 
    709     /**
    710      *
    711      */
    712     private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) {
    713         for (int i = 0; i < parts.size(); ++i) {
    714             // String element = parts.getElement(i);
    715             Map<String, String> attributes = parts.getAttributes(i);
    716             for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) {
    717                 String attribute = it.next();
    718                 if (skipAttributes.contains(attribute)) it.remove();
    719             }
    720         }
    721     }
    722 
    723     static Set<String> skipSet = new HashSet<String>(Arrays.asList("draft", "alt"));
    724 
    725     static Status status = new Status();
    726 
    727     /**
    728      *
    729      */
    730     private static String getValue(CLDRFile cldrFile, String path, String fullPath) {
    731         String value = cldrFile.getStringValue(path);
    732         if (value == null) {
    733             System.out.println("Null value for " + path);
    734             return value;
    735         }
    736         cldrFile.getSourceLocaleID(path, status);
    737         if (!path.equals(status.pathWhereFound)) {
    738             // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]";
    739             value = null;
    740             return value;
    741         }
    742         if (value.length() == 0) {
    743             parts.set(fullPath);
    744             removeAttributes(parts, skipSet);
    745             int limit = parts.size();
    746             value = parts.toString(limit - 1, limit);
    747             return value;
    748         }
    749         return value;
    750     }
    751 
    752     private static String getFileName2(PathHeader header, String suffix) {
    753         String result = (header.getSection() + "." + header.getPage())
    754             .replace(" ", "_")
    755             .replace("/", "_")
    756             .replace("(", "_")
    757             .replace(")", "_");
    758         if (suffix != null) {
    759             result += "." + suffix;
    760         }
    761         return result.toLowerCase(Locale.ENGLISH);
    762     }
    763 
    764     static String[] headerAndFooter = new String[2];
    765     private static Transliterator toHTML;
    766 
    767     /**
    768      * @param tsvFile TODO
    769      * @param path2
    770      *
    771      */
    772     private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)
    773         throws IOException {
    774         finish(out, tsvFile.value);
    775         out = writeHeader(main, title, tsvFile);
    776         out.println(headerString);
    777         return out;
    778     }
    779 
    780     public static String getHeader(Set<PathHeader> set) {
    781         StringBuffer out = new StringBuffer("<table class='simple'><tr>");
    782         String lastMain = "";
    783         String lastSub = "";
    784         for (PathHeader pathHeader : set) {
    785             String mainName = pathHeader.getSection();
    786             String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage());
    787             if (!mainName.equals(lastMain)) {
    788                 if (lastMain.length() != 0) {
    789                     out.append("</tr>" + System.lineSeparator() + "<tr>");
    790                 }
    791                 out.append("<th align='right' nowrap style='vertical-align: top'><b>"
    792                     + TransliteratorUtilities.toHTML.transform(mainName)
    793                     + ":&nbsp;</b></th><td>");
    794                 lastMain = mainName;
    795                 lastSub = subName;
    796             } else if (!subName.equals(lastSub)) {
    797                 out.append(" | ");
    798                 lastSub = subName;
    799             } else {
    800                 continue; // identical, skip
    801             }
    802             out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>");
    803             if (pathHeader.getPageId() == PageId.Alphabetic_Information) {
    804                 for (String[] pair : EXEMPLARS) {
    805                     out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>");
    806                 }
    807             }
    808             continue;
    809         }
    810         return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString();
    811     }
    812 
    813     private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException {
    814         PrintWriter out;
    815         out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html");
    816         if (tsvFile.value == null) {
    817             tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv");
    818             tsvFile.value.println("# By-Type Data");
    819             tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales");
    820         }
    821 
    822         ShowData.getChartTemplate("By-Type Chart: " + title,
    823             ToolConstants.CHART_DISPLAY_VERSION,
    824             "",
    825             // "<link rel='stylesheet' type='text/css' href='by_type.css'>" +
    826             // "<style type='text/css'>" + Utility.LINE_SEPARATOR +
    827             // "h1 {margin-bottom:1em}" + Utility.LINE_SEPARATOR +
    828             // "</style>" + Utility.LINE_SEPARATOR,
    829             headerAndFooter, null, false);
    830         out.println(headerAndFooter[0]);
    831         return out;
    832     }
    833 
    834     /**
    835      * @param tsvFile TODO
    836      *
    837      */
    838     private static void finish(PrintWriter out, PrintWriter tsvFile) {
    839         if (out == null) return;
    840         out.println("</table>");
    841         out.println(headerAndFooter[1]);
    842         out.close();
    843     }
    844 
    845     private static void finishAll(PrintWriter out, PrintWriter tsvFile) {
    846         // TODO Auto-generated method stub
    847         tsvFile.println("# EOF");
    848         tsvFile.close();
    849     }
    850 }
    851