Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.io.IOException;
      4 import java.util.Collection;
      5 import java.util.Comparator;
      6 import java.util.LinkedHashSet;
      7 import java.util.Map;
      8 import java.util.Map.Entry;
      9 import java.util.Set;
     10 import java.util.TreeSet;
     11 
     12 import org.unicode.cldr.util.CLDRConfig;
     13 import org.unicode.cldr.util.CLDRFile;
     14 import org.unicode.cldr.util.CldrUtility;
     15 import org.unicode.cldr.util.Pair;
     16 import org.unicode.cldr.util.StandardCodes;
     17 import org.unicode.cldr.util.StandardCodes.LstrField;
     18 import org.unicode.cldr.util.StandardCodes.LstrType;
     19 
     20 import com.google.common.collect.ImmutableSet;
     21 import com.google.common.collect.ImmutableSet.Builder;
     22 import com.google.common.collect.Multimap;
     23 import com.ibm.icu.text.Collator;
     24 import com.ibm.icu.util.ULocale;
     25 
     26 public class ChartLanguageGroups extends Chart {
     27 
     28     private static final String SHOULD_NOT_BE_LEAF_NODE = "";
     29     private static final String LEAF_NODES = "";
     30     private static final String TREE_NODES = "";
     31 
     32     public static void main(String[] args) {
     33         new ChartLanguageGroups().writeChart(null);
     34     }
     35 
     36     static final Set<String> COLLECTIONS;
     37     static {
     38         Map<String, Map<LstrField, String>> languages = StandardCodes.getEnumLstreg().get(LstrType.language);
     39         Builder<String> _collections = ImmutableSet.<String> builder();
     40         for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) {
     41             String scope = e.getValue().get(LstrField.Scope);
     42             if (scope != null
     43                 && "Collection".equalsIgnoreCase(scope)) {
     44                 _collections.add(e.getKey());
     45             }
     46         }
     47         COLLECTIONS = _collections.build();
     48     }
     49 
     50     @Override
     51     public String getDirectory() {
     52         return FormattedFileWriter.CHART_TARGET_DIR;
     53     }
     54 
     55     @Override
     56     public String getTitle() {
     57         return "Language Groups";
     58     }
     59 
     60     @Override
     61     public String getExplanation() {
     62         return "<p>This chart shows draft language groups based on data extracted from wikidata. "
     63             + "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>"
     64             + "<ul>\n"
     65             + "<li>A " + TREE_NODES
     66             + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), "
     67             + "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n"
     68             + "<li>A " + LEAF_NODES
     69             + " indicates that the contained languages are leaf nodes (contain nothing).</li>\n"
     70             + "<li>A " + SHOULD_NOT_BE_LEAF_NODE
     71             + " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldnt be  that is, its ISO 639 Scope is "
     72             + "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n"
     73             + "</ul>\n"
     74             + "<p><b>Caveats:</b> Only the wikidata containment for "
     75             + "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used."
     76             + "The containment data is not complete: "
     77             + "if a language doesn't appear in the chart it could be an isolate, or just be missing data."
     78             + "The data doesn't completely match wikipedias; there are some patches for CLDR languages.</p>\n";
     79     }
     80 
     81     Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH);
     82 
     83     @Override
     84     public void writeContents(FormattedFileWriter pw) throws IOException {
     85 
     86         Multimap<String, String> lg = CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups();
     87 
     88         TablePrinter tablePrinter = new TablePrinter()
     89             .addColumn("Language Group", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
     90             .setBreakSpans(true)
     91             .addColumn("Name", "class='source'", null, "class='source'", true)
     92             .addColumn("St.", "class='source'", null, "class='source'", true)
     93             .addColumn("Contained", "class='source'", null, "class='target'", true)
     94             .setBreakSpans(true);
     95 
     96         show(lg, "mul", tablePrinter);
     97         pw.write(tablePrinter.toTable());
     98     }
     99 
    100     private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) {
    101         Collection<String> children = lg.get(parent);
    102         if (children == null || children.isEmpty()) {
    103             return;
    104         }
    105         TreeSet<Pair<String, String>> nameAndCode = new TreeSet<>(new Comparator<Pair<String, String>>() {
    106             @Override
    107             public int compare(Pair<String, String> o1, Pair<String, String> o2) {
    108                 int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
    109                 if (diff != 0) {
    110                     return diff;
    111                 }
    112                 return o1.getSecond().compareTo(o2.getSecond());
    113             }
    114 
    115         });
    116         for (String lang : children) {
    117             nameAndCode.add(Pair.of(getLangName(lang), lang));
    118         }
    119         StringBuilder treeList = new StringBuilder();
    120         StringBuilder leafList = new StringBuilder();
    121         LinkedHashSet<Pair<String, String>> nameAndCodeWithChildren = new LinkedHashSet<>();
    122         for (Pair<String, String> pair : nameAndCode) {
    123             String code = pair.getSecond();
    124             if (lg.containsKey(code)) {
    125                 addChildren(treeList, TREE_NODES, pair, false);
    126                 nameAndCodeWithChildren.add(pair);
    127             } else if (!code.equals("und")) {
    128                 addChildren(leafList, LEAF_NODES, pair, true);
    129             }
    130         }
    131         if (treeList.length() != 0) {
    132             addRow(parent, tablePrinter, TREE_NODES, treeList);
    133         }
    134         if (leafList.length() != 0) {
    135             addRow(parent, tablePrinter, LEAF_NODES, leafList);
    136         }
    137 
    138         for (Pair<String, String> pair : nameAndCodeWithChildren) {
    139             show(lg, pair.getSecond(), tablePrinter);
    140         }
    141     }
    142 
    143     private void addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) {
    144         tablePrinter.addRow()
    145             .addCell(parent)
    146             .addCell(getLangName(parent))
    147             .addCell(marker)
    148             .addCell(treeList.toString())
    149             .finishRow();
    150     }
    151 
    152     private void addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections) {
    153         if (treeList.length() != 0) {
    154             treeList.append("; ");
    155         }
    156         treeList.append(getPairName(pair, showCollections));
    157     }
    158 
    159     private String getPairName(Pair<String, String> pair, boolean showCollection) {
    160         return (showCollection && COLLECTIONS.contains(pair.getSecond())
    161             ? SHOULD_NOT_BE_LEAF_NODE + "" : "")
    162             + pair.getSecond() + "" + pair.getFirst() + "";
    163     }
    164 
    165     private String getLangName(String langCode) {
    166         return langCode.equals("mul") ? "All"
    167             : langCode.equals("zh") ? "Mandarin Chinese"
    168                 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode).replace(" (Other)", "").replace(" languages", "");
    169     }
    170 }
    171