Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.IOException;
      5 import java.io.PrintWriter;
      6 import java.text.ParseException;
      7 import java.util.Arrays;
      8 import java.util.Collection;
      9 import java.util.Comparator;
     10 import java.util.Date;
     11 import java.util.HashMap;
     12 import java.util.Iterator;
     13 import java.util.List;
     14 import java.util.Locale;
     15 import java.util.Map;
     16 import java.util.Set;
     17 import java.util.TreeMap;
     18 import java.util.TreeSet;
     19 
     20 import org.unicode.cldr.util.CLDRFile;
     21 import org.unicode.cldr.util.CLDRPaths;
     22 import org.unicode.cldr.util.CldrUtility;
     23 import org.unicode.cldr.util.Factory;
     24 import org.unicode.cldr.util.Iso639Data;
     25 import org.unicode.cldr.util.Iso639Data.Scope;
     26 import org.unicode.cldr.util.Iso639Data.Type;
     27 import org.unicode.cldr.util.Log;
     28 import org.unicode.cldr.util.StandardCodes;
     29 import org.unicode.cldr.util.StandardCodes.LstrType;
     30 import org.unicode.cldr.util.SupplementalDataInfo;
     31 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
     32 import org.unicode.cldr.util.Validity;
     33 import org.unicode.cldr.util.Validity.Status;
     34 import org.unicode.cldr.util.XPathParts;
     35 
     36 import com.ibm.icu.dev.util.CollectionUtilities;
     37 import com.ibm.icu.impl.Relation;
     38 import com.ibm.icu.impl.Utility;
     39 import com.ibm.icu.text.Collator;
     40 import com.ibm.icu.text.DateFormat;
     41 import com.ibm.icu.text.DecimalFormat;
     42 import com.ibm.icu.text.NumberFormat;
     43 import com.ibm.icu.text.SimpleDateFormat;
     44 import com.ibm.icu.text.Transliterator;
     45 import com.ibm.icu.util.ULocale;
     46 
     47 public class GenerateEnums {
     48     private static final String CODE_INDENT = "  ";
     49 
     50     private static final String DATA_INDENT = "    ";
     51 
     52     private static final String LIST_INDENT = "              ";
     53 
     54     private StandardCodes sc = StandardCodes.make();
     55 
     56     private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
     57 
     58 //    private Factory supplementalFactory = Factory.make(
     59 //        CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*");
     60 
     61     private Set<String> cldrCodes = new TreeSet<String>();
     62 
     63     // private Map enum_canonical = new TreeMap();
     64     private Map<String, String> enum_alpha3 = new TreeMap<String, String>();
     65 
     66     private Map<String, String> enum_UN = new TreeMap<String, String>();
     67 
     68     // private Map enum_FIPS10 = new TreeMap();
     69 
     70     // private Map enum_TLD = new TreeMap();
     71 
     72     private CLDRFile english = factory.make("en", false);
     73 
     74     private CLDRFile supplementalMetadata = factory.make("supplementalMetadata",
     75         false);
     76 
     77     private CLDRFile supplementalData = factory.make("supplementalData", false);
     78 
     79     private Relation<String, String> unlimitedCurrencyCodes;
     80 
     81     private Set<String> scripts = new TreeSet<String>();
     82 
     83     private Set<String> languages = new TreeSet<String>();
     84 
     85     public static void main(String[] args) throws IOException {
     86         GenerateEnums gen = new GenerateEnums();
     87         gen.showLanguageInfo();
     88         gen.loadCLDRData();
     89         gen.showCounts();
     90         gen.showCurrencies();
     91         gen.showLanguages();
     92         gen.showScripts();
     93         gen.showRegionCodeInfo();
     94         System.out.println("DONE");
     95     }
     96 
     97     private void showCounts() {
     98         System.out.format("Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
     99             "language").size());
    100         System.out.format("Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
    101             "script").size());
    102         System.out.format("Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes(
    103             "territory").size());
    104     }
    105 
    106     private void showCurrencies() throws IOException {
    107         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt");
    108         Log.println();
    109         Log.println("Currency Data");
    110         Log.println();
    111         showGeneratedCommentStart(CODE_INDENT);
    112         compareSets("currencies from sup.data", currencyCodes, "valid currencies",
    113             validCurrencyCodes);
    114         Set<String> unused = new TreeSet<String>(validCurrencyCodes);
    115         unused.removeAll(currencyCodes);
    116         showCurrencies(currencyCodes);
    117         Log.println();
    118         showCurrencies(unused);
    119         Map<String, String> sorted = new TreeMap<String, String>(Collator
    120             .getInstance(ULocale.ENGLISH));
    121         for (String code : validCurrencyCodes) {
    122             if (unused.contains(code) && !code.equals("CLF"))
    123                 continue; // we include CLF for compatibility
    124             sorted.put(getName(code), code);
    125         }
    126         int lineLength = "  /** Belgian Franc */                                            BEF,"
    127             .length();
    128         for (String name : sorted.keySet()) {
    129             printRow(Log.getLog(), sorted.get(name), name, "currency", null,
    130                 lineLength);
    131         }
    132         showGeneratedCommentEnd(CODE_INDENT);
    133         Log.close();
    134     }
    135 
    136     private String getName(String code) {
    137         String result = english.getName(CLDRFile.CURRENCY_NAME, code);
    138         if (result == null) {
    139             result = code;
    140             System.out.println("Failed to find: " + code);
    141         }
    142         return result;
    143     }
    144 
    145     private void showCurrencies(Set<String> both) {
    146         // /** Afghani */ AFN,
    147         for (Iterator<String> it = both.iterator(); it.hasNext();) {
    148             String code = it.next();
    149             String englishName = getName(code);
    150             if (englishName == null) {
    151             }
    152             Set<String> regions = unlimitedCurrencyCodes.getAll(code);
    153             System.out
    154                 .println(code
    155                     + "\t"
    156                     + englishName
    157                     + "\t"
    158                     + (validCurrencyCodes.contains(code) ? currencyCodes
    159                         .contains(code) ? "" : "valid-only" : "supp-only")
    160                     + "\t"
    161                     + (regions != null ? regions : "unused"));
    162         }
    163     }
    164 
    165     private void showScripts() throws IOException {
    166         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt");
    167         Log.println();
    168         Log.println("Script Data");
    169         Log.println();
    170 
    171         showGeneratedCommentStart(CODE_INDENT);
    172         Map<String, String> code_replacements = new TreeMap<String, String>();
    173         int len = "  /** Arabic */                                        Arab,"
    174             .length();
    175         for (Iterator<String> it = scripts.iterator(); it.hasNext();) {
    176             String code = it.next();
    177             String englishName = english.getName(CLDRFile.SCRIPT_NAME, code);
    178             if (englishName == null)
    179                 continue;
    180             printRow(Log.getLog(), code, null, "script", code_replacements, len);
    181             // Log.println(" /**" + englishName + "*/ " + code + ",");
    182         }
    183         showGeneratedCommentEnd(CODE_INDENT);
    184         Log.close();
    185     }
    186 
    187     private void showLanguageInfo() throws IOException {
    188         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt");
    189         System.out.println();
    190         System.out.println("Language Converter");
    191         System.out.println();
    192         StringBuilder buffer = new StringBuilder();
    193         // language information
    194         for (String language : sc.getAvailableCodes("language")) {
    195             Scope scope = Iso639Data.getScope(language);
    196             if (scope == Scope.PrivateUse) {
    197                 continue;
    198             }
    199             buffer.setLength(0);
    200             String alpha3 = Iso639Data.toAlpha3(language);
    201             if (alpha3 != null) {
    202                 buffer.append(".add(\"" + alpha3 + "\")");
    203             }
    204             Type type = Iso639Data.getType(language);
    205             if (type != Type.Living) {
    206                 buffer.append(".add(Type." + type + ")");
    207             }
    208             if (scope != Scope.Individual) {
    209                 buffer.append(".add(Scope." + scope + ")");
    210             }
    211             if (buffer.length() > 0) {
    212                 Log.println("\t\tto(\"" + language + "\")" + buffer + ";");
    213             }
    214         }
    215         Log.close();
    216     }
    217 
    218     private void showLanguages() throws IOException {
    219         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt");
    220         System.out.println();
    221         System.out.println("Language Data");
    222         System.out.println();
    223 
    224         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
    225             String code = it.next();
    226             String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code);
    227             if (englishName == null)
    228                 continue;
    229             System.out.println("     /**" + englishName + "*/    " + code + ",");
    230         }
    231 
    232         showGeneratedCommentStart(LIST_INDENT);
    233         /*
    234          * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa
    235          * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + "
    236          * as ast ath aus av awa ay az ba bad bai bal ban bas bat be"
    237          */
    238         StringBuffer buffer = new StringBuffer();
    239         int lineLimit = 70 - LIST_INDENT.length();
    240         char lastChar = 0;
    241         for (Iterator<String> it = languages.iterator(); it.hasNext();) {
    242             String code = it.next();
    243             if (code.equals("root")) {
    244                 continue;
    245             }
    246             if (code.charAt(0) != lastChar
    247                 || buffer.length() + 1 + code.length() > lineLimit) {
    248                 if (buffer.length() != 0)
    249                     Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
    250                 buffer.setLength(0);
    251                 lastChar = code.charAt(0);
    252             }
    253             buffer.append(code).append(' ');
    254         }
    255         // remove the very last space
    256         if (buffer.charAt(buffer.length() - 1) == ' ') {
    257             buffer.setLength(buffer.length() - 1);
    258         }
    259         Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
    260 
    261         showGeneratedCommentEnd(LIST_INDENT);
    262         Log.close();
    263     }
    264 
    265     @SuppressWarnings("rawtypes")
    266     private Object join(Collection collection, String separator) {
    267         if (collection == null)
    268             return null;
    269         StringBuffer result = new StringBuffer();
    270         boolean first = true;
    271         for (Iterator it = collection.iterator(); it.hasNext();) {
    272             if (first)
    273                 first = false;
    274             else
    275                 result.append(separator);
    276             result.append(it.next());
    277         }
    278         return result.toString();
    279     }
    280 
    281     static NumberFormat threeDigit = new DecimalFormat("000");
    282 
    283     public void loadCLDRData() throws IOException {
    284         // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt");
    285         // while (true) {
    286         // String line = codes.readLine();
    287         // if (line == null)
    288         // break;
    289         // line = line.split("#")[0].trim();
    290         // if (line.length() == 0)
    291         // continue;
    292         // String[] sourceValues = line.split("\\s+");
    293         // String[] values = new String[5];
    294         // for (int i = 0; i < values.length; ++i) {
    295         // if (i >= sourceValues.length || sourceValues[i].equals("-"))
    296         // values[i] = null;
    297         // else
    298         // values[i] = sourceValues[i];
    299         // }
    300         // String alpha2 = values[0];
    301         // cldrCodes.add(alpha2);
    302         // if (isPrivateUseRegion(alpha2))
    303         // continue;
    304         // String numeric = values[1];
    305         // String alpha3 = values[2];
    306         // String internet = values[3];
    307         // if (internet != null)
    308         // internet = internet.toUpperCase();
    309         // String fips10 = values[4];
    310         // String enumValue = enumName(alpha2);
    311         // enum_alpha3.put(enumValue, alpha3);
    312         // enum_UN.put(enumValue, numeric);
    313         // enum_FIPS10.put(enumValue, fips10);
    314         // enum_TLD.put(enumValue, internet);
    315         // }
    316         // codes.close();
    317         DecimalFormat threeDigits = new DecimalFormat("000");
    318         for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) {
    319             cldrCodes.add(value);
    320             if (isPrivateUseRegion(value)) continue;
    321             enum_UN.put(value,
    322                 threeDigits.format(supplementalDataInfo.getNumericTerritoryMapping().getAll(value).iterator().next()));
    323         }
    324         for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) {
    325             cldrCodes.add(value);
    326             if (isPrivateUseRegion(value)) continue;
    327             enum_alpha3.put(value, supplementalDataInfo.getAlpha3TerritoryMapping().getAll(value).iterator().next());
    328         }
    329 
    330         BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt");
    331         Map<String, String> macro_name = new TreeMap<String, String>();
    332         while (true) {
    333             String line = codes.readLine();
    334             if (line == null)
    335                 break;
    336             line = line.trim();
    337             if (line.length() == 0)
    338                 continue;
    339             if (line.charAt(0) < '0' || line.charAt(0) > '9') {
    340                 System.out.println("GenerateEnums: Skipping: " + line);
    341                 continue;
    342             }
    343             String[] sourceValues = line.split("\\s+");
    344             int code = Integer.parseInt(sourceValues[0]);
    345             String codeName = threeDigit.format(code);
    346             macro_name.put(codeName, line);
    347         }
    348         codes.close();
    349 //        String values = supplementalDataInfo.getValidityInfo().get("$territory").get1().trim();
    350         Map<Status, Set<String>> validRegions = Validity.getInstance().getStatusToCodes(LstrType.region);
    351         Set<String> regions = new TreeSet<String>();
    352         regions.addAll(validRegions.get(Status.regular));
    353         regions.addAll(validRegions.get(Status.macroregion));
    354 //        String[] validTerritories = values.split("\\s+");
    355 //        for (int i = 0; i < validTerritories.length; ++i) {
    356         for (String region : regions) {
    357             if (corrigendum.contains(region)) {
    358                 System.out.println("Skipping " + region + "\t\t"
    359                     + getEnglishName(region));
    360                 continue; // exception, corrigendum
    361             }
    362             if (isPrivateUseRegion(region))
    363                 continue;
    364             if (region.charAt(0) < 'A') {// numeric
    365                 enum_UN.put(enumName(region), region);
    366                 cldrCodes.add(region);
    367             } else {
    368                 if (enum_alpha3.get(region) == null) {
    369                     System.out.println("Missing alpha3 for: " + region);
    370                 }
    371             }
    372         }
    373         checkDuplicates(enum_UN);
    374         checkDuplicates(enum_alpha3);
    375         Set<String> availableCodes = new TreeSet<String>(sc.getAvailableCodes("territory"));
    376         compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes);
    377         Set<String> missing = new TreeSet<String>(availableCodes);
    378         missing.removeAll(cldrCodes);
    379         // don't care list: "003"
    380         // missing.remove("003");
    381         // missing.remove("172");
    382         // Remove the following. They don't have numeric or alpha3 codes so they can't be found.
    383         missing.remove("EA");
    384         missing.remove("EZ");
    385         missing.remove("IC");
    386         missing.remove("QU");
    387         missing.remove("UN");
    388 
    389         if (missing.size() != 0) {
    390             throw new IllegalArgumentException("Codes in Registry but not in CLDR: "
    391                 + missing);
    392         }
    393 
    394         Set<String> UNValues = new TreeSet<String>(enum_UN.values());
    395 
    396         for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext();) {
    397             Object key = it.next();
    398             Object value = macro_name.get(key);
    399             if (!UNValues.contains(key)) {
    400                 System.out.println("Macro " + key + "\t" + value);
    401             }
    402 
    403         }
    404 
    405         for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext();) {
    406             String region = it.next();
    407             String englishName = getEnglishName(region);
    408             if (englishName == null) {
    409                 englishName = "NULL"; // for debugging\
    410             }
    411             String rfcName = getRFC3066Name(region);
    412             if (!englishName.equals(rfcName)) {
    413                 System.out.println("Different names: {\"" + region + "\",\t\""
    414                     + englishName + " (" + rfcName + ")\"},");
    415             }
    416         }
    417 
    418         XPathParts parts = new XPathParts();
    419         getContainment();
    420 
    421         DateFormat[] simpleFormats = { new SimpleDateFormat("yyyy-MM-dd"),
    422             new SimpleDateFormat("yyyy-MM"), new SimpleDateFormat("yyyy"), };
    423         Date today = new Date();
    424         Date longAgo = new Date(1000 - 1900, 1, 1);
    425         currencyCodes = new TreeSet<String>();
    426         unlimitedCurrencyCodes = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
    427         for (Iterator<String> it = supplementalData
    428             .iterator("//supplementalData/currencyData/region"); it.hasNext();) {
    429             String path = it.next();
    430             parts.set(path);
    431             String region = parts.findAttributeValue("region", "iso3166");
    432             String code = parts.findAttributeValue("currency", "iso4217");
    433             String to = parts.findAttributeValue("currency", "to");
    434             main: if (to == null) {
    435                 unlimitedCurrencyCodes.put(code, region);
    436             } else {
    437                 for (int i = 0; i < simpleFormats.length; ++i) {
    438                     try {
    439                         Date foo = simpleFormats[i].parse(to);
    440                         if (foo.compareTo(longAgo) < 0) {
    441                             System.out.println("Date Error: can't parse " + to);
    442                             break main;
    443                         } else if (foo.compareTo(today) >= 0) {
    444                             unlimitedCurrencyCodes.put(code, region);
    445                         }
    446                         break main;
    447                     } catch (ParseException e) {
    448                     }
    449                 }
    450                 System.out.println("Date Error: can't parse " + to);
    451             }
    452             currencyCodes.add(code);
    453         }
    454 
    455         validCurrencyCodes = new TreeSet<String>();
    456         Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu");
    457         for (String code : bcp47CurrencyCodes) {
    458             validCurrencyCodes.add(code.toUpperCase());
    459         }
    460 
    461         scripts = supplementalDataInfo.getCLDRScriptCodes();
    462         languages = supplementalDataInfo.getCLDRLanguageCodes();
    463 
    464         // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory"));
    465         // availableCodes.add("003");
    466         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
    467         // String code = (String) next())
    468         // canonicalRegion_UN.put(alpha2, numeric);
    469         // }
    470 
    471         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
    472         // String code = (String)it.next();
    473         // RegionCode region = map_id_canonical_RFC.get(code);
    474         // if (region != null) continue; // skip others
    475         // region = new RegionCode(code);
    476         // map_id_canonical_RFC.put(code,region);
    477         // map_canonical_id_RFC.put(region,code);
    478         // if ("A".compareTo(code) > 0) {
    479         // map_id_canonical_UN.put(code,region);
    480         // map_canonical_id_UN.put(region,code);
    481         // } else {
    482         // map_id_canonical_A2.put(code,region);
    483         // map_canonical_id_A2.put(region,code);
    484         // }
    485         // }
    486         // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) {
    487         // String code = (String)it.next();
    488         // good.add(getInstance(code));
    489         // }
    490     }
    491 
    492     public void getContainment() {
    493         XPathParts parts = new XPathParts();
    494         // <group type="001" contains="002 009 019 142 150"/> <!--World -->
    495         for (Iterator<String> it = supplementalData
    496             .iterator("//supplementalData/territoryContainment/group"); it.hasNext();) {
    497             String path = it.next();
    498             String fullPath = supplementalData.getFullXPath(path);
    499             parts.set(fullPath);
    500             String container = parts.getAttributeValue(parts.size() - 1, "type");
    501             final String containedString = parts.getAttributeValue(-1, "contains");
    502             List<String> contained = Arrays.asList(containedString.trim().split("\\s+"));
    503             containment.put(container, contained);
    504         }
    505         // fix recursiveContainment.
    506         // for (String region : (Collection<String>)containment.keySet()) {
    507         // Set temp = new LinkedHashSet();
    508         // addContains(region, temp);
    509         // recursiveContainment.put(region, temp);
    510         // }
    511         Set<String> startingFromWorld = new TreeSet<String>();
    512         addContains("001", startingFromWorld);
    513         compareSets("World", startingFromWorld, "CLDR", cldrCodes);
    514         // generateContains();
    515     }
    516 
    517     private void generateContains() {
    518 
    519         for (String region : containment.keySet()) {
    520             List<String> plain = containment.get(region);
    521             // Collection recursive = (Collection)recursiveContainment.get(region);
    522 
    523             String setAsString = CldrUtility.join(plain, " ");
    524             // String setAsString2 = recursive.equals(plain) ? "" : ", " +
    525             // Utility.join(recursive," ");
    526             Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");");
    527         }
    528     }
    529 
    530     Map<String, List<String>> containment = new TreeMap<String, List<String>>();
    531 
    532     // Map recursiveContainment = new TreeMap();
    533 
    534     private void addContains(String string, Set<String> startingFromWorld) {
    535         startingFromWorld.add(string);
    536         List<String> contained = (List<String>) containment.get(string);
    537         if (contained == null)
    538             return;
    539         for (Iterator<String> it = contained.iterator(); it.hasNext();) {
    540             addContains(it.next(), startingFromWorld);
    541         }
    542     }
    543 
    544     @SuppressWarnings("rawtypes")
    545     private void compareSets(String name, Set availableCodes, String name2,
    546         Set cldrCodes) {
    547         Set temp = new TreeSet();
    548         temp.addAll(availableCodes);
    549         temp.removeAll(cldrCodes);
    550         System.out.println("In " + name + " but not in " + name2 + ": " + temp);
    551         temp.clear();
    552         temp.addAll(cldrCodes);
    553         temp.removeAll(availableCodes);
    554         System.out.println("Not in " + name + " but in " + name2 + ": " + temp);
    555     }
    556 
    557     @SuppressWarnings("rawtypes")
    558     private void checkDuplicates(Map m) {
    559         Map backMap = new HashMap();
    560         for (Iterator it = m.keySet().iterator(); it.hasNext();) {
    561             Object key = it.next();
    562             Object o = m.get(key);
    563             Object otherKey = backMap.get(o);
    564             if (otherKey != null)
    565                 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t"
    566                     + o);
    567             else
    568                 backMap.put(o, key);
    569         }
    570     }
    571 
    572     Set<String> corrigendum = new TreeSet<String>(Arrays.asList(new String[] { "QE", "833",
    573         "830", "172" })); // 003, 419
    574 
    575     private Map extraNames = CollectionUtilities.asMap(new String[][] {
    576         { "BU", "Burma" }, { "TP", "East Timor" }, { "YU", "Yugoslavia" },
    577         { "ZR", "Zaire" }, { "CD", "Congo (Kinshasa, Democratic Republic)" },
    578         { "CI", "Ivory Coast (Cote d'Ivoire)" },
    579         { "FM", "Micronesia (Federated States)" },
    580         { "TL", "East Timor (Timor-Leste)" },
    581         // {"155","Western Europe"},
    582 
    583     });
    584 
    585     private Set<String> currencyCodes;
    586 
    587     private Set<String> validCurrencyCodes;
    588 
    589     static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo
    590         .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
    591 
    592     /**
    593      * Get the RegionCode Enum
    594      *
    595      * @throws IOException
    596      */
    597     private void showRegionCodeInfo() throws IOException {
    598         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt");
    599         System.out.println();
    600         System.out.println("Data for RegionCode");
    601         System.out.println();
    602         showGeneratedCommentStart(CODE_INDENT);
    603 
    604         Set<String> reordered = new TreeSet<String>(new LengthFirstComparator());
    605         reordered.addAll(enum_UN.keySet());
    606         Map<String, String> code_replacements = new TreeMap<String, String>();
    607         int len = "  /** Polynesia */                                    UN061,"
    608             .length();
    609         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
    610             String region = it.next();
    611             printRow(Log.getLog(), region, null, "territory", code_replacements, len);
    612         }
    613         showGeneratedCommentEnd(CODE_INDENT);
    614         Log.close();
    615 
    616         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt");
    617         Log.println();
    618         Log.println("Data for ISO Region Codes");
    619         Log.println();
    620         for (String territory : supplementalDataInfo
    621             .getTerritoriesWithPopulationData()) {
    622             if (territory.equals("ZZ")) {
    623                 continue;
    624             }
    625             PopulationData popData = supplementalDataInfo
    626                 .getPopulationDataForTerritory(territory);
    627             // to("ak").add(Scope.Macrolanguage).add("aka");
    628             Log.formatln("    addRegion(RegionCode.%s, %s, %s, %s) // %s", territory,
    629                 format(popData.getPopulation()), format(popData
    630                     .getLiteratePopulation()
    631                     / popData.getPopulation()),
    632                 format(popData.getGdp()), english
    633                     .getName("territory", territory));
    634             // remove all the ISO 639-3 until they are part of BCP 47
    635             // we need to remove in earlier pass so we have the count
    636             Set<String> languages = new TreeSet<String>();
    637             for (String language : supplementalDataInfo
    638                 .getLanguagesForTerritoryWithPopulationData(territory)) {
    639                 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) {
    640                     continue;
    641                 }
    642                 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(
    643                     language, territory);
    644                 if (popData.getPopulation() == 0
    645                     || Double.isNaN(popData.getLiteratePopulation()
    646                         / popData.getPopulation())) {
    647                     continue;
    648                 }
    649                 languages.add(language);
    650             }
    651             int count = languages.size();
    652             for (String language : languages) {
    653                 --count; // we need to know the last one
    654                 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(
    655                     language, territory);
    656                 Log.formatln("    .addLanguage(\"%s\", %s, %s)%s // %s", language,
    657                     format(popData.getPopulation()), format(popData
    658                         .getLiteratePopulation()
    659                         / popData.getPopulation()),
    660                     (count == 0 ? ";" : ""), english
    661                         .getName(language));
    662             }
    663         }
    664         Log.close();
    665 
    666         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt");
    667         Log.println();
    668         Log.println("Data for ISO Region Codes");
    669         Log.println();
    670         showGeneratedCommentStart(DATA_INDENT);
    671         // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are
    672         // containees
    673         reordered = new TreeSet<String>(new DeprecatedAndLengthFirstComparator("territory"));
    674         reordered.addAll(enum_UN.keySet());
    675         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
    676             String region = it.next();
    677             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
    678             // UN
    679             // name
    680             // int un = Integer.parseInt((String) enum_UN.get(region)); // get around
    681             // dumb octal
    682             // syntax
    683             String isoCode = (String) enum_alpha3.get(region);
    684             if (isoCode == null)
    685                 continue;
    686             Log.println(DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode."
    687                 + region + ");");
    688         }
    689         doAliases(code_replacements);
    690         showGeneratedCommentEnd(DATA_INDENT);
    691         Log.println();
    692         Log.println("Data for M.49 Region Codes");
    693         Log.println();
    694         showGeneratedCommentStart(DATA_INDENT);
    695 
    696         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
    697             String region = it.next();
    698             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
    699             // UN
    700             // name
    701             int un = Integer.parseInt((String) enum_UN.get(region), 10); // get
    702             // around
    703             // dumb
    704             // octal
    705             // syntax
    706             Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region
    707                 + ");");
    708         }
    709         doAliases(code_replacements);
    710 
    711         System.out.println("Plain list");
    712         for (Iterator<String> it = reordered.iterator(); it.hasNext();) {
    713             String region = it.next();
    714             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
    715             // UN
    716             // name
    717             String newCode = code_replacements.get(region);
    718             if (newCode != null)
    719                 continue;
    720 
    721             int un = Integer.parseInt((String) enum_UN.get(region), 10); // get
    722             // around
    723             // dumb
    724             // octal
    725             // syntax
    726             System.out.println(un + "\t" + region + "\t"
    727                 + english.getName("territory", region));
    728         }
    729 
    730         showGeneratedCommentEnd(DATA_INDENT);
    731 
    732         getContainment();
    733         Log.close();
    734     }
    735 
    736     static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH);
    737 
    738     static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH);
    739     static {
    740         nf.setMaximumFractionDigits(3);
    741         sf.setMaximumFractionDigits(3);
    742         nf.setGroupingUsed(false);
    743     }
    744 
    745     private String format(double value) {
    746         double newValue = CldrUtility.roundToDecimals(value, 3);
    747         String option1 = nf.format(newValue);
    748         String option2 = sf.format(value);
    749         return option1.length() <= option2.length() ? option1 : option2;
    750     }
    751 
    752     private void doAliases(Map<String, String> code_replacements) {
    753         for (String code : code_replacements.keySet()) {
    754             String newCode = code_replacements.get(code);
    755             if (newCode.length() == 0)
    756                 newCode = "ZZ";
    757             Log.println(DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \""
    758                 + newCode + "\");");
    759         }
    760     }
    761 
    762     private void showGeneratedCommentEnd(String indent) {
    763         Log.println(indent + "/* End of generated code. */");
    764     }
    765 
    766     private void showGeneratedCommentStart(String indent) {
    767         Log.println(indent + "/*");
    768         Log.println(indent
    769             + " * The following information is generated from a tool,");
    770         Log.println(indent + " * as described on");
    771         Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates.");
    772         Log.println(indent + " * Do not edit manually.");
    773         Log.println(indent + " * Start of generated code.");
    774         Log.println(indent + " */");
    775     }
    776 
    777     public final static class LengthFirstComparator implements Comparator<Object> {
    778         public int compare(Object a, Object b) {
    779             String as = a.toString();
    780             String bs = b.toString();
    781             if (as.length() < bs.length())
    782                 return -1;
    783             if (as.length() > bs.length())
    784                 return 1;
    785             return as.compareTo(bs);
    786         }
    787     }
    788 
    789     public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> {
    790         String type;
    791 
    792         DeprecatedAndLengthFirstComparator(String type) {
    793             this.type = type;
    794         }
    795 
    796         public int compare(Object a, Object b) {
    797             String as = a.toString();
    798             String bs = b.toString();
    799             String ar = getDeprecatedReplacement(type, as);
    800             String br = getDeprecatedReplacement(type, bs);
    801             // put the deprecated ones first, eg those that aren't null
    802             if (ar != null) {
    803                 if (br == null)
    804                     return -1;
    805             }
    806             if (br != null) {
    807                 if (ar == null)
    808                     return 1;
    809             }
    810             // now check the length
    811             if (as.length() < bs.length())
    812                 return -1;
    813             if (as.length() > bs.length())
    814                 return 1;
    815             return as.compareTo(bs);
    816         }
    817     }
    818 
    819     /**
    820      * Returns null if not deprecated, otherwise "" if there is no replacement,
    821      * otherwise the replacement.
    822      *
    823      * @return
    824      */
    825     public String getDeprecatedReplacement(String type, String cldrTypeValue) {
    826         if (type.equals("currency")) {
    827             return null;
    828         }
    829         String path = supplementalMetadata.getFullXPath(
    830             "//supplementalData/metadata/alias/" + type + "Alias[@type=\""
    831                 + cldrTypeValue + "\"]",
    832             true);
    833         if (path == null)
    834             return null;
    835         String replacement = new XPathParts().set(path).findAttributeValue(
    836             "territoryAlias", "replacement");
    837         if (replacement == null)
    838             return "";
    839         return replacement;
    840     }
    841 
    842     static Transliterator doFallbacks = Transliterator.createFromRules("id",
    843         "[] > ''; ", Transliterator.FORWARD);
    844 
    845     private void printRow(PrintWriter out, String codeName, String englishName,
    846         String type, Map<String, String> code_replacements, int lineLength) {
    847         // int numeric = Integer.parseInt((String) enum_UN.get(codeName));
    848         // String alpha3 = (String) enum_alpha3.get(codeName);
    849         String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix
    850         // UN
    851         // name
    852         String replacement = getDeprecatedReplacement(type, cldrName);
    853 
    854         String resolvedEnglishName = englishName != null ? englishName : type
    855             .equals("territory") ? getEnglishName(codeName) : type
    856                 .equals("currency") ? getName(codeName) : english.getName(CLDRFile.SCRIPT_NAME, codeName);
    857         resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName);
    858 
    859         String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " +
    860         // threeDigit.format(numeric);
    861         String printedCodeName = codeName;
    862         if (replacement != null) {
    863             code_replacements.put(codeName, replacement);
    864             out.println(prefix);
    865             prefix = CODE_INDENT + " * @deprecated"
    866                 + (replacement.length() == 0 ? "" : " see " + replacement);
    867             printedCodeName = "@Deprecated " + printedCodeName;
    868         }
    869         prefix += " */";
    870 
    871         if (codeName.equals("UN001")) {
    872             out.println();
    873         }
    874         if (prefix.length() > lineLength - (printedCodeName.length() + 1)) {
    875             // break at last space
    876             int lastFit = prefix.lastIndexOf(' ', lineLength
    877                 - (printedCodeName.length() + 1) - 2);
    878             out.println(prefix.substring(0, lastFit));
    879             prefix = CODE_INDENT + " *" + prefix.substring(lastFit);
    880         }
    881         out.print(prefix);
    882         out.print(Utility.repeat(" ", (lineLength
    883             - (prefix.length() + printedCodeName.length() + 1))));
    884         out.println(printedCodeName + ",");
    885     }
    886 
    887     private String getEnglishName(String codeName) {
    888         if (codeName.length() > 3)
    889             codeName = codeName.substring(2); // fix UN name
    890         String name = (String) extraNames.get(codeName);
    891         if (name != null)
    892             return name;
    893         name = english.getName(CLDRFile.TERRITORY_NAME, codeName);
    894         if (name != null)
    895             return name;
    896         return codeName;
    897     }
    898 
    899     private String getRFC3066Name(String codeName) {
    900         if (codeName.length() > 2)
    901             codeName = codeName.substring(2); // fix UN name
    902         List<String> list = sc.getFullData("territory", codeName);
    903         if (list == null)
    904             return null;
    905         return (String) list.get(0);
    906     }
    907 
    908     private String enumName(String codeName) {
    909         return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName;
    910     }
    911 
    912     static String quote(Object input) {
    913         if (input != null)
    914             return '"' + input.toString().trim() + '"';
    915         return null;
    916     }
    917 
    918     static boolean isPrivateUseRegion(String codeName) {
    919         // AA, QM..QZ, XA..XZ, ZZ - CLDR codes
    920         if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) {
    921             return false;
    922         } else if (codeName.equals("AA") || codeName.equals("ZZ")) {
    923             return true;
    924         } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) {
    925             return true;
    926         } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) {
    927             return true;
    928         }
    929         return false;
    930     }
    931     /*
    932      * <reset before="tertiary"></reset> <x><context></context><t></t></x>
    933      * <x><context></context><i></i></x>
    934      *
    935      * <x><context></context><i></i></x> <x><context></context><i></i></x>
    936      * <x><context></context><i></i></x> <x><context></context><i></i></x>
    937      * <x><context></context><i></i></x>
    938      *
    939      * <x><context></context><i></i></x> <x><context></context><i></i><extend></extend></x>
    940      * <x><context></context><i></i><extend></extend></x> <x><context></context><i></i><extend></extend></x>
    941      *
    942      * <x><context></context><i></i><extend></extend></x> <x><context></context><i></i><extend></extend></x>
    943      * <x><context></context><i></i><extend></extend></x> <x><context></context><i></i><extend></extend></x>
    944      *
    945      * <x><context></context><i></i><extend></extend></x>
    946      */
    947 }