Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.IOException;
      5 import java.util.Arrays;
      6 import java.util.Collections;
      7 import java.util.HashMap;
      8 import java.util.HashSet;
      9 import java.util.LinkedHashSet;
     10 import java.util.Map;
     11 import java.util.Set;
     12 import java.util.TreeSet;
     13 
     14 import com.ibm.icu.dev.util.CollectionUtilities;
     15 import com.ibm.icu.text.UnicodeSet;
     16 import com.ibm.icu.util.ICUUncheckedIOException;
     17 
     18 public class IsoRegionData {
     19     static Map<String, String> _numeric = new HashMap<String, String>();
     20     static Map<String, String> _alpha3 = new HashMap<String, String>();
     21     static Map<String, String> _fips10 = new HashMap<String, String>();
     22     static Map<String, String> _internet = new HashMap<String, String>();
     23     static Set<String> other_internet = new TreeSet<String>();
     24     static Set<String> available = new HashSet<String>();
     25 
     26     static final UnicodeSet NMTOKEN = new UnicodeSet(
     27         "[\\-.0-\\:A-Z_a-z\\u00B7\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u037D\\u037F-\\u1FFF\\u200C\\u200D\\u203F\\u2040\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD\\U00010000-\\U000EFFFF]")
     28             .freeze();
     29 
     30     static {
     31         /*
     32          * # RFC3066; UN Numeric; ISO3166 Alpha-3, internet, FIPS-10
     33          * # whitespace delimited: - for empty
     34          * # See http://unstats.un.org/unsd/methods/m49/m49regin.htm
     35          * # and http://www.iso.org/iso/en/prods-services/iso3166ma/01whats-new/index.html
     36          * # See also http://www.cia.gov/cia/publications/factbook/appendix/appendix-d.html
     37          * # and http://data.iana.org/TLD/tlds-alpha-by-domain.txt for the latest domains
     38          * # and http://www.iana.org/cctld/cctld-whois.htm
     39          * # and https://www.icmwg.org/ccwg/documents/ISO3166-FIPS10-A2-Mapping/3166-1-A2--to-FIPS10-A2-mapping.htm
     40          * # for FIPS: http://earth-info.nga.mil/gns/html/fips_files.html
     41          * RS 688 SRB rs RB
     42          */
     43         try {
     44             BufferedReader codes;
     45             codes = CldrUtility.getUTF8Data("tlds-alpha-by-domain.txt");
     46 
     47             while (true) {
     48                 String line = codes.readLine();
     49                 if (line == null)
     50                     break;
     51                 line = line.split("#")[0].trim();
     52                 if (line.length() == 0)
     53                     continue;
     54                 // if (line.startsWith("XN--")) {
     55                 // try {
     56                 // line = Punycode.decode(line.substring(4), null).toString();
     57                 // if (!NMTOKEN.containsAll(line)) {
     58                 // System.err.println("!NMTOKEN:" + line);
     59                 // continue;
     60                 // }
     61                 // } catch (StringPrepParseException e) {
     62                 // throw new IllegalArgumentException(e);
     63                 // }
     64                 // }
     65                 other_internet.add(line);
     66             }
     67             codes.close();
     68 
     69             Set<String> errors = new LinkedHashSet<String>();
     70             codes = CldrUtility.getUTF8Data("territory_codes.txt");
     71             while (true) {
     72                 String line = codes.readLine();
     73                 if (line == null)
     74                     break;
     75                 line = line.split("#")[0].trim();
     76                 if (line.length() == 0)
     77                     continue;
     78                 String[] sourceValues = line.split("\\s+");
     79                 String[] values = new String[5];
     80                 for (int i = 0; i < values.length; ++i) {
     81                     if (i >= sourceValues.length || sourceValues[i].equals("-")) {
     82                         values[i] = null;
     83                     } else {
     84                         values[i] = sourceValues[i];
     85                     }
     86                 }
     87                 String alpha2 = values[0];
     88                 String numeric = values[1];
     89                 String alpha3 = values[2];
     90                 String internet = values[3];
     91                 if (internet != null) {
     92                     internet = internet.toUpperCase();
     93                     LinkedHashSet<String> internetStrings = new LinkedHashSet<String>(
     94                         Arrays.asList(internet.split("/")));
     95                     if (!other_internet.containsAll(internetStrings)) {
     96                         errors.addAll(internetStrings);
     97                         errors.removeAll(other_internet);
     98                     }
     99                     other_internet.removeAll(internetStrings);
    100                     internet = CollectionUtilities.join(internetStrings, " ");
    101                 }
    102                 String fips10 = values[4];
    103                 _numeric.put(alpha2, numeric);
    104                 _alpha3.put(alpha2, alpha3);
    105                 _fips10.put(alpha2, fips10);
    106                 _internet.put(alpha2, internet);
    107             }
    108             codes.close();
    109             if (errors.size() != 0) {
    110                 throw new IllegalArgumentException("Internet values illegal: " + errors);
    111             }
    112         } catch (IOException e) {
    113             throw new ICUUncheckedIOException(e);
    114         }
    115         _internet.put("ZZ", CollectionUtilities.join(other_internet, " "));
    116 
    117         other_internet = Collections.unmodifiableSet(other_internet);
    118 
    119         available.addAll(_numeric.keySet());
    120         available.addAll(_alpha3.keySet());
    121         available.addAll(_fips10.keySet());
    122         available.addAll(_internet.keySet());
    123 
    124         _numeric = Collections.unmodifiableMap(_numeric);
    125         _alpha3 = Collections.unmodifiableMap(_alpha3);
    126         _fips10 = Collections.unmodifiableMap(_fips10);
    127         _internet = Collections.unmodifiableMap(_internet);
    128         available = Collections.unmodifiableSet(available);
    129     }
    130 
    131     public static String getNumeric(String countryCodeAlpha2) {
    132         return _numeric.get(countryCodeAlpha2);
    133     }
    134 
    135     public static String get_alpha3(String countryCodeAlpha2) {
    136         return _alpha3.get(countryCodeAlpha2);
    137     }
    138 
    139     public static String get_fips10(String countryCodeAlpha2) {
    140         return _fips10.get(countryCodeAlpha2);
    141     }
    142 
    143     public static String get_internet(String countryCodeAlpha2) {
    144         return _internet.get(countryCodeAlpha2);
    145     }
    146 
    147     public static Set<String> getOtherInternet() {
    148         return other_internet;
    149     }
    150 
    151     public static Set<String> getAvailable() {
    152         return available;
    153     }
    154 }
    155