Home | History | Annotate | Download | only in util
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2011-2016, International Business Machines Corporation
      6  * All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.util;
     10 
     11 import java.util.ArrayList;
     12 import java.util.Arrays;
     13 import java.util.Collections;
     14 import java.util.HashMap;
     15 import java.util.List;
     16 import java.util.Map;
     17 import java.util.Set;
     18 import java.util.TreeSet;
     19 
     20 import com.ibm.icu.impl.ICUData;
     21 import com.ibm.icu.impl.ICUResourceBundle;
     22 
     23 /**
     24  * <code>Region</code> is the class representing a Unicode Region Code, also known as a
     25  * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of
     26  * "regions" as "countries" when defining the characteristics of a locale.  Region codes There are different
     27  * types of region codes that are important to distinguish.
     28  * <p>
     29  *  Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or
     30  *  selected economic and other grouping" as defined in
     31  *  UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm).
     32  *  These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO
     33  *  added for Outlying Oceania.  Not all UNM.49 codes are defined in LDML, but most of them are.
     34  *  Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ),
     35  *  CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly
     36  *  by a continent ).
     37  *  <p>
     38  *  TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also
     39  *  include areas that are not separate countries, such as the code "AQ" for Antarctica or the code
     40  *  "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate
     41  *  codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows
     42  *  for the use of 3-digit codes in the future.
     43  *  <p>
     44  *  UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown,
     45  *  or that the value supplied as a region was invalid.
     46  *  <p>
     47  *  DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage,
     48  *  usually due to a country splitting into multiple territories or changing its name.
     49  *  <p>
     50  *  GROUPING - A widely understood grouping of territories that has a well defined membership such
     51  *  that a region code has been assigned for it.  Some of these are UNM.49 codes that do't fall into
     52  *  the world/continent/sub-continent hierarchy, while others are just well known groupings that have
     53  *  their own region code. Region "EU" (European Union) is one such region code that is a grouping.
     54  *  Groupings will never be returned by the getContainingRegion() API, since a different type of region
     55  *  ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead.
     56  *
     57  * @author       John Emmons
     58  * @stable ICU 50
     59  */
     60 
     61 public class Region implements Comparable<Region> {
     62 
     63     /**
     64      * RegionType is an enumeration defining the different types of regions.  Current possible
     65      * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN.
     66      *
     67      * @stable ICU 50
     68      */
     69 
     70     public enum RegionType {
     71         /**
     72          * Type representing the unknown region.
     73          * @stable ICU 50
     74          */
     75         UNKNOWN,
     76 
     77         /**
     78          * Type representing a territory.
     79          * @stable ICU 50
     80          */
     81         TERRITORY,
     82 
     83         /**
     84          * Type representing the whole world.
     85          * @stable ICU 50
     86          */
     87         WORLD,
     88         /**
     89          * Type representing a continent.
     90          * @stable ICU 50
     91          */
     92         CONTINENT,
     93         /**
     94          * Type representing a sub-continent.
     95          * @stable ICU 50
     96          */
     97         SUBCONTINENT,
     98         /**
     99          * Type representing a grouping of territories that is not to be used in
    100          * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree.
    101          * @stable ICU 50
    102          */
    103         GROUPING,
    104         /**
    105          * Type representing a region whose code has been deprecated, usually
    106          * due to a country splitting into multiple territories or changing its name.
    107          * @stable ICU 50
    108          */
    109         DEPRECATED,
    110     }
    111 
    112     private String id;
    113     private int code;
    114     private RegionType type;
    115     private Region containingRegion = null;
    116     private Set<Region> containedRegions = new TreeSet<Region>();
    117     private List<Region> preferredValues = null;
    118 
    119     private static boolean regionDataIsLoaded = false;
    120 
    121     private static Map<String,Region> regionIDMap = null;       // Map from ID the regions
    122     private static Map<Integer,Region> numericCodeMap = null;   // Map from numeric code to the regions
    123     private static Map<String,Region> regionAliases = null;     // Aliases
    124 
    125     private static ArrayList<Region> regions = null;            // This is the main data structure where the Regions are stored.
    126     private static ArrayList<Set<Region>> availableRegions = null;
    127 
    128     private static final String UNKNOWN_REGION_ID = "ZZ";
    129     private static final String OUTLYING_OCEANIA_REGION_ID = "QO";
    130     private static final String WORLD_ID = "001";
    131 
    132     /*
    133      * Private default constructor.  Use factory methods only.
    134      */
    135     private Region () {}
    136 
    137     /*
    138      * Initializes the region data from the ICU resource bundles.  The region data
    139      * contains the basic relationships such as which regions are known, what the numeric
    140      * codes are, any known aliases, and the territory containment data.
    141      *
    142      * If the region data has already loaded, then this method simply returns without doing
    143      * anything meaningful.
    144      *
    145      */
    146     private static synchronized void loadRegionData() {
    147 
    148         if ( regionDataIsLoaded ) {
    149             return;
    150         }
    151 
    152         regionAliases = new HashMap<String,Region>();
    153         regionIDMap = new HashMap<String,Region>();
    154         numericCodeMap = new HashMap<Integer,Region>();
    155 
    156         availableRegions = new ArrayList<Set<Region>>(RegionType.values().length);
    157 
    158 
    159         UResourceBundle metadataAlias = null;
    160         UResourceBundle territoryAlias = null;
    161         UResourceBundle codeMappings = null;
    162         UResourceBundle idValidity = null;
    163         UResourceBundle regionList = null;
    164         UResourceBundle regionRegular = null;
    165         UResourceBundle regionMacro = null;
    166         UResourceBundle regionUnknown = null;
    167         UResourceBundle worldContainment = null;
    168         UResourceBundle territoryContainment = null;
    169         UResourceBundle groupingContainment = null;
    170 
    171         UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
    172         metadataAlias = metadata.get("alias");
    173         territoryAlias = metadataAlias.get("territory");
    174 
    175         UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER);
    176         codeMappings = supplementalData.get("codeMappings");
    177         idValidity = supplementalData.get("idValidity");
    178         regionList = idValidity.get("region");
    179         regionRegular = regionList.get("regular");
    180         regionMacro = regionList.get("macroregion");
    181         regionUnknown = regionList.get("unknown");
    182 
    183         territoryContainment = supplementalData.get("territoryContainment");
    184         worldContainment = territoryContainment.get("001");
    185         groupingContainment = territoryContainment.get("grouping");
    186 
    187         String[] continentsArr = worldContainment.getStringArray();
    188         List<String> continents = Arrays.asList(continentsArr);
    189         String[] groupingArr = groupingContainment.getStringArray();
    190         List<String> groupings = Arrays.asList(groupingArr);
    191         List<String> regionCodes = new ArrayList<String>();
    192 
    193         List<String> allRegions = new ArrayList<String>();
    194         allRegions.addAll(Arrays.asList(regionRegular.getStringArray()));
    195         allRegions.addAll(Arrays.asList(regionMacro.getStringArray()));
    196         allRegions.add(regionUnknown.getString());
    197 
    198         for ( String r : allRegions ) {
    199             int rangeMarkerLocation = r.indexOf("~");
    200             if ( rangeMarkerLocation > 0 ) {
    201                 StringBuilder regionName = new StringBuilder(r);
    202                 char endRange = regionName.charAt(rangeMarkerLocation+1);
    203                 regionName.setLength(rangeMarkerLocation);
    204                 char lastChar = regionName.charAt(rangeMarkerLocation-1);
    205                 while ( lastChar <= endRange ) {
    206                     String newRegion = regionName.toString();
    207                     regionCodes.add(newRegion);
    208                     lastChar++;
    209                     regionName.setCharAt(rangeMarkerLocation-1,lastChar);
    210                 }
    211             } else {
    212                 regionCodes.add(r);
    213             }
    214         }
    215 
    216         regions = new ArrayList<Region>(regionCodes.size());
    217 
    218         // First process the region codes and create the master array of regions.
    219         for ( String id : regionCodes) {
    220             Region r = new Region();
    221             r.id = id;
    222             r.type = RegionType.TERRITORY; // Only temporary - figure out the real type later once the aliases are known.
    223             regionIDMap.put(id, r);
    224             if ( id.matches("[0-9]{3}")) {
    225                 r.code = Integer.valueOf(id).intValue();
    226                 numericCodeMap.put(r.code, r);
    227                 r.type = RegionType.SUBCONTINENT;
    228             } else {
    229                 r.code = -1;
    230             }
    231             regions.add(r);
    232         }
    233 
    234 
    235         // Process the territory aliases
    236         for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
    237             UResourceBundle res = territoryAlias.get(i);
    238             String aliasFrom = res.getKey();
    239             String aliasTo = res.get("replacement").getString();
    240 
    241             if ( regionIDMap.containsKey(aliasTo) && !regionIDMap.containsKey(aliasFrom) ) { // This is just an alias from some string to a region
    242                 regionAliases.put(aliasFrom, regionIDMap.get(aliasTo));
    243             } else {
    244                 Region r;
    245                 if ( regionIDMap.containsKey(aliasFrom) ) {  // This is a deprecated region
    246                     r = regionIDMap.get(aliasFrom);
    247                 } else { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
    248                     r = new Region();
    249                     r.id = aliasFrom;
    250                     regionIDMap.put(aliasFrom, r);
    251                     if ( aliasFrom.matches("[0-9]{3}")) {
    252                         r.code = Integer.valueOf(aliasFrom).intValue();
    253                         numericCodeMap.put(r.code, r);
    254                     } else {
    255                         r.code = -1;
    256                     }
    257                     regions.add(r);
    258                 }
    259                 r.type = RegionType.DEPRECATED;
    260                 List<String> aliasToRegionStrings = Arrays.asList(aliasTo.split(" "));
    261                 r.preferredValues = new ArrayList<Region>();
    262                 for ( String s : aliasToRegionStrings ) {
    263                     if (regionIDMap.containsKey(s)) {
    264                         r.preferredValues.add(regionIDMap.get(s));
    265                     }
    266                 }
    267             }
    268         }
    269 
    270         // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
    271         for ( int i = 0 ; i < codeMappings.getSize(); i++ ) {
    272             UResourceBundle mapping = codeMappings.get(i);
    273             if ( mapping.getType() == UResourceBundle.ARRAY ) {
    274                 String [] codeMappingStrings = mapping.getStringArray();
    275                 String codeMappingID = codeMappingStrings[0];
    276                 Integer codeMappingNumber = Integer.valueOf(codeMappingStrings[1]);
    277                 String codeMapping3Letter = codeMappingStrings[2];
    278 
    279                 if ( regionIDMap.containsKey(codeMappingID)) {
    280                     Region r = regionIDMap.get(codeMappingID);
    281                     r.code = codeMappingNumber.intValue();
    282                     numericCodeMap.put(r.code, r);
    283                     regionAliases.put(codeMapping3Letter, r);
    284                 }
    285             }
    286         }
    287 
    288         // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
    289         Region r;
    290         if ( regionIDMap.containsKey(WORLD_ID)) {
    291             r = regionIDMap.get(WORLD_ID);
    292             r.type = RegionType.WORLD;
    293         }
    294 
    295         if ( regionIDMap.containsKey(UNKNOWN_REGION_ID)) {
    296             r = regionIDMap.get(UNKNOWN_REGION_ID);
    297             r.type = RegionType.UNKNOWN;
    298         }
    299 
    300         for ( String continent : continents ) {
    301             if (regionIDMap.containsKey(continent)) {
    302                 r = regionIDMap.get(continent);
    303                 r.type = RegionType.CONTINENT;
    304             }
    305         }
    306 
    307         for ( String grouping : groupings ) {
    308             if (regionIDMap.containsKey(grouping)) {
    309                 r = regionIDMap.get(grouping);
    310                 r.type = RegionType.GROUPING;
    311             }
    312         }
    313 
    314         // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
    315         // even though it looks like a territory code.  Need to handle it here.
    316 
    317         if ( regionIDMap.containsKey(OUTLYING_OCEANIA_REGION_ID)) {
    318             r = regionIDMap.get(OUTLYING_OCEANIA_REGION_ID);
    319             r.type = RegionType.SUBCONTINENT;
    320         }
    321 
    322         // Load territory containment info from the supplemental data.
    323         for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) {
    324             UResourceBundle mapping = territoryContainment.get(i);
    325             String parent = mapping.getKey();
    326             if (parent.equals("containedGroupings") || parent.equals("deprecated")) {
    327                 continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip.
    328                 // #11232 is to do something useful with these.
    329             }
    330             Region parentRegion = regionIDMap.get(parent);
    331             for ( int j = 0 ; j < mapping.getSize(); j++ ) {
    332                 String child = mapping.getString(j);
    333                 Region childRegion = regionIDMap.get(child);
    334                 if ( parentRegion != null && childRegion != null ) {
    335 
    336                     // Add the child region to the set of regions contained by the parent
    337                     parentRegion.containedRegions.add(childRegion);
    338 
    339                     // Set the parent region to be the containing region of the child.
    340                     // Regions of type GROUPING can't be set as the parent, since another region
    341                     // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
    342                     if ( parentRegion.getType() != RegionType.GROUPING) {
    343                         childRegion.containingRegion = parentRegion;
    344                     }
    345                 }
    346             }
    347         }
    348 
    349         // Create the availableRegions lists
    350 
    351         for (int i = 0 ; i < RegionType.values().length ; i++) {
    352             availableRegions.add(new TreeSet<Region>());
    353         }
    354 
    355         for ( Region ar : regions ) {
    356             Set<Region> currentSet = availableRegions.get(ar.type.ordinal());
    357             currentSet.add(ar);
    358             availableRegions.set(ar.type.ordinal(),currentSet);
    359         }
    360 
    361         regionDataIsLoaded = true;
    362     }
    363 
    364     /** Returns a Region using the given region ID.  The region ID can be either a 2-letter ISO code,
    365      * 3-letter ISO code,  UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR.
    366      * @param id The id of the region to be retrieved.
    367      * @return The corresponding region.
    368      * @throws NullPointerException if the supplied id is null.
    369      * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU.
    370      * @stable ICU 50
    371      */
    372 
    373     public static Region getInstance(String id) {
    374 
    375         if ( id == null ) {
    376             throw new NullPointerException();
    377         }
    378 
    379         loadRegionData();
    380 
    381         Region r = regionIDMap.get(id);
    382 
    383         if ( r == null ) {
    384             r = regionAliases.get(id);
    385         }
    386 
    387         if ( r == null ) {
    388             throw new IllegalArgumentException("Unknown region id: " + id);
    389         }
    390 
    391         if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) {
    392             r = r.preferredValues.get(0);
    393         }
    394 
    395         return r;
    396     }
    397 
    398 
    399     /** Returns a Region using the given numeric code as defined by UNM.49
    400      * @param code The numeric code of the region to be retrieved.
    401      * @return The corresponding region.
    402      * @throws IllegalArgumentException if the supplied numeric code is not recognized.
    403      * @stable ICU 50
    404      */
    405 
    406     public static Region getInstance(int code) {
    407 
    408         loadRegionData();
    409 
    410         Region r = numericCodeMap.get(code);
    411 
    412         if ( r == null ) { // Just in case there's an alias that's numeric, try to find it.
    413             String pad = "";
    414             if ( code < 10 ) {
    415                 pad = "00";
    416             } else if ( code < 100 ) {
    417                 pad = "0";
    418             }
    419             String id = pad + Integer.toString(code);
    420             r = regionAliases.get(id);
    421         }
    422 
    423         if ( r == null ) {
    424             throw new IllegalArgumentException("Unknown region code: " + code);
    425         }
    426 
    427         if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) {
    428             r = r.preferredValues.get(0);
    429         }
    430 
    431         return r;
    432     }
    433 
    434 
    435     /** Used to retrieve all available regions of a specific type.
    436      *
    437      * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. )
    438      * @return An unmodifiable set of all known regions that match the given type.
    439      * @stable ICU 50
    440      */
    441 
    442     public static Set<Region> getAvailable(RegionType type) {
    443 
    444         loadRegionData();
    445         return Collections.unmodifiableSet(availableRegions.get(type.ordinal()));
    446     }
    447 
    448 
    449     /** Used to determine the macroregion that geographically contains this region.
    450      *
    451      * @return The region that geographically contains this region.  Returns NULL if this region is
    452      *  code "001" (World) or "ZZ" (Unknown region).  For example, calling this method with region "IT" (Italy)
    453      *  returns the region "039" (Southern Europe).
    454      * @stable ICU 50
    455      */
    456 
    457     public Region getContainingRegion() {
    458         loadRegionData();
    459         return containingRegion;
    460     }
    461 
    462     /** Used to determine the macroregion that geographically contains this region and that matches the given type.
    463      *
    464      * @return The region that geographically contains this region and matches the given type.  May return NULL if
    465      *  no containing region can be found that matches the given type.  For example, calling this method with region "IT" (Italy)
    466      *  and type CONTINENT returns the region "150" (Europe).
    467      * @stable ICU 50
    468      */
    469 
    470     public Region getContainingRegion(RegionType type) {
    471         loadRegionData();
    472         if ( containingRegion == null ) {
    473             return null;
    474         }
    475         if ( containingRegion.type.equals(type)) {
    476             return containingRegion;
    477         } else {
    478             return containingRegion.getContainingRegion(type);
    479         }
    480     }
    481 
    482     /** Used to determine the sub-regions that are contained within this region.
    483      *
    484      * @return An unmodifiable set containing all the regions that are immediate children
    485      * of this region in the region hierarchy.  These returned regions could be either macro
    486      * regions, territories, or a mixture of the two, depending on the containment data as defined
    487      * in CLDR.  This API may return an empty set if this region doesn't have any sub-regions.
    488      * For example, calling this method with region "150" (Europe) returns a set containing
    489      * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe)
    490      * - "154" (Northern Europe) and "155" (Western Europe).
    491      *
    492      * @stable ICU 50
    493      */
    494 
    495     public Set<Region> getContainedRegions() {
    496         loadRegionData();
    497         return Collections.unmodifiableSet(containedRegions);
    498     }
    499 
    500     /** Used to determine all the regions that are contained within this region and that match the given type
    501      *
    502      * @return An unmodifiable set containing all the regions that are children of this region
    503      * anywhere in the region hierarchy and match the given type.  This API may return an empty set
    504      * if this region doesn't have any sub-regions that match the given type.
    505      * For example, calling this method with region "150" (Europe) and type "TERRITORY" returns a set
    506      *  containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
    507      * @stable ICU 50
    508      */
    509 
    510     public Set<Region> getContainedRegions(RegionType type) {
    511 
    512         loadRegionData();
    513 
    514         Set<Region> result = new TreeSet<Region>();
    515         Set<Region> cr = getContainedRegions();
    516 
    517         for ( Region r : cr ) {
    518             if ( r.getType() == type ) {
    519                 result.add(r);
    520             } else {
    521                 result.addAll(r.getContainedRegions(type));
    522             }
    523         }
    524         return Collections.unmodifiableSet(result);
    525     }
    526 
    527     /**
    528      * @return For deprecated regions, return an unmodifiable list of the regions that are the preferred replacement regions for this region.
    529      * Returns null for a non-deprecated region.  For example, calling this method with region "SU" (Soviet Union) would
    530      * return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
    531      *
    532      * @stable ICU 50
    533      */
    534     public List<Region> getPreferredValues() {
    535 
    536         loadRegionData();
    537 
    538         if ( type == RegionType.DEPRECATED) {
    539             return Collections.unmodifiableList(preferredValues);
    540         } else {
    541             return null;
    542         }
    543     }
    544 
    545     /**
    546      * @return Returns true if this region contains the supplied other region anywhere in the region hierarchy.
    547      *
    548      * @stable ICU 50
    549      */
    550     public boolean contains(Region other) {
    551 
    552         loadRegionData();
    553 
    554         if (containedRegions.contains(other)) {
    555             return true;
    556         } else {
    557             for (Region cr : containedRegions) {
    558                 if (cr.contains(other)) {
    559                     return true;
    560                 }
    561             }
    562         }
    563 
    564         return false;
    565     }
    566 
    567     /** Returns the string representation of this region
    568      *
    569      * @return The string representation of this region, which is its ID.
    570      *
    571      * @stable ICU 50
    572      */
    573 
    574     public String toString() {
    575         return id;
    576     }
    577 
    578     /**
    579      * Returns the numeric code for this region
    580      *
    581      * @return The numeric code for this region. Returns a negative value if the given region does not have a numeric
    582      *         code assigned to it. This is a very rare case and only occurs for a few very small territories.
    583      *
    584      * @stable ICU 50
    585      */
    586 
    587     public int getNumericCode() {
    588         return code;
    589     }
    590 
    591     /** Returns this region's type.
    592      *
    593      * @return This region's type classification, such as MACROREGION or TERRITORY.
    594      *
    595      * @stable ICU 50
    596      */
    597 
    598     public RegionType getType() {
    599         return type;
    600     }
    601 
    602     /**
    603      * {@inheritDoc}
    604      * @stable ICU 50
    605      */
    606     public int compareTo(Region other) {
    607         return id.compareTo(other.id);
    608     }
    609 }
    610