1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2016, International Business Machines Corporation 6 * All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.util; 10 11 import java.util.ArrayList; 12 import java.util.Arrays; 13 import java.util.Collections; 14 import java.util.HashMap; 15 import java.util.List; 16 import java.util.Map; 17 import java.util.Set; 18 import java.util.TreeSet; 19 20 import com.ibm.icu.impl.ICUData; 21 import com.ibm.icu.impl.ICUResourceBundle; 22 23 /** 24 * <code>Region</code> is the class representing a Unicode Region Code, also known as a 25 * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of 26 * "regions" as "countries" when defining the characteristics of a locale. Region codes There are different 27 * types of region codes that are important to distinguish. 28 * <p> 29 * Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or 30 * selected economic and other grouping" as defined in 31 * UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm). 32 * These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO 33 * added for Outlying Oceania. Not all UNM.49 codes are defined in LDML, but most of them are. 34 * Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ), 35 * CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly 36 * by a continent ). 37 * <p> 38 * TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also 39 * include areas that are not separate countries, such as the code "AQ" for Antarctica or the code 40 * "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate 41 * codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows 42 * for the use of 3-digit codes in the future. 43 * <p> 44 * UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown, 45 * or that the value supplied as a region was invalid. 46 * <p> 47 * DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage, 48 * usually due to a country splitting into multiple territories or changing its name. 49 * <p> 50 * GROUPING - A widely understood grouping of territories that has a well defined membership such 51 * that a region code has been assigned for it. Some of these are UNM.49 codes that do't fall into 52 * the world/continent/sub-continent hierarchy, while others are just well known groupings that have 53 * their own region code. Region "EU" (European Union) is one such region code that is a grouping. 54 * Groupings will never be returned by the getContainingRegion() API, since a different type of region 55 * ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead. 56 * 57 * @author John Emmons 58 * @stable ICU 50 59 */ 60 61 public class Region implements Comparable<Region> { 62 63 /** 64 * RegionType is an enumeration defining the different types of regions. Current possible 65 * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN. 66 * 67 * @stable ICU 50 68 */ 69 70 public enum RegionType { 71 /** 72 * Type representing the unknown region. 73 * @stable ICU 50 74 */ 75 UNKNOWN, 76 77 /** 78 * Type representing a territory. 79 * @stable ICU 50 80 */ 81 TERRITORY, 82 83 /** 84 * Type representing the whole world. 85 * @stable ICU 50 86 */ 87 WORLD, 88 /** 89 * Type representing a continent. 90 * @stable ICU 50 91 */ 92 CONTINENT, 93 /** 94 * Type representing a sub-continent. 95 * @stable ICU 50 96 */ 97 SUBCONTINENT, 98 /** 99 * Type representing a grouping of territories that is not to be used in 100 * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree. 101 * @stable ICU 50 102 */ 103 GROUPING, 104 /** 105 * Type representing a region whose code has been deprecated, usually 106 * due to a country splitting into multiple territories or changing its name. 107 * @stable ICU 50 108 */ 109 DEPRECATED, 110 } 111 112 private String id; 113 private int code; 114 private RegionType type; 115 private Region containingRegion = null; 116 private Set<Region> containedRegions = new TreeSet<Region>(); 117 private List<Region> preferredValues = null; 118 119 private static boolean regionDataIsLoaded = false; 120 121 private static Map<String,Region> regionIDMap = null; // Map from ID the regions 122 private static Map<Integer,Region> numericCodeMap = null; // Map from numeric code to the regions 123 private static Map<String,Region> regionAliases = null; // Aliases 124 125 private static ArrayList<Region> regions = null; // This is the main data structure where the Regions are stored. 126 private static ArrayList<Set<Region>> availableRegions = null; 127 128 private static final String UNKNOWN_REGION_ID = "ZZ"; 129 private static final String OUTLYING_OCEANIA_REGION_ID = "QO"; 130 private static final String WORLD_ID = "001"; 131 132 /* 133 * Private default constructor. Use factory methods only. 134 */ 135 private Region () {} 136 137 /* 138 * Initializes the region data from the ICU resource bundles. The region data 139 * contains the basic relationships such as which regions are known, what the numeric 140 * codes are, any known aliases, and the territory containment data. 141 * 142 * If the region data has already loaded, then this method simply returns without doing 143 * anything meaningful. 144 * 145 */ 146 private static synchronized void loadRegionData() { 147 148 if ( regionDataIsLoaded ) { 149 return; 150 } 151 152 regionAliases = new HashMap<String,Region>(); 153 regionIDMap = new HashMap<String,Region>(); 154 numericCodeMap = new HashMap<Integer,Region>(); 155 156 availableRegions = new ArrayList<Set<Region>>(RegionType.values().length); 157 158 159 UResourceBundle metadataAlias = null; 160 UResourceBundle territoryAlias = null; 161 UResourceBundle codeMappings = null; 162 UResourceBundle idValidity = null; 163 UResourceBundle regionList = null; 164 UResourceBundle regionRegular = null; 165 UResourceBundle regionMacro = null; 166 UResourceBundle regionUnknown = null; 167 UResourceBundle worldContainment = null; 168 UResourceBundle territoryContainment = null; 169 UResourceBundle groupingContainment = null; 170 171 UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER); 172 metadataAlias = metadata.get("alias"); 173 territoryAlias = metadataAlias.get("territory"); 174 175 UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); 176 codeMappings = supplementalData.get("codeMappings"); 177 idValidity = supplementalData.get("idValidity"); 178 regionList = idValidity.get("region"); 179 regionRegular = regionList.get("regular"); 180 regionMacro = regionList.get("macroregion"); 181 regionUnknown = regionList.get("unknown"); 182 183 territoryContainment = supplementalData.get("territoryContainment"); 184 worldContainment = territoryContainment.get("001"); 185 groupingContainment = territoryContainment.get("grouping"); 186 187 String[] continentsArr = worldContainment.getStringArray(); 188 List<String> continents = Arrays.asList(continentsArr); 189 String[] groupingArr = groupingContainment.getStringArray(); 190 List<String> groupings = Arrays.asList(groupingArr); 191 List<String> regionCodes = new ArrayList<String>(); 192 193 List<String> allRegions = new ArrayList<String>(); 194 allRegions.addAll(Arrays.asList(regionRegular.getStringArray())); 195 allRegions.addAll(Arrays.asList(regionMacro.getStringArray())); 196 allRegions.add(regionUnknown.getString()); 197 198 for ( String r : allRegions ) { 199 int rangeMarkerLocation = r.indexOf("~"); 200 if ( rangeMarkerLocation > 0 ) { 201 StringBuilder regionName = new StringBuilder(r); 202 char endRange = regionName.charAt(rangeMarkerLocation+1); 203 regionName.setLength(rangeMarkerLocation); 204 char lastChar = regionName.charAt(rangeMarkerLocation-1); 205 while ( lastChar <= endRange ) { 206 String newRegion = regionName.toString(); 207 regionCodes.add(newRegion); 208 lastChar++; 209 regionName.setCharAt(rangeMarkerLocation-1,lastChar); 210 } 211 } else { 212 regionCodes.add(r); 213 } 214 } 215 216 regions = new ArrayList<Region>(regionCodes.size()); 217 218 // First process the region codes and create the master array of regions. 219 for ( String id : regionCodes) { 220 Region r = new Region(); 221 r.id = id; 222 r.type = RegionType.TERRITORY; // Only temporary - figure out the real type later once the aliases are known. 223 regionIDMap.put(id, r); 224 if ( id.matches("[0-9]{3}")) { 225 r.code = Integer.valueOf(id).intValue(); 226 numericCodeMap.put(r.code, r); 227 r.type = RegionType.SUBCONTINENT; 228 } else { 229 r.code = -1; 230 } 231 regions.add(r); 232 } 233 234 235 // Process the territory aliases 236 for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) { 237 UResourceBundle res = territoryAlias.get(i); 238 String aliasFrom = res.getKey(); 239 String aliasTo = res.get("replacement").getString(); 240 241 if ( regionIDMap.containsKey(aliasTo) && !regionIDMap.containsKey(aliasFrom) ) { // This is just an alias from some string to a region 242 regionAliases.put(aliasFrom, regionIDMap.get(aliasTo)); 243 } else { 244 Region r; 245 if ( regionIDMap.containsKey(aliasFrom) ) { // This is a deprecated region 246 r = regionIDMap.get(aliasFrom); 247 } else { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. 248 r = new Region(); 249 r.id = aliasFrom; 250 regionIDMap.put(aliasFrom, r); 251 if ( aliasFrom.matches("[0-9]{3}")) { 252 r.code = Integer.valueOf(aliasFrom).intValue(); 253 numericCodeMap.put(r.code, r); 254 } else { 255 r.code = -1; 256 } 257 regions.add(r); 258 } 259 r.type = RegionType.DEPRECATED; 260 List<String> aliasToRegionStrings = Arrays.asList(aliasTo.split(" ")); 261 r.preferredValues = new ArrayList<Region>(); 262 for ( String s : aliasToRegionStrings ) { 263 if (regionIDMap.containsKey(s)) { 264 r.preferredValues.add(regionIDMap.get(s)); 265 } 266 } 267 } 268 } 269 270 // Process the code mappings - This will allow us to assign numeric codes to most of the territories. 271 for ( int i = 0 ; i < codeMappings.getSize(); i++ ) { 272 UResourceBundle mapping = codeMappings.get(i); 273 if ( mapping.getType() == UResourceBundle.ARRAY ) { 274 String [] codeMappingStrings = mapping.getStringArray(); 275 String codeMappingID = codeMappingStrings[0]; 276 Integer codeMappingNumber = Integer.valueOf(codeMappingStrings[1]); 277 String codeMapping3Letter = codeMappingStrings[2]; 278 279 if ( regionIDMap.containsKey(codeMappingID)) { 280 Region r = regionIDMap.get(codeMappingID); 281 r.code = codeMappingNumber.intValue(); 282 numericCodeMap.put(r.code, r); 283 regionAliases.put(codeMapping3Letter, r); 284 } 285 } 286 } 287 288 // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS 289 Region r; 290 if ( regionIDMap.containsKey(WORLD_ID)) { 291 r = regionIDMap.get(WORLD_ID); 292 r.type = RegionType.WORLD; 293 } 294 295 if ( regionIDMap.containsKey(UNKNOWN_REGION_ID)) { 296 r = regionIDMap.get(UNKNOWN_REGION_ID); 297 r.type = RegionType.UNKNOWN; 298 } 299 300 for ( String continent : continents ) { 301 if (regionIDMap.containsKey(continent)) { 302 r = regionIDMap.get(continent); 303 r.type = RegionType.CONTINENT; 304 } 305 } 306 307 for ( String grouping : groupings ) { 308 if (regionIDMap.containsKey(grouping)) { 309 r = regionIDMap.get(grouping); 310 r.type = RegionType.GROUPING; 311 } 312 } 313 314 // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR 315 // even though it looks like a territory code. Need to handle it here. 316 317 if ( regionIDMap.containsKey(OUTLYING_OCEANIA_REGION_ID)) { 318 r = regionIDMap.get(OUTLYING_OCEANIA_REGION_ID); 319 r.type = RegionType.SUBCONTINENT; 320 } 321 322 // Load territory containment info from the supplemental data. 323 for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) { 324 UResourceBundle mapping = territoryContainment.get(i); 325 String parent = mapping.getKey(); 326 if (parent.equals("containedGroupings") || parent.equals("deprecated")) { 327 continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip. 328 // #11232 is to do something useful with these. 329 } 330 Region parentRegion = regionIDMap.get(parent); 331 for ( int j = 0 ; j < mapping.getSize(); j++ ) { 332 String child = mapping.getString(j); 333 Region childRegion = regionIDMap.get(child); 334 if ( parentRegion != null && childRegion != null ) { 335 336 // Add the child region to the set of regions contained by the parent 337 parentRegion.containedRegions.add(childRegion); 338 339 // Set the parent region to be the containing region of the child. 340 // Regions of type GROUPING can't be set as the parent, since another region 341 // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. 342 if ( parentRegion.getType() != RegionType.GROUPING) { 343 childRegion.containingRegion = parentRegion; 344 } 345 } 346 } 347 } 348 349 // Create the availableRegions lists 350 351 for (int i = 0 ; i < RegionType.values().length ; i++) { 352 availableRegions.add(new TreeSet<Region>()); 353 } 354 355 for ( Region ar : regions ) { 356 Set<Region> currentSet = availableRegions.get(ar.type.ordinal()); 357 currentSet.add(ar); 358 availableRegions.set(ar.type.ordinal(),currentSet); 359 } 360 361 regionDataIsLoaded = true; 362 } 363 364 /** Returns a Region using the given region ID. The region ID can be either a 2-letter ISO code, 365 * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR. 366 * @param id The id of the region to be retrieved. 367 * @return The corresponding region. 368 * @throws NullPointerException if the supplied id is null. 369 * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU. 370 * @stable ICU 50 371 */ 372 373 public static Region getInstance(String id) { 374 375 if ( id == null ) { 376 throw new NullPointerException(); 377 } 378 379 loadRegionData(); 380 381 Region r = regionIDMap.get(id); 382 383 if ( r == null ) { 384 r = regionAliases.get(id); 385 } 386 387 if ( r == null ) { 388 throw new IllegalArgumentException("Unknown region id: " + id); 389 } 390 391 if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) { 392 r = r.preferredValues.get(0); 393 } 394 395 return r; 396 } 397 398 399 /** Returns a Region using the given numeric code as defined by UNM.49 400 * @param code The numeric code of the region to be retrieved. 401 * @return The corresponding region. 402 * @throws IllegalArgumentException if the supplied numeric code is not recognized. 403 * @stable ICU 50 404 */ 405 406 public static Region getInstance(int code) { 407 408 loadRegionData(); 409 410 Region r = numericCodeMap.get(code); 411 412 if ( r == null ) { // Just in case there's an alias that's numeric, try to find it. 413 String pad = ""; 414 if ( code < 10 ) { 415 pad = "00"; 416 } else if ( code < 100 ) { 417 pad = "0"; 418 } 419 String id = pad + Integer.toString(code); 420 r = regionAliases.get(id); 421 } 422 423 if ( r == null ) { 424 throw new IllegalArgumentException("Unknown region code: " + code); 425 } 426 427 if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) { 428 r = r.preferredValues.get(0); 429 } 430 431 return r; 432 } 433 434 435 /** Used to retrieve all available regions of a specific type. 436 * 437 * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. ) 438 * @return An unmodifiable set of all known regions that match the given type. 439 * @stable ICU 50 440 */ 441 442 public static Set<Region> getAvailable(RegionType type) { 443 444 loadRegionData(); 445 return Collections.unmodifiableSet(availableRegions.get(type.ordinal())); 446 } 447 448 449 /** Used to determine the macroregion that geographically contains this region. 450 * 451 * @return The region that geographically contains this region. Returns NULL if this region is 452 * code "001" (World) or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) 453 * returns the region "039" (Southern Europe). 454 * @stable ICU 50 455 */ 456 457 public Region getContainingRegion() { 458 loadRegionData(); 459 return containingRegion; 460 } 461 462 /** Used to determine the macroregion that geographically contains this region and that matches the given type. 463 * 464 * @return The region that geographically contains this region and matches the given type. May return NULL if 465 * no containing region can be found that matches the given type. For example, calling this method with region "IT" (Italy) 466 * and type CONTINENT returns the region "150" (Europe). 467 * @stable ICU 50 468 */ 469 470 public Region getContainingRegion(RegionType type) { 471 loadRegionData(); 472 if ( containingRegion == null ) { 473 return null; 474 } 475 if ( containingRegion.type.equals(type)) { 476 return containingRegion; 477 } else { 478 return containingRegion.getContainingRegion(type); 479 } 480 } 481 482 /** Used to determine the sub-regions that are contained within this region. 483 * 484 * @return An unmodifiable set containing all the regions that are immediate children 485 * of this region in the region hierarchy. These returned regions could be either macro 486 * regions, territories, or a mixture of the two, depending on the containment data as defined 487 * in CLDR. This API may return an empty set if this region doesn't have any sub-regions. 488 * For example, calling this method with region "150" (Europe) returns a set containing 489 * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) 490 * - "154" (Northern Europe) and "155" (Western Europe). 491 * 492 * @stable ICU 50 493 */ 494 495 public Set<Region> getContainedRegions() { 496 loadRegionData(); 497 return Collections.unmodifiableSet(containedRegions); 498 } 499 500 /** Used to determine all the regions that are contained within this region and that match the given type 501 * 502 * @return An unmodifiable set containing all the regions that are children of this region 503 * anywhere in the region hierarchy and match the given type. This API may return an empty set 504 * if this region doesn't have any sub-regions that match the given type. 505 * For example, calling this method with region "150" (Europe) and type "TERRITORY" returns a set 506 * containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) 507 * @stable ICU 50 508 */ 509 510 public Set<Region> getContainedRegions(RegionType type) { 511 512 loadRegionData(); 513 514 Set<Region> result = new TreeSet<Region>(); 515 Set<Region> cr = getContainedRegions(); 516 517 for ( Region r : cr ) { 518 if ( r.getType() == type ) { 519 result.add(r); 520 } else { 521 result.addAll(r.getContainedRegions(type)); 522 } 523 } 524 return Collections.unmodifiableSet(result); 525 } 526 527 /** 528 * @return For deprecated regions, return an unmodifiable list of the regions that are the preferred replacement regions for this region. 529 * Returns null for a non-deprecated region. For example, calling this method with region "SU" (Soviet Union) would 530 * return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc... 531 * 532 * @stable ICU 50 533 */ 534 public List<Region> getPreferredValues() { 535 536 loadRegionData(); 537 538 if ( type == RegionType.DEPRECATED) { 539 return Collections.unmodifiableList(preferredValues); 540 } else { 541 return null; 542 } 543 } 544 545 /** 546 * @return Returns true if this region contains the supplied other region anywhere in the region hierarchy. 547 * 548 * @stable ICU 50 549 */ 550 public boolean contains(Region other) { 551 552 loadRegionData(); 553 554 if (containedRegions.contains(other)) { 555 return true; 556 } else { 557 for (Region cr : containedRegions) { 558 if (cr.contains(other)) { 559 return true; 560 } 561 } 562 } 563 564 return false; 565 } 566 567 /** Returns the string representation of this region 568 * 569 * @return The string representation of this region, which is its ID. 570 * 571 * @stable ICU 50 572 */ 573 574 public String toString() { 575 return id; 576 } 577 578 /** 579 * Returns the numeric code for this region 580 * 581 * @return The numeric code for this region. Returns a negative value if the given region does not have a numeric 582 * code assigned to it. This is a very rare case and only occurs for a few very small territories. 583 * 584 * @stable ICU 50 585 */ 586 587 public int getNumericCode() { 588 return code; 589 } 590 591 /** Returns this region's type. 592 * 593 * @return This region's type classification, such as MACROREGION or TERRITORY. 594 * 595 * @stable ICU 50 596 */ 597 598 public RegionType getType() { 599 return type; 600 } 601 602 /** 603 * {@inheritDoc} 604 * @stable ICU 50 605 */ 606 public int compareTo(Region other) { 607 return id.compareTo(other.id); 608 } 609 } 610