Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2010, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #if !UCONFIG_NO_FORMATTING
     11 
     12 #include "zonemeta.h"
     13 
     14 #include "unicode/timezone.h"
     15 #include "unicode/ustring.h"
     16 #include "unicode/putil.h"
     17 
     18 #include "umutex.h"
     19 #include "uvector.h"
     20 #include "cmemory.h"
     21 #include "gregoimp.h"
     22 #include "cstring.h"
     23 #include "ucln_in.h"
     24 #include "uassert.h"
     25 
     26 static UMTX gZoneMetaLock = NULL;
     27 
     28 // Metazone mapping table
     29 static UHashtable *gOlsonToMeta = NULL;
     30 static UBool gOlsonToMetaInitialized = FALSE;
     31 
     32 // Country info vectors
     33 static U_NAMESPACE_QUALIFIER UVector *gSingleZoneCountries = NULL;
     34 static U_NAMESPACE_QUALIFIER UVector *gMultiZonesCountries = NULL;
     35 static UBool gCountryInfoVectorsInitialized = FALSE;
     36 
     37 U_CDECL_BEGIN
     38 
     39 
     40 /**
     41  * Cleanup callback func
     42  */
     43 static UBool U_CALLCONV zoneMeta_cleanup(void)
     44 {
     45      umtx_destroy(&gZoneMetaLock);
     46 
     47     if (gOlsonToMeta != NULL) {
     48         uhash_close(gOlsonToMeta);
     49         gOlsonToMeta = NULL;
     50     }
     51     gOlsonToMetaInitialized = FALSE;
     52 
     53     delete gSingleZoneCountries;
     54     delete gMultiZonesCountries;
     55     gCountryInfoVectorsInitialized = FALSE;
     56 
     57     return TRUE;
     58 }
     59 
     60 /**
     61  * Deleter for UChar* string
     62  */
     63 static void U_CALLCONV
     64 deleteUCharString(void *obj) {
     65     UChar *entry = (UChar*)obj;
     66     uprv_free(entry);
     67 }
     68 
     69 /**
     70  * Deleter for UVector
     71  */
     72 static void U_CALLCONV
     73 deleteUVector(void *obj) {
     74    delete (U_NAMESPACE_QUALIFIER UVector*) obj;
     75 }
     76 
     77 /**
     78  * Deleter for OlsonToMetaMappingEntry
     79  */
     80 static void U_CALLCONV
     81 deleteOlsonToMetaMappingEntry(void *obj) {
     82     U_NAMESPACE_QUALIFIER OlsonToMetaMappingEntry *entry = (U_NAMESPACE_QUALIFIER OlsonToMetaMappingEntry*)obj;
     83     uprv_free(entry);
     84 }
     85 
     86 U_CDECL_END
     87 
     88 U_NAMESPACE_BEGIN
     89 
     90 #define ZID_KEY_MAX 128
     91 
     92 static const char gMetaZones[]          = "metaZones";
     93 static const char gMetazoneInfo[]       = "metazoneInfo";
     94 static const char gMapTimezonesTag[]    = "mapTimezones";
     95 
     96 static const char gKeyTypeData[]        = "keyTypeData";
     97 static const char gTypeAliasTag[]       = "typeAlias";
     98 static const char gTimezoneTag[]        = "timezone";
     99 
    100 static const char gWorldTag[]           = "001";
    101 
    102 static const UChar gWorld[] = {0x30, 0x30, 0x31, 0x00}; // "001"
    103 
    104 static const UChar gDefaultFrom[] = {0x31, 0x39, 0x37, 0x30, 0x2D, 0x30, 0x31, 0x2D, 0x30, 0x31,
    105                                      0x20, 0x30, 0x30, 0x3A, 0x30, 0x30, 0x00}; // "1970-01-01 00:00"
    106 static const UChar gDefaultTo[]   = {0x39, 0x39, 0x39, 0x39, 0x2D, 0x31, 0x32, 0x2D, 0x33, 0x31,
    107                                      0x20, 0x32, 0x33, 0x3A, 0x35, 0x39, 0x00}; // "9999-12-31 23:59"
    108 
    109 #define ASCII_DIGIT(c) (((c)>=0x30 && (c)<=0x39) ? (c)-0x30 : -1)
    110 
    111 /*
    112  * Convert a date string used by metazone mappings to UDate.
    113  * The format used by CLDR metazone mapping is "yyyy-MM-dd HH:mm".
    114  */
    115 static UDate
    116 parseDate (const UChar *text, UErrorCode &status) {
    117     if (U_FAILURE(status)) {
    118         return 0;
    119     }
    120     int32_t len = u_strlen(text);
    121     if (len != 16 && len != 10) {
    122         // It must be yyyy-MM-dd HH:mm (length 16) or yyyy-MM-dd (length 10)
    123         status = U_INVALID_FORMAT_ERROR;
    124         return 0;
    125     }
    126 
    127     int32_t year = 0, month = 0, day = 0, hour = 0, min = 0, n;
    128     int32_t idx;
    129 
    130     // "yyyy" (0 - 3)
    131     for (idx = 0; idx <= 3 && U_SUCCESS(status); idx++) {
    132         n = ASCII_DIGIT((int32_t)text[idx]);
    133         if (n >= 0) {
    134             year = 10*year + n;
    135         } else {
    136             status = U_INVALID_FORMAT_ERROR;
    137         }
    138     }
    139     // "MM" (5 - 6)
    140     for (idx = 5; idx <= 6 && U_SUCCESS(status); idx++) {
    141         n = ASCII_DIGIT((int32_t)text[idx]);
    142         if (n >= 0) {
    143             month = 10*month + n;
    144         } else {
    145             status = U_INVALID_FORMAT_ERROR;
    146         }
    147     }
    148     // "dd" (8 - 9)
    149     for (idx = 8; idx <= 9 && U_SUCCESS(status); idx++) {
    150         n = ASCII_DIGIT((int32_t)text[idx]);
    151         if (n >= 0) {
    152             day = 10*day + n;
    153         } else {
    154             status = U_INVALID_FORMAT_ERROR;
    155         }
    156     }
    157     if (len == 16) {
    158         // "HH" (11 - 12)
    159         for (idx = 11; idx <= 12 && U_SUCCESS(status); idx++) {
    160             n = ASCII_DIGIT((int32_t)text[idx]);
    161             if (n >= 0) {
    162                 hour = 10*hour + n;
    163             } else {
    164                 status = U_INVALID_FORMAT_ERROR;
    165             }
    166         }
    167         // "mm" (14 - 15)
    168         for (idx = 14; idx <= 15 && U_SUCCESS(status); idx++) {
    169             n = ASCII_DIGIT((int32_t)text[idx]);
    170             if (n >= 0) {
    171                 min = 10*min + n;
    172             } else {
    173                 status = U_INVALID_FORMAT_ERROR;
    174             }
    175         }
    176     }
    177 
    178     if (U_SUCCESS(status)) {
    179         UDate date = Grego::fieldsToDay(year, month - 1, day) * U_MILLIS_PER_DAY
    180             + hour * U_MILLIS_PER_HOUR + min * U_MILLIS_PER_MINUTE;
    181         return date;
    182     }
    183     return 0;
    184 }
    185 
    186 UnicodeString& U_EXPORT2
    187 ZoneMeta::getCanonicalSystemID(const UnicodeString &tzid, UnicodeString &systemID, UErrorCode& status) {
    188     // Dereference the input ID using the tz data first
    189     const UChar *canonical = TimeZone::dereferOlsonLink(tzid);
    190     if (canonical != NULL) {
    191         // check canonical mapping in CLDR
    192         char id[ZID_KEY_MAX];
    193         int32_t len = u_strlen(canonical);
    194         if (len < sizeof(id)) {
    195             u_UCharsToChars(canonical, id, len + 1 /* include the terminator */);
    196             // replace '/' with ':'
    197             char *p = id;
    198             while (*p) {
    199                 if (*p == '/') {
    200                     *p = ':';
    201                 }
    202                 p++;
    203             }
    204 
    205             UErrorCode tmpStatus = U_ZERO_ERROR;
    206             UResourceBundle *rb = ures_openDirect(NULL, gKeyTypeData, &tmpStatus);
    207             ures_getByKey(rb, gTypeAliasTag, rb, &tmpStatus);
    208             ures_getByKey(rb, gTimezoneTag, rb, &tmpStatus);
    209             const UChar *cldrCanonical = ures_getStringByKey(rb, id, NULL, &tmpStatus);
    210             if (U_SUCCESS(tmpStatus)) {
    211                 // canonical map found
    212                 canonical = cldrCanonical;
    213             }
    214             ures_close(rb);
    215         }
    216     }
    217     if (canonical == NULL) {
    218         systemID.remove();
    219         status = U_ILLEGAL_ARGUMENT_ERROR;
    220     } else {
    221         systemID.setTo(canonical);
    222     }
    223 
    224     return systemID;
    225 }
    226 
    227 UnicodeString& U_EXPORT2
    228 ZoneMeta::getCanonicalCountry(const UnicodeString &tzid, UnicodeString &canonicalCountry) {
    229     const UChar *region = TimeZone::getRegion(tzid);
    230     if (u_strcmp(gWorld, region) != 0) {
    231         canonicalCountry.setTo(region, -1);
    232     } else {
    233         canonicalCountry.remove();
    234     }
    235     return canonicalCountry;
    236 }
    237 
    238 UnicodeString& U_EXPORT2
    239 ZoneMeta::getSingleCountry(const UnicodeString &tzid, UnicodeString &country) {
    240     // Get canonical country for the zone
    241     const UChar *region = TimeZone::getRegion(tzid);
    242     if (u_strcmp(gWorld, region) == 0) {
    243         // special case - "001"
    244         country.remove();
    245         return country;
    246     }
    247 
    248     // Checking the cached results
    249     UErrorCode status = U_ZERO_ERROR;
    250     UBool initialized;
    251     UMTX_CHECK(&gZoneMetaLock, gCountryInfoVectorsInitialized, initialized);
    252     if (!initialized) {
    253         // Create empty vectors
    254         umtx_lock(&gZoneMetaLock);
    255         {
    256             if (!gCountryInfoVectorsInitialized) {
    257                 // No deleters for these UVectors, it's a reference to a resource bundle string.
    258                 gSingleZoneCountries = new UVector(NULL, uhash_compareUChars, status);
    259                 if (gSingleZoneCountries == NULL) {
    260                     status = U_MEMORY_ALLOCATION_ERROR;
    261                 }
    262                 gMultiZonesCountries = new UVector(NULL, uhash_compareUChars, status);
    263                 if (gMultiZonesCountries == NULL) {
    264                     status = U_MEMORY_ALLOCATION_ERROR;
    265                 }
    266 
    267                 if (U_SUCCESS(status)) {
    268                     gCountryInfoVectorsInitialized = TRUE;
    269                 } else {
    270                     delete gSingleZoneCountries;
    271                     delete gMultiZonesCountries;
    272                 }
    273             }
    274         }
    275         umtx_unlock(&gZoneMetaLock);
    276 
    277         if (U_FAILURE(status)) {
    278             country.remove();
    279             return country;
    280         }
    281     }
    282 
    283     // Check if it was already cached
    284     UBool cached = FALSE;
    285     UBool multiZones = FALSE;
    286     umtx_lock(&gZoneMetaLock);
    287     {
    288         multiZones = cached = gMultiZonesCountries->contains((void*)region);
    289         if (!multiZones) {
    290             cached = gSingleZoneCountries->contains((void*)region);
    291         }
    292     }
    293     umtx_unlock(&gZoneMetaLock);
    294 
    295     if (!cached) {
    296         // We need to go through all zones associated with the region.
    297         // This is relatively heavy operation.
    298 
    299         U_ASSERT(u_strlen(region) == 2);
    300 
    301         char buf[] = {0, 0, 0};
    302         u_UCharsToChars(region, buf, 2);
    303 
    304         StringEnumeration *ids = TimeZone::createEnumeration(buf);
    305         int32_t idsLen = ids->count(status);
    306         if (U_SUCCESS(status) && idsLen > 1) {
    307             // multiple zones are available for the region
    308             UnicodeString canonical, tmp;
    309             const UnicodeString *id = ids->snext(status);
    310             getCanonicalSystemID(*id, canonical, status);
    311             if (U_SUCCESS(status)) {
    312                 // check if there are any other canonical zone in the group
    313                 while (id = ids->snext(status)) {
    314                     getCanonicalSystemID(*id, tmp, status);
    315                     if (U_FAILURE(status)) {
    316                         break;
    317                     }
    318                     if (canonical != tmp) {
    319                         // another canonical zone was found
    320                         multiZones = TRUE;
    321                         break;
    322                     }
    323                 }
    324             }
    325         }
    326         if (U_FAILURE(status)) {
    327             // no single country by default for any error cases
    328             multiZones = TRUE;
    329         }
    330         delete ids;
    331 
    332         // Cache the result
    333         umtx_lock(&gZoneMetaLock);
    334         {
    335             UErrorCode ec = U_ZERO_ERROR;
    336             if (multiZones) {
    337                 if (!gMultiZonesCountries->contains((void*)region)) {
    338                     gMultiZonesCountries->addElement((void*)region, ec);
    339                 }
    340             } else {
    341                 if (!gSingleZoneCountries->contains((void*)region)) {
    342                     gSingleZoneCountries->addElement((void*)region, ec);
    343                 }
    344             }
    345         }
    346         umtx_unlock(&gZoneMetaLock);
    347     }
    348 
    349     if (multiZones) {
    350         country.remove();
    351     } else {
    352         country.setTo(region, -1);
    353     }
    354     return country;
    355 }
    356 
    357 UnicodeString& U_EXPORT2
    358 ZoneMeta::getMetazoneID(const UnicodeString &tzid, UDate date, UnicodeString &result) {
    359     UBool isSet = FALSE;
    360     const UVector *mappings = getMetazoneMappings(tzid);
    361     if (mappings != NULL) {
    362         for (int32_t i = 0; i < mappings->size(); i++) {
    363             OlsonToMetaMappingEntry *mzm = (OlsonToMetaMappingEntry*)mappings->elementAt(i);
    364             if (mzm->from <= date && mzm->to > date) {
    365                 result.setTo(mzm->mzid, -1);
    366                 isSet = TRUE;
    367                 break;
    368             }
    369         }
    370     }
    371     if (!isSet) {
    372         result.remove();
    373     }
    374     return result;
    375 }
    376 
    377 const UVector* U_EXPORT2
    378 ZoneMeta::getMetazoneMappings(const UnicodeString &tzid) {
    379     UErrorCode status = U_ZERO_ERROR;
    380     UChar tzidUChars[ZID_KEY_MAX];
    381     tzid.extract(tzidUChars, ZID_KEY_MAX, status);
    382     if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
    383         return NULL;
    384     }
    385 
    386     UBool initialized;
    387     UMTX_CHECK(&gZoneMetaLock, gOlsonToMetaInitialized, initialized);
    388     if (!initialized) {
    389         UHashtable *tmpOlsonToMeta = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status);
    390         if (U_FAILURE(status)) {
    391             return NULL;
    392         }
    393         uhash_setKeyDeleter(tmpOlsonToMeta, deleteUCharString);
    394         uhash_setValueDeleter(tmpOlsonToMeta, deleteUVector);
    395 
    396         umtx_lock(&gZoneMetaLock);
    397         {
    398             if (!gOlsonToMetaInitialized) {
    399                 gOlsonToMeta = tmpOlsonToMeta;
    400                 tmpOlsonToMeta = NULL;
    401                 gOlsonToMetaInitialized = TRUE;
    402             }
    403         }
    404         umtx_unlock(&gZoneMetaLock);
    405 
    406         // OK to call the following multiple times with the same function
    407         ucln_i18n_registerCleanup(UCLN_I18N_ZONEMETA, zoneMeta_cleanup);
    408         if (tmpOlsonToMeta != NULL) {
    409             uhash_close(tmpOlsonToMeta);
    410         }
    411     }
    412 
    413     // get the mapping from cache
    414     const UVector *result = NULL;
    415 
    416     umtx_lock(&gZoneMetaLock);
    417     {
    418         result = (UVector*) uhash_get(gOlsonToMeta, tzidUChars);
    419     }
    420     umtx_unlock(&gZoneMetaLock);
    421 
    422     if (result != NULL) {
    423         return result;
    424     }
    425 
    426     // miss the cache - create new one
    427     UVector *tmpResult = createMetazoneMappings(tzid);
    428     if (tmpResult == NULL) {
    429         // not available
    430         return NULL;
    431     }
    432 
    433     // put the new one into the cache
    434     umtx_lock(&gZoneMetaLock);
    435     {
    436         // make sure it's already created
    437         result = (UVector*) uhash_get(gOlsonToMeta, tzidUChars);
    438         if (result == NULL) {
    439             // add the one just created
    440             int32_t tzidLen = tzid.length() + 1;
    441             UChar *key = (UChar*)uprv_malloc(tzidLen * sizeof(UChar));
    442             if (key == NULL) {
    443                 // memory allocation error..  just return NULL
    444                 result = NULL;
    445                 delete tmpResult;
    446             } else {
    447                 tzid.extract(key, tzidLen, status);
    448                 uhash_put(gOlsonToMeta, key, tmpResult, &status);
    449                 if (U_FAILURE(status)) {
    450                     // delete the mapping
    451                     result = NULL;
    452                     delete tmpResult;
    453                 } else {
    454                     result = tmpResult;
    455                 }
    456             }
    457         } else {
    458             // another thread already put the one
    459             delete tmpResult;
    460         }
    461     }
    462     umtx_unlock(&gZoneMetaLock);
    463 
    464     return result;
    465 }
    466 
    467 UVector*
    468 ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) {
    469     UVector *mzMappings = NULL;
    470     UErrorCode status = U_ZERO_ERROR;
    471 
    472     UnicodeString canonicalID;
    473     UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status);
    474     ures_getByKey(rb, gMetazoneInfo, rb, &status);
    475     TimeZone::getCanonicalID(tzid, canonicalID, status);
    476 
    477     if (U_SUCCESS(status)) {
    478         char tzKey[ZID_KEY_MAX];
    479         canonicalID.extract(0, canonicalID.length(), tzKey, sizeof(tzKey), US_INV);
    480 
    481         // tzid keys are using ':' as separators
    482         char *p = tzKey;
    483         while (*p) {
    484             if (*p == '/') {
    485                 *p = ':';
    486             }
    487             p++;
    488         }
    489 
    490         ures_getByKey(rb, tzKey, rb, &status);
    491 
    492         if (U_SUCCESS(status)) {
    493             UResourceBundle *mz = NULL;
    494             while (ures_hasNext(rb)) {
    495                 mz = ures_getNextResource(rb, mz, &status);
    496 
    497                 const UChar *mz_name = ures_getStringByIndex(mz, 0, NULL, &status);
    498                 const UChar *mz_from = gDefaultFrom;
    499                 const UChar *mz_to = gDefaultTo;
    500 
    501                 if (ures_getSize(mz) == 3) {
    502                     mz_from = ures_getStringByIndex(mz, 1, NULL, &status);
    503                     mz_to   = ures_getStringByIndex(mz, 2, NULL, &status);
    504                 }
    505 
    506                 if(U_FAILURE(status)){
    507                     status = U_ZERO_ERROR;
    508                     continue;
    509                 }
    510                 // We do not want to use SimpleDateformat to parse boundary dates,
    511                 // because this code could be triggered by the initialization code
    512                 // used by SimpleDateFormat.
    513                 UDate from = parseDate(mz_from, status);
    514                 UDate to = parseDate(mz_to, status);
    515                 if (U_FAILURE(status)) {
    516                     status = U_ZERO_ERROR;
    517                     continue;
    518                 }
    519 
    520                 OlsonToMetaMappingEntry *entry = (OlsonToMetaMappingEntry*)uprv_malloc(sizeof(OlsonToMetaMappingEntry));
    521                 if (entry == NULL) {
    522                     status = U_MEMORY_ALLOCATION_ERROR;
    523                     break;
    524                 }
    525                 entry->mzid = mz_name;
    526                 entry->from = from;
    527                 entry->to = to;
    528 
    529                 if (mzMappings == NULL) {
    530                     mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status);
    531                     if (U_FAILURE(status)) {
    532                         delete mzMappings;
    533                         deleteOlsonToMetaMappingEntry(entry);
    534                         uprv_free(entry);
    535                         break;
    536                     }
    537                 }
    538 
    539                 mzMappings->addElement(entry, status);
    540                 if (U_FAILURE(status)) {
    541                     break;
    542                 }
    543             }
    544             ures_close(mz);
    545             if (U_FAILURE(status)) {
    546                 if (mzMappings != NULL) {
    547                     delete mzMappings;
    548                     mzMappings = NULL;
    549                 }
    550             }
    551         }
    552     }
    553     ures_close(rb);
    554     return mzMappings;
    555 }
    556 
    557 UnicodeString& U_EXPORT2
    558 ZoneMeta::getZoneIdByMetazone(const UnicodeString &mzid, const UnicodeString &region, UnicodeString &result) {
    559     UErrorCode status = U_ZERO_ERROR;
    560     const UChar *tzid = NULL;
    561     int32_t tzidLen = 0;
    562     char keyBuf[ZID_KEY_MAX + 1];
    563     int32_t keyLen = 0;
    564 
    565     if (mzid.length() >= ZID_KEY_MAX) {
    566         result.remove();
    567         return result;
    568     }
    569 
    570     keyLen = mzid.extract(0, mzid.length(), keyBuf, ZID_KEY_MAX, US_INV);
    571 
    572     UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status);
    573     ures_getByKey(rb, gMapTimezonesTag, rb, &status);
    574     ures_getByKey(rb, keyBuf, rb, &status);
    575 
    576     if (U_SUCCESS(status)) {
    577         // check region mapping
    578         if (region.length() == 2 || region.length() == 3) {
    579             region.extract(0, region.length(), keyBuf, ZID_KEY_MAX, US_INV);
    580             tzid = ures_getStringByKey(rb, keyBuf, &tzidLen, &status);
    581             if (status == U_MISSING_RESOURCE_ERROR) {
    582                 status = U_ZERO_ERROR;
    583             }
    584         }
    585         if (U_SUCCESS(status) && tzid == NULL) {
    586             // try "001"
    587             tzid = ures_getStringByKey(rb, gWorldTag, &tzidLen, &status);
    588         }
    589     }
    590     ures_close(rb);
    591 
    592     if (tzid == NULL) {
    593         result.remove();
    594     } else {
    595         result.setTo(tzid, tzidLen);
    596     }
    597 
    598     return result;
    599 }
    600 
    601 U_NAMESPACE_END
    602 
    603 #endif /* #if !UCONFIG_NO_FORMATTING */
    604