Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2007-2009, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 #ifndef ZSTRFMT_H
      8 #define ZSTRFMT_H
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_FORMATTING
     13 
     14 #include "unicode/unistr.h"
     15 #include "unicode/calendar.h"
     16 #include "uhash.h"
     17 #include "uvector.h"
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 /*
     22  * Character node used by TextTrieMap
     23  */
     24 struct CharacterNode {
     25     // No constructor or destructor.
     26     // We malloc and free an uninitalized array of CharacterNode objects
     27     // and clear and delete them ourselves.
     28 
     29     void clear();
     30     void deleteValues();
     31 
     32     void addValue(void *value, UErrorCode &status);
     33     inline UBool hasValues() const;
     34     inline int32_t countValues() const;
     35     inline const void *getValue(int32_t index) const;
     36 
     37     void     *fValues;      // Union of one single value vs. UVector of values.
     38     UChar    fCharacter;    // UTF-16 code unit.
     39     uint16_t fFirstChild;   // 0 if no children.
     40     uint16_t fNextSibling;  // 0 terminates the list.
     41     UBool    fHasValuesVector;
     42     UBool    fPadding;
     43 
     44     // No value:   fValues == NULL               and  fHasValuesVector == FALSE
     45     // One value:  fValues == value              and  fHasValuesVector == FALSE
     46     // >=2 values: fValues == UVector of values  and  fHasValuesVector == TRUE
     47 };
     48 
     49 inline UBool CharacterNode::hasValues() const {
     50     return (UBool)(fValues != NULL);
     51 }
     52 
     53 inline int32_t CharacterNode::countValues() const {
     54     return
     55         fValues == NULL ? 0 :
     56         !fHasValuesVector ? 1 :
     57         ((const UVector *)fValues)->size();
     58 }
     59 
     60 inline const void *CharacterNode::getValue(int32_t index) const {
     61     if (!fHasValuesVector) {
     62         return fValues;  // Assume index == 0.
     63     } else {
     64         return ((const UVector *)fValues)->elementAt(index);
     65     }
     66 }
     67 
     68 /*
     69  * Search result handler callback interface used by TextTrieMap search.
     70  */
     71 class TextTrieMapSearchResultHandler : public UMemory {
     72 public:
     73     virtual UBool handleMatch(int32_t matchLength,
     74                               const CharacterNode *node, UErrorCode& status) = 0;
     75     virtual ~TextTrieMapSearchResultHandler(); //added to avoid warning
     76 };
     77 
     78 
     79 /*
     80  * ZSFStringPool   Pool of (UChar *) strings.  Provides for sharing of repeated
     81  *                 strings within ZoneStringFormats.
     82  */
     83 class ZSFStringPoolChunk;
     84 class ZSFStringPool: public UMemory {
     85   public:
     86     ZSFStringPool(UErrorCode &status);
     87     ~ZSFStringPool();
     88 
     89     /* Get the pooled string that is equal to the supplied string s.
     90      * Copy the string into the pool if it is not already present.
     91      *
     92      * Life time of the returned string is that of the pool.
     93      */
     94     const UChar *get(const UChar *s, UErrorCode &status);
     95 
     96     /* Get the pooled string that is equal to the supplied string s.
     97      * Copy the string into the pool if it is not already present.
     98      */
     99     const UChar *get(const UnicodeString &s, UErrorCode &status);
    100 
    101     /* Adopt a string into the pool, without copying it.
    102      * Used for strings from resource bundles, which will persist without copying.
    103      */
    104     const UChar *adopt(const UChar *s, UErrorCode &status);
    105 
    106     /* Freeze the string pool.  Discards the hash table that is used
    107      * for looking up a string.  All pointers to pooled strings remain valid.
    108      */
    109     void freeze();
    110 
    111   private:
    112     ZSFStringPoolChunk   *fChunks;
    113     UHashtable           *fHash;
    114 };
    115 
    116 
    117 /**
    118  * TextTrieMap is a trie implementation for supporting
    119  * fast prefix match for the string key.
    120  */
    121 class TextTrieMap : public UMemory {
    122 public:
    123     TextTrieMap(UBool ignoreCase);
    124     virtual ~TextTrieMap();
    125 
    126     void put(const UnicodeString &key, void *value, ZSFStringPool &sp, UErrorCode &status);
    127     void search(const UnicodeString &text, int32_t start,
    128         TextTrieMapSearchResultHandler *handler, UErrorCode& status) const;
    129     int32_t isEmpty() const;
    130 
    131 private:
    132     UBool           fIgnoreCase;
    133     CharacterNode   *fNodes;
    134     int32_t         fNodesCapacity;
    135     int32_t         fNodesCount;
    136 
    137     UVector         *fLazyContents;
    138     UBool           fIsEmpty;
    139 
    140     UBool growNodes();
    141     CharacterNode* addChildNode(CharacterNode *parent, UChar c, UErrorCode &status);
    142     CharacterNode* getChildNode(CharacterNode *parent, UChar c) const;
    143 
    144     void putImpl(const UnicodeString &key, void *value, UErrorCode &status);
    145     void buildTrie(UErrorCode &status);
    146     void search(CharacterNode *node, const UnicodeString &text, int32_t start,
    147         int32_t index, TextTrieMapSearchResultHandler *handler, UErrorCode &status) const;
    148 };
    149 
    150 
    151 // Name types, these bit flag are used for zone string lookup
    152 enum TimeZoneTranslationType {
    153     LOCATION        = 0x0001,
    154     GENERIC_LONG    = 0x0002,
    155     GENERIC_SHORT   = 0x0004,
    156     STANDARD_LONG   = 0x0008,
    157     STANDARD_SHORT  = 0x0010,
    158     DAYLIGHT_LONG   = 0x0020,
    159     DAYLIGHT_SHORT  = 0x0040
    160 };
    161 
    162 // Name type index, these constants are used for index in the zone strings array.
    163 enum TimeZoneTranslationTypeIndex {
    164     ZSIDX_LOCATION = 0,
    165     ZSIDX_LONG_STANDARD,
    166     ZSIDX_SHORT_STANDARD,
    167     ZSIDX_LONG_DAYLIGHT,
    168     ZSIDX_SHORT_DAYLIGHT,
    169     ZSIDX_LONG_GENERIC,
    170     ZSIDX_SHORT_GENERIC,
    171 
    172     ZSIDX_COUNT
    173 };
    174 
    175 class MessageFormat;
    176 
    177 
    178 /*
    179  * ZoneStringInfo is a class holding a localized zone string
    180  * information.
    181  */
    182 class ZoneStringInfo : public UMemory {
    183 public:
    184     virtual ~ZoneStringInfo();
    185 
    186     inline UnicodeString& getID(UnicodeString &result) const;
    187     inline UnicodeString& getString(UnicodeString &result) const;
    188     inline UBool isStandard(void) const;
    189     inline UBool isDaylight(void) const;
    190     inline UBool isGeneric(void) const;
    191 
    192 private:
    193     friend class ZoneStringFormat;
    194     friend class ZoneStringSearchResultHandler;
    195 
    196     ZoneStringInfo(const UnicodeString &id, const UnicodeString &str,
    197                    TimeZoneTranslationType type, ZSFStringPool &sp, UErrorCode &status);
    198 
    199     const UChar   *fId;
    200     const UChar   *fStr;
    201     TimeZoneTranslationType fType;
    202 };
    203 
    204 inline UnicodeString& ZoneStringInfo::getID(UnicodeString &result) const {
    205     return result.setTo(fId, -1);
    206 }
    207 
    208 inline UnicodeString& ZoneStringInfo::getString(UnicodeString &result) const {
    209     return result.setTo(fStr, -1);
    210 }
    211 
    212 inline UBool ZoneStringInfo::isStandard(void) const {
    213     return (fType == STANDARD_LONG || fType == STANDARD_SHORT);
    214 }
    215 
    216 inline UBool ZoneStringInfo::isDaylight(void) const {
    217     return (fType == DAYLIGHT_LONG || fType == DAYLIGHT_SHORT);
    218 }
    219 
    220 inline UBool ZoneStringInfo::isGeneric(void) const {
    221     return (fType == LOCATION || fType == GENERIC_LONG || fType == GENERIC_SHORT);
    222 }
    223 
    224 class SafeZoneStringFormatPtr;
    225 
    226 class ZoneStringFormat : public UMemory {
    227 public:
    228     ZoneStringFormat(const UnicodeString* const* strings, int32_t rowCount, int32_t columnCount, UErrorCode &status);
    229     ZoneStringFormat(const Locale& locale, UErrorCode &status);
    230     virtual ~ZoneStringFormat();
    231 
    232     /* Gets zone string format from cache if available, create it if not cached. */
    233     static SafeZoneStringFormatPtr* getZoneStringFormat(const Locale& locale, UErrorCode &status);
    234 
    235     /*
    236      * Create a snapshot of old zone strings array for the given date
    237      */
    238     UnicodeString** createZoneStringsArray(UDate date, int32_t &rowCount, int32_t &colCount, UErrorCode &status) const;
    239 
    240     /* TODO:  There is no implementation for this function.  Delete declaration? */
    241     const UnicodeString** getZoneStrings(int32_t &rowCount, int32_t &columnCount) const;
    242 
    243     UnicodeString& getSpecificLongString(const Calendar &cal,
    244         UnicodeString &result, UErrorCode &status) const;
    245 
    246     UnicodeString& getSpecificShortString(const Calendar &cal,
    247         UBool commonlyUsedOnly, UnicodeString &result, UErrorCode &status) const;
    248 
    249     UnicodeString& getGenericLongString(const Calendar &cal,
    250         UnicodeString &result, UErrorCode &status) const;
    251 
    252     UnicodeString& getGenericShortString(const Calendar &cal,
    253         UBool commonlyUsedOnly, UnicodeString &result, UErrorCode &status) const;
    254 
    255     UnicodeString& getGenericLocationString(const Calendar &cal,
    256         UnicodeString &result, UErrorCode &status) const;
    257 
    258     const ZoneStringInfo* findSpecificLong(const UnicodeString &text, int32_t start,
    259         int32_t &matchLength, UErrorCode &status) const;
    260     const ZoneStringInfo* findSpecificShort(const UnicodeString &text, int32_t start,
    261         int32_t &matchLength, UErrorCode &status) const;
    262     const ZoneStringInfo* findGenericLong(const UnicodeString &text, int32_t start,
    263         int32_t &matchLength, UErrorCode &status) const;
    264     const ZoneStringInfo* findGenericShort(const UnicodeString &text, int32_t start,
    265         int32_t &matchLength, UErrorCode &status) const;
    266     const ZoneStringInfo* findGenericLocation(const UnicodeString &text, int32_t start,
    267         int32_t &matchLength, UErrorCode &status) const;
    268 
    269     // Following APIs are not used by SimpleDateFormat, but public for testing purpose
    270     inline UnicodeString& getLongStandard(const UnicodeString &tzid, UDate date,
    271         UnicodeString &result) const;
    272     inline UnicodeString& getLongDaylight(const UnicodeString &tzid, UDate date,
    273         UnicodeString &result) const;
    274     inline UnicodeString& getLongGenericNonLocation(const UnicodeString &tzid, UDate date,
    275         UnicodeString &result) const;
    276     inline UnicodeString& getLongGenericPartialLocation(const UnicodeString &tzid, UDate date,
    277         UnicodeString &result) const;
    278     inline UnicodeString& getShortStandard(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    279         UnicodeString &result) const;
    280     inline UnicodeString& getShortDaylight(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    281         UnicodeString &result) const;
    282     inline UnicodeString& getShortGenericNonLocation(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    283         UnicodeString &result) const;
    284     inline UnicodeString& getShortGenericPartialLocation(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    285         UnicodeString &result) const;
    286     inline UnicodeString& getGenericLocation(const UnicodeString &tzid, UnicodeString &result) const;
    287 
    288 private:
    289     Locale           fLocale;
    290     UHashtable      *fTzidToStrings;
    291     UHashtable      *fMzidToStrings;
    292 
    293     TextTrieMap      fZoneStringsTrie;
    294     ZSFStringPool    fStringPool;
    295 
    296     UResourceBundle *fZoneStringsArray;
    297     UResourceBundle *fMetazoneItem;
    298     UResourceBundle *fZoneItem;
    299 
    300     /*
    301      * Private method to get a zone string except generic partial location types.
    302      */
    303     UnicodeString& getString(const UnicodeString &tzid, TimeZoneTranslationTypeIndex typeIdx, UDate date,
    304         UBool commonlyUsedOnly, UnicodeString& result) const;
    305 
    306     /*
    307      * Private method to get a generic string, with fallback logic involved,
    308      * that is,
    309      *
    310      * 1. If a generic non-location string is avaiable for the zone, return it.
    311      * 2. If a generic non-location string is associated with a metazone and
    312      *    the zone never use daylight time around the given date, use the standard
    313      *    string (if available).
    314      *
    315      *    Note: In CLDR1.5.1, the same localization is used for generic and standard.
    316      *    In this case, we do not use the standard string and do the rest.
    317      *
    318      * 3. If a generic non-location string is associated with a metazone and
    319      *    the offset at the given time is different from the preferred zone for the
    320      *    current locale, then return the generic partial location string (if avaiable)
    321      * 4. If a generic non-location string is not available, use generic location
    322      *    string.
    323      */
    324     UnicodeString& getGenericString(const Calendar &cal, UBool isShort, UBool commonlyUsedOnly,
    325         UnicodeString &result, UErrorCode &status) const;
    326 
    327     /*
    328      * Private method to get a generic partial location string
    329      */
    330     UnicodeString& getGenericPartialLocationString(const UnicodeString &tzid, UBool isShort,
    331         UDate date, UBool commonlyUsedOnly, UnicodeString &result) const;
    332 
    333     /*
    334      * Find a prefix matching time zone for the given zone string types.
    335      * @param text The text contains a time zone string
    336      * @param start The start index within the text
    337      * @param types The bit mask representing a set of requested types
    338      * @param matchLength Receives the match length
    339      * @param status
    340      * @return If any zone string matched for the requested types, returns a
    341      * ZoneStringInfo for the longest match.  If no matches are found for
    342      * the requested types, returns a ZoneStringInfo for the longest match
    343      * for any other types.  If nothing matches at all, returns null.
    344      */
    345     const ZoneStringInfo* find(const UnicodeString &text, int32_t start, int32_t types,
    346         int32_t &matchLength, UErrorCode &status) const;
    347 
    348     UnicodeString& getRegion(UnicodeString &region) const;
    349 
    350     static MessageFormat* getFallbackFormat(const Locale &locale, UErrorCode &status);
    351     static MessageFormat* getRegionFormat(const Locale &locale, UErrorCode &status);
    352     const UChar* getZoneStringFromBundle(const UResourceBundle *zoneitem, const char *key);
    353     static UBool isCommonlyUsed(const UResourceBundle *zoneitem);
    354     static UnicodeString& getLocalizedCountry(const UnicodeString &countryCode, const Locale &locale,
    355         UnicodeString &displayCountry);
    356 };
    357 
    358 inline UnicodeString&
    359 ZoneStringFormat::getLongStandard(const UnicodeString &tzid, UDate date,
    360                                   UnicodeString &result) const {
    361     return getString(tzid, ZSIDX_LONG_STANDARD, date, FALSE /* not used */, result);
    362 }
    363 
    364 inline UnicodeString&
    365 ZoneStringFormat::getLongDaylight(const UnicodeString &tzid, UDate date,
    366                                   UnicodeString &result) const {
    367     return getString(tzid, ZSIDX_LONG_DAYLIGHT, date, FALSE /* not used */, result);
    368 }
    369 
    370 inline UnicodeString&
    371 ZoneStringFormat::getLongGenericNonLocation(const UnicodeString &tzid, UDate date,
    372                                             UnicodeString &result) const {
    373     return getString(tzid, ZSIDX_LONG_GENERIC, date, FALSE /* not used */, result);
    374 }
    375 
    376 inline UnicodeString&
    377 ZoneStringFormat::getLongGenericPartialLocation(const UnicodeString &tzid, UDate date,
    378                                                 UnicodeString &result) const {
    379     return getGenericPartialLocationString(tzid, FALSE, date, FALSE /* not used */, result);
    380 }
    381 
    382 inline UnicodeString&
    383 ZoneStringFormat::getShortStandard(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    384                                    UnicodeString &result) const {
    385     return getString(tzid, ZSIDX_SHORT_STANDARD, date, commonlyUsedOnly, result);
    386 }
    387 
    388 inline UnicodeString&
    389 ZoneStringFormat::getShortDaylight(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    390                                    UnicodeString &result) const {
    391     return getString(tzid, ZSIDX_SHORT_DAYLIGHT, date, commonlyUsedOnly, result);
    392 }
    393 
    394 inline UnicodeString&
    395 ZoneStringFormat::getShortGenericNonLocation(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    396                                              UnicodeString &result) const {
    397     return getString(tzid, ZSIDX_SHORT_GENERIC, date, commonlyUsedOnly, result);
    398 }
    399 
    400 inline UnicodeString&
    401 ZoneStringFormat::getShortGenericPartialLocation(const UnicodeString &tzid, UDate date, UBool commonlyUsedOnly,
    402                                                  UnicodeString &result) const {
    403     return getGenericPartialLocationString(tzid, TRUE, date, commonlyUsedOnly, result);
    404 }
    405 
    406 inline UnicodeString&
    407 ZoneStringFormat::getGenericLocation(const UnicodeString &tzid, UnicodeString &result) const {
    408     return getString(tzid, ZSIDX_LOCATION, 0 /*not used*/, FALSE /*not used*/, result);
    409 }
    410 
    411 
    412 /*
    413  * ZoneStrings is a container of localized zone strings used by ZoneStringFormat
    414  */
    415 class ZoneStrings : public UMemory {
    416 public:
    417     ZoneStrings(UnicodeString *strings,
    418                 int32_t        stringsCount,
    419                 UBool          commonlyUsed,
    420                 UnicodeString **genericPartialLocationStrings,
    421                 int32_t        genericRowCount,
    422                 int32_t        genericColCount,
    423                 ZSFStringPool &sp,
    424                 UErrorCode    &status);
    425     virtual         ~ZoneStrings();
    426 
    427     UnicodeString&   getString(int32_t typeIdx, UnicodeString &result) const;
    428     inline UBool     isShortFormatCommonlyUsed(void) const;
    429     UnicodeString&   getGenericPartialLocationString(const UnicodeString &mzid, UBool isShort,
    430                                         UBool commonlyUsedOnly, UnicodeString &result) const;
    431 
    432 private:
    433     const UChar   **fStrings;
    434     int32_t         fStringsCount;
    435     UBool           fIsCommonlyUsed;
    436     const UChar * **fGenericPartialLocationStrings;
    437     int32_t         fGenericPartialLocationRowCount;
    438     int32_t         fGenericPartialLocationColCount;
    439 };
    440 
    441 inline UBool
    442 ZoneStrings::isShortFormatCommonlyUsed(void) const {
    443     return fIsCommonlyUsed;
    444 }
    445 
    446 /*
    447  * ZoneStringSearchResultHandler is an implementation of
    448  * TextTrieMapSearchHandler.  This class is used by ZoneStringFormat
    449  * for collecting search results for localized zone strings.
    450  */
    451 class ZoneStringSearchResultHandler : public TextTrieMapSearchResultHandler {
    452 public:
    453     ZoneStringSearchResultHandler(UErrorCode &status);
    454     virtual ~ZoneStringSearchResultHandler();
    455 
    456     virtual UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status);
    457     int32_t countMatches(void);
    458     const ZoneStringInfo* getMatch(int32_t index, int32_t &matchLength);
    459     void clear(void);
    460 
    461 private:
    462     UVector fResults;
    463     int32_t fMatchLen[ZSIDX_COUNT];
    464 };
    465 
    466 
    467 /*
    468  * ZoneStringFormat cache implementation
    469  */
    470 class ZSFCacheEntry : public UMemory {
    471 public:
    472     ~ZSFCacheEntry();
    473 
    474     void delRef(void);
    475     const ZoneStringFormat* getZoneStringFormat(void);
    476 
    477 private:
    478     friend class ZSFCache;
    479 
    480     ZSFCacheEntry(const Locale &locale, ZoneStringFormat *zsf, ZSFCacheEntry *next);
    481 
    482     Locale              fLocale;
    483     ZoneStringFormat    *fZoneStringFormat;
    484     ZSFCacheEntry       *fNext;
    485     int32_t             fRefCount;
    486 };
    487 
    488 class SafeZoneStringFormatPtr : public UMemory {
    489 public:
    490     ~SafeZoneStringFormatPtr();
    491     const ZoneStringFormat* get() const;
    492 
    493 private:
    494     friend class ZSFCache;
    495 
    496     SafeZoneStringFormatPtr(ZSFCacheEntry *cacheEntry);
    497 
    498     ZSFCacheEntry   *fCacheEntry;
    499 };
    500 
    501 class ZSFCache : public UMemory {
    502 public:
    503     ZSFCache(int32_t capacity);
    504     ~ZSFCache();
    505 
    506     SafeZoneStringFormatPtr* get(const Locale &locale, UErrorCode &status);
    507 
    508 private:
    509     int32_t         fCapacity;
    510     ZSFCacheEntry   *fFirst;
    511 };
    512 
    513 U_NAMESPACE_END
    514 
    515 #endif /* #if !UCONFIG_NO_FORMATTING */
    516 
    517 #endif // ZSTRFMT_H
    518