Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1997-2013, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 * File resbund.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   02/05/97    aliu        Fixed bug in chopLocale.  Added scanForLocaleInFile
     15 *                           based on code taken from scanForLocale.  Added
     16 *                           constructor which attempts to read resource bundle
     17 *                           from a specific file, without searching other files.
     18 *   02/11/97    aliu        Added UErrorCode return values to constructors. Fixed
     19 *                           infinite loops in scanForFile and scanForLocale.
     20 *                           Modified getRawResourceData to not delete storage in
     21 *                           localeData and resourceData which it doesn't own.
     22 *                           Added Mac compatibility #ifdefs for tellp() and
     23 *                           ios::nocreate.
     24 *   03/04/97    aliu        Modified to use ExpandingDataSink objects instead of
     25 *                           the highly inefficient ostrstream objects.
     26 *   03/13/97    aliu        Rewrote to load in entire resource bundle and store
     27 *                           it as a Hashtable of ResourceBundleData objects.
     28 *                           Added state table to govern parsing of files.
     29 *                           Modified to load locale index out of new file distinct
     30 *                           from default.txt.
     31 *   03/25/97    aliu        Modified to support 2-d arrays, needed for timezone data.
     32 *                           Added support for custom file suffixes.  Again, needed
     33 *                           to support timezone data.  Improved error handling to
     34 *                           detect duplicate tags and subtags.
     35 *   04/07/97    aliu        Fixed bug in getHashtableForLocale().  Fixed handling
     36 *                           of failing UErrorCode values on entry to API methods.
     37 *                           Fixed bugs in getArrayItem() for negative indices.
     38 *   04/29/97    aliu        Update to use new Hashtable deletion protocol.
     39 *   05/06/97    aliu        Flattened kTransitionTable for HP compiler.
     40 *                           Fixed usage of CharString.
     41 * 06/11/99      stephen     Removed parsing of .txt files.
     42 *                           Reworked to use new binary format.
     43 *                           Cleaned up.
     44 * 06/14/99      stephen     Removed methods taking a filename suffix.
     45 * 06/22/99      stephen     Added missing T_FileStream_close in parse()
     46 * 11/09/99      weiv        Added getLocale(), rewritten constructForLocale()
     47 * March 2000    weiv        complete overhaul.
     48 ******************************************************************************
     49 */
     50 
     51 #include "unicode/utypes.h"
     52 #include "unicode/resbund.h"
     53 
     54 #include "mutex.h"
     55 #include "uassert.h"
     56 #include "umutex.h"
     57 
     58 #include "uresimp.h"
     59 
     60 U_NAMESPACE_BEGIN
     61 
     62 /*-----------------------------------------------------------------------------
     63  * Implementation Notes
     64  *
     65  * Resource bundles are read in once, and thereafter cached.
     66  * ResourceBundle statically keeps track of which files have been
     67  * read, so we are guaranteed that each file is read at most once.
     68  * Resource bundles can be loaded from different data directories and
     69  * will be treated as distinct, even if they are for the same locale.
     70  *
     71  * Resource bundles are lightweight objects, which have pointers to
     72  * one or more shared Hashtable objects containing all the data.
     73  * Copying would be cheap, but there is no copy constructor, since
     74  * there wasn't one in the original API.
     75  *
     76  * The ResourceBundle parsing mechanism is implemented as a transition
     77  * network, for easy maintenance and modification.  The network is
     78  * implemented as a matrix (instead of in code) to make this even
     79  * easier.  The matrix contains Transition objects.  Each Transition
     80  * object describes a destination node and an action to take before
     81  * moving to the destination node.  The source node is encoded by the
     82  * index of the object in the array that contains it.  The pieces
     83  * needed to understand the transition network are the enums for node
     84  * IDs and actions, the parse() method, which walks through the
     85  * network and implements the actions, and the network itself.  The
     86  * network guarantees certain conditions, for example, that a new
     87  * resource will not be closed until one has been opened first; or
     88  * that data will not be stored into a TaggedList until a TaggedList
     89  * has been created.  Nonetheless, the code in parse() does some
     90  * consistency checks as it runs the network, and fails with an
     91  * U_INTERNAL_PROGRAM_ERROR if one of these checks fails.  If the input
     92  * data has a bad format, an U_INVALID_FORMAT_ERROR is returned.  If you
     93  * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
     94  * it.
     95  *
     96  * Old functionality of multiple locales in a single file is still
     97  * supported.  For this reason, LOCALE names override FILE names.  If
     98  * data for en_US is located in the en.txt file, once it is loaded,
     99  * the code will not care where it came from (other than remembering
    100  * which directory it came from).  However, if there is an en_US
    101  * resource in en_US.txt, that will take precedence.  There is no
    102  * limit to the number or type of resources that can be stored in a
    103  * file, however, files are only searched in a specific way.  If
    104  * en_US_CA is requested, then first en_US_CA.txt is searched, then
    105  * en_US.txt, then en.txt, then default.txt.  So it only makes sense
    106  * to put certain locales in certain files.  In this example, it would
    107  * be logical to put en_US_CA, en_US, and en into the en.txt file,
    108  * since they would be found there if asked for.  The extreme example
    109  * is to place all locale resources into default.txt, which should
    110  * also work.
    111  *
    112  * Inheritance is implemented.  For example, xx_YY_zz inherits as
    113  * follows: xx_YY_zz, xx_YY, xx, default.  Inheritance is implemented
    114  * as an array of hashtables.  There will be from 1 to 4 hashtables in
    115  * the array.
    116  *
    117  * Fallback files are implemented.  The fallback pattern is Language
    118  * Country Variant (LCV) -> LC -> L.  Fallback is first done for the
    119  * requested locale.  Then it is done for the default locale, as
    120  * returned by Locale::getDefault().  Then the special file
    121  * default.txt is searched for the default locale.  The overall FILE
    122  * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
    123  *
    124  * Note that although file name searching includes the default locale,
    125  * once a ResourceBundle object is constructed, the inheritance path
    126  * no longer includes the default locale.  The path is LCV -> LC -> L
    127  * -> default.
    128  *
    129  * File parsing is lazy.  Nothing is parsed unless it is called for by
    130  * someone.  So when a ResourceBundle for xx_YY_zz is constructed,
    131  * only that locale is parsed (along with anything else in the same
    132  * file).  Later, if the FooBar tag is asked for, and if it isn't
    133  * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
    134  * so forth, until the chain is exhausted or the tag is found.
    135  *
    136  * Thread-safety is implemented around caches, both the cache that
    137  * stores all the resouce data, and the cache that stores flags
    138  * indicating whether or not a file has been visited.  These caches
    139  * delete their storage at static cleanup time, when the process
    140  * quits.
    141  *
    142  * ResourceBundle supports TableCollation as a special case.  This
    143  * involves having special ResourceBundle objects which DO own their
    144  * data, since we don't want large collation rule strings in the
    145  * ResourceBundle cache (these are already cached in the
    146  * TableCollation cache).  TableCollation files (.ctx files) have the
    147  * same format as normal resource data files, with a different
    148  * interpretation, from the standpoint of ResourceBundle.  .ctx files
    149  * are loaded into otherwise ordinary ResourceBundle objects.  They
    150  * don't inherit (that's implemented by TableCollation) and they own
    151  * their data (as mentioned above).  However, they still support
    152  * possible multiple locales in a single .ctx file.  (This is in
    153  * practice a bad idea, since you only want the one locale you're
    154  * looking for, and only one tag will be present
    155  * ("CollationElements"), so you don't need an inheritance chain of
    156  * multiple locales.)  Up to 4 locale resources will be loaded from a
    157  * .ctx file; everything after the first 4 is ignored (parsed and
    158  * deleted).  (Normal .txt files have no limit.)  Instead of being
    159  * loaded into the cache, and then looked up as needed, the locale
    160  * resources are read straight into the ResourceBundle object.
    161  *
    162  * The Index, which used to reside in default.txt, has been moved to a
    163  * new file, index.txt.  This file contains a slightly modified format
    164  * with the addition of the "InstalledLocales" tag; it looks like:
    165  *
    166  * Index {
    167  *   InstalledLocales {
    168  *     ar
    169  *     ..
    170  *     zh_TW
    171  *   }
    172  * }
    173  */
    174 //-----------------------------------------------------------------------------
    175 
    176 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
    177 
    178 ResourceBundle::ResourceBundle(UErrorCode &err)
    179                                 :UObject(), fLocale(NULL)
    180 {
    181     fResource = ures_open(0, Locale::getDefault().getName(), &err);
    182 }
    183 
    184 ResourceBundle::ResourceBundle(const ResourceBundle &other)
    185                               :UObject(other), fLocale(NULL)
    186 {
    187     UErrorCode status = U_ZERO_ERROR;
    188 
    189     if (other.fResource) {
    190         fResource = ures_copyResb(0, other.fResource, &status);
    191     } else {
    192         /* Copying a bad resource bundle */
    193         fResource = NULL;
    194     }
    195 }
    196 
    197 ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
    198                                :UObject(), fLocale(NULL)
    199 {
    200     if (res) {
    201         fResource = ures_copyResb(0, res, &err);
    202     } else {
    203         /* Copying a bad resource bundle */
    204         fResource = NULL;
    205     }
    206 }
    207 
    208 ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
    209                                :UObject(), fLocale(NULL)
    210 {
    211     fResource = ures_open(path, locale.getName(), &err);
    212 }
    213 
    214 
    215 ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
    216 {
    217     if(this == &other) {
    218         return *this;
    219     }
    220     if(fResource != 0) {
    221         ures_close(fResource);
    222         fResource = NULL;
    223     }
    224     if (fLocale != NULL) {
    225         delete fLocale;
    226         fLocale = NULL;
    227     }
    228     UErrorCode status = U_ZERO_ERROR;
    229     if (other.fResource) {
    230         fResource = ures_copyResb(0, other.fResource, &status);
    231     } else {
    232         /* Copying a bad resource bundle */
    233         fResource = NULL;
    234     }
    235     return *this;
    236 }
    237 
    238 ResourceBundle::~ResourceBundle()
    239 {
    240     if(fResource != 0) {
    241         ures_close(fResource);
    242     }
    243     if(fLocale != NULL) {
    244       delete(fLocale);
    245     }
    246 }
    247 
    248 ResourceBundle *
    249 ResourceBundle::clone() const {
    250     return new ResourceBundle(*this);
    251 }
    252 
    253 UnicodeString ResourceBundle::getString(UErrorCode& status) const {
    254     int32_t len = 0;
    255     const UChar *r = ures_getString(fResource, &len, &status);
    256     return UnicodeString(TRUE, r, len);
    257 }
    258 
    259 const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
    260     return ures_getBinary(fResource, &len, &status);
    261 }
    262 
    263 const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
    264     return ures_getIntVector(fResource, &len, &status);
    265 }
    266 
    267 uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
    268     return ures_getUInt(fResource, &status);
    269 }
    270 
    271 int32_t ResourceBundle::getInt(UErrorCode& status) const {
    272     return ures_getInt(fResource, &status);
    273 }
    274 
    275 const char *ResourceBundle::getName(void) const {
    276     return ures_getName(fResource);
    277 }
    278 
    279 const char *ResourceBundle::getKey(void) const {
    280     return ures_getKey(fResource);
    281 }
    282 
    283 UResType ResourceBundle::getType(void) const {
    284     return ures_getType(fResource);
    285 }
    286 
    287 int32_t ResourceBundle::getSize(void) const {
    288     return ures_getSize(fResource);
    289 }
    290 
    291 UBool ResourceBundle::hasNext(void) const {
    292     return ures_hasNext(fResource);
    293 }
    294 
    295 void ResourceBundle::resetIterator(void) {
    296     ures_resetIterator(fResource);
    297 }
    298 
    299 ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
    300     UResourceBundle r;
    301 
    302     ures_initStackObject(&r);
    303     ures_getNextResource(fResource, &r, &status);
    304     ResourceBundle res(&r, status);
    305     if (U_SUCCESS(status)) {
    306         ures_close(&r);
    307     }
    308     return res;
    309 }
    310 
    311 UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
    312     int32_t len = 0;
    313     const UChar* r = ures_getNextString(fResource, &len, 0, &status);
    314     return UnicodeString(TRUE, r, len);
    315 }
    316 
    317 UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
    318     int32_t len = 0;
    319     const UChar* r = ures_getNextString(fResource, &len, key, &status);
    320     return UnicodeString(TRUE, r, len);
    321 }
    322 
    323 ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
    324     UResourceBundle r;
    325 
    326     ures_initStackObject(&r);
    327     ures_getByIndex(fResource, indexR, &r, &status);
    328     ResourceBundle res(&r, status);
    329     if (U_SUCCESS(status)) {
    330         ures_close(&r);
    331     }
    332     return res;
    333 }
    334 
    335 UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
    336     int32_t len = 0;
    337     const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
    338     return UnicodeString(TRUE, r, len);
    339 }
    340 
    341 ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
    342     UResourceBundle r;
    343 
    344     ures_initStackObject(&r);
    345     ures_getByKey(fResource, key, &r, &status);
    346     ResourceBundle res(&r, status);
    347     if (U_SUCCESS(status)) {
    348         ures_close(&r);
    349     }
    350     return res;
    351 }
    352 
    353 ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
    354     UResourceBundle r;
    355     ures_initStackObject(&r);
    356     ures_getByKeyWithFallback(fResource, key, &r, &status);
    357     ResourceBundle res(&r, status);
    358     if(U_SUCCESS(status)){
    359         ures_close(&r);
    360     }
    361     return res;
    362 }
    363 UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
    364     int32_t len = 0;
    365     const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
    366     return UnicodeString(TRUE, r, len);
    367 }
    368 
    369 const char*
    370 ResourceBundle::getVersionNumber()  const
    371 {
    372     return ures_getVersionNumberInternal(fResource);
    373 }
    374 
    375 void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
    376     ures_getVersion(fResource, versionInfo);
    377 }
    378 
    379 static UMutex gLocaleLock = U_MUTEX_INITIALIZER;
    380 const Locale &ResourceBundle::getLocale(void) const {
    381     Mutex lock(&gLocaleLock);
    382     if (fLocale != NULL) {
    383         return *fLocale;
    384     }
    385     UErrorCode status = U_ZERO_ERROR;
    386     const char *localeName = ures_getLocaleInternal(fResource, &status);
    387     ResourceBundle *ncThis = const_cast<ResourceBundle *>(this);
    388     ncThis->fLocale = new Locale(localeName);
    389     return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault();
    390 }
    391 
    392 const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
    393 {
    394   return ures_getLocaleByType(fResource, type, &status);
    395 }
    396 
    397 U_NAMESPACE_END
    398 //eof
    399