Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1997-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
     10 *
     11 *   Date        Name        Description
     12 *   04/14/97    aliu        Creation.
     13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
     14 *                            getDefaultLocaleID().
     15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
     16 *                            for assumed case.  Non-UNIX platforms must be
     17 *                            special-cased.  Rewrote numeric methods dealing
     18 *                            with NaN and Infinity to be platform independent
     19 *                             over all IEEE 754 platforms.
     20 *   05/13/97    aliu        Restored sign of timezone
     21 *                            (semantics are hours West of GMT)
     22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
     23 *                             nextDouble..
     24 *   07/22/98    stephen     Added remainder, max, min, trunc
     25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
     26 *   08/24/98    stephen     Added longBitsFromDouble
     27 *   09/08/98    stephen     Minor changes for Mac Port
     28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
     29 *                            Fixed EBCDIC tables
     30 *   04/15/99    stephen     Converted to C.
     31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
     32 *   08/04/99    jeffrey R.  Added OS/2 changes
     33 *   11/15/99    helena      Integrated S/390 IEEE support.
     34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
     35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
     36 *   01/03/08    Steven L.   Fake Time Support
     37 ******************************************************************************
     38 */
     39 
     40 /* Define _XOPEN_SOURCE for Solaris and friends. */
     41 /* NetBSD needs it to be >= 4 */
     42 #if !defined(_XOPEN_SOURCE)
     43 #if __STDC_VERSION__ >= 199901L
     44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
     45 #define _XOPEN_SOURCE 600
     46 #else
     47 #define _XOPEN_SOURCE 4
     48 #endif
     49 #endif
     50 
     51 /* Make sure things like readlink and such functions work.
     52 Poorly upgraded Solaris machines can't have this defined.
     53 Cleanly installed Solaris can use this #define.
     54 */
     55 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
     56 #define _XOPEN_SOURCE_EXTENDED 1
     57 #endif
     58 
     59 /* include ICU headers */
     60 #include "unicode/utypes.h"
     61 #include "unicode/putil.h"
     62 #include "unicode/ustring.h"
     63 #include "putilimp.h"
     64 #include "uassert.h"
     65 #include "umutex.h"
     66 #include "cmemory.h"
     67 #include "cstring.h"
     68 #include "locmap.h"
     69 #include "ucln_cmn.h"
     70 
     71 /* Include standard headers. */
     72 #include <stdio.h>
     73 #include <stdlib.h>
     74 #include <string.h>
     75 #include <math.h>
     76 #include <locale.h>
     77 #include <float.h>
     78 #include <time.h>
     79 
     80 /* include system headers */
     81 #ifdef U_WINDOWS
     82 #   define WIN32_LEAN_AND_MEAN
     83 #   define VC_EXTRALEAN
     84 #   define NOUSER
     85 #   define NOSERVICE
     86 #   define NOIME
     87 #   define NOMCX
     88 #   include <windows.h>
     89 #   include "wintz.h"
     90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
     91 /* tzset isn't defined in strict ANSI on Cygwin. */
     92 #   undef __STRICT_ANSI__
     93 #elif defined(OS400)
     94 #   include <float.h>
     95 #   include <qusec.h>       /* error code structure */
     96 #   include <qusrjobi.h>
     97 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
     98 #   include <mih/testptr.h> /* For uprv_maximumPtr */
     99 #elif defined(XP_MAC)
    100 #   include <Files.h>
    101 #   include <IntlResources.h>
    102 #   include <Script.h>
    103 #   include <Folders.h>
    104 #   include <MacTypes.h>
    105 #   include <TextUtils.h>
    106 #   define ICU_NO_USER_DATA_OVERRIDE 1
    107 #elif defined(OS390)
    108 #include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
    109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
    110 #include <limits.h>
    111 #include <unistd.h>
    112 #elif defined(U_QNX)
    113 #include <sys/neutrino.h>
    114 #endif
    115 
    116 #ifndef U_WINDOWS
    117 #include <sys/time.h>
    118 #endif
    119 
    120 /*
    121  * Only include langinfo.h if we have a way to get the codeset. If we later
    122  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
    123  *
    124  */
    125 
    126 #if U_HAVE_NL_LANGINFO_CODESET
    127 #include <langinfo.h>
    128 #endif
    129 
    130 /* Define the extension for data files, again... */
    131 #define DATA_TYPE "dat"
    132 
    133 /* Leave this copyright notice here! */
    134 static const char copyright[] = U_COPYRIGHT_STRING;
    135 
    136 /* floating point implementations ------------------------------------------- */
    137 
    138 /* We return QNAN rather than SNAN*/
    139 #define SIGN 0x80000000U
    140 
    141 /* Make it easy to define certain types of constants */
    142 typedef union {
    143     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
    144     double d64;
    145 } BitPatternConversion;
    146 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
    147 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
    148 
    149 /*---------------------------------------------------------------------------
    150   Platform utilities
    151   Our general strategy is to assume we're on a POSIX platform.  Platforms which
    152   are non-POSIX must declare themselves so.  The default POSIX implementation
    153   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
    154   functions).
    155   ---------------------------------------------------------------------------*/
    156 
    157 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
    158 #   undef U_POSIX_LOCALE
    159 #else
    160 #   define U_POSIX_LOCALE    1
    161 #endif
    162 
    163 /*
    164     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
    165     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
    166 */
    167 #if !IEEE_754
    168 static char*
    169 u_topNBytesOfDouble(double* d, int n)
    170 {
    171 #if U_IS_BIG_ENDIAN
    172     return (char*)d;
    173 #else
    174     return (char*)(d + 1) - n;
    175 #endif
    176 }
    177 #endif
    178 
    179 static char*
    180 u_bottomNBytesOfDouble(double* d, int n)
    181 {
    182 #if U_IS_BIG_ENDIAN
    183     return (char*)(d + 1) - n;
    184 #else
    185     return (char*)d;
    186 #endif
    187 }
    188 
    189 #if defined (U_DEBUG_FAKETIME)
    190 /* Override the clock to test things without having to move the system clock.
    191  * Assumes POSIX gettimeofday() will function
    192  */
    193 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
    194 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
    195 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
    196 static UMTX fakeClockMutex = NULL;
    197 
    198 static UDate getUTCtime_real() {
    199     struct timeval posixTime;
    200     gettimeofday(&posixTime, NULL);
    201     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
    202 }
    203 
    204 static UDate getUTCtime_fake() {
    205     umtx_lock(&fakeClockMutex);
    206     if(!fakeClock_set) {
    207         UDate real = getUTCtime_real();
    208         const char *fake_start = getenv("U_FAKETIME_START");
    209         if(fake_start!=NULL) {
    210             sscanf(fake_start,"%lf",&fakeClock_t0);
    211         }
    212         fakeClock_dt = fakeClock_t0 - real;
    213         fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
    214                        "U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
    215                             fakeClock_t0, fake_start, fakeClock_dt, real);
    216         fakeClock_set = TRUE;
    217     }
    218     umtx_unlock(&fakeClockMutex);
    219 
    220     return getUTCtime_real() + fakeClock_dt;
    221 }
    222 #endif
    223 
    224 #if defined(U_WINDOWS)
    225 typedef union {
    226     int64_t int64;
    227     FILETIME fileTime;
    228 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
    229 
    230 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
    231 #define EPOCH_BIAS  INT64_C(116444736000000000)
    232 #define HECTONANOSECOND_PER_MILLISECOND   10000
    233 
    234 #endif
    235 
    236 /*---------------------------------------------------------------------------
    237   Universal Implementations
    238   These are designed to work on all platforms.  Try these, and if they
    239   don't work on your platform, then special case your platform with new
    240   implementations.
    241 ---------------------------------------------------------------------------*/
    242 
    243 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
    244 U_CAPI UDate U_EXPORT2
    245 uprv_getUTCtime()
    246 {
    247 #if defined(U_DEBUG_FAKETIME)
    248     return getUTCtime_fake(); /* Hook for overriding the clock */
    249 #elif defined(XP_MAC)
    250     time_t t, t1, t2;
    251     struct tm tmrec;
    252 
    253     uprv_memset( &tmrec, 0, sizeof(tmrec) );
    254     tmrec.tm_year = 70;
    255     tmrec.tm_mon = 0;
    256     tmrec.tm_mday = 1;
    257     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
    258 
    259     time(&t);
    260     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
    261     t2 = mktime(&tmrec);    /* seconds of current GMT*/
    262     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
    263 #elif defined(U_WINDOWS)
    264 
    265     FileTimeConversion winTime;
    266     GetSystemTimeAsFileTime(&winTime.fileTime);
    267     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
    268 #else
    269 /*
    270     struct timeval posixTime;
    271     gettimeofday(&posixTime, NULL);
    272     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
    273 */
    274     time_t epochtime;
    275     time(&epochtime);
    276     return (UDate)epochtime * U_MILLIS_PER_SECOND;
    277 #endif
    278 }
    279 
    280 /*-----------------------------------------------------------------------------
    281   IEEE 754
    282   These methods detect and return NaN and infinity values for doubles
    283   conforming to IEEE 754.  Platforms which support this standard include X86,
    284   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
    285   If this doesn't work on your platform, you have non-IEEE floating-point, and
    286   will need to code your own versions.  A naive implementation is to return 0.0
    287   for getNaN and getInfinity, and false for isNaN and isInfinite.
    288   ---------------------------------------------------------------------------*/
    289 
    290 U_CAPI UBool U_EXPORT2
    291 uprv_isNaN(double number)
    292 {
    293 #if IEEE_754
    294     BitPatternConversion convertedNumber;
    295     convertedNumber.d64 = number;
    296     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
    297     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
    298 
    299 #elif defined(OS390)
    300     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    301                         sizeof(uint32_t));
    302     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
    303                         sizeof(uint32_t));
    304 
    305     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
    306       (lowBits == 0x00000000L);
    307 
    308 #else
    309     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
    310     /* you'll need to replace this default implementation with what's correct*/
    311     /* for your platform.*/
    312     return number != number;
    313 #endif
    314 }
    315 
    316 U_CAPI UBool U_EXPORT2
    317 uprv_isInfinite(double number)
    318 {
    319 #if IEEE_754
    320     BitPatternConversion convertedNumber;
    321     convertedNumber.d64 = number;
    322     /* Infinity is exactly 0x7FF0000000000000U. */
    323     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
    324 #elif defined(OS390)
    325     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    326                         sizeof(uint32_t));
    327     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
    328                         sizeof(uint32_t));
    329 
    330     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
    331 
    332 #else
    333     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
    334     /* value, you'll need to replace this default implementation with what's*/
    335     /* correct for your platform.*/
    336     return number == (2.0 * number);
    337 #endif
    338 }
    339 
    340 U_CAPI UBool U_EXPORT2
    341 uprv_isPositiveInfinity(double number)
    342 {
    343 #if IEEE_754 || defined(OS390)
    344     return (UBool)(number > 0 && uprv_isInfinite(number));
    345 #else
    346     return uprv_isInfinite(number);
    347 #endif
    348 }
    349 
    350 U_CAPI UBool U_EXPORT2
    351 uprv_isNegativeInfinity(double number)
    352 {
    353 #if IEEE_754 || defined(OS390)
    354     return (UBool)(number < 0 && uprv_isInfinite(number));
    355 
    356 #else
    357     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    358                         sizeof(uint32_t));
    359     return((highBits & SIGN) && uprv_isInfinite(number));
    360 
    361 #endif
    362 }
    363 
    364 U_CAPI double U_EXPORT2
    365 uprv_getNaN()
    366 {
    367 #if IEEE_754 || defined(OS390)
    368     return gNan.d64;
    369 #else
    370     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
    371     /* you'll need to replace this default implementation with what's correct*/
    372     /* for your platform.*/
    373     return 0.0;
    374 #endif
    375 }
    376 
    377 U_CAPI double U_EXPORT2
    378 uprv_getInfinity()
    379 {
    380 #if IEEE_754 || defined(OS390)
    381     return gInf.d64;
    382 #else
    383     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
    384     /* value, you'll need to replace this default implementation with what's*/
    385     /* correct for your platform.*/
    386     return 0.0;
    387 #endif
    388 }
    389 
    390 U_CAPI double U_EXPORT2
    391 uprv_floor(double x)
    392 {
    393     return floor(x);
    394 }
    395 
    396 U_CAPI double U_EXPORT2
    397 uprv_ceil(double x)
    398 {
    399     return ceil(x);
    400 }
    401 
    402 U_CAPI double U_EXPORT2
    403 uprv_round(double x)
    404 {
    405     return uprv_floor(x + 0.5);
    406 }
    407 
    408 U_CAPI double U_EXPORT2
    409 uprv_fabs(double x)
    410 {
    411     return fabs(x);
    412 }
    413 
    414 U_CAPI double U_EXPORT2
    415 uprv_modf(double x, double* y)
    416 {
    417     return modf(x, y);
    418 }
    419 
    420 U_CAPI double U_EXPORT2
    421 uprv_fmod(double x, double y)
    422 {
    423     return fmod(x, y);
    424 }
    425 
    426 U_CAPI double U_EXPORT2
    427 uprv_pow(double x, double y)
    428 {
    429     /* This is declared as "double pow(double x, double y)" */
    430     return pow(x, y);
    431 }
    432 
    433 U_CAPI double U_EXPORT2
    434 uprv_pow10(int32_t x)
    435 {
    436     return pow(10.0, (double)x);
    437 }
    438 
    439 U_CAPI double U_EXPORT2
    440 uprv_fmax(double x, double y)
    441 {
    442 #if IEEE_754
    443     int32_t lowBits;
    444 
    445     /* first handle NaN*/
    446     if(uprv_isNaN(x) || uprv_isNaN(y))
    447         return uprv_getNaN();
    448 
    449     /* check for -0 and 0*/
    450     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
    451     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
    452         return y;
    453 
    454 #endif
    455 
    456     /* this should work for all flt point w/o NaN and Infpecial cases */
    457     return (x > y ? x : y);
    458 }
    459 
    460 U_CAPI double U_EXPORT2
    461 uprv_fmin(double x, double y)
    462 {
    463 #if IEEE_754
    464     int32_t lowBits;
    465 
    466     /* first handle NaN*/
    467     if(uprv_isNaN(x) || uprv_isNaN(y))
    468         return uprv_getNaN();
    469 
    470     /* check for -0 and 0*/
    471     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
    472     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
    473         return y;
    474 
    475 #endif
    476 
    477     /* this should work for all flt point w/o NaN and Inf special cases */
    478     return (x > y ? y : x);
    479 }
    480 
    481 /**
    482  * Truncates the given double.
    483  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
    484  * This is different than calling floor() or ceil():
    485  * floor(3.3) = 3, floor(-3.3) = -4
    486  * ceil(3.3) = 4, ceil(-3.3) = -3
    487  */
    488 U_CAPI double U_EXPORT2
    489 uprv_trunc(double d)
    490 {
    491 #if IEEE_754
    492     int32_t lowBits;
    493 
    494     /* handle error cases*/
    495     if(uprv_isNaN(d))
    496         return uprv_getNaN();
    497     if(uprv_isInfinite(d))
    498         return uprv_getInfinity();
    499 
    500     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
    501     if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
    502         return ceil(d);
    503     else
    504         return floor(d);
    505 
    506 #else
    507     return d >= 0 ? floor(d) : ceil(d);
    508 
    509 #endif
    510 }
    511 
    512 /**
    513  * Return the largest positive number that can be represented by an integer
    514  * type of arbitrary bit length.
    515  */
    516 U_CAPI double U_EXPORT2
    517 uprv_maxMantissa(void)
    518 {
    519     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
    520 }
    521 
    522 U_CAPI double U_EXPORT2
    523 uprv_log(double d)
    524 {
    525     return log(d);
    526 }
    527 
    528 U_CAPI void * U_EXPORT2
    529 uprv_maximumPtr(void * base)
    530 {
    531 #if defined(OS400)
    532     /*
    533      * With the provided function we should never be out of range of a given segment
    534      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
    535      * id and 3 bytes for the offset.  The key is that the casting takes care of
    536      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
    537      * seen in a program is x001000 and when casted to an int would be 0.
    538      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
    539      *
    540      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
    541      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
    542      * This function determines the activation based on the pointer that is passed in and
    543      * calculates the appropriate maximum available size for
    544      * each pointer type (TERASPACE and non-TERASPACE)
    545      *
    546      * Unlike other operating systems, the pointer model isn't determined at
    547      * compile time on i5/OS.
    548      */
    549     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
    550         /* if it is a TERASPACE pointer the max is 2GB - 4k */
    551         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
    552     }
    553     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
    554     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
    555 
    556 #else
    557     return U_MAX_PTR(base);
    558 #endif
    559 }
    560 
    561 /*---------------------------------------------------------------------------
    562   Platform-specific Implementations
    563   Try these, and if they don't work on your platform, then special case your
    564   platform with new implementations.
    565   ---------------------------------------------------------------------------*/
    566 
    567 /* Generic time zone layer -------------------------------------------------- */
    568 
    569 /* Time zone utilities */
    570 U_CAPI void U_EXPORT2
    571 uprv_tzset()
    572 {
    573 #ifdef U_TZSET
    574     U_TZSET();
    575 #else
    576     /* no initialization*/
    577 #endif
    578 }
    579 
    580 U_CAPI int32_t U_EXPORT2
    581 uprv_timezone()
    582 {
    583 #ifdef U_TIMEZONE
    584     return U_TIMEZONE;
    585 #else
    586     time_t t, t1, t2;
    587     struct tm tmrec;
    588     UBool dst_checked;
    589     int32_t tdiff = 0;
    590 
    591     time(&t);
    592     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
    593     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
    594     t1 = mktime(&tmrec);                 /* local time in seconds*/
    595     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
    596     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
    597     tdiff = t2 - t1;
    598     /* imitate NT behaviour, which returns same timezone offset to GMT for
    599        winter and summer*/
    600     if (dst_checked)
    601         tdiff += 3600;
    602     return tdiff;
    603 #endif
    604 }
    605 
    606 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
    607    some platforms need to have it declared here. */
    608 
    609 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
    610 /* RS6000 and others reject char **tzname.  */
    611 extern U_IMPORT char *U_TZNAME[];
    612 #endif
    613 
    614 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
    615 /* These platforms are likely to use Olson timezone IDs. */
    616 #define CHECK_LOCALTIME_LINK 1
    617 #if defined(U_DARWIN)
    618 #include <tzfile.h>
    619 #define TZZONEINFO      (TZDIR "/")
    620 #else
    621 #define TZDEFAULT       "/etc/localtime"
    622 #define TZZONEINFO      "/usr/share/zoneinfo/"
    623 #endif
    624 #if U_HAVE_DIRENT_H
    625 #define SEARCH_TZFILE
    626 #include <dirent.h>  /* Needed to search through system timezone files */
    627 #endif
    628 static char gTimeZoneBuffer[PATH_MAX];
    629 static char *gTimeZoneBufferPtr = NULL;
    630 #endif
    631 
    632 #ifndef U_WINDOWS
    633 #define isNonDigit(ch) (ch < '0' || '9' < ch)
    634 static UBool isValidOlsonID(const char *id) {
    635     int32_t idx = 0;
    636 
    637     /* Determine if this is something like Iceland (Olson ID)
    638     or AST4ADT (non-Olson ID) */
    639     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
    640         idx++;
    641     }
    642 
    643     /* If we went through the whole string, then it might be okay.
    644     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
    645     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
    646     The rest of the time it could be an Olson ID. George */
    647     return (UBool)(id[idx] == 0
    648         || uprv_strcmp(id, "PST8PDT") == 0
    649         || uprv_strcmp(id, "MST7MDT") == 0
    650         || uprv_strcmp(id, "CST6CDT") == 0
    651         || uprv_strcmp(id, "EST5EDT") == 0);
    652 }
    653 #endif
    654 
    655 #if defined(U_TZNAME) && !defined(U_WINDOWS)
    656 
    657 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
    658 typedef struct OffsetZoneMapping {
    659     int32_t offsetSeconds;
    660     int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
    661     const char *stdID;
    662     const char *dstID;
    663     const char *olsonID;
    664 } OffsetZoneMapping;
    665 
    666 /*
    667 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
    668 and maps it to an Olson ID.
    669 Before adding anything to this list, take a look at
    670 icu/source/tools/tzcode/tz.alias
    671 Sometimes no daylight savings (0) is important to define due to aliases.
    672 This list can be tested with icu/source/test/compat/tzone.pl
    673 More values could be added to daylightType to increase precision.
    674 */
    675 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
    676     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
    677     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
    678     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
    679     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
    680     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
    681     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
    682     {-36000, 2, "EST", "EST", "Australia/Sydney"},
    683     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
    684     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
    685     {-34200, 2, "CST", "CST", "Australia/South"},
    686     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
    687     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
    688     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
    689     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
    690     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
    691     {-28800, 2, "WST", "WST", "Australia/West"},
    692     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
    693     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
    694     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
    695     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
    696     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
    697     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
    698     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
    699     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
    700     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
    701     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
    702     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
    703     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
    704     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
    705     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
    706     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
    707     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
    708     {0, 1, "GMT", "IST", "Europe/Dublin"},
    709     {0, 1, "GMT", "BST", "Europe/London"},
    710     {0, 0, "WET", "WEST", "Africa/Casablanca"},
    711     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
    712     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
    713     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
    714     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
    715     {10800, 2, "UYT", "UYST", "America/Montevideo"},
    716     {10800, 1, "WGT", "WGST", "America/Godthab"},
    717     {10800, 2, "BRT", "BRST", "Brazil/East"},
    718     {12600, 1, "NST", "NDT", "America/St_Johns"},
    719     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
    720     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
    721     {14400, 2, "CLT", "CLST", "Chile/Continental"},
    722     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
    723     {14400, 2, "PYT", "PYST", "America/Asuncion"},
    724     {18000, 1, "CST", "CDT", "America/Havana"},
    725     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
    726     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
    727     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
    728     {21600, 0, "CST", "CDT", "America/Guatemala"},
    729     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
    730     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
    731     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
    732     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
    733     {32400, 1, "AKST", "AKDT", "US/Alaska"},
    734     {36000, 1, "HAST", "HADT", "US/Aleutian"}
    735 };
    736 
    737 /*#define DEBUG_TZNAME*/
    738 
    739 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
    740 {
    741     int32_t idx;
    742 #ifdef DEBUG_TZNAME
    743     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
    744 #endif
    745     for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
    746     {
    747         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
    748             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
    749             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
    750             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
    751         {
    752             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
    753         }
    754     }
    755     return NULL;
    756 }
    757 #endif
    758 
    759 #ifdef SEARCH_TZFILE
    760 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
    761 #define MAX_READ_SIZE 512
    762 
    763 typedef struct DefaultTZInfo {
    764     char* defaultTZBuffer;
    765     int64_t defaultTZFileSize;
    766     FILE* defaultTZFilePtr;
    767     UBool defaultTZstatus;
    768     int32_t defaultTZPosition;
    769 } DefaultTZInfo;
    770 
    771 /*
    772  * This method compares the two files given to see if they are a match.
    773  * It is currently use to compare two TZ files.
    774  */
    775 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
    776     FILE* file;
    777     int64_t sizeFile;
    778     int64_t sizeFileLeft;
    779     int32_t sizeFileRead;
    780     int32_t sizeFileToRead;
    781     char bufferFile[MAX_READ_SIZE];
    782     UBool result = TRUE;
    783 
    784     if (tzInfo->defaultTZFilePtr == NULL) {
    785         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
    786     }
    787     file = fopen(TZFileName, "r");
    788 
    789     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
    790 
    791     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
    792         /* First check that the file size are equal. */
    793         if (tzInfo->defaultTZFileSize == 0) {
    794             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
    795             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
    796         }
    797         fseek(file, 0, SEEK_END);
    798         sizeFile = ftell(file);
    799         sizeFileLeft = sizeFile;
    800 
    801         if (sizeFile != tzInfo->defaultTZFileSize) {
    802             result = FALSE;
    803         } else {
    804             /* Store the data from the files in seperate buffers and
    805              * compare each byte to determine equality.
    806              */
    807             if (tzInfo->defaultTZBuffer == NULL) {
    808                 rewind(tzInfo->defaultTZFilePtr);
    809                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
    810                 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
    811             }
    812             rewind(file);
    813             while(sizeFileLeft > 0) {
    814                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
    815                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
    816 
    817                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
    818                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
    819                     result = FALSE;
    820                     break;
    821                 }
    822                 sizeFileLeft -= sizeFileRead;
    823                 tzInfo->defaultTZPosition += sizeFileRead;
    824             }
    825         }
    826     } else {
    827         result = FALSE;
    828     }
    829 
    830     if (file != NULL) {
    831         fclose(file);
    832     }
    833 
    834     return result;
    835 }
    836 /*
    837  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
    838  */
    839 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
    840 #define SKIP1 "."
    841 #define SKIP2 ".."
    842 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
    843 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
    844     DIR* dirp = opendir(path);
    845     DIR* subDirp = NULL;
    846     struct dirent* dirEntry = NULL;
    847 
    848     char* result = NULL;
    849 
    850     /* Save the current path */
    851     char curpath[MAX_PATH_SIZE];
    852 
    853     if (dirp == NULL)
    854         return result;
    855 
    856     uprv_memset(curpath, 0, MAX_PATH_SIZE);
    857     uprv_strcpy(curpath, path);
    858 
    859     /* Check each entry in the directory. */
    860     while((dirEntry = readdir(dirp)) != NULL) {
    861         if (uprv_strcmp(dirEntry->d_name, SKIP1) != 0 && uprv_strcmp(dirEntry->d_name, SKIP2) != 0) {
    862             /* Create a newpath with the new entry to test each entry in the directory. */
    863             char newpath[MAX_PATH_SIZE];
    864             uprv_strcpy(newpath, curpath);
    865             uprv_strcat(newpath, dirEntry->d_name);
    866 
    867             if ((subDirp = opendir(newpath)) != NULL) {
    868                 /* If this new path is a directory, make a recursive call with the newpath. */
    869                 closedir(subDirp);
    870                 uprv_strcat(newpath, "/");
    871                 result = searchForTZFile(newpath, tzInfo);
    872             } else {
    873                 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
    874                     uprv_strcpy(SEARCH_TZFILE_RESULT, newpath + (sizeof(TZZONEINFO) - 1));
    875                     result = SEARCH_TZFILE_RESULT;
    876                     /* Get out after the first one found. */
    877                     break;
    878                 }
    879             }
    880         }
    881     }
    882     closedir(dirp);
    883     return result;
    884 }
    885 #endif
    886 U_CAPI const char* U_EXPORT2
    887 uprv_tzname(int n)
    888 {
    889     const char *tzid = NULL;
    890 #ifdef U_WINDOWS
    891     tzid = uprv_detectWindowsTimeZone();
    892 
    893     if (tzid != NULL) {
    894         return tzid;
    895     }
    896 #else
    897 
    898 /*#if defined(U_DARWIN)
    899     int ret;
    900 
    901     tzid = getenv("TZFILE");
    902     if (tzid != NULL) {
    903         return tzid;
    904     }
    905 #endif*/
    906 
    907 /* This code can be temporarily disabled to test tzname resolution later on. */
    908 #ifndef DEBUG_TZNAME
    909     tzid = getenv("TZ");
    910     if (tzid != NULL && isValidOlsonID(tzid))
    911     {
    912         /* This might be a good Olson ID. */
    913         if (uprv_strncmp(tzid, "posix/", 6) == 0
    914             || uprv_strncmp(tzid, "right/", 6) == 0)
    915         {
    916             /* Remove the posix/ or right/ prefix. */
    917             tzid += 6;
    918         }
    919         return tzid;
    920     }
    921     /* else U_TZNAME will give a better result. */
    922 #endif
    923 
    924 #if defined(CHECK_LOCALTIME_LINK)
    925     /* Caller must handle threading issues */
    926     if (gTimeZoneBufferPtr == NULL) {
    927         /*
    928         This is a trick to look at the name of the link to get the Olson ID
    929         because the tzfile contents is underspecified.
    930         This isn't guaranteed to work because it may not be a symlink.
    931         */
    932         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
    933         if (0 < ret) {
    934             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
    935             gTimeZoneBuffer[ret] = 0;
    936             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
    937                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
    938             {
    939                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
    940             }
    941         } else {
    942 #if defined(SEARCH_TZFILE)
    943             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
    944             if (tzInfo != NULL) {
    945                 tzInfo->defaultTZBuffer = NULL;
    946                 tzInfo->defaultTZFileSize = 0;
    947                 tzInfo->defaultTZFilePtr = NULL;
    948                 tzInfo->defaultTZstatus = FALSE;
    949                 tzInfo->defaultTZPosition = 0;
    950 
    951                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
    952 
    953                 /* Free previously allocated memory */
    954                 if (tzInfo->defaultTZBuffer != NULL) {
    955                     uprv_free(tzInfo->defaultTZBuffer);
    956                 }
    957                 if (tzInfo->defaultTZFilePtr != NULL) {
    958                     fclose(tzInfo->defaultTZFilePtr);
    959                 }
    960                 uprv_free(tzInfo);
    961             }
    962 
    963             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
    964                 return gTimeZoneBufferPtr;
    965             }
    966 #endif
    967         }
    968     }
    969     else {
    970         return gTimeZoneBufferPtr;
    971     }
    972 #endif
    973 #endif
    974 
    975 #ifdef U_TZNAME
    976 #ifdef U_WINDOWS
    977     return uprv_strdup(U_TZNAME[n]);
    978 #else
    979     /*
    980     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
    981     So we remap the abbreviation to an olson ID.
    982 
    983     Since Windows exposes a little more timezone information,
    984     we normally don't use this code on Windows because
    985     uprv_detectWindowsTimeZone should have already given the correct answer.
    986     */
    987     {
    988         struct tm juneSol, decemberSol;
    989         int daylightType;
    990         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
    991         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
    992 
    993         /* This probing will tell us when daylight savings occurs.  */
    994         localtime_r(&juneSolstice, &juneSol);
    995         localtime_r(&decemberSolstice, &decemberSol);
    996         daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
    997         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
    998         if (tzid != NULL) {
    999             return tzid;
   1000         }
   1001     }
   1002     return U_TZNAME[n];
   1003 #endif
   1004 #else
   1005     return "";
   1006 #endif
   1007 }
   1008 
   1009 /* Get and set the ICU data directory --------------------------------------- */
   1010 
   1011 static char *gDataDirectory = NULL;
   1012 #if U_POSIX_LOCALE
   1013  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
   1014 #endif
   1015 
   1016 static UBool U_CALLCONV putil_cleanup(void)
   1017 {
   1018     if (gDataDirectory && *gDataDirectory) {
   1019         uprv_free(gDataDirectory);
   1020     }
   1021     gDataDirectory = NULL;
   1022 #if U_POSIX_LOCALE
   1023     if (gCorrectedPOSIXLocale) {
   1024         uprv_free(gCorrectedPOSIXLocale);
   1025         gCorrectedPOSIXLocale = NULL;
   1026     }
   1027 #endif
   1028     return TRUE;
   1029 }
   1030 
   1031 /*
   1032  * Set the data directory.
   1033  *    Make a copy of the passed string, and set the global data dir to point to it.
   1034  *    TODO:  see bug #2849, regarding thread safety.
   1035  */
   1036 U_CAPI void U_EXPORT2
   1037 u_setDataDirectory(const char *directory) {
   1038     char *newDataDir;
   1039     int32_t length;
   1040 
   1041     if(directory==NULL || *directory==0) {
   1042         /* A small optimization to prevent the malloc and copy when the
   1043         shared library is used, and this is a way to make sure that NULL
   1044         is never returned.
   1045         */
   1046         newDataDir = (char *)"";
   1047     }
   1048     else {
   1049         length=(int32_t)uprv_strlen(directory);
   1050         newDataDir = (char *)uprv_malloc(length + 2);
   1051         /* Exit out if newDataDir could not be created. */
   1052         if (newDataDir == NULL) {
   1053             return;
   1054         }
   1055         uprv_strcpy(newDataDir, directory);
   1056 
   1057 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1058         {
   1059             char *p;
   1060             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
   1061                 *p = U_FILE_SEP_CHAR;
   1062             }
   1063         }
   1064 #endif
   1065     }
   1066 
   1067     umtx_lock(NULL);
   1068     if (gDataDirectory && *gDataDirectory) {
   1069         uprv_free(gDataDirectory);
   1070     }
   1071     gDataDirectory = newDataDir;
   1072     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1073     umtx_unlock(NULL);
   1074 }
   1075 
   1076 U_CAPI UBool U_EXPORT2
   1077 uprv_pathIsAbsolute(const char *path)
   1078 {
   1079   if(!path || !*path) {
   1080     return FALSE;
   1081   }
   1082 
   1083   if(*path == U_FILE_SEP_CHAR) {
   1084     return TRUE;
   1085   }
   1086 
   1087 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1088   if(*path == U_FILE_ALT_SEP_CHAR) {
   1089     return TRUE;
   1090   }
   1091 #endif
   1092 
   1093 #if defined(U_WINDOWS)
   1094   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
   1095        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
   1096       path[1] == ':' ) {
   1097     return TRUE;
   1098   }
   1099 #endif
   1100 
   1101   return FALSE;
   1102 }
   1103 
   1104 U_CAPI const char * U_EXPORT2
   1105 u_getDataDirectory(void) {
   1106     const char *path = NULL;
   1107 
   1108     /* if we have the directory, then return it immediately */
   1109     UMTX_CHECK(NULL, gDataDirectory, path);
   1110 
   1111     if(path) {
   1112         return path;
   1113     }
   1114 
   1115     /*
   1116     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
   1117     override ICU's data with the ICU_DATA environment variable. This prevents
   1118     problems where multiple custom copies of ICU's specific version of data
   1119     are installed on a system. Either the application must define the data
   1120     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
   1121     ICU, set the data with udata_setCommonData or trust that all of the
   1122     required data is contained in ICU's data library that contains
   1123     the entry point defined by U_ICUDATA_ENTRY_POINT.
   1124 
   1125     There may also be some platforms where environment variables
   1126     are not allowed.
   1127     */
   1128 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
   1129     /* First try to get the environment variable */
   1130     path=getenv("ICU_DATA");
   1131 #   endif
   1132 
   1133     /* ICU_DATA_DIR may be set as a compile option */
   1134 #   ifdef ICU_DATA_DIR
   1135     if(path==NULL || *path==0) {
   1136         path=ICU_DATA_DIR;
   1137     }
   1138 #   endif
   1139 
   1140     if(path==NULL) {
   1141         /* It looks really bad, set it to something. */
   1142         path = "";
   1143     }
   1144 
   1145     u_setDataDirectory(path);
   1146     return gDataDirectory;
   1147 }
   1148 
   1149 
   1150 
   1151 
   1152 
   1153 /* Macintosh-specific locale information ------------------------------------ */
   1154 #ifdef XP_MAC
   1155 
   1156 typedef struct {
   1157     int32_t script;
   1158     int32_t region;
   1159     int32_t lang;
   1160     int32_t date_region;
   1161     const char* posixID;
   1162 } mac_lc_rec;
   1163 
   1164 /* Todo: This will be updated with a newer version from www.unicode.org web
   1165    page when it's available.*/
   1166 #define MAC_LC_MAGIC_NUMBER -5
   1167 #define MAC_LC_INIT_NUMBER -9
   1168 
   1169 static const mac_lc_rec mac_lc_recs[] = {
   1170     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
   1171     /* United States*/
   1172     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
   1173     /* France*/
   1174     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
   1175     /* Great Britain*/
   1176     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
   1177     /* Germany*/
   1178     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
   1179     /* Italy*/
   1180     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
   1181     /* Metherlands*/
   1182     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
   1183     /* French for Belgium or Lxembourg*/
   1184     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
   1185     /* Sweden*/
   1186     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
   1187     /* Denmark*/
   1188     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
   1189     /* Portugal*/
   1190     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
   1191     /* French Canada*/
   1192     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
   1193     /* Israel*/
   1194     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
   1195     /* Japan*/
   1196     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
   1197     /* Australia*/
   1198     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
   1199     /* the Arabic world (?)*/
   1200     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
   1201     /* Finland*/
   1202     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
   1203     /* French for Switzerland*/
   1204     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
   1205     /* German for Switzerland*/
   1206     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
   1207     /* Greece*/
   1208     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
   1209     /* Iceland ===*/
   1210     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
   1211     /* Malta ===*/
   1212     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
   1213     /* Cyprus ===*/
   1214     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
   1215     /* Turkey ===*/
   1216     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
   1217     /* Croatian system for Yugoslavia*/
   1218     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
   1219     /* Hindi system for India*/
   1220     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
   1221     /* Pakistan*/
   1222     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
   1223     /* Lithuania*/
   1224     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
   1225     /* Poland*/
   1226     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
   1227     /* Hungary*/
   1228     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
   1229     /* Estonia*/
   1230     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
   1231     /* Latvia*/
   1232     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
   1233     /* Lapland  [Ask Rich for the data. HS]*/
   1234     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
   1235     /* Faeroe Islands*/
   1236     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
   1237     /* Iran*/
   1238     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
   1239     /* Russia*/
   1240     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
   1241     /* Ireland*/
   1242     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
   1243     /* Korea*/
   1244     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
   1245     /* People's Republic of China*/
   1246     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
   1247     /* Taiwan*/
   1248     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
   1249     /* Thailand*/
   1250 
   1251     /* fallback is en_US*/
   1252     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
   1253     MAC_LC_MAGIC_NUMBER, "en_US"
   1254 };
   1255 
   1256 #endif
   1257 
   1258 #if U_POSIX_LOCALE
   1259 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
   1260  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
   1261  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
   1262  */
   1263 static const char *uprv_getPOSIXIDForCategory(int category)
   1264 {
   1265     const char* posixID = NULL;
   1266     if (category == LC_MESSAGES || category == LC_CTYPE) {
   1267         /*
   1268         * On Solaris two different calls to setlocale can result in
   1269         * different values. Only get this value once.
   1270         *
   1271         * We must check this first because an application can set this.
   1272         *
   1273         * LC_ALL can't be used because it's platform dependent. The LANG
   1274         * environment variable seems to affect LC_CTYPE variable by default.
   1275         * Here is what setlocale(LC_ALL, NULL) can return.
   1276         * HPUX can return 'C C C C C C C'
   1277         * Solaris can return /en_US/C/C/C/C/C on the second try.
   1278         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
   1279         *
   1280         * The default codepage detection also needs to use LC_CTYPE.
   1281         *
   1282         * Do not call setlocale(LC_*, "")! Using an empty string instead
   1283         * of NULL, will modify the libc behavior.
   1284         */
   1285         posixID = setlocale(category, NULL);
   1286         if ((posixID == 0)
   1287             || (uprv_strcmp("C", posixID) == 0)
   1288             || (uprv_strcmp("POSIX", posixID) == 0))
   1289         {
   1290             /* Maybe we got some garbage.  Try something more reasonable */
   1291             posixID = getenv("LC_ALL");
   1292             if (posixID == 0) {
   1293                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
   1294                 if (posixID == 0) {
   1295                     posixID = getenv("LANG");
   1296                 }
   1297             }
   1298         }
   1299     }
   1300     if ((posixID==0)
   1301         || (uprv_strcmp("C", posixID) == 0)
   1302         || (uprv_strcmp("POSIX", posixID) == 0))
   1303     {
   1304         /* Nothing worked.  Give it a nice POSIX default value. */
   1305         posixID = "en_US_POSIX";
   1306     }
   1307     return posixID;
   1308 }
   1309 
   1310 /* Return just the POSIX id for the default locale, whatever happens to be in
   1311  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
   1312  */
   1313 static const char *uprv_getPOSIXIDForDefaultLocale(void)
   1314 {
   1315     static const char* posixID = NULL;
   1316     if (posixID == 0) {
   1317         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
   1318     }
   1319     return posixID;
   1320 }
   1321 
   1322 /* Return just the POSIX id for the default codepage, whatever happens to be in
   1323  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
   1324  */
   1325 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
   1326 {
   1327     static const char* posixID = NULL;
   1328     if (posixID == 0) {
   1329         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
   1330     }
   1331     return posixID;
   1332 }
   1333 #endif
   1334 
   1335 /* NOTE: The caller should handle thread safety */
   1336 U_CAPI const char* U_EXPORT2
   1337 uprv_getDefaultLocaleID()
   1338 {
   1339 #if U_POSIX_LOCALE
   1340 /*
   1341   Note that:  (a '!' means the ID is improper somehow)
   1342      LC_ALL  ---->     default_loc          codepage
   1343 --------------------------------------------------------
   1344      ab.CD             ab                   CD
   1345      ab@CD             ab__CD               -
   1346      ab (at) CD.EF          ab__CD               EF
   1347 
   1348      ab_CD.EF@GH       ab_CD_GH             EF
   1349 
   1350 Some 'improper' ways to do the same as above:
   1351   !  ab_CD (at) GH.EF       ab_CD_GH             EF
   1352   !  ab_CD.EF (at) GH.IJ    ab_CD_GH             EF
   1353   !  ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH             EF
   1354 
   1355      _CD@GH            _CD_GH               -
   1356      _CD.EF@GH         _CD_GH               EF
   1357 
   1358 The variant cannot have dots in it.
   1359 The 'rightmost' variant (@xxx) wins.
   1360 The leftmost codepage (.xxx) wins.
   1361 */
   1362     char *correctedPOSIXLocale = 0;
   1363     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
   1364     const char *p;
   1365     const char *q;
   1366     int32_t len;
   1367 
   1368     /* Format: (no spaces)
   1369     ll [ _CC ] [ . MM ] [ @ VV]
   1370 
   1371       l = lang, C = ctry, M = charmap, V = variant
   1372     */
   1373 
   1374     if (gCorrectedPOSIXLocale != NULL) {
   1375         return gCorrectedPOSIXLocale;
   1376     }
   1377 
   1378     if ((p = uprv_strchr(posixID, '.')) != NULL) {
   1379         /* assume new locale can't be larger than old one? */
   1380         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
   1381         /* Exit on memory allocation error. */
   1382         if (correctedPOSIXLocale == NULL) {
   1383             return NULL;
   1384         }
   1385         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
   1386         correctedPOSIXLocale[p-posixID] = 0;
   1387 
   1388         /* do not copy after the @ */
   1389         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
   1390             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
   1391         }
   1392     }
   1393 
   1394     /* Note that we scan the *uncorrected* ID. */
   1395     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
   1396         if (correctedPOSIXLocale == NULL) {
   1397             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
   1398             /* Exit on memory allocation error. */
   1399             if (correctedPOSIXLocale == NULL) {
   1400                 return NULL;
   1401             }
   1402             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
   1403             correctedPOSIXLocale[p-posixID] = 0;
   1404         }
   1405         p++;
   1406 
   1407         /* Take care of any special cases here.. */
   1408         if (!uprv_strcmp(p, "nynorsk")) {
   1409             p = "NY";
   1410             /* Don't worry about no__NY. In practice, it won't appear. */
   1411         }
   1412 
   1413         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
   1414             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
   1415         }
   1416         else {
   1417             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
   1418         }
   1419 
   1420         if ((q = uprv_strchr(p, '.')) != NULL) {
   1421             /* How big will the resulting string be? */
   1422             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
   1423             uprv_strncat(correctedPOSIXLocale, p, q-p);
   1424             correctedPOSIXLocale[len] = 0;
   1425         }
   1426         else {
   1427             /* Anything following the @ sign */
   1428             uprv_strcat(correctedPOSIXLocale, p);
   1429         }
   1430 
   1431         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
   1432          * How about 'russian' -> 'ru'?
   1433          * Many of the other locales using ISO codes will be handled by the
   1434          * canonicalization functions in uloc_getDefault.
   1435          */
   1436     }
   1437 
   1438     /* Was a correction made? */
   1439     if (correctedPOSIXLocale != NULL) {
   1440         posixID = correctedPOSIXLocale;
   1441     }
   1442     else {
   1443         /* copy it, just in case the original pointer goes away.  See j2395 */
   1444         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
   1445         /* Exit on memory allocation error. */
   1446         if (correctedPOSIXLocale == NULL) {
   1447             return NULL;
   1448         }
   1449         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
   1450     }
   1451 
   1452     if (gCorrectedPOSIXLocale == NULL) {
   1453         gCorrectedPOSIXLocale = correctedPOSIXLocale;
   1454         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1455         correctedPOSIXLocale = NULL;
   1456     }
   1457 
   1458     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
   1459         uprv_free(correctedPOSIXLocale);
   1460     }
   1461 
   1462     return posixID;
   1463 
   1464 #elif defined(U_WINDOWS)
   1465     UErrorCode status = U_ZERO_ERROR;
   1466     LCID id = GetThreadLocale();
   1467     const char* locID = uprv_convertToPosix(id, &status);
   1468 
   1469     if (U_FAILURE(status)) {
   1470         locID = "en_US";
   1471     }
   1472     return locID;
   1473 
   1474 #elif defined(XP_MAC)
   1475     int32_t script = MAC_LC_INIT_NUMBER;
   1476     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
   1477     int32_t region = MAC_LC_INIT_NUMBER;
   1478     /* = GetScriptManagerVariable(smRegionCode);*/
   1479     int32_t lang = MAC_LC_INIT_NUMBER;
   1480     /* = GetScriptManagerVariable(smScriptLang);*/
   1481     int32_t date_region = MAC_LC_INIT_NUMBER;
   1482     const char* posixID = 0;
   1483     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
   1484     int32_t i;
   1485     Intl1Hndl ih;
   1486 
   1487     ih = (Intl1Hndl) GetIntlResource(1);
   1488     if (ih)
   1489         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
   1490 
   1491     for (i = 0; i < count; i++) {
   1492         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
   1493              || (mac_lc_recs[i].script == script))
   1494             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
   1495              || (mac_lc_recs[i].region == region))
   1496             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
   1497              || (mac_lc_recs[i].lang == lang))
   1498             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
   1499              || (mac_lc_recs[i].date_region == date_region))
   1500             )
   1501         {
   1502             posixID = mac_lc_recs[i].posixID;
   1503             break;
   1504         }
   1505     }
   1506 
   1507     return posixID;
   1508 
   1509 #elif defined(OS400)
   1510     /* locales are process scoped and are by definition thread safe */
   1511     static char correctedLocale[64];
   1512     const  char *localeID = getenv("LC_ALL");
   1513            char *p;
   1514 
   1515     if (localeID == NULL)
   1516         localeID = getenv("LANG");
   1517     if (localeID == NULL)
   1518         localeID = setlocale(LC_ALL, NULL);
   1519     /* Make sure we have something... */
   1520     if (localeID == NULL)
   1521         return "en_US_POSIX";
   1522 
   1523     /* Extract the locale name from the path. */
   1524     if((p = uprv_strrchr(localeID, '/')) != NULL)
   1525     {
   1526         /* Increment p to start of locale name. */
   1527         p++;
   1528         localeID = p;
   1529     }
   1530 
   1531     /* Copy to work location. */
   1532     uprv_strcpy(correctedLocale, localeID);
   1533 
   1534     /* Strip off the '.locale' extension. */
   1535     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
   1536         *p = 0;
   1537     }
   1538 
   1539     /* Upper case the locale name. */
   1540     T_CString_toUpperCase(correctedLocale);
   1541 
   1542     /* See if we are using the POSIX locale.  Any of the
   1543     * following are equivalent and use the same QLGPGCMA
   1544     * (POSIX) locale.
   1545     * QLGPGCMA2 means UCS2
   1546     * QLGPGCMA_4 means UTF-32
   1547     * QLGPGCMA_8 means UTF-8
   1548     */
   1549     if ((uprv_strcmp("C", correctedLocale) == 0) ||
   1550         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
   1551         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
   1552     {
   1553         uprv_strcpy(correctedLocale, "en_US_POSIX");
   1554     }
   1555     else
   1556     {
   1557         int16_t LocaleLen;
   1558 
   1559         /* Lower case the lang portion. */
   1560         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
   1561         {
   1562             *p = uprv_tolower(*p);
   1563         }
   1564 
   1565         /* Adjust for Euro.  After '_E' add 'URO'. */
   1566         LocaleLen = uprv_strlen(correctedLocale);
   1567         if (correctedLocale[LocaleLen - 2] == '_' &&
   1568             correctedLocale[LocaleLen - 1] == 'E')
   1569         {
   1570             uprv_strcat(correctedLocale, "URO");
   1571         }
   1572 
   1573         /* If using Lotus-based locale then convert to
   1574          * equivalent non Lotus.
   1575          */
   1576         else if (correctedLocale[LocaleLen - 2] == '_' &&
   1577             correctedLocale[LocaleLen - 1] == 'L')
   1578         {
   1579             correctedLocale[LocaleLen - 2] = 0;
   1580         }
   1581 
   1582         /* There are separate simplified and traditional
   1583          * locales called zh_HK_S and zh_HK_T.
   1584          */
   1585         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
   1586         {
   1587             uprv_strcpy(correctedLocale, "zh_HK");
   1588         }
   1589 
   1590         /* A special zh_CN_GBK locale...
   1591         */
   1592         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
   1593         {
   1594             uprv_strcpy(correctedLocale, "zh_CN");
   1595         }
   1596 
   1597     }
   1598 
   1599     return correctedLocale;
   1600 #endif
   1601 
   1602 }
   1603 
   1604 #if !U_CHARSET_IS_UTF8
   1605 #if U_POSIX_LOCALE
   1606 /*
   1607 Due to various platform differences, one platform may specify a charset,
   1608 when they really mean a different charset. Remap the names so that they are
   1609 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
   1610 here. Before adding anything to this function, please consider adding unique
   1611 names to the ICU alias table in the data directory.
   1612 */
   1613 static const char*
   1614 remapPlatformDependentCodepage(const char *locale, const char *name) {
   1615     if (locale != NULL && *locale == 0) {
   1616         /* Make sure that an empty locale is handled the same way. */
   1617         locale = NULL;
   1618     }
   1619     if (name == NULL) {
   1620         return NULL;
   1621     }
   1622 #if defined(U_AIX)
   1623     if (uprv_strcmp(name, "IBM-943") == 0) {
   1624         /* Use the ASCII compatible ibm-943 */
   1625         name = "Shift-JIS";
   1626     }
   1627     else if (uprv_strcmp(name, "IBM-1252") == 0) {
   1628         /* Use the windows-1252 that contains the Euro */
   1629         name = "IBM-5348";
   1630     }
   1631 #elif defined(U_SOLARIS)
   1632     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
   1633         /* Solaris underspecifies the "EUC" name. */
   1634         if (uprv_strcmp(locale, "zh_CN") == 0) {
   1635             name = "EUC-CN";
   1636         }
   1637         else if (uprv_strcmp(locale, "zh_TW") == 0) {
   1638             name = "EUC-TW";
   1639         }
   1640         else if (uprv_strcmp(locale, "ko_KR") == 0) {
   1641             name = "EUC-KR";
   1642         }
   1643     }
   1644     else if (uprv_strcmp(name, "eucJP") == 0) {
   1645         /*
   1646         ibm-954 is the best match.
   1647         ibm-33722 is the default for eucJP (similar to Windows).
   1648         */
   1649         name = "eucjis";
   1650     }
   1651     else if (uprv_strcmp(name, "646") == 0) {
   1652         /*
   1653          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
   1654          * ISO-8859-1 instead of US-ASCII(646).
   1655          */
   1656         name = "ISO-8859-1";
   1657     }
   1658 #elif defined(U_DARWIN)
   1659     if (locale == NULL && *name == 0) {
   1660         /*
   1661         No locale was specified, and an empty name was passed in.
   1662         This usually indicates that nl_langinfo didn't return valid information.
   1663         Mac OS X uses UTF-8 by default (especially the locale data and console).
   1664         */
   1665         name = "UTF-8";
   1666     }
   1667     else if (uprv_strcmp(name, "CP949") == 0) {
   1668         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
   1669         name = "EUC-KR";
   1670     }
   1671 #elif defined(U_BSD)
   1672     if (uprv_strcmp(name, "CP949") == 0) {
   1673         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
   1674         name = "EUC-KR";
   1675     }
   1676 #elif defined(U_HPUX)
   1677     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
   1678         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
   1679         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
   1680         name = "hkbig5";
   1681     }
   1682     else if (uprv_strcmp(name, "eucJP") == 0) {
   1683         /*
   1684         ibm-1350 is the best match, but unavailable.
   1685         ibm-954 is mostly a superset of ibm-1350.
   1686         ibm-33722 is the default for eucJP (similar to Windows).
   1687         */
   1688         name = "eucjis";
   1689     }
   1690 #elif defined(U_LINUX)
   1691     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
   1692         /* Linux underspecifies the "EUC" name. */
   1693         if (uprv_strcmp(locale, "korean") == 0) {
   1694             name = "EUC-KR";
   1695         }
   1696         else if (uprv_strcmp(locale, "japanese") == 0) {
   1697             /* See comment below about eucJP */
   1698             name = "eucjis";
   1699         }
   1700     }
   1701     else if (uprv_strcmp(name, "eucjp") == 0) {
   1702         /*
   1703         ibm-1350 is the best match, but unavailable.
   1704         ibm-954 is mostly a superset of ibm-1350.
   1705         ibm-33722 is the default for eucJP (similar to Windows).
   1706         */
   1707         name = "eucjis";
   1708     }
   1709     /*
   1710      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
   1711      * it by falling back to 'US-ASCII' when NULL is returned from this
   1712      * function. So, we don't have to worry about it here.
   1713      */
   1714 #endif
   1715     /* return NULL when "" is passed in */
   1716     if (*name == 0) {
   1717         name = NULL;
   1718     }
   1719     return name;
   1720 }
   1721 
   1722 static const char*
   1723 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
   1724 {
   1725     char localeBuf[100];
   1726     const char *name = NULL;
   1727     char *variant = NULL;
   1728 
   1729     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
   1730         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
   1731         uprv_strncpy(localeBuf, localeName, localeCapacity);
   1732         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
   1733         name = uprv_strncpy(buffer, name+1, buffCapacity);
   1734         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
   1735         if ((variant = (uprv_strchr(name, '@'))) != NULL) {
   1736             *variant = 0;
   1737         }
   1738         name = remapPlatformDependentCodepage(localeBuf, name);
   1739     }
   1740     return name;
   1741 }
   1742 #endif
   1743 
   1744 static const char*
   1745 int_getDefaultCodepage()
   1746 {
   1747 #if defined(OS400)
   1748     uint32_t ccsid = 37; /* Default to ibm-37 */
   1749     static char codepage[64];
   1750     Qwc_JOBI0400_t jobinfo;
   1751     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
   1752 
   1753     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
   1754         "*                         ", "                ", &error);
   1755 
   1756     if (error.Bytes_Available == 0) {
   1757         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
   1758             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
   1759         }
   1760         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
   1761             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
   1762         }
   1763         /* else use the default */
   1764     }
   1765     sprintf(codepage,"ibm-%d", ccsid);
   1766     return codepage;
   1767 
   1768 #elif defined(OS390)
   1769     static char codepage[64];
   1770 
   1771     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
   1772     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
   1773     codepage[63] = 0; /* NULL terminate */
   1774 
   1775     return codepage;
   1776 
   1777 #elif defined(XP_MAC)
   1778     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
   1779 
   1780 #elif defined(U_WINDOWS)
   1781     static char codepage[64];
   1782     sprintf(codepage, "windows-%d", GetACP());
   1783     return codepage;
   1784 
   1785 #elif U_POSIX_LOCALE
   1786     static char codesetName[100];
   1787     const char *localeName = NULL;
   1788     const char *name = NULL;
   1789 
   1790     uprv_memset(codesetName, 0, sizeof(codesetName));
   1791 #if U_HAVE_NL_LANGINFO_CODESET
   1792     /* When available, check nl_langinfo first because it usually gives more
   1793        useful names. It depends on LC_CTYPE.
   1794        nl_langinfo may use the same buffer as setlocale. */
   1795     {
   1796         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
   1797         codeset = remapPlatformDependentCodepage(NULL, codeset);
   1798         if (codeset != NULL) {
   1799             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
   1800             codesetName[sizeof(codesetName)-1] = 0;
   1801             return codesetName;
   1802         }
   1803     }
   1804 #endif
   1805 
   1806     /* Use setlocale in a nice way, and then check some environment variables.
   1807        Maybe the application used setlocale already.
   1808     */
   1809     uprv_memset(codesetName, 0, sizeof(codesetName));
   1810     localeName = uprv_getPOSIXIDForDefaultCodepage();
   1811     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
   1812     if (name) {
   1813         /* if we can find the codeset name from setlocale, return that. */
   1814         return name;
   1815     }
   1816 
   1817     if (*codesetName == 0)
   1818     {
   1819         /* Everything failed. Return US ASCII (ISO 646). */
   1820         (void)uprv_strcpy(codesetName, "US-ASCII");
   1821     }
   1822     return codesetName;
   1823 #else
   1824     return "US-ASCII";
   1825 #endif
   1826 }
   1827 
   1828 
   1829 U_CAPI const char*  U_EXPORT2
   1830 uprv_getDefaultCodepage()
   1831 {
   1832     static char const  *name = NULL;
   1833     umtx_lock(NULL);
   1834     if (name == NULL) {
   1835         name = int_getDefaultCodepage();
   1836     }
   1837     umtx_unlock(NULL);
   1838     return name;
   1839 }
   1840 #endif  /* !U_CHARSET_IS_UTF8 */
   1841 
   1842 
   1843 /* end of platform-specific implementation -------------- */
   1844 
   1845 /* version handling --------------------------------------------------------- */
   1846 
   1847 U_CAPI void U_EXPORT2
   1848 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
   1849     char *end;
   1850     uint16_t part=0;
   1851 
   1852     if(versionArray==NULL) {
   1853         return;
   1854     }
   1855 
   1856     if(versionString!=NULL) {
   1857         for(;;) {
   1858             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
   1859             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
   1860                 break;
   1861             }
   1862             versionString=end+1;
   1863         }
   1864     }
   1865 
   1866     while(part<U_MAX_VERSION_LENGTH) {
   1867         versionArray[part++]=0;
   1868     }
   1869 }
   1870 
   1871 U_CAPI void U_EXPORT2
   1872 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
   1873     if(versionArray!=NULL && versionString!=NULL) {
   1874         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
   1875         int32_t len = u_strlen(versionString);
   1876         if(len>U_MAX_VERSION_STRING_LENGTH) {
   1877             len = U_MAX_VERSION_STRING_LENGTH;
   1878         }
   1879         u_UCharsToChars(versionString, versionChars, len);
   1880         versionChars[U_MAX_VERSION_STRING_LENGTH]=0;
   1881         u_versionFromString(versionArray, versionChars);
   1882     }
   1883 }
   1884 
   1885 U_CAPI int32_t U_EXPORT2
   1886 u_compareVersions(UVersionInfo v1, UVersionInfo v2) {
   1887     int n;
   1888     if(v1==NULL||v2==NULL) return 0;
   1889     for(n=0;n<U_MAX_VERSION_LENGTH;n++) {
   1890       if(v1[n]<v2[n]) {
   1891         return -1;
   1892       } else if(v1[n]>v2[n]) {
   1893         return  1;
   1894       }
   1895     }
   1896     return 0; /* no difference */
   1897 }
   1898 
   1899 
   1900 U_CAPI void U_EXPORT2
   1901 u_versionToString(UVersionInfo versionArray, char *versionString) {
   1902     uint16_t count, part;
   1903     uint8_t field;
   1904 
   1905     if(versionString==NULL) {
   1906         return;
   1907     }
   1908 
   1909     if(versionArray==NULL) {
   1910         versionString[0]=0;
   1911         return;
   1912     }
   1913 
   1914     /* count how many fields need to be written */
   1915     for(count=4; count>0 && versionArray[count-1]==0; --count) {
   1916     }
   1917 
   1918     if(count <= 1) {
   1919         count = 2;
   1920     }
   1921 
   1922     /* write the first part */
   1923     /* write the decimal field value */
   1924     field=versionArray[0];
   1925     if(field>=100) {
   1926         *versionString++=(char)('0'+field/100);
   1927         field%=100;
   1928     }
   1929     if(field>=10) {
   1930         *versionString++=(char)('0'+field/10);
   1931         field%=10;
   1932     }
   1933     *versionString++=(char)('0'+field);
   1934 
   1935     /* write the following parts */
   1936     for(part=1; part<count; ++part) {
   1937         /* write a dot first */
   1938         *versionString++=U_VERSION_DELIMITER;
   1939 
   1940         /* write the decimal field value */
   1941         field=versionArray[part];
   1942         if(field>=100) {
   1943             *versionString++=(char)('0'+field/100);
   1944             field%=100;
   1945         }
   1946         if(field>=10) {
   1947             *versionString++=(char)('0'+field/10);
   1948             field%=10;
   1949         }
   1950         *versionString++=(char)('0'+field);
   1951     }
   1952 
   1953     /* NUL-terminate */
   1954     *versionString=0;
   1955 }
   1956 
   1957 U_CAPI void U_EXPORT2
   1958 u_getVersion(UVersionInfo versionArray) {
   1959     u_versionFromString(versionArray, U_ICU_VERSION);
   1960 }
   1961 
   1962 /*
   1963  * Hey, Emacs, please set the following:
   1964  *
   1965  * Local Variables:
   1966  * indent-tabs-mode: nil
   1967  * End:
   1968  *
   1969  */
   1970