Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1997-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
     12 *
     13 *   Date        Name        Description
     14 *   04/14/97    aliu        Creation.
     15 *   04/24/97    aliu        Added getDefaultDataDirectory() and
     16 *                            getDefaultLocaleID().
     17 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
     18 *                            for assumed case.  Non-UNIX platforms must be
     19 *                            special-cased.  Rewrote numeric methods dealing
     20 *                            with NaN and Infinity to be platform independent
     21 *                             over all IEEE 754 platforms.
     22 *   05/13/97    aliu        Restored sign of timezone
     23 *                            (semantics are hours West of GMT)
     24 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
     25 *                             nextDouble..
     26 *   07/22/98    stephen     Added remainder, max, min, trunc
     27 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
     28 *   08/24/98    stephen     Added longBitsFromDouble
     29 *   09/08/98    stephen     Minor changes for Mac Port
     30 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
     31 *                            Fixed EBCDIC tables
     32 *   04/15/99    stephen     Converted to C.
     33 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
     34 *   08/04/99    jeffrey R.  Added OS/2 changes
     35 *   11/15/99    helena      Integrated S/390 IEEE support.
     36 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
     37 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
     38 *   01/03/08    Steven L.   Fake Time Support
     39 ******************************************************************************
     40 */
     41 
     42 // Defines _XOPEN_SOURCE for access to POSIX functions.
     43 // Must be before any other #includes.
     44 #include "uposixdefs.h"
     45 
     46 // First, the platform type. Need this for U_PLATFORM.
     47 #include "unicode/platform.h"
     48 
     49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
     50 /* tzset isn't defined in strict ANSI on MinGW. */
     51 #undef __STRICT_ANSI__
     52 #endif
     53 
     54 /*
     55  * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
     56  */
     57 #include <time.h>
     58 
     59 #if !U_PLATFORM_USES_ONLY_WIN32_API
     60 #include <sys/time.h>
     61 #endif
     62 
     63 /* include the rest of the ICU headers */
     64 #include "unicode/putil.h"
     65 #include "unicode/ustring.h"
     66 #include "putilimp.h"
     67 #include "uassert.h"
     68 #include "umutex.h"
     69 #include "cmemory.h"
     70 #include "cstring.h"
     71 #include "locmap.h"
     72 #include "ucln_cmn.h"
     73 #include "charstr.h"
     74 
     75 /* Include standard headers. */
     76 #include <stdio.h>
     77 #include <stdlib.h>
     78 #include <string.h>
     79 #include <math.h>
     80 #include <locale.h>
     81 #include <float.h>
     82 
     83 #ifndef U_COMMON_IMPLEMENTATION
     84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
     85 #endif
     86 
     87 
     88 /* include system headers */
     89 #if U_PLATFORM_USES_ONLY_WIN32_API
     90     /*
     91      * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
     92      * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
     93      * to use native APIs as much as possible?
     94      */
     95 #ifndef WIN32_LEAN_AND_MEAN
     96 #   define WIN32_LEAN_AND_MEAN
     97 #endif
     98 #   define VC_EXTRALEAN
     99 #   define NOUSER
    100 #   define NOSERVICE
    101 #   define NOIME
    102 #   define NOMCX
    103 #   include <windows.h>
    104 #   include "unicode/uloc.h"
    105 #   include "wintz.h"
    106 #if U_PLATFORM_HAS_WINUWP_API
    107 typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef
    108 #include <Windows.Globalization.h>
    109 #include <windows.system.userprofile.h>
    110 #include <wrl/wrappers/corewrappers.h>
    111 #include <wrl/client.h>
    112 
    113 using namespace ABI::Windows::Foundation;
    114 using namespace Microsoft::WRL;
    115 using namespace Microsoft::WRL::Wrappers;
    116 #endif
    117 #elif U_PLATFORM == U_PF_OS400
    118 #   include <float.h>
    119 #   include <qusec.h>       /* error code structure */
    120 #   include <qusrjobi.h>
    121 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
    122 #   include <mih/testptr.h> /* For uprv_maximumPtr */
    123 #elif U_PLATFORM == U_PF_OS390
    124 #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
    125 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
    126 #   include <limits.h>
    127 #   include <unistd.h>
    128 #   if U_PLATFORM == U_PF_SOLARIS
    129 #       ifndef _XPG4_2
    130 #           define _XPG4_2
    131 #       endif
    132 #   endif
    133 #elif U_PLATFORM == U_PF_QNX
    134 #   include <sys/neutrino.h>
    135 #endif
    136 
    137 /*
    138  * Only include langinfo.h if we have a way to get the codeset. If we later
    139  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
    140  *
    141  */
    142 
    143 #if U_HAVE_NL_LANGINFO_CODESET
    144 #include <langinfo.h>
    145 #endif
    146 
    147 /**
    148  * Simple things (presence of functions, etc) should just go in configure.in and be added to
    149  * icucfg.h via autoheader.
    150  */
    151 #if U_PLATFORM_IMPLEMENTS_POSIX
    152 #   if U_PLATFORM == U_PF_OS400
    153 #    define HAVE_DLFCN_H 0
    154 #    define HAVE_DLOPEN 0
    155 #   else
    156 #   ifndef HAVE_DLFCN_H
    157 #    define HAVE_DLFCN_H 1
    158 #   endif
    159 #   ifndef HAVE_DLOPEN
    160 #    define HAVE_DLOPEN 1
    161 #   endif
    162 #   endif
    163 #   ifndef HAVE_GETTIMEOFDAY
    164 #    define HAVE_GETTIMEOFDAY 1
    165 #   endif
    166 #else
    167 #   define HAVE_DLFCN_H 0
    168 #   define HAVE_DLOPEN 0
    169 #   define HAVE_GETTIMEOFDAY 0
    170 #endif
    171 
    172 U_NAMESPACE_USE
    173 
    174 /* Define the extension for data files, again... */
    175 #define DATA_TYPE "dat"
    176 
    177 /* Leave this copyright notice here! */
    178 static const char copyright[] = U_COPYRIGHT_STRING;
    179 
    180 /* floating point implementations ------------------------------------------- */
    181 
    182 /* We return QNAN rather than SNAN*/
    183 #define SIGN 0x80000000U
    184 
    185 /* Make it easy to define certain types of constants */
    186 typedef union {
    187     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
    188     double d64;
    189 } BitPatternConversion;
    190 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
    191 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
    192 
    193 /*---------------------------------------------------------------------------
    194   Platform utilities
    195   Our general strategy is to assume we're on a POSIX platform.  Platforms which
    196   are non-POSIX must declare themselves so.  The default POSIX implementation
    197   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
    198   functions).
    199   ---------------------------------------------------------------------------*/
    200 
    201 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
    202 #   undef U_POSIX_LOCALE
    203 #else
    204 #   define U_POSIX_LOCALE    1
    205 #endif
    206 
    207 /*
    208     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
    209     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
    210 */
    211 #if !IEEE_754
    212 static char*
    213 u_topNBytesOfDouble(double* d, int n)
    214 {
    215 #if U_IS_BIG_ENDIAN
    216     return (char*)d;
    217 #else
    218     return (char*)(d + 1) - n;
    219 #endif
    220 }
    221 
    222 static char*
    223 u_bottomNBytesOfDouble(double* d, int n)
    224 {
    225 #if U_IS_BIG_ENDIAN
    226     return (char*)(d + 1) - n;
    227 #else
    228     return (char*)d;
    229 #endif
    230 }
    231 #endif   /* !IEEE_754 */
    232 
    233 #if IEEE_754
    234 static UBool
    235 u_signBit(double d) {
    236     uint8_t hiByte;
    237 #if U_IS_BIG_ENDIAN
    238     hiByte = *(uint8_t *)&d;
    239 #else
    240     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
    241 #endif
    242     return (hiByte & 0x80) != 0;
    243 }
    244 #endif
    245 
    246 
    247 
    248 #if defined (U_DEBUG_FAKETIME)
    249 /* Override the clock to test things without having to move the system clock.
    250  * Assumes POSIX gettimeofday() will function
    251  */
    252 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
    253 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
    254 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
    255 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
    256 
    257 static UDate getUTCtime_real() {
    258     struct timeval posixTime;
    259     gettimeofday(&posixTime, NULL);
    260     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
    261 }
    262 
    263 static UDate getUTCtime_fake() {
    264     umtx_lock(&fakeClockMutex);
    265     if(!fakeClock_set) {
    266         UDate real = getUTCtime_real();
    267         const char *fake_start = getenv("U_FAKETIME_START");
    268         if((fake_start!=NULL) && (fake_start[0]!=0)) {
    269             sscanf(fake_start,"%lf",&fakeClock_t0);
    270             fakeClock_dt = fakeClock_t0 - real;
    271             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
    272                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
    273                     fakeClock_t0, fake_start, fakeClock_dt, real);
    274         } else {
    275           fakeClock_dt = 0;
    276             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
    277                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
    278         }
    279         fakeClock_set = TRUE;
    280     }
    281     umtx_unlock(&fakeClockMutex);
    282 
    283     return getUTCtime_real() + fakeClock_dt;
    284 }
    285 #endif
    286 
    287 #if U_PLATFORM_USES_ONLY_WIN32_API
    288 typedef union {
    289     int64_t int64;
    290     FILETIME fileTime;
    291 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
    292 
    293 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
    294 #define EPOCH_BIAS  INT64_C(116444736000000000)
    295 #define HECTONANOSECOND_PER_MILLISECOND   10000
    296 
    297 #endif
    298 
    299 /*---------------------------------------------------------------------------
    300   Universal Implementations
    301   These are designed to work on all platforms.  Try these, and if they
    302   don't work on your platform, then special case your platform with new
    303   implementations.
    304 ---------------------------------------------------------------------------*/
    305 
    306 U_CAPI UDate U_EXPORT2
    307 uprv_getUTCtime()
    308 {
    309 #if defined(U_DEBUG_FAKETIME)
    310     return getUTCtime_fake(); /* Hook for overriding the clock */
    311 #else
    312     return uprv_getRawUTCtime();
    313 #endif
    314 }
    315 
    316 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
    317 U_CAPI UDate U_EXPORT2
    318 uprv_getRawUTCtime()
    319 {
    320 #if U_PLATFORM_USES_ONLY_WIN32_API
    321 
    322     FileTimeConversion winTime;
    323     GetSystemTimeAsFileTime(&winTime.fileTime);
    324     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
    325 #else
    326 
    327 #if HAVE_GETTIMEOFDAY
    328     struct timeval posixTime;
    329     gettimeofday(&posixTime, NULL);
    330     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
    331 #else
    332     time_t epochtime;
    333     time(&epochtime);
    334     return (UDate)epochtime * U_MILLIS_PER_SECOND;
    335 #endif
    336 
    337 #endif
    338 }
    339 
    340 /*-----------------------------------------------------------------------------
    341   IEEE 754
    342   These methods detect and return NaN and infinity values for doubles
    343   conforming to IEEE 754.  Platforms which support this standard include X86,
    344   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
    345   If this doesn't work on your platform, you have non-IEEE floating-point, and
    346   will need to code your own versions.  A naive implementation is to return 0.0
    347   for getNaN and getInfinity, and false for isNaN and isInfinite.
    348   ---------------------------------------------------------------------------*/
    349 
    350 U_CAPI UBool U_EXPORT2
    351 uprv_isNaN(double number)
    352 {
    353 #if IEEE_754
    354     BitPatternConversion convertedNumber;
    355     convertedNumber.d64 = number;
    356     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
    357     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
    358 
    359 #elif U_PLATFORM == U_PF_OS390
    360     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    361                         sizeof(uint32_t));
    362     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
    363                         sizeof(uint32_t));
    364 
    365     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
    366       (lowBits == 0x00000000L);
    367 
    368 #else
    369     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
    370     /* you'll need to replace this default implementation with what's correct*/
    371     /* for your platform.*/
    372     return number != number;
    373 #endif
    374 }
    375 
    376 U_CAPI UBool U_EXPORT2
    377 uprv_isInfinite(double number)
    378 {
    379 #if IEEE_754
    380     BitPatternConversion convertedNumber;
    381     convertedNumber.d64 = number;
    382     /* Infinity is exactly 0x7FF0000000000000U. */
    383     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
    384 #elif U_PLATFORM == U_PF_OS390
    385     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    386                         sizeof(uint32_t));
    387     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
    388                         sizeof(uint32_t));
    389 
    390     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
    391 
    392 #else
    393     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
    394     /* value, you'll need to replace this default implementation with what's*/
    395     /* correct for your platform.*/
    396     return number == (2.0 * number);
    397 #endif
    398 }
    399 
    400 U_CAPI UBool U_EXPORT2
    401 uprv_isPositiveInfinity(double number)
    402 {
    403 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    404     return (UBool)(number > 0 && uprv_isInfinite(number));
    405 #else
    406     return uprv_isInfinite(number);
    407 #endif
    408 }
    409 
    410 U_CAPI UBool U_EXPORT2
    411 uprv_isNegativeInfinity(double number)
    412 {
    413 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    414     return (UBool)(number < 0 && uprv_isInfinite(number));
    415 
    416 #else
    417     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
    418                         sizeof(uint32_t));
    419     return((highBits & SIGN) && uprv_isInfinite(number));
    420 
    421 #endif
    422 }
    423 
    424 U_CAPI double U_EXPORT2
    425 uprv_getNaN()
    426 {
    427 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    428     return gNan.d64;
    429 #else
    430     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
    431     /* you'll need to replace this default implementation with what's correct*/
    432     /* for your platform.*/
    433     return 0.0;
    434 #endif
    435 }
    436 
    437 U_CAPI double U_EXPORT2
    438 uprv_getInfinity()
    439 {
    440 #if IEEE_754 || U_PLATFORM == U_PF_OS390
    441     return gInf.d64;
    442 #else
    443     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
    444     /* value, you'll need to replace this default implementation with what's*/
    445     /* correct for your platform.*/
    446     return 0.0;
    447 #endif
    448 }
    449 
    450 U_CAPI double U_EXPORT2
    451 uprv_floor(double x)
    452 {
    453     return floor(x);
    454 }
    455 
    456 U_CAPI double U_EXPORT2
    457 uprv_ceil(double x)
    458 {
    459     return ceil(x);
    460 }
    461 
    462 U_CAPI double U_EXPORT2
    463 uprv_round(double x)
    464 {
    465     return uprv_floor(x + 0.5);
    466 }
    467 
    468 U_CAPI double U_EXPORT2
    469 uprv_fabs(double x)
    470 {
    471     return fabs(x);
    472 }
    473 
    474 U_CAPI double U_EXPORT2
    475 uprv_modf(double x, double* y)
    476 {
    477     return modf(x, y);
    478 }
    479 
    480 U_CAPI double U_EXPORT2
    481 uprv_fmod(double x, double y)
    482 {
    483     return fmod(x, y);
    484 }
    485 
    486 U_CAPI double U_EXPORT2
    487 uprv_pow(double x, double y)
    488 {
    489     /* This is declared as "double pow(double x, double y)" */
    490     return pow(x, y);
    491 }
    492 
    493 U_CAPI double U_EXPORT2
    494 uprv_pow10(int32_t x)
    495 {
    496     return pow(10.0, (double)x);
    497 }
    498 
    499 U_CAPI double U_EXPORT2
    500 uprv_fmax(double x, double y)
    501 {
    502 #if IEEE_754
    503     /* first handle NaN*/
    504     if(uprv_isNaN(x) || uprv_isNaN(y))
    505         return uprv_getNaN();
    506 
    507     /* check for -0 and 0*/
    508     if(x == 0.0 && y == 0.0 && u_signBit(x))
    509         return y;
    510 
    511 #endif
    512 
    513     /* this should work for all flt point w/o NaN and Inf special cases */
    514     return (x > y ? x : y);
    515 }
    516 
    517 U_CAPI double U_EXPORT2
    518 uprv_fmin(double x, double y)
    519 {
    520 #if IEEE_754
    521     /* first handle NaN*/
    522     if(uprv_isNaN(x) || uprv_isNaN(y))
    523         return uprv_getNaN();
    524 
    525     /* check for -0 and 0*/
    526     if(x == 0.0 && y == 0.0 && u_signBit(y))
    527         return y;
    528 
    529 #endif
    530 
    531     /* this should work for all flt point w/o NaN and Inf special cases */
    532     return (x > y ? y : x);
    533 }
    534 
    535 U_CAPI UBool U_EXPORT2
    536 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
    537     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
    538     // This function could be optimized by calling one of those primitives.
    539     auto a64 = static_cast<int64_t>(a);
    540     auto b64 = static_cast<int64_t>(b);
    541     int64_t res64 = a64 + b64;
    542     *res = static_cast<int32_t>(res64);
    543     return res64 != *res;
    544 }
    545 
    546 U_CAPI UBool U_EXPORT2
    547 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
    548     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
    549     // This function could be optimized by calling one of those primitives.
    550     auto a64 = static_cast<int64_t>(a);
    551     auto b64 = static_cast<int64_t>(b);
    552     int64_t res64 = a64 * b64;
    553     *res = static_cast<int32_t>(res64);
    554     return res64 != *res;
    555 }
    556 
    557 /**
    558  * Truncates the given double.
    559  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
    560  * This is different than calling floor() or ceil():
    561  * floor(3.3) = 3, floor(-3.3) = -4
    562  * ceil(3.3) = 4, ceil(-3.3) = -3
    563  */
    564 U_CAPI double U_EXPORT2
    565 uprv_trunc(double d)
    566 {
    567 #if IEEE_754
    568     /* handle error cases*/
    569     if(uprv_isNaN(d))
    570         return uprv_getNaN();
    571     if(uprv_isInfinite(d))
    572         return uprv_getInfinity();
    573 
    574     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
    575         return ceil(d);
    576     else
    577         return floor(d);
    578 
    579 #else
    580     return d >= 0 ? floor(d) : ceil(d);
    581 
    582 #endif
    583 }
    584 
    585 /**
    586  * Return the largest positive number that can be represented by an integer
    587  * type of arbitrary bit length.
    588  */
    589 U_CAPI double U_EXPORT2
    590 uprv_maxMantissa(void)
    591 {
    592     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
    593 }
    594 
    595 U_CAPI double U_EXPORT2
    596 uprv_log(double d)
    597 {
    598     return log(d);
    599 }
    600 
    601 U_CAPI void * U_EXPORT2
    602 uprv_maximumPtr(void * base)
    603 {
    604 #if U_PLATFORM == U_PF_OS400
    605     /*
    606      * With the provided function we should never be out of range of a given segment
    607      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
    608      * id and 3 bytes for the offset.  The key is that the casting takes care of
    609      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
    610      * seen in a program is x001000 and when casted to an int would be 0.
    611      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
    612      *
    613      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
    614      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
    615      * This function determines the activation based on the pointer that is passed in and
    616      * calculates the appropriate maximum available size for
    617      * each pointer type (TERASPACE and non-TERASPACE)
    618      *
    619      * Unlike other operating systems, the pointer model isn't determined at
    620      * compile time on i5/OS.
    621      */
    622     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
    623         /* if it is a TERASPACE pointer the max is 2GB - 4k */
    624         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
    625     }
    626     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
    627     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
    628 
    629 #else
    630     return U_MAX_PTR(base);
    631 #endif
    632 }
    633 
    634 /*---------------------------------------------------------------------------
    635   Platform-specific Implementations
    636   Try these, and if they don't work on your platform, then special case your
    637   platform with new implementations.
    638   ---------------------------------------------------------------------------*/
    639 
    640 /* Generic time zone layer -------------------------------------------------- */
    641 
    642 /* Time zone utilities */
    643 U_CAPI void U_EXPORT2
    644 uprv_tzset()
    645 {
    646 #if defined(U_TZSET)
    647     U_TZSET();
    648 #else
    649     /* no initialization*/
    650 #endif
    651 }
    652 
    653 U_CAPI int32_t U_EXPORT2
    654 uprv_timezone()
    655 {
    656 #ifdef U_TIMEZONE
    657     return U_TIMEZONE;
    658 #else
    659     time_t t, t1, t2;
    660     struct tm tmrec;
    661     int32_t tdiff = 0;
    662 
    663     time(&t);
    664     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
    665 #if U_PLATFORM != U_PF_IPHONE
    666     UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
    667 #endif
    668     t1 = mktime(&tmrec);                 /* local time in seconds*/
    669     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
    670     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
    671     tdiff = t2 - t1;
    672 
    673 #if U_PLATFORM != U_PF_IPHONE
    674     /* imitate NT behaviour, which returns same timezone offset to GMT for
    675        winter and summer.
    676        This does not work on all platforms. For instance, on glibc on Linux
    677        and on Mac OS 10.5, tdiff calculated above remains the same
    678        regardless of whether DST is in effect or not. iOS is another
    679        platform where this does not work. Linux + glibc and Mac OS 10.5
    680        have U_TIMEZONE defined so that this code is not reached.
    681     */
    682     if (dst_checked)
    683         tdiff += 3600;
    684 #endif
    685     return tdiff;
    686 #endif
    687 }
    688 
    689 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
    690    some platforms need to have it declared here. */
    691 
    692 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
    693 /* RS6000 and others reject char **tzname.  */
    694 extern U_IMPORT char *U_TZNAME[];
    695 #endif
    696 
    697 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
    698 /* These platforms are likely to use Olson timezone IDs. */
    699 /* common targets of the symbolic link at TZDEFAULT are:
    700  * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
    701  * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
    702  * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
    703  * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
    704  * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
    705  * To avoid checking lots of paths, just check that the target path
    706  * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
    707  */
    708 
    709 #define CHECK_LOCALTIME_LINK 1
    710 #if U_PLATFORM_IS_DARWIN_BASED
    711 #include <tzfile.h>
    712 #define TZZONEINFO      (TZDIR "/")
    713 #elif U_PLATFORM == U_PF_SOLARIS
    714 #define TZDEFAULT       "/etc/localtime"
    715 #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
    716 #define TZ_ENV_CHECK    "localtime"
    717 #else
    718 #define TZDEFAULT       "/etc/localtime"
    719 #define TZZONEINFO      "/usr/share/zoneinfo/"
    720 #endif
    721 #define TZZONEINFOTAIL  "/zoneinfo/"
    722 #if U_HAVE_DIRENT_H
    723 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
    724 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
    725    symlinked to /etc/localtime, which makes searchForTZFile return
    726    'localtime' when it's the first match. */
    727 #define TZFILE_SKIP2    "localtime"
    728 #define SEARCH_TZFILE
    729 #include <dirent.h>  /* Needed to search through system timezone files */
    730 #endif
    731 static char gTimeZoneBuffer[PATH_MAX];
    732 static char *gTimeZoneBufferPtr = NULL;
    733 #endif
    734 
    735 #if !U_PLATFORM_USES_ONLY_WIN32_API
    736 #define isNonDigit(ch) (ch < '0' || '9' < ch)
    737 static UBool isValidOlsonID(const char *id) {
    738     int32_t idx = 0;
    739 
    740     /* Determine if this is something like Iceland (Olson ID)
    741     or AST4ADT (non-Olson ID) */
    742     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
    743         idx++;
    744     }
    745 
    746     /* If we went through the whole string, then it might be okay.
    747     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
    748     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
    749     The rest of the time it could be an Olson ID. George */
    750     return (UBool)(id[idx] == 0
    751         || uprv_strcmp(id, "PST8PDT") == 0
    752         || uprv_strcmp(id, "MST7MDT") == 0
    753         || uprv_strcmp(id, "CST6CDT") == 0
    754         || uprv_strcmp(id, "EST5EDT") == 0);
    755 }
    756 
    757 /* On some Unix-like OS, 'posix' subdirectory in
    758    /usr/share/zoneinfo replicates the top-level contents. 'right'
    759    subdirectory has the same set of files, but individual files
    760    are different from those in the top-level directory or 'posix'
    761    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
    762    has files for UTC.
    763    When the first match for /etc/localtime is in either of them
    764    (usually in posix because 'right' has different file contents),
    765    or TZ environment variable points to one of them, createTimeZone
    766    fails because, say, 'posix/America/New_York' is not an Olson
    767    timezone id ('America/New_York' is). So, we have to skip
    768    'posix/' and 'right/' at the beginning. */
    769 static void skipZoneIDPrefix(const char** id) {
    770     if (uprv_strncmp(*id, "posix/", 6) == 0
    771         || uprv_strncmp(*id, "right/", 6) == 0)
    772     {
    773         *id += 6;
    774     }
    775 }
    776 #endif
    777 
    778 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
    779 
    780 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
    781 typedef struct OffsetZoneMapping {
    782     int32_t offsetSeconds;
    783     int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
    784     const char *stdID;
    785     const char *dstID;
    786     const char *olsonID;
    787 } OffsetZoneMapping;
    788 
    789 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
    790 
    791 /*
    792 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
    793 and maps it to an Olson ID.
    794 Before adding anything to this list, take a look at
    795 icu/source/tools/tzcode/tz.alias
    796 Sometimes no daylight savings (0) is important to define due to aliases.
    797 This list can be tested with icu/source/test/compat/tzone.pl
    798 More values could be added to daylightType to increase precision.
    799 */
    800 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
    801     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
    802     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
    803     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
    804     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
    805     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
    806     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
    807     {-36000, 2, "EST", "EST", "Australia/Sydney"},
    808     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
    809     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
    810     {-34200, 2, "CST", "CST", "Australia/South"},
    811     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
    812     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
    813     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
    814     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
    815     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
    816     {-28800, 2, "WST", "WST", "Australia/West"},
    817     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
    818     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
    819     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
    820     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
    821     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
    822     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
    823     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
    824     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
    825     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
    826     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
    827     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
    828     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
    829     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
    830     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
    831     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
    832     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
    833     {0, 1, "GMT", "IST", "Europe/Dublin"},
    834     {0, 1, "GMT", "BST", "Europe/London"},
    835     {0, 0, "WET", "WEST", "Africa/Casablanca"},
    836     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
    837     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
    838     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
    839     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
    840     {10800, 2, "UYT", "UYST", "America/Montevideo"},
    841     {10800, 1, "WGT", "WGST", "America/Godthab"},
    842     {10800, 2, "BRT", "BRST", "Brazil/East"},
    843     {12600, 1, "NST", "NDT", "America/St_Johns"},
    844     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
    845     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
    846     {14400, 2, "CLT", "CLST", "Chile/Continental"},
    847     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
    848     {14400, 2, "PYT", "PYST", "America/Asuncion"},
    849     {18000, 1, "CST", "CDT", "America/Havana"},
    850     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
    851     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
    852     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
    853     {21600, 0, "CST", "CDT", "America/Guatemala"},
    854     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
    855     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
    856     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
    857     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
    858     {32400, 1, "AKST", "AKDT", "US/Alaska"},
    859     {36000, 1, "HAST", "HADT", "US/Aleutian"}
    860 };
    861 
    862 /*#define DEBUG_TZNAME*/
    863 
    864 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
    865 {
    866     int32_t idx;
    867 #ifdef DEBUG_TZNAME
    868     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
    869 #endif
    870     for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
    871     {
    872         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
    873             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
    874             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
    875             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
    876         {
    877             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
    878         }
    879     }
    880     return NULL;
    881 }
    882 #endif
    883 
    884 #ifdef SEARCH_TZFILE
    885 #define MAX_READ_SIZE 512
    886 
    887 typedef struct DefaultTZInfo {
    888     char* defaultTZBuffer;
    889     int64_t defaultTZFileSize;
    890     FILE* defaultTZFilePtr;
    891     UBool defaultTZstatus;
    892     int32_t defaultTZPosition;
    893 } DefaultTZInfo;
    894 
    895 /*
    896  * This method compares the two files given to see if they are a match.
    897  * It is currently use to compare two TZ files.
    898  */
    899 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
    900     FILE* file;
    901     int64_t sizeFile;
    902     int64_t sizeFileLeft;
    903     int32_t sizeFileRead;
    904     int32_t sizeFileToRead;
    905     char bufferFile[MAX_READ_SIZE];
    906     UBool result = TRUE;
    907 
    908     if (tzInfo->defaultTZFilePtr == NULL) {
    909         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
    910     }
    911     file = fopen(TZFileName, "r");
    912 
    913     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
    914 
    915     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
    916         /* First check that the file size are equal. */
    917         if (tzInfo->defaultTZFileSize == 0) {
    918             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
    919             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
    920         }
    921         fseek(file, 0, SEEK_END);
    922         sizeFile = ftell(file);
    923         sizeFileLeft = sizeFile;
    924 
    925         if (sizeFile != tzInfo->defaultTZFileSize) {
    926             result = FALSE;
    927         } else {
    928             /* Store the data from the files in seperate buffers and
    929              * compare each byte to determine equality.
    930              */
    931             if (tzInfo->defaultTZBuffer == NULL) {
    932                 rewind(tzInfo->defaultTZFilePtr);
    933                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
    934                 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
    935             }
    936             rewind(file);
    937             while(sizeFileLeft > 0) {
    938                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
    939                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
    940 
    941                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
    942                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
    943                     result = FALSE;
    944                     break;
    945                 }
    946                 sizeFileLeft -= sizeFileRead;
    947                 tzInfo->defaultTZPosition += sizeFileRead;
    948             }
    949         }
    950     } else {
    951         result = FALSE;
    952     }
    953 
    954     if (file != NULL) {
    955         fclose(file);
    956     }
    957 
    958     return result;
    959 }
    960 
    961 
    962 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
    963 #define SKIP1 "."
    964 #define SKIP2 ".."
    965 static UBool U_CALLCONV putil_cleanup(void);
    966 static CharString *gSearchTZFileResult = NULL;
    967 
    968 /*
    969  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
    970  * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
    971  */
    972 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
    973     DIR* dirp = NULL;
    974     struct dirent* dirEntry = NULL;
    975     char* result = NULL;
    976     UErrorCode status = U_ZERO_ERROR;
    977 
    978     /* Save the current path */
    979     CharString curpath(path, -1, status);
    980     if (U_FAILURE(status)) {
    981         goto cleanupAndReturn;
    982     }
    983 
    984     dirp = opendir(path);
    985     if (dirp == NULL) {
    986         goto cleanupAndReturn;
    987     }
    988 
    989     if (gSearchTZFileResult == NULL) {
    990         gSearchTZFileResult = new CharString;
    991         if (gSearchTZFileResult == NULL) {
    992             goto cleanupAndReturn;
    993         }
    994         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
    995     }
    996 
    997     /* Check each entry in the directory. */
    998     while((dirEntry = readdir(dirp)) != NULL) {
    999         const char* dirName = dirEntry->d_name;
   1000         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
   1001             /* Create a newpath with the new entry to test each entry in the directory. */
   1002             CharString newpath(curpath, status);
   1003             newpath.append(dirName, -1, status);
   1004             if (U_FAILURE(status)) {
   1005                 break;
   1006             }
   1007 
   1008             DIR* subDirp = NULL;
   1009             if ((subDirp = opendir(newpath.data())) != NULL) {
   1010                 /* If this new path is a directory, make a recursive call with the newpath. */
   1011                 closedir(subDirp);
   1012                 newpath.append('/', status);
   1013                 if (U_FAILURE(status)) {
   1014                     break;
   1015                 }
   1016                 result = searchForTZFile(newpath.data(), tzInfo);
   1017                 /*
   1018                  Have to get out here. Otherwise, we'd keep looking
   1019                  and return the first match in the top-level directory
   1020                  if there's a match in the top-level. If not, this function
   1021                  would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
   1022                  It worked without this in most cases because we have a fallback of calling
   1023                  localtime_r to figure out the default timezone.
   1024                 */
   1025                 if (result != NULL)
   1026                     break;
   1027             } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
   1028                 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
   1029                     int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
   1030                     if (amountToSkip > newpath.length()) {
   1031                         amountToSkip = newpath.length();
   1032                     }
   1033                     const char* zoneid = newpath.data() + amountToSkip;
   1034                     skipZoneIDPrefix(&zoneid);
   1035                     gSearchTZFileResult->clear();
   1036                     gSearchTZFileResult->append(zoneid, -1, status);
   1037                     if (U_FAILURE(status)) {
   1038                         break;
   1039                     }
   1040                     result = gSearchTZFileResult->data();
   1041                     /* Get out after the first one found. */
   1042                     break;
   1043                 }
   1044             }
   1045         }
   1046     }
   1047 
   1048   cleanupAndReturn:
   1049     if (dirp) {
   1050         closedir(dirp);
   1051     }
   1052     return result;
   1053 }
   1054 #endif
   1055 
   1056 U_CAPI void U_EXPORT2
   1057 uprv_tzname_clear_cache()
   1058 {
   1059 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
   1060     gTimeZoneBufferPtr = NULL;
   1061 #endif
   1062 }
   1063 
   1064 U_CAPI const char* U_EXPORT2
   1065 uprv_tzname(int n)
   1066 {
   1067     (void)n; // Avoid unreferenced parameter warning.
   1068     const char *tzid = NULL;
   1069 #if U_PLATFORM_USES_ONLY_WIN32_API
   1070     tzid = uprv_detectWindowsTimeZone();
   1071 
   1072     if (tzid != NULL) {
   1073         return tzid;
   1074     }
   1075 
   1076 #ifndef U_TZNAME
   1077     // The return value is free'd in timezone.cpp on Windows because
   1078     // the other code path returns a pointer to a heap location.
   1079     // If we don't have a name already, then tzname wouldn't be any
   1080     // better, so just fall back.
   1081     return uprv_strdup("Etc/UTC");
   1082 #endif // !U_TZNAME
   1083 
   1084 #else
   1085 
   1086 /*#if U_PLATFORM_IS_DARWIN_BASED
   1087     int ret;
   1088 
   1089     tzid = getenv("TZFILE");
   1090     if (tzid != NULL) {
   1091         return tzid;
   1092     }
   1093 #endif*/
   1094 
   1095 /* This code can be temporarily disabled to test tzname resolution later on. */
   1096 #ifndef DEBUG_TZNAME
   1097     tzid = getenv("TZ");
   1098     if (tzid != NULL && isValidOlsonID(tzid)
   1099 #if U_PLATFORM == U_PF_SOLARIS
   1100     /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
   1101         && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
   1102 #endif
   1103     ) {
   1104         /* The colon forces tzset() to treat the remainder as zoneinfo path */
   1105         if (tzid[0] == ':') {
   1106             tzid++;
   1107         }
   1108         /* This might be a good Olson ID. */
   1109         skipZoneIDPrefix(&tzid);
   1110         return tzid;
   1111     }
   1112     /* else U_TZNAME will give a better result. */
   1113 #endif
   1114 
   1115 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
   1116     /* Caller must handle threading issues */
   1117     if (gTimeZoneBufferPtr == NULL) {
   1118         /*
   1119         This is a trick to look at the name of the link to get the Olson ID
   1120         because the tzfile contents is underspecified.
   1121         This isn't guaranteed to work because it may not be a symlink.
   1122         */
   1123         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
   1124         if (0 < ret) {
   1125             int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
   1126             gTimeZoneBuffer[ret] = 0;
   1127             char *  tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
   1128 
   1129             if (tzZoneInfoTailPtr != NULL
   1130                 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
   1131             {
   1132                 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
   1133             }
   1134         } else {
   1135 #if defined(SEARCH_TZFILE)
   1136             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
   1137             if (tzInfo != NULL) {
   1138                 tzInfo->defaultTZBuffer = NULL;
   1139                 tzInfo->defaultTZFileSize = 0;
   1140                 tzInfo->defaultTZFilePtr = NULL;
   1141                 tzInfo->defaultTZstatus = FALSE;
   1142                 tzInfo->defaultTZPosition = 0;
   1143 
   1144                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
   1145 
   1146                 /* Free previously allocated memory */
   1147                 if (tzInfo->defaultTZBuffer != NULL) {
   1148                     uprv_free(tzInfo->defaultTZBuffer);
   1149                 }
   1150                 if (tzInfo->defaultTZFilePtr != NULL) {
   1151                     fclose(tzInfo->defaultTZFilePtr);
   1152                 }
   1153                 uprv_free(tzInfo);
   1154             }
   1155 
   1156             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
   1157                 return gTimeZoneBufferPtr;
   1158             }
   1159 #endif
   1160         }
   1161     }
   1162     else {
   1163         return gTimeZoneBufferPtr;
   1164     }
   1165 #endif
   1166 #endif
   1167 
   1168 #ifdef U_TZNAME
   1169 #if U_PLATFORM_USES_ONLY_WIN32_API
   1170     /* The return value is free'd in timezone.cpp on Windows because
   1171      * the other code path returns a pointer to a heap location. */
   1172     return uprv_strdup(U_TZNAME[n]);
   1173 #else
   1174     /*
   1175     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
   1176     So we remap the abbreviation to an olson ID.
   1177 
   1178     Since Windows exposes a little more timezone information,
   1179     we normally don't use this code on Windows because
   1180     uprv_detectWindowsTimeZone should have already given the correct answer.
   1181     */
   1182     {
   1183         struct tm juneSol, decemberSol;
   1184         int daylightType;
   1185         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
   1186         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
   1187 
   1188         /* This probing will tell us when daylight savings occurs.  */
   1189         localtime_r(&juneSolstice, &juneSol);
   1190         localtime_r(&decemberSolstice, &decemberSol);
   1191         if(decemberSol.tm_isdst > 0) {
   1192           daylightType = U_DAYLIGHT_DECEMBER;
   1193         } else if(juneSol.tm_isdst > 0) {
   1194           daylightType = U_DAYLIGHT_JUNE;
   1195         } else {
   1196           daylightType = U_DAYLIGHT_NONE;
   1197         }
   1198         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
   1199         if (tzid != NULL) {
   1200             return tzid;
   1201         }
   1202     }
   1203     return U_TZNAME[n];
   1204 #endif
   1205 #else
   1206     return "";
   1207 #endif
   1208 }
   1209 
   1210 /* Get and set the ICU data directory --------------------------------------- */
   1211 
   1212 static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
   1213 static char *gDataDirectory = NULL;
   1214 
   1215 UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
   1216 static CharString *gTimeZoneFilesDirectory = NULL;
   1217 
   1218 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
   1219  static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
   1220  static bool gCorrectedPOSIXLocaleHeapAllocated = false;
   1221 #endif
   1222 
   1223 static UBool U_CALLCONV putil_cleanup(void)
   1224 {
   1225     if (gDataDirectory && *gDataDirectory) {
   1226         uprv_free(gDataDirectory);
   1227     }
   1228     gDataDirectory = NULL;
   1229     gDataDirInitOnce.reset();
   1230 
   1231     delete gTimeZoneFilesDirectory;
   1232     gTimeZoneFilesDirectory = NULL;
   1233     gTimeZoneFilesInitOnce.reset();
   1234 
   1235 #ifdef SEARCH_TZFILE
   1236     delete gSearchTZFileResult;
   1237     gSearchTZFileResult = NULL;
   1238 #endif
   1239 
   1240 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
   1241     if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
   1242         uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
   1243         gCorrectedPOSIXLocale = NULL;
   1244         gCorrectedPOSIXLocaleHeapAllocated = false;
   1245     }
   1246 #endif
   1247     return TRUE;
   1248 }
   1249 
   1250 /*
   1251  * Set the data directory.
   1252  *    Make a copy of the passed string, and set the global data dir to point to it.
   1253  */
   1254 U_CAPI void U_EXPORT2
   1255 u_setDataDirectory(const char *directory) {
   1256     char *newDataDir;
   1257     int32_t length;
   1258 
   1259     if(directory==NULL || *directory==0) {
   1260         /* A small optimization to prevent the malloc and copy when the
   1261         shared library is used, and this is a way to make sure that NULL
   1262         is never returned.
   1263         */
   1264         newDataDir = (char *)"";
   1265     }
   1266     else {
   1267         length=(int32_t)uprv_strlen(directory);
   1268         newDataDir = (char *)uprv_malloc(length + 2);
   1269         /* Exit out if newDataDir could not be created. */
   1270         if (newDataDir == NULL) {
   1271             return;
   1272         }
   1273         uprv_strcpy(newDataDir, directory);
   1274 
   1275 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1276         {
   1277             char *p;
   1278             while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
   1279                 *p = U_FILE_SEP_CHAR;
   1280             }
   1281         }
   1282 #endif
   1283     }
   1284 
   1285     if (gDataDirectory && *gDataDirectory) {
   1286         uprv_free(gDataDirectory);
   1287     }
   1288     gDataDirectory = newDataDir;
   1289     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1290 }
   1291 
   1292 U_CAPI UBool U_EXPORT2
   1293 uprv_pathIsAbsolute(const char *path)
   1294 {
   1295   if(!path || !*path) {
   1296     return FALSE;
   1297   }
   1298 
   1299   if(*path == U_FILE_SEP_CHAR) {
   1300     return TRUE;
   1301   }
   1302 
   1303 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1304   if(*path == U_FILE_ALT_SEP_CHAR) {
   1305     return TRUE;
   1306   }
   1307 #endif
   1308 
   1309 #if U_PLATFORM_USES_ONLY_WIN32_API
   1310   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
   1311        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
   1312       path[1] == ':' ) {
   1313     return TRUE;
   1314   }
   1315 #endif
   1316 
   1317   return FALSE;
   1318 }
   1319 
   1320 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
   1321    until some client wrapper makefiles are updated */
   1322 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
   1323 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1324 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
   1325 # endif
   1326 #endif
   1327 
   1328 #if U_PLATFORM_HAS_WINUWP_API != 0
   1329 // Helper function to get the ICU Data Directory under the Windows directory location.
   1330 static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
   1331 {
   1332 #if defined(ICU_DATA_DIR_WINDOWS)
   1333     wchar_t windowsPath[MAX_PATH];
   1334     char windowsPathUtf8[MAX_PATH];
   1335 
   1336     UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
   1337     if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
   1338         // Convert UTF-16 to a UTF-8 string.
   1339         UErrorCode status = U_ZERO_ERROR;
   1340         int32_t windowsPathUtf8Len = 0;
   1341         u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
   1342             &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
   1343 
   1344         if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
   1345             (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
   1346             // Ensure it always has a separator, so we can append the ICU data path.
   1347             if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
   1348                 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
   1349                 windowsPathUtf8[windowsPathUtf8Len] = '\0';
   1350             }
   1351             // Check if the concatenated string will fit.
   1352             if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
   1353                 uprv_strcpy(directoryBuffer, windowsPathUtf8);
   1354                 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
   1355                 return TRUE;
   1356             }
   1357         }
   1358     }
   1359 #endif
   1360 
   1361     return FALSE;
   1362 }
   1363 #endif
   1364 
   1365 static void U_CALLCONV dataDirectoryInitFn() {
   1366     /* If we already have the directory, then return immediately. Will happen if user called
   1367      * u_setDataDirectory().
   1368      */
   1369     if (gDataDirectory) {
   1370         return;
   1371     }
   1372 
   1373     const char *path = NULL;
   1374 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1375     char datadir_path_buffer[PATH_MAX];
   1376 #endif
   1377 
   1378     /*
   1379     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
   1380     override ICU's data with the ICU_DATA environment variable. This prevents
   1381     problems where multiple custom copies of ICU's specific version of data
   1382     are installed on a system. Either the application must define the data
   1383     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
   1384     ICU, set the data with udata_setCommonData or trust that all of the
   1385     required data is contained in ICU's data library that contains
   1386     the entry point defined by U_ICUDATA_ENTRY_POINT.
   1387 
   1388     There may also be some platforms where environment variables
   1389     are not allowed.
   1390     */
   1391 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
   1392     /* First try to get the environment variable */
   1393 #       if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
   1394         path=getenv("ICU_DATA");
   1395 #       endif
   1396 #   endif
   1397 
   1398     /* ICU_DATA_DIR may be set as a compile option.
   1399      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
   1400      * and is used only when data is built in archive mode eliminating the need
   1401      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
   1402      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
   1403      * set their own path.
   1404      */
   1405 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
   1406     if(path==NULL || *path==0) {
   1407 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1408         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
   1409 # endif
   1410 # ifdef ICU_DATA_DIR
   1411         path=ICU_DATA_DIR;
   1412 # else
   1413         path=U_ICU_DATA_DEFAULT_DIR;
   1414 # endif
   1415 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
   1416         if (prefix != NULL) {
   1417             snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
   1418             path=datadir_path_buffer;
   1419         }
   1420 # endif
   1421     }
   1422 #endif
   1423 
   1424 #if U_PLATFORM_HAS_WINUWP_API != 0  && defined(ICU_DATA_DIR_WINDOWS)
   1425     char datadir_path_buffer[MAX_PATH];
   1426     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
   1427         path = datadir_path_buffer;
   1428     }
   1429 #endif
   1430 
   1431     if(path==NULL) {
   1432         /* It looks really bad, set it to something. */
   1433 #if U_PLATFORM_HAS_WIN32_API
   1434         // Windows UWP will require icudtl.dat file in same directory as icuuc.dll
   1435         path = ".\\";
   1436 #else
   1437         path = "";
   1438 #endif
   1439     }
   1440 
   1441     u_setDataDirectory(path);
   1442     return;
   1443 }
   1444 
   1445 U_CAPI const char * U_EXPORT2
   1446 u_getDataDirectory(void) {
   1447     umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
   1448     return gDataDirectory;
   1449 }
   1450 
   1451 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
   1452     if (U_FAILURE(status)) {
   1453         return;
   1454     }
   1455     gTimeZoneFilesDirectory->clear();
   1456     gTimeZoneFilesDirectory->append(path, status);
   1457 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   1458     char *p = gTimeZoneFilesDirectory->data();
   1459     while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
   1460         *p = U_FILE_SEP_CHAR;
   1461     }
   1462 #endif
   1463 }
   1464 
   1465 #define TO_STRING(x) TO_STRING_2(x)
   1466 #define TO_STRING_2(x) #x
   1467 
   1468 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
   1469     U_ASSERT(gTimeZoneFilesDirectory == NULL);
   1470     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1471     gTimeZoneFilesDirectory = new CharString();
   1472     if (gTimeZoneFilesDirectory == NULL) {
   1473         status = U_MEMORY_ALLOCATION_ERROR;
   1474         return;
   1475     }
   1476 
   1477     const char *dir = "";
   1478 
   1479 #if U_PLATFORM_HAS_WINUWP_API != 0
   1480     // The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory.
   1481     char datadir_path_buffer[MAX_PATH];
   1482     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
   1483         dir = datadir_path_buffer;
   1484     }
   1485 #else
   1486     dir = getenv("ICU_TIMEZONE_FILES_DIR");
   1487 #endif // U_PLATFORM_HAS_WINUWP_API
   1488 
   1489 #if defined(U_TIMEZONE_FILES_DIR)
   1490     if (dir == NULL) {
   1491         // Build time configuration setting.
   1492         dir = TO_STRING(U_TIMEZONE_FILES_DIR);
   1493     }
   1494 #endif
   1495 
   1496     if (dir == NULL) {
   1497         dir = "";
   1498     }
   1499 
   1500     setTimeZoneFilesDir(dir, status);
   1501 }
   1502 
   1503 
   1504 U_CAPI const char * U_EXPORT2
   1505 u_getTimeZoneFilesDirectory(UErrorCode *status) {
   1506     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
   1507     return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
   1508 }
   1509 
   1510 U_CAPI void U_EXPORT2
   1511 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
   1512     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
   1513     setTimeZoneFilesDir(path, *status);
   1514 
   1515     // Note: this function does some extra churn, first setting based on the
   1516     //       environment, then immediately replacing with the value passed in.
   1517     //       The logic is simpler that way, and performance shouldn't be an issue.
   1518 }
   1519 
   1520 
   1521 #if U_POSIX_LOCALE
   1522 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
   1523  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
   1524  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
   1525  */
   1526 static const char *uprv_getPOSIXIDForCategory(int category)
   1527 {
   1528     const char* posixID = NULL;
   1529     if (category == LC_MESSAGES || category == LC_CTYPE) {
   1530         /*
   1531         * On Solaris two different calls to setlocale can result in
   1532         * different values. Only get this value once.
   1533         *
   1534         * We must check this first because an application can set this.
   1535         *
   1536         * LC_ALL can't be used because it's platform dependent. The LANG
   1537         * environment variable seems to affect LC_CTYPE variable by default.
   1538         * Here is what setlocale(LC_ALL, NULL) can return.
   1539         * HPUX can return 'C C C C C C C'
   1540         * Solaris can return /en_US/C/C/C/C/C on the second try.
   1541         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
   1542         *
   1543         * The default codepage detection also needs to use LC_CTYPE.
   1544         *
   1545         * Do not call setlocale(LC_*, "")! Using an empty string instead
   1546         * of NULL, will modify the libc behavior.
   1547         */
   1548         posixID = setlocale(category, NULL);
   1549         if ((posixID == 0)
   1550             || (uprv_strcmp("C", posixID) == 0)
   1551             || (uprv_strcmp("POSIX", posixID) == 0))
   1552         {
   1553             /* Maybe we got some garbage.  Try something more reasonable */
   1554             posixID = getenv("LC_ALL");
   1555             /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
   1556              * This is needed to properly handle empty env. variables
   1557              */
   1558 #if U_PLATFORM == U_PF_SOLARIS
   1559             if ((posixID == 0) || (posixID[0] == '\0')) {
   1560                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
   1561                 if ((posixID == 0) || (posixID[0] == '\0')) {
   1562 #else
   1563             if (posixID == 0) {
   1564                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
   1565                 if (posixID == 0) {
   1566 #endif
   1567                     posixID = getenv("LANG");
   1568                 }
   1569             }
   1570         }
   1571     }
   1572     if ((posixID==0)
   1573         || (uprv_strcmp("C", posixID) == 0)
   1574         || (uprv_strcmp("POSIX", posixID) == 0))
   1575     {
   1576         /* Nothing worked.  Give it a nice POSIX default value. */
   1577         posixID = "en_US_POSIX";
   1578     }
   1579     return posixID;
   1580 }
   1581 
   1582 /* Return just the POSIX id for the default locale, whatever happens to be in
   1583  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
   1584  */
   1585 static const char *uprv_getPOSIXIDForDefaultLocale(void)
   1586 {
   1587     static const char* posixID = NULL;
   1588     if (posixID == 0) {
   1589         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
   1590     }
   1591     return posixID;
   1592 }
   1593 
   1594 #if !U_CHARSET_IS_UTF8
   1595 /* Return just the POSIX id for the default codepage, whatever happens to be in
   1596  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
   1597  */
   1598 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
   1599 {
   1600     static const char* posixID = NULL;
   1601     if (posixID == 0) {
   1602         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
   1603     }
   1604     return posixID;
   1605 }
   1606 #endif
   1607 #endif
   1608 
   1609 /* NOTE: The caller should handle thread safety */
   1610 U_CAPI const char* U_EXPORT2
   1611 uprv_getDefaultLocaleID()
   1612 {
   1613 #if U_POSIX_LOCALE
   1614 /*
   1615   Note that:  (a '!' means the ID is improper somehow)
   1616      LC_ALL  ---->     default_loc          codepage
   1617 --------------------------------------------------------
   1618      ab.CD             ab                   CD
   1619      ab@CD             ab__CD               -
   1620      ab (at) CD.EF          ab__CD               EF
   1621 
   1622      ab_CD.EF@GH       ab_CD_GH             EF
   1623 
   1624 Some 'improper' ways to do the same as above:
   1625   !  ab_CD (at) GH.EF       ab_CD_GH             EF
   1626   !  ab_CD.EF (at) GH.IJ    ab_CD_GH             EF
   1627   !  ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH             EF
   1628 
   1629      _CD@GH            _CD_GH               -
   1630      _CD.EF@GH         _CD_GH               EF
   1631 
   1632 The variant cannot have dots in it.
   1633 The 'rightmost' variant (@xxx) wins.
   1634 The leftmost codepage (.xxx) wins.
   1635 */
   1636     char *correctedPOSIXLocale = 0;
   1637     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
   1638     const char *p;
   1639     const char *q;
   1640     int32_t len;
   1641 
   1642     /* Format: (no spaces)
   1643     ll [ _CC ] [ . MM ] [ @ VV]
   1644 
   1645       l = lang, C = ctry, M = charmap, V = variant
   1646     */
   1647 
   1648     if (gCorrectedPOSIXLocale != NULL) {
   1649         return gCorrectedPOSIXLocale;
   1650     }
   1651 
   1652     if ((p = uprv_strchr(posixID, '.')) != NULL) {
   1653         /* assume new locale can't be larger than old one? */
   1654         correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
   1655         /* Exit on memory allocation error. */
   1656         if (correctedPOSIXLocale == NULL) {
   1657             return NULL;
   1658         }
   1659         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
   1660         correctedPOSIXLocale[p-posixID] = 0;
   1661 
   1662         /* do not copy after the @ */
   1663         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
   1664             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
   1665         }
   1666     }
   1667 
   1668     /* Note that we scan the *uncorrected* ID. */
   1669     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
   1670         if (correctedPOSIXLocale == NULL) {
   1671             /* new locale can be 1 char longer than old one if @ -> __ */
   1672             correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+2));
   1673             /* Exit on memory allocation error. */
   1674             if (correctedPOSIXLocale == NULL) {
   1675                 return NULL;
   1676             }
   1677             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
   1678             correctedPOSIXLocale[p-posixID] = 0;
   1679         }
   1680         p++;
   1681 
   1682         /* Take care of any special cases here.. */
   1683         if (!uprv_strcmp(p, "nynorsk")) {
   1684             p = "NY";
   1685             /* Don't worry about no__NY. In practice, it won't appear. */
   1686         }
   1687 
   1688         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
   1689             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
   1690         }
   1691         else {
   1692             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
   1693         }
   1694 
   1695         if ((q = uprv_strchr(p, '.')) != NULL) {
   1696             /* How big will the resulting string be? */
   1697             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
   1698             uprv_strncat(correctedPOSIXLocale, p, q-p);
   1699             correctedPOSIXLocale[len] = 0;
   1700         }
   1701         else {
   1702             /* Anything following the @ sign */
   1703             uprv_strcat(correctedPOSIXLocale, p);
   1704         }
   1705 
   1706         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
   1707          * How about 'russian' -> 'ru'?
   1708          * Many of the other locales using ISO codes will be handled by the
   1709          * canonicalization functions in uloc_getDefault.
   1710          */
   1711     }
   1712 
   1713     /* Was a correction made? */
   1714     if (correctedPOSIXLocale != NULL) {
   1715         posixID = correctedPOSIXLocale;
   1716     }
   1717     else {
   1718         /* copy it, just in case the original pointer goes away.  See j2395 */
   1719         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
   1720         /* Exit on memory allocation error. */
   1721         if (correctedPOSIXLocale == NULL) {
   1722             return NULL;
   1723         }
   1724         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
   1725     }
   1726 
   1727     if (gCorrectedPOSIXLocale == NULL) {
   1728         gCorrectedPOSIXLocale = correctedPOSIXLocale;
   1729         gCorrectedPOSIXLocaleHeapAllocated = true;
   1730         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1731         correctedPOSIXLocale = NULL;
   1732     }
   1733 
   1734     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
   1735         uprv_free(correctedPOSIXLocale);
   1736     }
   1737 
   1738     return posixID;
   1739 
   1740 #elif U_PLATFORM_USES_ONLY_WIN32_API
   1741 #define POSIX_LOCALE_CAPACITY 64
   1742     UErrorCode status = U_ZERO_ERROR;
   1743     char *correctedPOSIXLocale = nullptr;
   1744 
   1745     // If we have already figured this out just use the cached value
   1746     if (gCorrectedPOSIXLocale != nullptr) {
   1747         return gCorrectedPOSIXLocale;
   1748     }
   1749 
   1750     // No cached value, need to determine the current value
   1751     static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
   1752     int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
   1753 
   1754     // Now we should have a Windows locale name that needs converted to the POSIX style.
   1755     if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
   1756     {
   1757         // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
   1758         char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
   1759 
   1760         int32_t i;
   1761         for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
   1762         {
   1763             if (windowsLocale[i] == '_')
   1764             {
   1765                 modifiedWindowsLocale[i] = '-';
   1766             }
   1767             else
   1768             {
   1769                 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
   1770             }
   1771 
   1772             if (modifiedWindowsLocale[i] == '\0')
   1773             {
   1774                 break;
   1775             }
   1776         }
   1777 
   1778         if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
   1779         {
   1780             // Ran out of room, can't really happen, maybe we'll be lucky about a matching
   1781             // locale when tags are dropped
   1782             modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
   1783         }
   1784 
   1785         // Now normalize the resulting name
   1786         correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
   1787         /* TODO: Should we just exit on memory allocation failure? */
   1788         if (correctedPOSIXLocale)
   1789         {
   1790             int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
   1791             if (U_SUCCESS(status))
   1792             {
   1793                 *(correctedPOSIXLocale + posixLen) = 0;
   1794                 gCorrectedPOSIXLocale = correctedPOSIXLocale;
   1795                 gCorrectedPOSIXLocaleHeapAllocated = true;
   1796                 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
   1797             }
   1798             else
   1799             {
   1800                 uprv_free(correctedPOSIXLocale);
   1801             }
   1802         }
   1803     }
   1804 
   1805     // If unable to find a locale we can agree upon, use en-US by default
   1806     if (gCorrectedPOSIXLocale == nullptr) {
   1807         gCorrectedPOSIXLocale = "en_US";
   1808     }
   1809     return gCorrectedPOSIXLocale;
   1810 
   1811 #elif U_PLATFORM == U_PF_OS400
   1812     /* locales are process scoped and are by definition thread safe */
   1813     static char correctedLocale[64];
   1814     const  char *localeID = getenv("LC_ALL");
   1815            char *p;
   1816 
   1817     if (localeID == NULL)
   1818         localeID = getenv("LANG");
   1819     if (localeID == NULL)
   1820         localeID = setlocale(LC_ALL, NULL);
   1821     /* Make sure we have something... */
   1822     if (localeID == NULL)
   1823         return "en_US_POSIX";
   1824 
   1825     /* Extract the locale name from the path. */
   1826     if((p = uprv_strrchr(localeID, '/')) != NULL)
   1827     {
   1828         /* Increment p to start of locale name. */
   1829         p++;
   1830         localeID = p;
   1831     }
   1832 
   1833     /* Copy to work location. */
   1834     uprv_strcpy(correctedLocale, localeID);
   1835 
   1836     /* Strip off the '.locale' extension. */
   1837     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
   1838         *p = 0;
   1839     }
   1840 
   1841     /* Upper case the locale name. */
   1842     T_CString_toUpperCase(correctedLocale);
   1843 
   1844     /* See if we are using the POSIX locale.  Any of the
   1845     * following are equivalent and use the same QLGPGCMA
   1846     * (POSIX) locale.
   1847     * QLGPGCMA2 means UCS2
   1848     * QLGPGCMA_4 means UTF-32
   1849     * QLGPGCMA_8 means UTF-8
   1850     */
   1851     if ((uprv_strcmp("C", correctedLocale) == 0) ||
   1852         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
   1853         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
   1854     {
   1855         uprv_strcpy(correctedLocale, "en_US_POSIX");
   1856     }
   1857     else
   1858     {
   1859         int16_t LocaleLen;
   1860 
   1861         /* Lower case the lang portion. */
   1862         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
   1863         {
   1864             *p = uprv_tolower(*p);
   1865         }
   1866 
   1867         /* Adjust for Euro.  After '_E' add 'URO'. */
   1868         LocaleLen = uprv_strlen(correctedLocale);
   1869         if (correctedLocale[LocaleLen - 2] == '_' &&
   1870             correctedLocale[LocaleLen - 1] == 'E')
   1871         {
   1872             uprv_strcat(correctedLocale, "URO");
   1873         }
   1874 
   1875         /* If using Lotus-based locale then convert to
   1876          * equivalent non Lotus.
   1877          */
   1878         else if (correctedLocale[LocaleLen - 2] == '_' &&
   1879             correctedLocale[LocaleLen - 1] == 'L')
   1880         {
   1881             correctedLocale[LocaleLen - 2] = 0;
   1882         }
   1883 
   1884         /* There are separate simplified and traditional
   1885          * locales called zh_HK_S and zh_HK_T.
   1886          */
   1887         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
   1888         {
   1889             uprv_strcpy(correctedLocale, "zh_HK");
   1890         }
   1891 
   1892         /* A special zh_CN_GBK locale...
   1893         */
   1894         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
   1895         {
   1896             uprv_strcpy(correctedLocale, "zh_CN");
   1897         }
   1898 
   1899     }
   1900 
   1901     return correctedLocale;
   1902 #endif
   1903 
   1904 }
   1905 
   1906 #if !U_CHARSET_IS_UTF8
   1907 #if U_POSIX_LOCALE
   1908 /*
   1909 Due to various platform differences, one platform may specify a charset,
   1910 when they really mean a different charset. Remap the names so that they are
   1911 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
   1912 here. Before adding anything to this function, please consider adding unique
   1913 names to the ICU alias table in the data directory.
   1914 */
   1915 static const char*
   1916 remapPlatformDependentCodepage(const char *locale, const char *name) {
   1917     if (locale != NULL && *locale == 0) {
   1918         /* Make sure that an empty locale is handled the same way. */
   1919         locale = NULL;
   1920     }
   1921     if (name == NULL) {
   1922         return NULL;
   1923     }
   1924 #if U_PLATFORM == U_PF_AIX
   1925     if (uprv_strcmp(name, "IBM-943") == 0) {
   1926         /* Use the ASCII compatible ibm-943 */
   1927         name = "Shift-JIS";
   1928     }
   1929     else if (uprv_strcmp(name, "IBM-1252") == 0) {
   1930         /* Use the windows-1252 that contains the Euro */
   1931         name = "IBM-5348";
   1932     }
   1933 #elif U_PLATFORM == U_PF_SOLARIS
   1934     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
   1935         /* Solaris underspecifies the "EUC" name. */
   1936         if (uprv_strcmp(locale, "zh_CN") == 0) {
   1937             name = "EUC-CN";
   1938         }
   1939         else if (uprv_strcmp(locale, "zh_TW") == 0) {
   1940             name = "EUC-TW";
   1941         }
   1942         else if (uprv_strcmp(locale, "ko_KR") == 0) {
   1943             name = "EUC-KR";
   1944         }
   1945     }
   1946     else if (uprv_strcmp(name, "eucJP") == 0) {
   1947         /*
   1948         ibm-954 is the best match.
   1949         ibm-33722 is the default for eucJP (similar to Windows).
   1950         */
   1951         name = "eucjis";
   1952     }
   1953     else if (uprv_strcmp(name, "646") == 0) {
   1954         /*
   1955          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
   1956          * ISO-8859-1 instead of US-ASCII(646).
   1957          */
   1958         name = "ISO-8859-1";
   1959     }
   1960 #elif U_PLATFORM_IS_DARWIN_BASED
   1961     if (locale == NULL && *name == 0) {
   1962         /*
   1963         No locale was specified, and an empty name was passed in.
   1964         This usually indicates that nl_langinfo didn't return valid information.
   1965         Mac OS X uses UTF-8 by default (especially the locale data and console).
   1966         */
   1967         name = "UTF-8";
   1968     }
   1969     else if (uprv_strcmp(name, "CP949") == 0) {
   1970         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
   1971         name = "EUC-KR";
   1972     }
   1973     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
   1974         /*
   1975          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
   1976          */
   1977         name = "UTF-8";
   1978     }
   1979 #elif U_PLATFORM == U_PF_BSD
   1980     if (uprv_strcmp(name, "CP949") == 0) {
   1981         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
   1982         name = "EUC-KR";
   1983     }
   1984 #elif U_PLATFORM == U_PF_HPUX
   1985     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
   1986         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
   1987         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
   1988         name = "hkbig5";
   1989     }
   1990     else if (uprv_strcmp(name, "eucJP") == 0) {
   1991         /*
   1992         ibm-1350 is the best match, but unavailable.
   1993         ibm-954 is mostly a superset of ibm-1350.
   1994         ibm-33722 is the default for eucJP (similar to Windows).
   1995         */
   1996         name = "eucjis";
   1997     }
   1998 #elif U_PLATFORM == U_PF_LINUX
   1999     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
   2000         /* Linux underspecifies the "EUC" name. */
   2001         if (uprv_strcmp(locale, "korean") == 0) {
   2002             name = "EUC-KR";
   2003         }
   2004         else if (uprv_strcmp(locale, "japanese") == 0) {
   2005             /* See comment below about eucJP */
   2006             name = "eucjis";
   2007         }
   2008     }
   2009     else if (uprv_strcmp(name, "eucjp") == 0) {
   2010         /*
   2011         ibm-1350 is the best match, but unavailable.
   2012         ibm-954 is mostly a superset of ibm-1350.
   2013         ibm-33722 is the default for eucJP (similar to Windows).
   2014         */
   2015         name = "eucjis";
   2016     }
   2017     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
   2018             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
   2019         /*
   2020          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
   2021          */
   2022         name = "UTF-8";
   2023     }
   2024     /*
   2025      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
   2026      * it by falling back to 'US-ASCII' when NULL is returned from this
   2027      * function. So, we don't have to worry about it here.
   2028      */
   2029 #endif
   2030     /* return NULL when "" is passed in */
   2031     if (*name == 0) {
   2032         name = NULL;
   2033     }
   2034     return name;
   2035 }
   2036 
   2037 static const char*
   2038 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
   2039 {
   2040     char localeBuf[100];
   2041     const char *name = NULL;
   2042     char *variant = NULL;
   2043 
   2044     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
   2045         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
   2046         uprv_strncpy(localeBuf, localeName, localeCapacity);
   2047         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
   2048         name = uprv_strncpy(buffer, name+1, buffCapacity);
   2049         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
   2050         if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
   2051             *variant = 0;
   2052         }
   2053         name = remapPlatformDependentCodepage(localeBuf, name);
   2054     }
   2055     return name;
   2056 }
   2057 #endif
   2058 
   2059 static const char*
   2060 int_getDefaultCodepage()
   2061 {
   2062 #if U_PLATFORM == U_PF_OS400
   2063     uint32_t ccsid = 37; /* Default to ibm-37 */
   2064     static char codepage[64];
   2065     Qwc_JOBI0400_t jobinfo;
   2066     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
   2067 
   2068     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
   2069         "*                         ", "                ", &error);
   2070 
   2071     if (error.Bytes_Available == 0) {
   2072         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
   2073             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
   2074         }
   2075         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
   2076             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
   2077         }
   2078         /* else use the default */
   2079     }
   2080     sprintf(codepage,"ibm-%d", ccsid);
   2081     return codepage;
   2082 
   2083 #elif U_PLATFORM == U_PF_OS390
   2084     static char codepage[64];
   2085 
   2086     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
   2087     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
   2088     codepage[63] = 0; /* NULL terminate */
   2089 
   2090     return codepage;
   2091 
   2092 #elif U_PLATFORM_USES_ONLY_WIN32_API
   2093     static char codepage[64];
   2094     DWORD codepageNumber = 0;
   2095 
   2096 #if U_PLATFORM_HAS_WINUWP_API > 0
   2097     // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
   2098     // have folks use Unicode than a "system" code page, however this is the same
   2099     // codepage as the system default locale codepage.  (FWIW, the system locale is
   2100     // ONLY used for codepage, it should never be used for anything else)
   2101     GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
   2102         (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
   2103 #else
   2104     // Win32 apps can call GetACP
   2105     codepageNumber = GetACP();
   2106 #endif
   2107     // Special case for UTF-8
   2108     if (codepageNumber == 65001)
   2109     {
   2110         return "UTF-8";
   2111     }
   2112     // Windows codepages can look like windows-1252, so format the found number
   2113     // the numbers are eclectic, however all valid system code pages, besides UTF-8
   2114     // are between 3 and 19999
   2115     if (codepageNumber > 0 && codepageNumber < 20000)
   2116     {
   2117         sprintf(codepage, "windows-%ld", codepageNumber);
   2118         return codepage;
   2119     }
   2120     // If the codepage number call failed then return UTF-8
   2121     return "UTF-8";
   2122 
   2123 #elif U_POSIX_LOCALE
   2124     static char codesetName[100];
   2125     const char *localeName = NULL;
   2126     const char *name = NULL;
   2127 
   2128     localeName = uprv_getPOSIXIDForDefaultCodepage();
   2129     uprv_memset(codesetName, 0, sizeof(codesetName));
   2130     /* On Solaris nl_langinfo returns C locale values unless setlocale
   2131      * was called earlier.
   2132      */
   2133 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
   2134     /* When available, check nl_langinfo first because it usually gives more
   2135        useful names. It depends on LC_CTYPE.
   2136        nl_langinfo may use the same buffer as setlocale. */
   2137     {
   2138         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
   2139 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
   2140         /*
   2141          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
   2142          * instead of ASCII.
   2143          */
   2144         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
   2145             codeset = remapPlatformDependentCodepage(localeName, codeset);
   2146         } else
   2147 #endif
   2148         {
   2149             codeset = remapPlatformDependentCodepage(NULL, codeset);
   2150         }
   2151 
   2152         if (codeset != NULL) {
   2153             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
   2154             codesetName[sizeof(codesetName)-1] = 0;
   2155             return codesetName;
   2156         }
   2157     }
   2158 #endif
   2159 
   2160     /* Use setlocale in a nice way, and then check some environment variables.
   2161        Maybe the application used setlocale already.
   2162     */
   2163     uprv_memset(codesetName, 0, sizeof(codesetName));
   2164     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
   2165     if (name) {
   2166         /* if we can find the codeset name from setlocale, return that. */
   2167         return name;
   2168     }
   2169 
   2170     if (*codesetName == 0)
   2171     {
   2172         /* Everything failed. Return US ASCII (ISO 646). */
   2173         (void)uprv_strcpy(codesetName, "US-ASCII");
   2174     }
   2175     return codesetName;
   2176 #else
   2177     return "US-ASCII";
   2178 #endif
   2179 }
   2180 
   2181 
   2182 U_CAPI const char*  U_EXPORT2
   2183 uprv_getDefaultCodepage()
   2184 {
   2185     static char const  *name = NULL;
   2186     umtx_lock(NULL);
   2187     if (name == NULL) {
   2188         name = int_getDefaultCodepage();
   2189     }
   2190     umtx_unlock(NULL);
   2191     return name;
   2192 }
   2193 #endif  /* !U_CHARSET_IS_UTF8 */
   2194 
   2195 
   2196 /* end of platform-specific implementation -------------- */
   2197 
   2198 /* version handling --------------------------------------------------------- */
   2199 
   2200 U_CAPI void U_EXPORT2
   2201 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
   2202     char *end;
   2203     uint16_t part=0;
   2204 
   2205     if(versionArray==NULL) {
   2206         return;
   2207     }
   2208 
   2209     if(versionString!=NULL) {
   2210         for(;;) {
   2211             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
   2212             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
   2213                 break;
   2214             }
   2215             versionString=end+1;
   2216         }
   2217     }
   2218 
   2219     while(part<U_MAX_VERSION_LENGTH) {
   2220         versionArray[part++]=0;
   2221     }
   2222 }
   2223 
   2224 U_CAPI void U_EXPORT2
   2225 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
   2226     if(versionArray!=NULL && versionString!=NULL) {
   2227         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
   2228         int32_t len = u_strlen(versionString);
   2229         if(len>U_MAX_VERSION_STRING_LENGTH) {
   2230             len = U_MAX_VERSION_STRING_LENGTH;
   2231         }
   2232         u_UCharsToChars(versionString, versionChars, len);
   2233         versionChars[len]=0;
   2234         u_versionFromString(versionArray, versionChars);
   2235     }
   2236 }
   2237 
   2238 U_CAPI void U_EXPORT2
   2239 u_versionToString(const UVersionInfo versionArray, char *versionString) {
   2240     uint16_t count, part;
   2241     uint8_t field;
   2242 
   2243     if(versionString==NULL) {
   2244         return;
   2245     }
   2246 
   2247     if(versionArray==NULL) {
   2248         versionString[0]=0;
   2249         return;
   2250     }
   2251 
   2252     /* count how many fields need to be written */
   2253     for(count=4; count>0 && versionArray[count-1]==0; --count) {
   2254     }
   2255 
   2256     if(count <= 1) {
   2257         count = 2;
   2258     }
   2259 
   2260     /* write the first part */
   2261     /* write the decimal field value */
   2262     field=versionArray[0];
   2263     if(field>=100) {
   2264         *versionString++=(char)('0'+field/100);
   2265         field%=100;
   2266     }
   2267     if(field>=10) {
   2268         *versionString++=(char)('0'+field/10);
   2269         field%=10;
   2270     }
   2271     *versionString++=(char)('0'+field);
   2272 
   2273     /* write the following parts */
   2274     for(part=1; part<count; ++part) {
   2275         /* write a dot first */
   2276         *versionString++=U_VERSION_DELIMITER;
   2277 
   2278         /* write the decimal field value */
   2279         field=versionArray[part];
   2280         if(field>=100) {
   2281             *versionString++=(char)('0'+field/100);
   2282             field%=100;
   2283         }
   2284         if(field>=10) {
   2285             *versionString++=(char)('0'+field/10);
   2286             field%=10;
   2287         }
   2288         *versionString++=(char)('0'+field);
   2289     }
   2290 
   2291     /* NUL-terminate */
   2292     *versionString=0;
   2293 }
   2294 
   2295 U_CAPI void U_EXPORT2
   2296 u_getVersion(UVersionInfo versionArray) {
   2297     (void)copyright;   // Suppress unused variable warning from clang.
   2298     u_versionFromString(versionArray, U_ICU_VERSION);
   2299 }
   2300 
   2301 /**
   2302  * icucfg.h dependent code
   2303  */
   2304 
   2305 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
   2306 
   2307 #if HAVE_DLFCN_H
   2308 #ifdef __MVS__
   2309 #ifndef __SUSV3
   2310 #define __SUSV3 1
   2311 #endif
   2312 #endif
   2313 #include <dlfcn.h>
   2314 #endif /* HAVE_DLFCN_H */
   2315 
   2316 U_INTERNAL void * U_EXPORT2
   2317 uprv_dl_open(const char *libName, UErrorCode *status) {
   2318   void *ret = NULL;
   2319   if(U_FAILURE(*status)) return ret;
   2320   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
   2321   if(ret==NULL) {
   2322 #ifdef U_TRACE_DYLOAD
   2323     printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
   2324 #endif
   2325     *status = U_MISSING_RESOURCE_ERROR;
   2326   }
   2327   return ret;
   2328 }
   2329 
   2330 U_INTERNAL void U_EXPORT2
   2331 uprv_dl_close(void *lib, UErrorCode *status) {
   2332   if(U_FAILURE(*status)) return;
   2333   dlclose(lib);
   2334 }
   2335 
   2336 U_INTERNAL UVoidFunction* U_EXPORT2
   2337 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
   2338   union {
   2339       UVoidFunction *fp;
   2340       void *vp;
   2341   } uret;
   2342   uret.fp = NULL;
   2343   if(U_FAILURE(*status)) return uret.fp;
   2344   uret.vp = dlsym(lib, sym);
   2345   if(uret.vp == NULL) {
   2346 #ifdef U_TRACE_DYLOAD
   2347     printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
   2348 #endif
   2349     *status = U_MISSING_RESOURCE_ERROR;
   2350   }
   2351   return uret.fp;
   2352 }
   2353 
   2354 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
   2355 
   2356 /* Windows API implementation. */
   2357 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
   2358 
   2359 U_INTERNAL void * U_EXPORT2
   2360 uprv_dl_open(const char *libName, UErrorCode *status) {
   2361   HMODULE lib = NULL;
   2362 
   2363   if(U_FAILURE(*status)) return NULL;
   2364 
   2365   lib = LoadLibraryA(libName);
   2366 
   2367   if(lib==NULL) {
   2368     *status = U_MISSING_RESOURCE_ERROR;
   2369   }
   2370 
   2371   return (void*)lib;
   2372 }
   2373 
   2374 U_INTERNAL void U_EXPORT2
   2375 uprv_dl_close(void *lib, UErrorCode *status) {
   2376   HMODULE handle = (HMODULE)lib;
   2377   if(U_FAILURE(*status)) return;
   2378 
   2379   FreeLibrary(handle);
   2380 
   2381   return;
   2382 }
   2383 
   2384 U_INTERNAL UVoidFunction* U_EXPORT2
   2385 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
   2386   HMODULE handle = (HMODULE)lib;
   2387   UVoidFunction* addr = NULL;
   2388 
   2389   if(U_FAILURE(*status) || lib==NULL) return NULL;
   2390 
   2391   addr = (UVoidFunction*)GetProcAddress(handle, sym);
   2392 
   2393   if(addr==NULL) {
   2394     DWORD lastError = GetLastError();
   2395     if(lastError == ERROR_PROC_NOT_FOUND) {
   2396       *status = U_MISSING_RESOURCE_ERROR;
   2397     } else {
   2398       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
   2399     }
   2400   }
   2401 
   2402   return addr;
   2403 }
   2404 
   2405 #else
   2406 
   2407 /* No dynamic loading, null (nonexistent) implementation. */
   2408 
   2409 U_INTERNAL void * U_EXPORT2
   2410 uprv_dl_open(const char *libName, UErrorCode *status) {
   2411     (void)libName;
   2412     if(U_FAILURE(*status)) return NULL;
   2413     *status = U_UNSUPPORTED_ERROR;
   2414     return NULL;
   2415 }
   2416 
   2417 U_INTERNAL void U_EXPORT2
   2418 uprv_dl_close(void *lib, UErrorCode *status) {
   2419     (void)lib;
   2420     if(U_FAILURE(*status)) return;
   2421     *status = U_UNSUPPORTED_ERROR;
   2422     return;
   2423 }
   2424 
   2425 U_INTERNAL UVoidFunction* U_EXPORT2
   2426 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
   2427   (void)lib;
   2428   (void)sym;
   2429   if(U_SUCCESS(*status)) {
   2430     *status = U_UNSUPPORTED_ERROR;
   2431   }
   2432   return (UVoidFunction*)NULL;
   2433 }
   2434 
   2435 #endif
   2436 
   2437 /*
   2438  * Hey, Emacs, please set the following:
   2439  *
   2440  * Local Variables:
   2441  * indent-tabs-mode: nil
   2442  * End:
   2443  *
   2444  */
   2445