1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1997-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10 * 11 * Date Name Description 12 * 04/14/97 aliu Creation. 13 * 04/24/97 aliu Added getDefaultDataDirectory() and 14 * getDefaultLocaleID(). 15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods 16 * for assumed case. Non-UNIX platforms must be 17 * special-cased. Rewrote numeric methods dealing 18 * with NaN and Infinity to be platform independent 19 * over all IEEE 754 platforms. 20 * 05/13/97 aliu Restored sign of timezone 21 * (semantics are hours West of GMT) 22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23 * nextDouble.. 24 * 07/22/98 stephen Added remainder, max, min, trunc 25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26 * 08/24/98 stephen Added longBitsFromDouble 27 * 09/08/98 stephen Minor changes for Mac Port 28 * 03/02/99 stephen Removed openFile(). Added AS400 support. 29 * Fixed EBCDIC tables 30 * 04/15/99 stephen Converted to C. 31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32 * 08/04/99 jeffrey R. Added OS/2 changes 33 * 11/15/99 helena Integrated S/390 IEEE support. 34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36 * 01/03/08 Steven L. Fake Time Support 37 ****************************************************************************** 38 */ 39 40 // Defines _XOPEN_SOURCE for access to POSIX functions. 41 // Must be before any other #includes. 42 #include "uposixdefs.h" 43 44 /* include ICU headers */ 45 #include "unicode/utypes.h" 46 #include "unicode/putil.h" 47 #include "unicode/ustring.h" 48 #include "putilimp.h" 49 #include "uassert.h" 50 #include "umutex.h" 51 #include "cmemory.h" 52 #include "cstring.h" 53 #include "locmap.h" 54 #include "ucln_cmn.h" 55 #include "charstr.h" 56 57 /* Include standard headers. */ 58 #include <stdio.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <math.h> 62 #include <locale.h> 63 #include <float.h> 64 65 #ifndef U_COMMON_IMPLEMENTATION 66 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu 67 #endif 68 69 70 /* include system headers */ 71 #if U_PLATFORM_USES_ONLY_WIN32_API 72 /* 73 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. 74 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) 75 * to use native APIs as much as possible? 76 */ 77 # define WIN32_LEAN_AND_MEAN 78 # define VC_EXTRALEAN 79 # define NOUSER 80 # define NOSERVICE 81 # define NOIME 82 # define NOMCX 83 # include <windows.h> 84 # include "wintz.h" 85 #elif U_PLATFORM == U_PF_OS400 86 # include <float.h> 87 # include <qusec.h> /* error code structure */ 88 # include <qusrjobi.h> 89 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 90 # include <mih/testptr.h> /* For uprv_maximumPtr */ 91 #elif U_PLATFORM == U_PF_OS390 92 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 93 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS 94 # include <limits.h> 95 # include <unistd.h> 96 # if U_PLATFORM == U_PF_SOLARIS 97 # ifndef _XPG4_2 98 # define _XPG4_2 99 # endif 100 # endif 101 #elif U_PLATFORM == U_PF_QNX 102 # include <sys/neutrino.h> 103 #endif 104 105 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) 106 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ 107 #undef __STRICT_ANSI__ 108 #endif 109 110 /* 111 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. 112 */ 113 #include <time.h> 114 115 #if !U_PLATFORM_USES_ONLY_WIN32_API 116 #include <sys/time.h> 117 #endif 118 119 /* 120 * Only include langinfo.h if we have a way to get the codeset. If we later 121 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 122 * 123 */ 124 125 #if U_HAVE_NL_LANGINFO_CODESET 126 #include <langinfo.h> 127 #endif 128 129 /** 130 * Simple things (presence of functions, etc) should just go in configure.in and be added to 131 * icucfg.h via autoheader. 132 */ 133 #if U_PLATFORM_IMPLEMENTS_POSIX 134 # if U_PLATFORM == U_PF_OS400 135 # define HAVE_DLFCN_H 0 136 # define HAVE_DLOPEN 0 137 # else 138 # ifndef HAVE_DLFCN_H 139 # define HAVE_DLFCN_H 1 140 # endif 141 # ifndef HAVE_DLOPEN 142 # define HAVE_DLOPEN 1 143 # endif 144 # endif 145 # ifndef HAVE_GETTIMEOFDAY 146 # define HAVE_GETTIMEOFDAY 1 147 # endif 148 #else 149 # define HAVE_DLFCN_H 0 150 # define HAVE_DLOPEN 0 151 # define HAVE_GETTIMEOFDAY 0 152 #endif 153 154 U_NAMESPACE_USE 155 156 /* Define the extension for data files, again... */ 157 #define DATA_TYPE "dat" 158 159 /* Leave this copyright notice here! */ 160 static const char copyright[] = U_COPYRIGHT_STRING; 161 162 /* floating point implementations ------------------------------------------- */ 163 164 /* We return QNAN rather than SNAN*/ 165 #define SIGN 0x80000000U 166 167 /* Make it easy to define certain types of constants */ 168 typedef union { 169 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 170 double d64; 171 } BitPatternConversion; 172 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 173 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 174 175 /*--------------------------------------------------------------------------- 176 Platform utilities 177 Our general strategy is to assume we're on a POSIX platform. Platforms which 178 are non-POSIX must declare themselves so. The default POSIX implementation 179 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 180 functions). 181 ---------------------------------------------------------------------------*/ 182 183 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 184 # undef U_POSIX_LOCALE 185 #else 186 # define U_POSIX_LOCALE 1 187 #endif 188 189 /* 190 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 191 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 192 */ 193 #if !IEEE_754 194 static char* 195 u_topNBytesOfDouble(double* d, int n) 196 { 197 #if U_IS_BIG_ENDIAN 198 return (char*)d; 199 #else 200 return (char*)(d + 1) - n; 201 #endif 202 } 203 204 static char* 205 u_bottomNBytesOfDouble(double* d, int n) 206 { 207 #if U_IS_BIG_ENDIAN 208 return (char*)(d + 1) - n; 209 #else 210 return (char*)d; 211 #endif 212 } 213 #endif /* !IEEE_754 */ 214 215 #if IEEE_754 216 static UBool 217 u_signBit(double d) { 218 uint8_t hiByte; 219 #if U_IS_BIG_ENDIAN 220 hiByte = *(uint8_t *)&d; 221 #else 222 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 223 #endif 224 return (hiByte & 0x80) != 0; 225 } 226 #endif 227 228 229 230 #if defined (U_DEBUG_FAKETIME) 231 /* Override the clock to test things without having to move the system clock. 232 * Assumes POSIX gettimeofday() will function 233 */ 234 UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 235 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 236 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 237 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; 238 239 static UDate getUTCtime_real() { 240 struct timeval posixTime; 241 gettimeofday(&posixTime, NULL); 242 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 243 } 244 245 static UDate getUTCtime_fake() { 246 umtx_lock(&fakeClockMutex); 247 if(!fakeClock_set) { 248 UDate real = getUTCtime_real(); 249 const char *fake_start = getenv("U_FAKETIME_START"); 250 if((fake_start!=NULL) && (fake_start[0]!=0)) { 251 sscanf(fake_start,"%lf",&fakeClock_t0); 252 fakeClock_dt = fakeClock_t0 - real; 253 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 254 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 255 fakeClock_t0, fake_start, fakeClock_dt, real); 256 } else { 257 fakeClock_dt = 0; 258 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 259 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 260 } 261 fakeClock_set = TRUE; 262 } 263 umtx_unlock(&fakeClockMutex); 264 265 return getUTCtime_real() + fakeClock_dt; 266 } 267 #endif 268 269 #if U_PLATFORM_USES_ONLY_WIN32_API 270 typedef union { 271 int64_t int64; 272 FILETIME fileTime; 273 } FileTimeConversion; /* This is like a ULARGE_INTEGER */ 274 275 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 276 #define EPOCH_BIAS INT64_C(116444736000000000) 277 #define HECTONANOSECOND_PER_MILLISECOND 10000 278 279 #endif 280 281 /*--------------------------------------------------------------------------- 282 Universal Implementations 283 These are designed to work on all platforms. Try these, and if they 284 don't work on your platform, then special case your platform with new 285 implementations. 286 ---------------------------------------------------------------------------*/ 287 288 U_CAPI UDate U_EXPORT2 289 uprv_getUTCtime() 290 { 291 #if defined(U_DEBUG_FAKETIME) 292 return getUTCtime_fake(); /* Hook for overriding the clock */ 293 #else 294 return uprv_getRawUTCtime(); 295 #endif 296 } 297 298 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 299 U_CAPI UDate U_EXPORT2 300 uprv_getRawUTCtime() 301 { 302 #if U_PLATFORM_USES_ONLY_WIN32_API 303 304 FileTimeConversion winTime; 305 GetSystemTimeAsFileTime(&winTime.fileTime); 306 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 307 #else 308 309 #if HAVE_GETTIMEOFDAY 310 struct timeval posixTime; 311 gettimeofday(&posixTime, NULL); 312 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 313 #else 314 time_t epochtime; 315 time(&epochtime); 316 return (UDate)epochtime * U_MILLIS_PER_SECOND; 317 #endif 318 319 #endif 320 } 321 322 /*----------------------------------------------------------------------------- 323 IEEE 754 324 These methods detect and return NaN and infinity values for doubles 325 conforming to IEEE 754. Platforms which support this standard include X86, 326 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 327 If this doesn't work on your platform, you have non-IEEE floating-point, and 328 will need to code your own versions. A naive implementation is to return 0.0 329 for getNaN and getInfinity, and false for isNaN and isInfinite. 330 ---------------------------------------------------------------------------*/ 331 332 U_CAPI UBool U_EXPORT2 333 uprv_isNaN(double number) 334 { 335 #if IEEE_754 336 BitPatternConversion convertedNumber; 337 convertedNumber.d64 = number; 338 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 339 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 340 341 #elif U_PLATFORM == U_PF_OS390 342 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 343 sizeof(uint32_t)); 344 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 345 sizeof(uint32_t)); 346 347 return ((highBits & 0x7F080000L) == 0x7F080000L) && 348 (lowBits == 0x00000000L); 349 350 #else 351 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 352 /* you'll need to replace this default implementation with what's correct*/ 353 /* for your platform.*/ 354 return number != number; 355 #endif 356 } 357 358 U_CAPI UBool U_EXPORT2 359 uprv_isInfinite(double number) 360 { 361 #if IEEE_754 362 BitPatternConversion convertedNumber; 363 convertedNumber.d64 = number; 364 /* Infinity is exactly 0x7FF0000000000000U. */ 365 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 366 #elif U_PLATFORM == U_PF_OS390 367 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 368 sizeof(uint32_t)); 369 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 370 sizeof(uint32_t)); 371 372 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 373 374 #else 375 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 376 /* value, you'll need to replace this default implementation with what's*/ 377 /* correct for your platform.*/ 378 return number == (2.0 * number); 379 #endif 380 } 381 382 U_CAPI UBool U_EXPORT2 383 uprv_isPositiveInfinity(double number) 384 { 385 #if IEEE_754 || U_PLATFORM == U_PF_OS390 386 return (UBool)(number > 0 && uprv_isInfinite(number)); 387 #else 388 return uprv_isInfinite(number); 389 #endif 390 } 391 392 U_CAPI UBool U_EXPORT2 393 uprv_isNegativeInfinity(double number) 394 { 395 #if IEEE_754 || U_PLATFORM == U_PF_OS390 396 return (UBool)(number < 0 && uprv_isInfinite(number)); 397 398 #else 399 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 400 sizeof(uint32_t)); 401 return((highBits & SIGN) && uprv_isInfinite(number)); 402 403 #endif 404 } 405 406 U_CAPI double U_EXPORT2 407 uprv_getNaN() 408 { 409 #if IEEE_754 || U_PLATFORM == U_PF_OS390 410 return gNan.d64; 411 #else 412 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 413 /* you'll need to replace this default implementation with what's correct*/ 414 /* for your platform.*/ 415 return 0.0; 416 #endif 417 } 418 419 U_CAPI double U_EXPORT2 420 uprv_getInfinity() 421 { 422 #if IEEE_754 || U_PLATFORM == U_PF_OS390 423 return gInf.d64; 424 #else 425 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 426 /* value, you'll need to replace this default implementation with what's*/ 427 /* correct for your platform.*/ 428 return 0.0; 429 #endif 430 } 431 432 U_CAPI double U_EXPORT2 433 uprv_floor(double x) 434 { 435 return floor(x); 436 } 437 438 U_CAPI double U_EXPORT2 439 uprv_ceil(double x) 440 { 441 return ceil(x); 442 } 443 444 U_CAPI double U_EXPORT2 445 uprv_round(double x) 446 { 447 return uprv_floor(x + 0.5); 448 } 449 450 U_CAPI double U_EXPORT2 451 uprv_fabs(double x) 452 { 453 return fabs(x); 454 } 455 456 U_CAPI double U_EXPORT2 457 uprv_modf(double x, double* y) 458 { 459 return modf(x, y); 460 } 461 462 U_CAPI double U_EXPORT2 463 uprv_fmod(double x, double y) 464 { 465 return fmod(x, y); 466 } 467 468 U_CAPI double U_EXPORT2 469 uprv_pow(double x, double y) 470 { 471 /* This is declared as "double pow(double x, double y)" */ 472 return pow(x, y); 473 } 474 475 U_CAPI double U_EXPORT2 476 uprv_pow10(int32_t x) 477 { 478 return pow(10.0, (double)x); 479 } 480 481 U_CAPI double U_EXPORT2 482 uprv_fmax(double x, double y) 483 { 484 #if IEEE_754 485 /* first handle NaN*/ 486 if(uprv_isNaN(x) || uprv_isNaN(y)) 487 return uprv_getNaN(); 488 489 /* check for -0 and 0*/ 490 if(x == 0.0 && y == 0.0 && u_signBit(x)) 491 return y; 492 493 #endif 494 495 /* this should work for all flt point w/o NaN and Inf special cases */ 496 return (x > y ? x : y); 497 } 498 499 U_CAPI double U_EXPORT2 500 uprv_fmin(double x, double y) 501 { 502 #if IEEE_754 503 /* first handle NaN*/ 504 if(uprv_isNaN(x) || uprv_isNaN(y)) 505 return uprv_getNaN(); 506 507 /* check for -0 and 0*/ 508 if(x == 0.0 && y == 0.0 && u_signBit(y)) 509 return y; 510 511 #endif 512 513 /* this should work for all flt point w/o NaN and Inf special cases */ 514 return (x > y ? y : x); 515 } 516 517 /** 518 * Truncates the given double. 519 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 520 * This is different than calling floor() or ceil(): 521 * floor(3.3) = 3, floor(-3.3) = -4 522 * ceil(3.3) = 4, ceil(-3.3) = -3 523 */ 524 U_CAPI double U_EXPORT2 525 uprv_trunc(double d) 526 { 527 #if IEEE_754 528 /* handle error cases*/ 529 if(uprv_isNaN(d)) 530 return uprv_getNaN(); 531 if(uprv_isInfinite(d)) 532 return uprv_getInfinity(); 533 534 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 535 return ceil(d); 536 else 537 return floor(d); 538 539 #else 540 return d >= 0 ? floor(d) : ceil(d); 541 542 #endif 543 } 544 545 /** 546 * Return the largest positive number that can be represented by an integer 547 * type of arbitrary bit length. 548 */ 549 U_CAPI double U_EXPORT2 550 uprv_maxMantissa(void) 551 { 552 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 553 } 554 555 U_CAPI double U_EXPORT2 556 uprv_log(double d) 557 { 558 return log(d); 559 } 560 561 U_CAPI void * U_EXPORT2 562 uprv_maximumPtr(void * base) 563 { 564 #if U_PLATFORM == U_PF_OS400 565 /* 566 * With the provided function we should never be out of range of a given segment 567 * (a traditional/typical segment that is). Our segments have 5 bytes for the 568 * id and 3 bytes for the offset. The key is that the casting takes care of 569 * only retrieving the offset portion minus x1000. Hence, the smallest offset 570 * seen in a program is x001000 and when casted to an int would be 0. 571 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 572 * 573 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 574 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 575 * This function determines the activation based on the pointer that is passed in and 576 * calculates the appropriate maximum available size for 577 * each pointer type (TERASPACE and non-TERASPACE) 578 * 579 * Unlike other operating systems, the pointer model isn't determined at 580 * compile time on i5/OS. 581 */ 582 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 583 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 584 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 585 } 586 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 587 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 588 589 #else 590 return U_MAX_PTR(base); 591 #endif 592 } 593 594 /*--------------------------------------------------------------------------- 595 Platform-specific Implementations 596 Try these, and if they don't work on your platform, then special case your 597 platform with new implementations. 598 ---------------------------------------------------------------------------*/ 599 600 /* Generic time zone layer -------------------------------------------------- */ 601 602 /* Time zone utilities */ 603 U_CAPI void U_EXPORT2 604 uprv_tzset() 605 { 606 #if defined(U_TZSET) 607 U_TZSET(); 608 #else 609 /* no initialization*/ 610 #endif 611 } 612 613 U_CAPI int32_t U_EXPORT2 614 uprv_timezone() 615 { 616 #ifdef U_TIMEZONE 617 return U_TIMEZONE; 618 #else 619 time_t t, t1, t2; 620 struct tm tmrec; 621 int32_t tdiff = 0; 622 623 time(&t); 624 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 625 #if U_PLATFORM != U_PF_IPHONE 626 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 627 #endif 628 t1 = mktime(&tmrec); /* local time in seconds*/ 629 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 630 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 631 tdiff = t2 - t1; 632 633 #if U_PLATFORM != U_PF_IPHONE 634 /* imitate NT behaviour, which returns same timezone offset to GMT for 635 winter and summer. 636 This does not work on all platforms. For instance, on glibc on Linux 637 and on Mac OS 10.5, tdiff calculated above remains the same 638 regardless of whether DST is in effect or not. iOS is another 639 platform where this does not work. Linux + glibc and Mac OS 10.5 640 have U_TIMEZONE defined so that this code is not reached. 641 */ 642 if (dst_checked) 643 tdiff += 3600; 644 #endif 645 return tdiff; 646 #endif 647 } 648 649 /* Note that U_TZNAME does *not* have to be tzname, but if it is, 650 some platforms need to have it declared here. */ 651 652 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API)) 653 /* RS6000 and others reject char **tzname. */ 654 extern U_IMPORT char *U_TZNAME[]; 655 #endif 656 657 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) 658 /* These platforms are likely to use Olson timezone IDs. */ 659 #define CHECK_LOCALTIME_LINK 1 660 #if U_PLATFORM_IS_DARWIN_BASED 661 #include <tzfile.h> 662 #define TZZONEINFO (TZDIR "/") 663 #elif U_PLATFORM == U_PF_SOLARIS 664 #define TZDEFAULT "/etc/localtime" 665 #define TZZONEINFO "/usr/share/lib/zoneinfo/" 666 #define TZZONEINFO2 "../usr/share/lib/zoneinfo/" 667 #define TZ_ENV_CHECK "localtime" 668 #else 669 #define TZDEFAULT "/etc/localtime" 670 #define TZZONEINFO "/usr/share/zoneinfo/" 671 #endif 672 #if U_HAVE_DIRENT_H 673 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 674 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 675 symlinked to /etc/localtime, which makes searchForTZFile return 676 'localtime' when it's the first match. */ 677 #define TZFILE_SKIP2 "localtime" 678 #define SEARCH_TZFILE 679 #include <dirent.h> /* Needed to search through system timezone files */ 680 #endif 681 static char gTimeZoneBuffer[PATH_MAX]; 682 static char *gTimeZoneBufferPtr = NULL; 683 #endif 684 685 #if !U_PLATFORM_USES_ONLY_WIN32_API 686 #define isNonDigit(ch) (ch < '0' || '9' < ch) 687 static UBool isValidOlsonID(const char *id) { 688 int32_t idx = 0; 689 690 /* Determine if this is something like Iceland (Olson ID) 691 or AST4ADT (non-Olson ID) */ 692 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 693 idx++; 694 } 695 696 /* If we went through the whole string, then it might be okay. 697 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 698 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 699 The rest of the time it could be an Olson ID. George */ 700 return (UBool)(id[idx] == 0 701 || uprv_strcmp(id, "PST8PDT") == 0 702 || uprv_strcmp(id, "MST7MDT") == 0 703 || uprv_strcmp(id, "CST6CDT") == 0 704 || uprv_strcmp(id, "EST5EDT") == 0); 705 } 706 707 /* On some Unix-like OS, 'posix' subdirectory in 708 /usr/share/zoneinfo replicates the top-level contents. 'right' 709 subdirectory has the same set of files, but individual files 710 are different from those in the top-level directory or 'posix' 711 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 712 has files for UTC. 713 When the first match for /etc/localtime is in either of them 714 (usually in posix because 'right' has different file contents), 715 or TZ environment variable points to one of them, createTimeZone 716 fails because, say, 'posix/America/New_York' is not an Olson 717 timezone id ('America/New_York' is). So, we have to skip 718 'posix/' and 'right/' at the beginning. */ 719 static void skipZoneIDPrefix(const char** id) { 720 if (uprv_strncmp(*id, "posix/", 6) == 0 721 || uprv_strncmp(*id, "right/", 6) == 0) 722 { 723 *id += 6; 724 } 725 } 726 #endif 727 728 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API 729 730 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 731 typedef struct OffsetZoneMapping { 732 int32_t offsetSeconds; 733 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ 734 const char *stdID; 735 const char *dstID; 736 const char *olsonID; 737 } OffsetZoneMapping; 738 739 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; 740 741 /* 742 This list tries to disambiguate a set of abbreviated timezone IDs and offsets 743 and maps it to an Olson ID. 744 Before adding anything to this list, take a look at 745 icu/source/tools/tzcode/tz.alias 746 Sometimes no daylight savings (0) is important to define due to aliases. 747 This list can be tested with icu/source/test/compat/tzone.pl 748 More values could be added to daylightType to increase precision. 749 */ 750 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 751 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 752 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 753 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 754 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 755 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 756 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 757 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 758 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 759 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 760 {-34200, 2, "CST", "CST", "Australia/South"}, 761 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 762 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 763 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 764 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 765 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 766 {-28800, 2, "WST", "WST", "Australia/West"}, 767 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 768 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 769 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 770 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 771 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 772 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 773 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 774 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 775 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 776 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 777 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 778 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 779 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 780 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 781 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 782 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 783 {0, 1, "GMT", "IST", "Europe/Dublin"}, 784 {0, 1, "GMT", "BST", "Europe/London"}, 785 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 786 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 787 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 788 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 789 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 790 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 791 {10800, 1, "WGT", "WGST", "America/Godthab"}, 792 {10800, 2, "BRT", "BRST", "Brazil/East"}, 793 {12600, 1, "NST", "NDT", "America/St_Johns"}, 794 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 795 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 796 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 797 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 798 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 799 {18000, 1, "CST", "CDT", "America/Havana"}, 800 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 801 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 802 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 803 {21600, 0, "CST", "CDT", "America/Guatemala"}, 804 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 805 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 806 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 807 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 808 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 809 {36000, 1, "HAST", "HADT", "US/Aleutian"} 810 }; 811 812 /*#define DEBUG_TZNAME*/ 813 814 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 815 { 816 int32_t idx; 817 #ifdef DEBUG_TZNAME 818 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 819 #endif 820 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) 821 { 822 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 823 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 824 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 825 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 826 { 827 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 828 } 829 } 830 return NULL; 831 } 832 #endif 833 834 #ifdef SEARCH_TZFILE 835 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 836 #define MAX_READ_SIZE 512 837 838 typedef struct DefaultTZInfo { 839 char* defaultTZBuffer; 840 int64_t defaultTZFileSize; 841 FILE* defaultTZFilePtr; 842 UBool defaultTZstatus; 843 int32_t defaultTZPosition; 844 } DefaultTZInfo; 845 846 /* 847 * This method compares the two files given to see if they are a match. 848 * It is currently use to compare two TZ files. 849 */ 850 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 851 FILE* file; 852 int64_t sizeFile; 853 int64_t sizeFileLeft; 854 int32_t sizeFileRead; 855 int32_t sizeFileToRead; 856 char bufferFile[MAX_READ_SIZE]; 857 UBool result = TRUE; 858 859 if (tzInfo->defaultTZFilePtr == NULL) { 860 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 861 } 862 file = fopen(TZFileName, "r"); 863 864 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 865 866 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 867 /* First check that the file size are equal. */ 868 if (tzInfo->defaultTZFileSize == 0) { 869 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 870 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 871 } 872 fseek(file, 0, SEEK_END); 873 sizeFile = ftell(file); 874 sizeFileLeft = sizeFile; 875 876 if (sizeFile != tzInfo->defaultTZFileSize) { 877 result = FALSE; 878 } else { 879 /* Store the data from the files in seperate buffers and 880 * compare each byte to determine equality. 881 */ 882 if (tzInfo->defaultTZBuffer == NULL) { 883 rewind(tzInfo->defaultTZFilePtr); 884 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 885 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 886 } 887 rewind(file); 888 while(sizeFileLeft > 0) { 889 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 890 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 891 892 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 893 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 894 result = FALSE; 895 break; 896 } 897 sizeFileLeft -= sizeFileRead; 898 tzInfo->defaultTZPosition += sizeFileRead; 899 } 900 } 901 } else { 902 result = FALSE; 903 } 904 905 if (file != NULL) { 906 fclose(file); 907 } 908 909 return result; 910 } 911 /* 912 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 913 */ 914 /* dirent also lists two entries: "." and ".." that we can safely ignore. */ 915 #define SKIP1 "." 916 #define SKIP2 ".." 917 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 918 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 919 char curpath[MAX_PATH_SIZE]; 920 DIR* dirp = opendir(path); 921 DIR* subDirp = NULL; 922 struct dirent* dirEntry = NULL; 923 924 char* result = NULL; 925 if (dirp == NULL) { 926 return result; 927 } 928 929 /* Save the current path */ 930 uprv_memset(curpath, 0, MAX_PATH_SIZE); 931 uprv_strcpy(curpath, path); 932 933 /* Check each entry in the directory. */ 934 while((dirEntry = readdir(dirp)) != NULL) { 935 const char* dirName = dirEntry->d_name; 936 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 937 /* Create a newpath with the new entry to test each entry in the directory. */ 938 char newpath[MAX_PATH_SIZE]; 939 uprv_strcpy(newpath, curpath); 940 uprv_strcat(newpath, dirName); 941 942 if ((subDirp = opendir(newpath)) != NULL) { 943 /* If this new path is a directory, make a recursive call with the newpath. */ 944 closedir(subDirp); 945 uprv_strcat(newpath, "/"); 946 result = searchForTZFile(newpath, tzInfo); 947 /* 948 Have to get out here. Otherwise, we'd keep looking 949 and return the first match in the top-level directory 950 if there's a match in the top-level. If not, this function 951 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 952 It worked without this in most cases because we have a fallback of calling 953 localtime_r to figure out the default timezone. 954 */ 955 if (result != NULL) 956 break; 957 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 958 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 959 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 960 skipZoneIDPrefix(&zoneid); 961 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 962 result = SEARCH_TZFILE_RESULT; 963 /* Get out after the first one found. */ 964 break; 965 } 966 } 967 } 968 } 969 closedir(dirp); 970 return result; 971 } 972 #endif 973 U_CAPI const char* U_EXPORT2 974 uprv_tzname(int n) 975 { 976 const char *tzid = NULL; 977 #if U_PLATFORM_USES_ONLY_WIN32_API 978 tzid = uprv_detectWindowsTimeZone(); 979 980 if (tzid != NULL) { 981 return tzid; 982 } 983 #else 984 985 /*#if U_PLATFORM_IS_DARWIN_BASED 986 int ret; 987 988 tzid = getenv("TZFILE"); 989 if (tzid != NULL) { 990 return tzid; 991 } 992 #endif*/ 993 994 /* This code can be temporarily disabled to test tzname resolution later on. */ 995 #ifndef DEBUG_TZNAME 996 tzid = getenv("TZ"); 997 if (tzid != NULL && isValidOlsonID(tzid) 998 #if U_PLATFORM == U_PF_SOLARIS 999 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ 1000 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 1001 #endif 1002 ) { 1003 /* This might be a good Olson ID. */ 1004 skipZoneIDPrefix(&tzid); 1005 return tzid; 1006 } 1007 /* else U_TZNAME will give a better result. */ 1008 #endif 1009 1010 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) 1011 /* Caller must handle threading issues */ 1012 if (gTimeZoneBufferPtr == NULL) { 1013 /* 1014 This is a trick to look at the name of the link to get the Olson ID 1015 because the tzfile contents is underspecified. 1016 This isn't guaranteed to work because it may not be a symlink. 1017 */ 1018 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1019 if (0 < ret) { 1020 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1021 gTimeZoneBuffer[ret] = 0; 1022 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1023 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1024 { 1025 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1026 } 1027 #if U_PLATFORM == U_PF_SOLARIS 1028 else 1029 { 1030 tzZoneInfoLen = uprv_strlen(TZZONEINFO2); 1031 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0 1032 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1033 { 1034 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1035 } 1036 } 1037 #endif 1038 } else { 1039 #if defined(SEARCH_TZFILE) 1040 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1041 if (tzInfo != NULL) { 1042 tzInfo->defaultTZBuffer = NULL; 1043 tzInfo->defaultTZFileSize = 0; 1044 tzInfo->defaultTZFilePtr = NULL; 1045 tzInfo->defaultTZstatus = FALSE; 1046 tzInfo->defaultTZPosition = 0; 1047 1048 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1049 1050 /* Free previously allocated memory */ 1051 if (tzInfo->defaultTZBuffer != NULL) { 1052 uprv_free(tzInfo->defaultTZBuffer); 1053 } 1054 if (tzInfo->defaultTZFilePtr != NULL) { 1055 fclose(tzInfo->defaultTZFilePtr); 1056 } 1057 uprv_free(tzInfo); 1058 } 1059 1060 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1061 return gTimeZoneBufferPtr; 1062 } 1063 #endif 1064 } 1065 } 1066 else { 1067 return gTimeZoneBufferPtr; 1068 } 1069 #endif 1070 #endif 1071 1072 #ifdef U_TZNAME 1073 #if U_PLATFORM_USES_ONLY_WIN32_API 1074 /* The return value is free'd in timezone.cpp on Windows because 1075 * the other code path returns a pointer to a heap location. */ 1076 return uprv_strdup(U_TZNAME[n]); 1077 #else 1078 /* 1079 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1080 So we remap the abbreviation to an olson ID. 1081 1082 Since Windows exposes a little more timezone information, 1083 we normally don't use this code on Windows because 1084 uprv_detectWindowsTimeZone should have already given the correct answer. 1085 */ 1086 { 1087 struct tm juneSol, decemberSol; 1088 int daylightType; 1089 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1090 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1091 1092 /* This probing will tell us when daylight savings occurs. */ 1093 localtime_r(&juneSolstice, &juneSol); 1094 localtime_r(&decemberSolstice, &decemberSol); 1095 if(decemberSol.tm_isdst > 0) { 1096 daylightType = U_DAYLIGHT_DECEMBER; 1097 } else if(juneSol.tm_isdst > 0) { 1098 daylightType = U_DAYLIGHT_JUNE; 1099 } else { 1100 daylightType = U_DAYLIGHT_NONE; 1101 } 1102 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1103 if (tzid != NULL) { 1104 return tzid; 1105 } 1106 } 1107 return U_TZNAME[n]; 1108 #endif 1109 #else 1110 return ""; 1111 #endif 1112 } 1113 1114 /* Get and set the ICU data directory --------------------------------------- */ 1115 1116 static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; 1117 static char *gDataDirectory = NULL; 1118 1119 UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; 1120 static CharString *gTimeZoneFilesDirectory = NULL; 1121 1122 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API 1123 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1124 #endif 1125 1126 static UBool U_CALLCONV putil_cleanup(void) 1127 { 1128 if (gDataDirectory && *gDataDirectory) { 1129 uprv_free(gDataDirectory); 1130 } 1131 gDataDirectory = NULL; 1132 gDataDirInitOnce.reset(); 1133 1134 delete gTimeZoneFilesDirectory; 1135 gTimeZoneFilesDirectory = NULL; 1136 gTimeZoneFilesInitOnce.reset(); 1137 1138 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API 1139 if (gCorrectedPOSIXLocale) { 1140 uprv_free(gCorrectedPOSIXLocale); 1141 gCorrectedPOSIXLocale = NULL; 1142 } 1143 #endif 1144 return TRUE; 1145 } 1146 1147 /* 1148 * Set the data directory. 1149 * Make a copy of the passed string, and set the global data dir to point to it. 1150 */ 1151 U_CAPI void U_EXPORT2 1152 u_setDataDirectory(const char *directory) { 1153 char *newDataDir; 1154 int32_t length; 1155 1156 if(directory==NULL || *directory==0) { 1157 /* A small optimization to prevent the malloc and copy when the 1158 shared library is used, and this is a way to make sure that NULL 1159 is never returned. 1160 */ 1161 newDataDir = (char *)""; 1162 } 1163 else { 1164 length=(int32_t)uprv_strlen(directory); 1165 newDataDir = (char *)uprv_malloc(length + 2); 1166 /* Exit out if newDataDir could not be created. */ 1167 if (newDataDir == NULL) { 1168 return; 1169 } 1170 uprv_strcpy(newDataDir, directory); 1171 1172 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1173 { 1174 char *p; 1175 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1176 *p = U_FILE_SEP_CHAR; 1177 } 1178 } 1179 #endif 1180 } 1181 1182 if (gDataDirectory && *gDataDirectory) { 1183 uprv_free(gDataDirectory); 1184 } 1185 gDataDirectory = newDataDir; 1186 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1187 } 1188 1189 U_CAPI UBool U_EXPORT2 1190 uprv_pathIsAbsolute(const char *path) 1191 { 1192 if(!path || !*path) { 1193 return FALSE; 1194 } 1195 1196 if(*path == U_FILE_SEP_CHAR) { 1197 return TRUE; 1198 } 1199 1200 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1201 if(*path == U_FILE_ALT_SEP_CHAR) { 1202 return TRUE; 1203 } 1204 #endif 1205 1206 #if U_PLATFORM_USES_ONLY_WIN32_API 1207 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1208 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1209 path[1] == ':' ) { 1210 return TRUE; 1211 } 1212 #endif 1213 1214 return FALSE; 1215 } 1216 1217 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1218 until some client wrapper makefiles are updated */ 1219 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR 1220 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1221 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1222 # endif 1223 #endif 1224 1225 static void U_CALLCONV dataDirectoryInitFn() { 1226 /* If we already have the directory, then return immediately. Will happen if user called 1227 * u_setDataDirectory(). 1228 */ 1229 if (gDataDirectory) { 1230 return; 1231 } 1232 1233 const char *path = NULL; 1234 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1235 char datadir_path_buffer[PATH_MAX]; 1236 #endif 1237 1238 /* 1239 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1240 override ICU's data with the ICU_DATA environment variable. This prevents 1241 problems where multiple custom copies of ICU's specific version of data 1242 are installed on a system. Either the application must define the data 1243 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1244 ICU, set the data with udata_setCommonData or trust that all of the 1245 required data is contained in ICU's data library that contains 1246 the entry point defined by U_ICUDATA_ENTRY_POINT. 1247 1248 There may also be some platforms where environment variables 1249 are not allowed. 1250 */ 1251 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1252 /* First try to get the environment variable */ 1253 path=getenv("ICU_DATA"); 1254 # endif 1255 1256 /* ICU_DATA_DIR may be set as a compile option. 1257 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1258 * and is used only when data is built in archive mode eliminating the need 1259 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1260 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1261 * set their own path. 1262 */ 1263 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1264 if(path==NULL || *path==0) { 1265 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1266 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1267 # endif 1268 # ifdef ICU_DATA_DIR 1269 path=ICU_DATA_DIR; 1270 # else 1271 path=U_ICU_DATA_DEFAULT_DIR; 1272 # endif 1273 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1274 if (prefix != NULL) { 1275 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1276 path=datadir_path_buffer; 1277 } 1278 # endif 1279 } 1280 #endif 1281 1282 if(path==NULL) { 1283 /* It looks really bad, set it to something. */ 1284 path = ""; 1285 } 1286 1287 u_setDataDirectory(path); 1288 return; 1289 } 1290 1291 U_CAPI const char * U_EXPORT2 1292 u_getDataDirectory(void) { 1293 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); 1294 return gDataDirectory; 1295 } 1296 1297 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { 1298 if (U_FAILURE(status)) { 1299 return; 1300 } 1301 gTimeZoneFilesDirectory->clear(); 1302 gTimeZoneFilesDirectory->append(path, status); 1303 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1304 char *p = gTimeZoneFilesDirectory->data(); 1305 while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) { 1306 *p = U_FILE_SEP_CHAR; 1307 } 1308 #endif 1309 } 1310 1311 #define TO_STRING(x) TO_STRING_2(x) 1312 #define TO_STRING_2(x) #x 1313 1314 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { 1315 U_ASSERT(gTimeZoneFilesDirectory == NULL); 1316 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1317 gTimeZoneFilesDirectory = new CharString(); 1318 if (gTimeZoneFilesDirectory == NULL) { 1319 status = U_MEMORY_ALLOCATION_ERROR; 1320 return; 1321 } 1322 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR"); 1323 #if defined(U_TIMEZONE_FILES_DIR) 1324 if (dir == NULL) { 1325 dir = TO_STRING(U_TIMEZONE_FILES_DIR); 1326 } 1327 #endif 1328 if (dir == NULL) { 1329 dir = ""; 1330 } 1331 setTimeZoneFilesDir(dir, status); 1332 } 1333 1334 1335 U_CAPI const char * U_EXPORT2 1336 u_getTimeZoneFilesDirectory(UErrorCode *status) { 1337 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); 1338 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : ""; 1339 } 1340 1341 U_CAPI void U_EXPORT2 1342 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { 1343 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); 1344 setTimeZoneFilesDir(path, *status); 1345 1346 // Note: this function does some extra churn, first setting based on the 1347 // environment, then immediately replacing with the value passed in. 1348 // The logic is simpler that way, and performance shouldn't be an issue. 1349 } 1350 1351 1352 #if U_POSIX_LOCALE 1353 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1354 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1355 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1356 */ 1357 static const char *uprv_getPOSIXIDForCategory(int category) 1358 { 1359 const char* posixID = NULL; 1360 if (category == LC_MESSAGES || category == LC_CTYPE) { 1361 /* 1362 * On Solaris two different calls to setlocale can result in 1363 * different values. Only get this value once. 1364 * 1365 * We must check this first because an application can set this. 1366 * 1367 * LC_ALL can't be used because it's platform dependent. The LANG 1368 * environment variable seems to affect LC_CTYPE variable by default. 1369 * Here is what setlocale(LC_ALL, NULL) can return. 1370 * HPUX can return 'C C C C C C C' 1371 * Solaris can return /en_US/C/C/C/C/C on the second try. 1372 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1373 * 1374 * The default codepage detection also needs to use LC_CTYPE. 1375 * 1376 * Do not call setlocale(LC_*, "")! Using an empty string instead 1377 * of NULL, will modify the libc behavior. 1378 */ 1379 posixID = setlocale(category, NULL); 1380 if ((posixID == 0) 1381 || (uprv_strcmp("C", posixID) == 0) 1382 || (uprv_strcmp("POSIX", posixID) == 0)) 1383 { 1384 /* Maybe we got some garbage. Try something more reasonable */ 1385 posixID = getenv("LC_ALL"); 1386 if (posixID == 0) { 1387 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1388 if (posixID == 0) { 1389 posixID = getenv("LANG"); 1390 } 1391 } 1392 } 1393 } 1394 if ((posixID==0) 1395 || (uprv_strcmp("C", posixID) == 0) 1396 || (uprv_strcmp("POSIX", posixID) == 0)) 1397 { 1398 /* Nothing worked. Give it a nice POSIX default value. */ 1399 posixID = "en_US_POSIX"; 1400 } 1401 return posixID; 1402 } 1403 1404 /* Return just the POSIX id for the default locale, whatever happens to be in 1405 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1406 */ 1407 static const char *uprv_getPOSIXIDForDefaultLocale(void) 1408 { 1409 static const char* posixID = NULL; 1410 if (posixID == 0) { 1411 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1412 } 1413 return posixID; 1414 } 1415 1416 #if !U_CHARSET_IS_UTF8 1417 /* Return just the POSIX id for the default codepage, whatever happens to be in 1418 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1419 */ 1420 static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1421 { 1422 static const char* posixID = NULL; 1423 if (posixID == 0) { 1424 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1425 } 1426 return posixID; 1427 } 1428 #endif 1429 #endif 1430 1431 /* NOTE: The caller should handle thread safety */ 1432 U_CAPI const char* U_EXPORT2 1433 uprv_getDefaultLocaleID() 1434 { 1435 #if U_POSIX_LOCALE 1436 /* 1437 Note that: (a '!' means the ID is improper somehow) 1438 LC_ALL ----> default_loc codepage 1439 -------------------------------------------------------- 1440 ab.CD ab CD 1441 ab@CD ab__CD - 1442 ab (at) CD.EF ab__CD EF 1443 1444 ab_CD.EF@GH ab_CD_GH EF 1445 1446 Some 'improper' ways to do the same as above: 1447 ! ab_CD (at) GH.EF ab_CD_GH EF 1448 ! ab_CD.EF (at) GH.IJ ab_CD_GH EF 1449 ! ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH EF 1450 1451 _CD@GH _CD_GH - 1452 _CD.EF@GH _CD_GH EF 1453 1454 The variant cannot have dots in it. 1455 The 'rightmost' variant (@xxx) wins. 1456 The leftmost codepage (.xxx) wins. 1457 */ 1458 char *correctedPOSIXLocale = 0; 1459 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1460 const char *p; 1461 const char *q; 1462 int32_t len; 1463 1464 /* Format: (no spaces) 1465 ll [ _CC ] [ . MM ] [ @ VV] 1466 1467 l = lang, C = ctry, M = charmap, V = variant 1468 */ 1469 1470 if (gCorrectedPOSIXLocale != NULL) { 1471 return gCorrectedPOSIXLocale; 1472 } 1473 1474 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1475 /* assume new locale can't be larger than old one? */ 1476 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1477 /* Exit on memory allocation error. */ 1478 if (correctedPOSIXLocale == NULL) { 1479 return NULL; 1480 } 1481 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1482 correctedPOSIXLocale[p-posixID] = 0; 1483 1484 /* do not copy after the @ */ 1485 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1486 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1487 } 1488 } 1489 1490 /* Note that we scan the *uncorrected* ID. */ 1491 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1492 if (correctedPOSIXLocale == NULL) { 1493 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1494 /* Exit on memory allocation error. */ 1495 if (correctedPOSIXLocale == NULL) { 1496 return NULL; 1497 } 1498 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1499 correctedPOSIXLocale[p-posixID] = 0; 1500 } 1501 p++; 1502 1503 /* Take care of any special cases here.. */ 1504 if (!uprv_strcmp(p, "nynorsk")) { 1505 p = "NY"; 1506 /* Don't worry about no__NY. In practice, it won't appear. */ 1507 } 1508 1509 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1510 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1511 } 1512 else { 1513 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1514 } 1515 1516 if ((q = uprv_strchr(p, '.')) != NULL) { 1517 /* How big will the resulting string be? */ 1518 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1519 uprv_strncat(correctedPOSIXLocale, p, q-p); 1520 correctedPOSIXLocale[len] = 0; 1521 } 1522 else { 1523 /* Anything following the @ sign */ 1524 uprv_strcat(correctedPOSIXLocale, p); 1525 } 1526 1527 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1528 * How about 'russian' -> 'ru'? 1529 * Many of the other locales using ISO codes will be handled by the 1530 * canonicalization functions in uloc_getDefault. 1531 */ 1532 } 1533 1534 /* Was a correction made? */ 1535 if (correctedPOSIXLocale != NULL) { 1536 posixID = correctedPOSIXLocale; 1537 } 1538 else { 1539 /* copy it, just in case the original pointer goes away. See j2395 */ 1540 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1541 /* Exit on memory allocation error. */ 1542 if (correctedPOSIXLocale == NULL) { 1543 return NULL; 1544 } 1545 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1546 } 1547 1548 if (gCorrectedPOSIXLocale == NULL) { 1549 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1550 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1551 correctedPOSIXLocale = NULL; 1552 } 1553 1554 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1555 uprv_free(correctedPOSIXLocale); 1556 } 1557 1558 return posixID; 1559 1560 #elif U_PLATFORM_USES_ONLY_WIN32_API 1561 #define POSIX_LOCALE_CAPACITY 64 1562 UErrorCode status = U_ZERO_ERROR; 1563 char *correctedPOSIXLocale = 0; 1564 1565 if (gCorrectedPOSIXLocale != NULL) { 1566 return gCorrectedPOSIXLocale; 1567 } 1568 1569 LCID id = GetThreadLocale(); 1570 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); 1571 if (correctedPOSIXLocale) { 1572 int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); 1573 if (U_SUCCESS(status)) { 1574 *(correctedPOSIXLocale + posixLen) = 0; 1575 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1576 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1577 } else { 1578 uprv_free(correctedPOSIXLocale); 1579 } 1580 } 1581 1582 if (gCorrectedPOSIXLocale == NULL) { 1583 return "en_US"; 1584 } 1585 return gCorrectedPOSIXLocale; 1586 1587 #elif U_PLATFORM == U_PF_OS400 1588 /* locales are process scoped and are by definition thread safe */ 1589 static char correctedLocale[64]; 1590 const char *localeID = getenv("LC_ALL"); 1591 char *p; 1592 1593 if (localeID == NULL) 1594 localeID = getenv("LANG"); 1595 if (localeID == NULL) 1596 localeID = setlocale(LC_ALL, NULL); 1597 /* Make sure we have something... */ 1598 if (localeID == NULL) 1599 return "en_US_POSIX"; 1600 1601 /* Extract the locale name from the path. */ 1602 if((p = uprv_strrchr(localeID, '/')) != NULL) 1603 { 1604 /* Increment p to start of locale name. */ 1605 p++; 1606 localeID = p; 1607 } 1608 1609 /* Copy to work location. */ 1610 uprv_strcpy(correctedLocale, localeID); 1611 1612 /* Strip off the '.locale' extension. */ 1613 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1614 *p = 0; 1615 } 1616 1617 /* Upper case the locale name. */ 1618 T_CString_toUpperCase(correctedLocale); 1619 1620 /* See if we are using the POSIX locale. Any of the 1621 * following are equivalent and use the same QLGPGCMA 1622 * (POSIX) locale. 1623 * QLGPGCMA2 means UCS2 1624 * QLGPGCMA_4 means UTF-32 1625 * QLGPGCMA_8 means UTF-8 1626 */ 1627 if ((uprv_strcmp("C", correctedLocale) == 0) || 1628 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1629 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1630 { 1631 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1632 } 1633 else 1634 { 1635 int16_t LocaleLen; 1636 1637 /* Lower case the lang portion. */ 1638 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1639 { 1640 *p = uprv_tolower(*p); 1641 } 1642 1643 /* Adjust for Euro. After '_E' add 'URO'. */ 1644 LocaleLen = uprv_strlen(correctedLocale); 1645 if (correctedLocale[LocaleLen - 2] == '_' && 1646 correctedLocale[LocaleLen - 1] == 'E') 1647 { 1648 uprv_strcat(correctedLocale, "URO"); 1649 } 1650 1651 /* If using Lotus-based locale then convert to 1652 * equivalent non Lotus. 1653 */ 1654 else if (correctedLocale[LocaleLen - 2] == '_' && 1655 correctedLocale[LocaleLen - 1] == 'L') 1656 { 1657 correctedLocale[LocaleLen - 2] = 0; 1658 } 1659 1660 /* There are separate simplified and traditional 1661 * locales called zh_HK_S and zh_HK_T. 1662 */ 1663 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1664 { 1665 uprv_strcpy(correctedLocale, "zh_HK"); 1666 } 1667 1668 /* A special zh_CN_GBK locale... 1669 */ 1670 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1671 { 1672 uprv_strcpy(correctedLocale, "zh_CN"); 1673 } 1674 1675 } 1676 1677 return correctedLocale; 1678 #endif 1679 1680 } 1681 1682 #if !U_CHARSET_IS_UTF8 1683 #if U_POSIX_LOCALE 1684 /* 1685 Due to various platform differences, one platform may specify a charset, 1686 when they really mean a different charset. Remap the names so that they are 1687 compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1688 here. Before adding anything to this function, please consider adding unique 1689 names to the ICU alias table in the data directory. 1690 */ 1691 static const char* 1692 remapPlatformDependentCodepage(const char *locale, const char *name) { 1693 if (locale != NULL && *locale == 0) { 1694 /* Make sure that an empty locale is handled the same way. */ 1695 locale = NULL; 1696 } 1697 if (name == NULL) { 1698 return NULL; 1699 } 1700 #if U_PLATFORM == U_PF_AIX 1701 if (uprv_strcmp(name, "IBM-943") == 0) { 1702 /* Use the ASCII compatible ibm-943 */ 1703 name = "Shift-JIS"; 1704 } 1705 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1706 /* Use the windows-1252 that contains the Euro */ 1707 name = "IBM-5348"; 1708 } 1709 #elif U_PLATFORM == U_PF_SOLARIS 1710 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1711 /* Solaris underspecifies the "EUC" name. */ 1712 if (uprv_strcmp(locale, "zh_CN") == 0) { 1713 name = "EUC-CN"; 1714 } 1715 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1716 name = "EUC-TW"; 1717 } 1718 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1719 name = "EUC-KR"; 1720 } 1721 } 1722 else if (uprv_strcmp(name, "eucJP") == 0) { 1723 /* 1724 ibm-954 is the best match. 1725 ibm-33722 is the default for eucJP (similar to Windows). 1726 */ 1727 name = "eucjis"; 1728 } 1729 else if (uprv_strcmp(name, "646") == 0) { 1730 /* 1731 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1732 * ISO-8859-1 instead of US-ASCII(646). 1733 */ 1734 name = "ISO-8859-1"; 1735 } 1736 #elif U_PLATFORM_IS_DARWIN_BASED 1737 if (locale == NULL && *name == 0) { 1738 /* 1739 No locale was specified, and an empty name was passed in. 1740 This usually indicates that nl_langinfo didn't return valid information. 1741 Mac OS X uses UTF-8 by default (especially the locale data and console). 1742 */ 1743 name = "UTF-8"; 1744 } 1745 else if (uprv_strcmp(name, "CP949") == 0) { 1746 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1747 name = "EUC-KR"; 1748 } 1749 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1750 /* 1751 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1752 */ 1753 name = "UTF-8"; 1754 } 1755 #elif U_PLATFORM == U_PF_BSD 1756 if (uprv_strcmp(name, "CP949") == 0) { 1757 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1758 name = "EUC-KR"; 1759 } 1760 #elif U_PLATFORM == U_PF_HPUX 1761 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1762 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1763 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1764 name = "hkbig5"; 1765 } 1766 else if (uprv_strcmp(name, "eucJP") == 0) { 1767 /* 1768 ibm-1350 is the best match, but unavailable. 1769 ibm-954 is mostly a superset of ibm-1350. 1770 ibm-33722 is the default for eucJP (similar to Windows). 1771 */ 1772 name = "eucjis"; 1773 } 1774 #elif U_PLATFORM == U_PF_LINUX 1775 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1776 /* Linux underspecifies the "EUC" name. */ 1777 if (uprv_strcmp(locale, "korean") == 0) { 1778 name = "EUC-KR"; 1779 } 1780 else if (uprv_strcmp(locale, "japanese") == 0) { 1781 /* See comment below about eucJP */ 1782 name = "eucjis"; 1783 } 1784 } 1785 else if (uprv_strcmp(name, "eucjp") == 0) { 1786 /* 1787 ibm-1350 is the best match, but unavailable. 1788 ibm-954 is mostly a superset of ibm-1350. 1789 ibm-33722 is the default for eucJP (similar to Windows). 1790 */ 1791 name = "eucjis"; 1792 } 1793 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1794 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1795 /* 1796 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1797 */ 1798 name = "UTF-8"; 1799 } 1800 /* 1801 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1802 * it by falling back to 'US-ASCII' when NULL is returned from this 1803 * function. So, we don't have to worry about it here. 1804 */ 1805 #endif 1806 /* return NULL when "" is passed in */ 1807 if (*name == 0) { 1808 name = NULL; 1809 } 1810 return name; 1811 } 1812 1813 static const char* 1814 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1815 { 1816 char localeBuf[100]; 1817 const char *name = NULL; 1818 char *variant = NULL; 1819 1820 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1821 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1822 uprv_strncpy(localeBuf, localeName, localeCapacity); 1823 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1824 name = uprv_strncpy(buffer, name+1, buffCapacity); 1825 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1826 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { 1827 *variant = 0; 1828 } 1829 name = remapPlatformDependentCodepage(localeBuf, name); 1830 } 1831 return name; 1832 } 1833 #endif 1834 1835 static const char* 1836 int_getDefaultCodepage() 1837 { 1838 #if U_PLATFORM == U_PF_OS400 1839 uint32_t ccsid = 37; /* Default to ibm-37 */ 1840 static char codepage[64]; 1841 Qwc_JOBI0400_t jobinfo; 1842 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1843 1844 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1845 "* ", " ", &error); 1846 1847 if (error.Bytes_Available == 0) { 1848 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1849 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1850 } 1851 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1852 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1853 } 1854 /* else use the default */ 1855 } 1856 sprintf(codepage,"ibm-%d", ccsid); 1857 return codepage; 1858 1859 #elif U_PLATFORM == U_PF_OS390 1860 static char codepage[64]; 1861 1862 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1863 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1864 codepage[63] = 0; /* NULL terminate */ 1865 1866 return codepage; 1867 1868 #elif U_PLATFORM_USES_ONLY_WIN32_API 1869 static char codepage[64]; 1870 sprintf(codepage, "windows-%d", GetACP()); 1871 return codepage; 1872 1873 #elif U_POSIX_LOCALE 1874 static char codesetName[100]; 1875 const char *localeName = NULL; 1876 const char *name = NULL; 1877 1878 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1879 uprv_memset(codesetName, 0, sizeof(codesetName)); 1880 #if U_HAVE_NL_LANGINFO_CODESET 1881 /* When available, check nl_langinfo first because it usually gives more 1882 useful names. It depends on LC_CTYPE. 1883 nl_langinfo may use the same buffer as setlocale. */ 1884 { 1885 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1886 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED 1887 /* 1888 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1889 * instead of ASCII. 1890 */ 1891 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1892 codeset = remapPlatformDependentCodepage(localeName, codeset); 1893 } else 1894 #endif 1895 { 1896 codeset = remapPlatformDependentCodepage(NULL, codeset); 1897 } 1898 1899 if (codeset != NULL) { 1900 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1901 codesetName[sizeof(codesetName)-1] = 0; 1902 return codesetName; 1903 } 1904 } 1905 #endif 1906 1907 /* Use setlocale in a nice way, and then check some environment variables. 1908 Maybe the application used setlocale already. 1909 */ 1910 uprv_memset(codesetName, 0, sizeof(codesetName)); 1911 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1912 if (name) { 1913 /* if we can find the codeset name from setlocale, return that. */ 1914 return name; 1915 } 1916 1917 if (*codesetName == 0) 1918 { 1919 /* Everything failed. Return US ASCII (ISO 646). */ 1920 (void)uprv_strcpy(codesetName, "US-ASCII"); 1921 } 1922 return codesetName; 1923 #else 1924 return "US-ASCII"; 1925 #endif 1926 } 1927 1928 1929 U_CAPI const char* U_EXPORT2 1930 uprv_getDefaultCodepage() 1931 { 1932 static char const *name = NULL; 1933 umtx_lock(NULL); 1934 if (name == NULL) { 1935 name = int_getDefaultCodepage(); 1936 } 1937 umtx_unlock(NULL); 1938 return name; 1939 } 1940 #endif /* !U_CHARSET_IS_UTF8 */ 1941 1942 1943 /* end of platform-specific implementation -------------- */ 1944 1945 /* version handling --------------------------------------------------------- */ 1946 1947 U_CAPI void U_EXPORT2 1948 u_versionFromString(UVersionInfo versionArray, const char *versionString) { 1949 char *end; 1950 uint16_t part=0; 1951 1952 if(versionArray==NULL) { 1953 return; 1954 } 1955 1956 if(versionString!=NULL) { 1957 for(;;) { 1958 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 1959 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 1960 break; 1961 } 1962 versionString=end+1; 1963 } 1964 } 1965 1966 while(part<U_MAX_VERSION_LENGTH) { 1967 versionArray[part++]=0; 1968 } 1969 } 1970 1971 U_CAPI void U_EXPORT2 1972 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 1973 if(versionArray!=NULL && versionString!=NULL) { 1974 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 1975 int32_t len = u_strlen(versionString); 1976 if(len>U_MAX_VERSION_STRING_LENGTH) { 1977 len = U_MAX_VERSION_STRING_LENGTH; 1978 } 1979 u_UCharsToChars(versionString, versionChars, len); 1980 versionChars[len]=0; 1981 u_versionFromString(versionArray, versionChars); 1982 } 1983 } 1984 1985 U_CAPI void U_EXPORT2 1986 u_versionToString(const UVersionInfo versionArray, char *versionString) { 1987 uint16_t count, part; 1988 uint8_t field; 1989 1990 if(versionString==NULL) { 1991 return; 1992 } 1993 1994 if(versionArray==NULL) { 1995 versionString[0]=0; 1996 return; 1997 } 1998 1999 /* count how many fields need to be written */ 2000 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2001 } 2002 2003 if(count <= 1) { 2004 count = 2; 2005 } 2006 2007 /* write the first part */ 2008 /* write the decimal field value */ 2009 field=versionArray[0]; 2010 if(field>=100) { 2011 *versionString++=(char)('0'+field/100); 2012 field%=100; 2013 } 2014 if(field>=10) { 2015 *versionString++=(char)('0'+field/10); 2016 field%=10; 2017 } 2018 *versionString++=(char)('0'+field); 2019 2020 /* write the following parts */ 2021 for(part=1; part<count; ++part) { 2022 /* write a dot first */ 2023 *versionString++=U_VERSION_DELIMITER; 2024 2025 /* write the decimal field value */ 2026 field=versionArray[part]; 2027 if(field>=100) { 2028 *versionString++=(char)('0'+field/100); 2029 field%=100; 2030 } 2031 if(field>=10) { 2032 *versionString++=(char)('0'+field/10); 2033 field%=10; 2034 } 2035 *versionString++=(char)('0'+field); 2036 } 2037 2038 /* NUL-terminate */ 2039 *versionString=0; 2040 } 2041 2042 U_CAPI void U_EXPORT2 2043 u_getVersion(UVersionInfo versionArray) { 2044 (void)copyright; // Suppress unused variable warning from clang. 2045 u_versionFromString(versionArray, U_ICU_VERSION); 2046 } 2047 2048 /** 2049 * icucfg.h dependent code 2050 */ 2051 2052 #if U_ENABLE_DYLOAD 2053 2054 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API 2055 2056 #if HAVE_DLFCN_H 2057 2058 #ifdef __MVS__ 2059 #ifndef __SUSV3 2060 #define __SUSV3 1 2061 #endif 2062 #endif 2063 #include <dlfcn.h> 2064 #endif 2065 2066 U_INTERNAL void * U_EXPORT2 2067 uprv_dl_open(const char *libName, UErrorCode *status) { 2068 void *ret = NULL; 2069 if(U_FAILURE(*status)) return ret; 2070 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2071 if(ret==NULL) { 2072 #ifdef U_TRACE_DYLOAD 2073 printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); 2074 #endif 2075 *status = U_MISSING_RESOURCE_ERROR; 2076 } 2077 return ret; 2078 } 2079 2080 U_INTERNAL void U_EXPORT2 2081 uprv_dl_close(void *lib, UErrorCode *status) { 2082 if(U_FAILURE(*status)) return; 2083 dlclose(lib); 2084 } 2085 2086 U_INTERNAL UVoidFunction* U_EXPORT2 2087 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2088 union { 2089 UVoidFunction *fp; 2090 void *vp; 2091 } uret; 2092 uret.fp = NULL; 2093 if(U_FAILURE(*status)) return uret.fp; 2094 uret.vp = dlsym(lib, sym); 2095 if(uret.vp == NULL) { 2096 #ifdef U_TRACE_DYLOAD 2097 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); 2098 #endif 2099 *status = U_MISSING_RESOURCE_ERROR; 2100 } 2101 return uret.fp; 2102 } 2103 2104 #else 2105 2106 /* null (nonexistent) implementation. */ 2107 2108 U_INTERNAL void * U_EXPORT2 2109 uprv_dl_open(const char *libName, UErrorCode *status) { 2110 if(U_FAILURE(*status)) return NULL; 2111 *status = U_UNSUPPORTED_ERROR; 2112 return NULL; 2113 } 2114 2115 U_INTERNAL void U_EXPORT2 2116 uprv_dl_close(void *lib, UErrorCode *status) { 2117 if(U_FAILURE(*status)) return; 2118 *status = U_UNSUPPORTED_ERROR; 2119 return; 2120 } 2121 2122 2123 U_INTERNAL UVoidFunction* U_EXPORT2 2124 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2125 if(U_SUCCESS(*status)) { 2126 *status = U_UNSUPPORTED_ERROR; 2127 } 2128 return (UVoidFunction*)NULL; 2129 } 2130 2131 2132 2133 #endif 2134 2135 #elif U_PLATFORM_USES_ONLY_WIN32_API 2136 2137 U_INTERNAL void * U_EXPORT2 2138 uprv_dl_open(const char *libName, UErrorCode *status) { 2139 HMODULE lib = NULL; 2140 2141 if(U_FAILURE(*status)) return NULL; 2142 2143 lib = LoadLibraryA(libName); 2144 2145 if(lib==NULL) { 2146 *status = U_MISSING_RESOURCE_ERROR; 2147 } 2148 2149 return (void*)lib; 2150 } 2151 2152 U_INTERNAL void U_EXPORT2 2153 uprv_dl_close(void *lib, UErrorCode *status) { 2154 HMODULE handle = (HMODULE)lib; 2155 if(U_FAILURE(*status)) return; 2156 2157 FreeLibrary(handle); 2158 2159 return; 2160 } 2161 2162 2163 U_INTERNAL UVoidFunction* U_EXPORT2 2164 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2165 HMODULE handle = (HMODULE)lib; 2166 UVoidFunction* addr = NULL; 2167 2168 if(U_FAILURE(*status) || lib==NULL) return NULL; 2169 2170 addr = (UVoidFunction*)GetProcAddress(handle, sym); 2171 2172 if(addr==NULL) { 2173 DWORD lastError = GetLastError(); 2174 if(lastError == ERROR_PROC_NOT_FOUND) { 2175 *status = U_MISSING_RESOURCE_ERROR; 2176 } else { 2177 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2178 } 2179 } 2180 2181 return addr; 2182 } 2183 2184 2185 #else 2186 2187 /* No dynamic loading set. */ 2188 2189 U_INTERNAL void * U_EXPORT2 2190 uprv_dl_open(const char *libName, UErrorCode *status) { 2191 if(U_FAILURE(*status)) return NULL; 2192 *status = U_UNSUPPORTED_ERROR; 2193 return NULL; 2194 } 2195 2196 U_INTERNAL void U_EXPORT2 2197 uprv_dl_close(void *lib, UErrorCode *status) { 2198 if(U_FAILURE(*status)) return; 2199 *status = U_UNSUPPORTED_ERROR; 2200 return; 2201 } 2202 2203 2204 U_INTERNAL UVoidFunction* U_EXPORT2 2205 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2206 if(U_SUCCESS(*status)) { 2207 *status = U_UNSUPPORTED_ERROR; 2208 } 2209 return (UVoidFunction*)NULL; 2210 } 2211 2212 #endif /* U_ENABLE_DYLOAD */ 2213 2214 /* 2215 * Hey, Emacs, please set the following: 2216 * 2217 * Local Variables: 2218 * indent-tabs-mode: nil 2219 * End: 2220 * 2221 */ 2222