1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1997-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10 * 11 * Date Name Description 12 * 04/14/97 aliu Creation. 13 * 04/24/97 aliu Added getDefaultDataDirectory() and 14 * getDefaultLocaleID(). 15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods 16 * for assumed case. Non-UNIX platforms must be 17 * special-cased. Rewrote numeric methods dealing 18 * with NaN and Infinity to be platform independent 19 * over all IEEE 754 platforms. 20 * 05/13/97 aliu Restored sign of timezone 21 * (semantics are hours West of GMT) 22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23 * nextDouble.. 24 * 07/22/98 stephen Added remainder, max, min, trunc 25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26 * 08/24/98 stephen Added longBitsFromDouble 27 * 09/08/98 stephen Minor changes for Mac Port 28 * 03/02/99 stephen Removed openFile(). Added AS400 support. 29 * Fixed EBCDIC tables 30 * 04/15/99 stephen Converted to C. 31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32 * 08/04/99 jeffrey R. Added OS/2 changes 33 * 11/15/99 helena Integrated S/390 IEEE support. 34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36 * 01/03/08 Steven L. Fake Time Support 37 ****************************************************************************** 38 */ 39 40 // Defines _XOPEN_SOURCE for access to POSIX functions. 41 // Must be before any other #includes. 42 #include "uposixdefs.h" 43 44 /* include ICU headers */ 45 #include "unicode/utypes.h" 46 #include "unicode/putil.h" 47 #include "unicode/ustring.h" 48 #include "putilimp.h" 49 #include "uassert.h" 50 #include "umutex.h" 51 #include "cmemory.h" 52 #include "cstring.h" 53 #include "locmap.h" 54 #include "ucln_cmn.h" 55 56 /* Include standard headers. */ 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <math.h> 61 #include <locale.h> 62 #include <float.h> 63 64 #ifndef U_COMMON_IMPLEMENTATION 65 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu 66 #endif 67 68 69 /* include system headers */ 70 #if U_PLATFORM_USES_ONLY_WIN32_API 71 /* 72 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. 73 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) 74 * to use native APIs as much as possible? 75 */ 76 # define WIN32_LEAN_AND_MEAN 77 # define VC_EXTRALEAN 78 # define NOUSER 79 # define NOSERVICE 80 # define NOIME 81 # define NOMCX 82 # include <windows.h> 83 # include "wintz.h" 84 #elif U_PLATFORM == U_PF_OS400 85 # include <float.h> 86 # include <qusec.h> /* error code structure */ 87 # include <qusrjobi.h> 88 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 89 # include <mih/testptr.h> /* For uprv_maximumPtr */ 90 #elif U_PLATFORM == U_PF_CLASSIC_MACOS 91 # include <Files.h> 92 # include <IntlResources.h> 93 # include <Script.h> 94 # include <Folders.h> 95 # include <MacTypes.h> 96 # include <TextUtils.h> 97 # define ICU_NO_USER_DATA_OVERRIDE 1 98 #elif U_PLATFORM == U_PF_OS390 99 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 100 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD 101 # include <limits.h> 102 # include <unistd.h> 103 #elif U_PLATFORM == U_PF_QNX 104 # include <sys/neutrino.h> 105 #elif U_PLATFORM == U_PF_SOLARIS 106 # ifndef _XPG4_2 107 # define _XPG4_2 108 # endif 109 #endif 110 111 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) 112 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ 113 #undef __STRICT_ANSI__ 114 #endif 115 116 /* 117 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. 118 */ 119 #include <time.h> 120 121 #if !U_PLATFORM_USES_ONLY_WIN32_API 122 #include <sys/time.h> 123 #endif 124 125 /* 126 * Only include langinfo.h if we have a way to get the codeset. If we later 127 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 128 * 129 */ 130 131 #if U_HAVE_NL_LANGINFO_CODESET 132 #include <langinfo.h> 133 #endif 134 135 /** 136 * Simple things (presence of functions, etc) should just go in configure.in and be added to 137 * icucfg.h via autoheader. 138 */ 139 #if U_PLATFORM_IMPLEMENTS_POSIX 140 # if U_PLATFORM == U_PF_OS400 141 # define HAVE_DLFCN_H 0 142 # define HAVE_DLOPEN 0 143 # else 144 # ifndef HAVE_DLFCN_H 145 # define HAVE_DLFCN_H 1 146 # endif 147 # ifndef HAVE_DLOPEN 148 # define HAVE_DLOPEN 1 149 # endif 150 # endif 151 # ifndef HAVE_GETTIMEOFDAY 152 # define HAVE_GETTIMEOFDAY 1 153 # endif 154 #else 155 # define HAVE_DLFCN_H 0 156 # define HAVE_DLOPEN 0 157 # define HAVE_GETTIMEOFDAY 0 158 #endif 159 160 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 161 162 /* Define the extension for data files, again... */ 163 #define DATA_TYPE "dat" 164 165 /* Leave this copyright notice here! */ 166 static const char copyright[] = U_COPYRIGHT_STRING; 167 168 /* floating point implementations ------------------------------------------- */ 169 170 /* We return QNAN rather than SNAN*/ 171 #define SIGN 0x80000000U 172 173 /* Make it easy to define certain types of constants */ 174 typedef union { 175 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 176 double d64; 177 } BitPatternConversion; 178 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 179 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 180 181 /*--------------------------------------------------------------------------- 182 Platform utilities 183 Our general strategy is to assume we're on a POSIX platform. Platforms which 184 are non-POSIX must declare themselves so. The default POSIX implementation 185 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 186 functions). 187 ---------------------------------------------------------------------------*/ 188 189 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400 190 # undef U_POSIX_LOCALE 191 #else 192 # define U_POSIX_LOCALE 1 193 #endif 194 195 /* 196 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 197 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 198 */ 199 #if !IEEE_754 200 static char* 201 u_topNBytesOfDouble(double* d, int n) 202 { 203 #if U_IS_BIG_ENDIAN 204 return (char*)d; 205 #else 206 return (char*)(d + 1) - n; 207 #endif 208 } 209 210 static char* 211 u_bottomNBytesOfDouble(double* d, int n) 212 { 213 #if U_IS_BIG_ENDIAN 214 return (char*)(d + 1) - n; 215 #else 216 return (char*)d; 217 #endif 218 } 219 #endif /* !IEEE_754 */ 220 221 #if IEEE_754 222 static UBool 223 u_signBit(double d) { 224 uint8_t hiByte; 225 #if U_IS_BIG_ENDIAN 226 hiByte = *(uint8_t *)&d; 227 #else 228 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 229 #endif 230 return (hiByte & 0x80) != 0; 231 } 232 #endif 233 234 235 236 #if defined (U_DEBUG_FAKETIME) 237 /* Override the clock to test things without having to move the system clock. 238 * Assumes POSIX gettimeofday() will function 239 */ 240 UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 241 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 242 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 243 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; 244 245 static UDate getUTCtime_real() { 246 struct timeval posixTime; 247 gettimeofday(&posixTime, NULL); 248 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 249 } 250 251 static UDate getUTCtime_fake() { 252 umtx_lock(&fakeClockMutex); 253 if(!fakeClock_set) { 254 UDate real = getUTCtime_real(); 255 const char *fake_start = getenv("U_FAKETIME_START"); 256 if((fake_start!=NULL) && (fake_start[0]!=0)) { 257 sscanf(fake_start,"%lf",&fakeClock_t0); 258 fakeClock_dt = fakeClock_t0 - real; 259 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 260 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 261 fakeClock_t0, fake_start, fakeClock_dt, real); 262 } else { 263 fakeClock_dt = 0; 264 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 265 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 266 } 267 fakeClock_set = TRUE; 268 } 269 umtx_unlock(&fakeClockMutex); 270 271 return getUTCtime_real() + fakeClock_dt; 272 } 273 #endif 274 275 #if U_PLATFORM_USES_ONLY_WIN32_API 276 typedef union { 277 int64_t int64; 278 FILETIME fileTime; 279 } FileTimeConversion; /* This is like a ULARGE_INTEGER */ 280 281 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 282 #define EPOCH_BIAS INT64_C(116444736000000000) 283 #define HECTONANOSECOND_PER_MILLISECOND 10000 284 285 #endif 286 287 /*--------------------------------------------------------------------------- 288 Universal Implementations 289 These are designed to work on all platforms. Try these, and if they 290 don't work on your platform, then special case your platform with new 291 implementations. 292 ---------------------------------------------------------------------------*/ 293 294 U_CAPI UDate U_EXPORT2 295 uprv_getUTCtime() 296 { 297 #if defined(U_DEBUG_FAKETIME) 298 return getUTCtime_fake(); /* Hook for overriding the clock */ 299 #else 300 return uprv_getRawUTCtime(); 301 #endif 302 } 303 304 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 305 U_CAPI UDate U_EXPORT2 306 uprv_getRawUTCtime() 307 { 308 #if U_PLATFORM == U_PF_CLASSIC_MACOS 309 time_t t, t1, t2; 310 struct tm tmrec; 311 312 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 313 tmrec.tm_year = 70; 314 tmrec.tm_mon = 0; 315 tmrec.tm_mday = 1; 316 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 317 318 time(&t); 319 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 320 t2 = mktime(&tmrec); /* seconds of current GMT*/ 321 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 322 #elif U_PLATFORM_USES_ONLY_WIN32_API 323 324 FileTimeConversion winTime; 325 GetSystemTimeAsFileTime(&winTime.fileTime); 326 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 327 #else 328 329 #if HAVE_GETTIMEOFDAY 330 struct timeval posixTime; 331 gettimeofday(&posixTime, NULL); 332 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 333 #else 334 time_t epochtime; 335 time(&epochtime); 336 return (UDate)epochtime * U_MILLIS_PER_SECOND; 337 #endif 338 339 #endif 340 } 341 342 /*----------------------------------------------------------------------------- 343 IEEE 754 344 These methods detect and return NaN and infinity values for doubles 345 conforming to IEEE 754. Platforms which support this standard include X86, 346 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 347 If this doesn't work on your platform, you have non-IEEE floating-point, and 348 will need to code your own versions. A naive implementation is to return 0.0 349 for getNaN and getInfinity, and false for isNaN and isInfinite. 350 ---------------------------------------------------------------------------*/ 351 352 U_CAPI UBool U_EXPORT2 353 uprv_isNaN(double number) 354 { 355 #if IEEE_754 356 BitPatternConversion convertedNumber; 357 convertedNumber.d64 = number; 358 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 359 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 360 361 #elif U_PLATFORM == U_PF_OS390 362 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 363 sizeof(uint32_t)); 364 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 365 sizeof(uint32_t)); 366 367 return ((highBits & 0x7F080000L) == 0x7F080000L) && 368 (lowBits == 0x00000000L); 369 370 #else 371 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 372 /* you'll need to replace this default implementation with what's correct*/ 373 /* for your platform.*/ 374 return number != number; 375 #endif 376 } 377 378 U_CAPI UBool U_EXPORT2 379 uprv_isInfinite(double number) 380 { 381 #if IEEE_754 382 BitPatternConversion convertedNumber; 383 convertedNumber.d64 = number; 384 /* Infinity is exactly 0x7FF0000000000000U. */ 385 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 386 #elif U_PLATFORM == U_PF_OS390 387 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 388 sizeof(uint32_t)); 389 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 390 sizeof(uint32_t)); 391 392 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 393 394 #else 395 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 396 /* value, you'll need to replace this default implementation with what's*/ 397 /* correct for your platform.*/ 398 return number == (2.0 * number); 399 #endif 400 } 401 402 U_CAPI UBool U_EXPORT2 403 uprv_isPositiveInfinity(double number) 404 { 405 #if IEEE_754 || U_PLATFORM == U_PF_OS390 406 return (UBool)(number > 0 && uprv_isInfinite(number)); 407 #else 408 return uprv_isInfinite(number); 409 #endif 410 } 411 412 U_CAPI UBool U_EXPORT2 413 uprv_isNegativeInfinity(double number) 414 { 415 #if IEEE_754 || U_PLATFORM == U_PF_OS390 416 return (UBool)(number < 0 && uprv_isInfinite(number)); 417 418 #else 419 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 420 sizeof(uint32_t)); 421 return((highBits & SIGN) && uprv_isInfinite(number)); 422 423 #endif 424 } 425 426 U_CAPI double U_EXPORT2 427 uprv_getNaN() 428 { 429 #if IEEE_754 || U_PLATFORM == U_PF_OS390 430 return gNan.d64; 431 #else 432 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 433 /* you'll need to replace this default implementation with what's correct*/ 434 /* for your platform.*/ 435 return 0.0; 436 #endif 437 } 438 439 U_CAPI double U_EXPORT2 440 uprv_getInfinity() 441 { 442 #if IEEE_754 || U_PLATFORM == U_PF_OS390 443 return gInf.d64; 444 #else 445 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 446 /* value, you'll need to replace this default implementation with what's*/ 447 /* correct for your platform.*/ 448 return 0.0; 449 #endif 450 } 451 452 U_CAPI double U_EXPORT2 453 uprv_floor(double x) 454 { 455 return floor(x); 456 } 457 458 U_CAPI double U_EXPORT2 459 uprv_ceil(double x) 460 { 461 return ceil(x); 462 } 463 464 U_CAPI double U_EXPORT2 465 uprv_round(double x) 466 { 467 return uprv_floor(x + 0.5); 468 } 469 470 U_CAPI double U_EXPORT2 471 uprv_fabs(double x) 472 { 473 return fabs(x); 474 } 475 476 U_CAPI double U_EXPORT2 477 uprv_modf(double x, double* y) 478 { 479 return modf(x, y); 480 } 481 482 U_CAPI double U_EXPORT2 483 uprv_fmod(double x, double y) 484 { 485 return fmod(x, y); 486 } 487 488 U_CAPI double U_EXPORT2 489 uprv_pow(double x, double y) 490 { 491 /* This is declared as "double pow(double x, double y)" */ 492 return pow(x, y); 493 } 494 495 U_CAPI double U_EXPORT2 496 uprv_pow10(int32_t x) 497 { 498 return pow(10.0, (double)x); 499 } 500 501 U_CAPI double U_EXPORT2 502 uprv_fmax(double x, double y) 503 { 504 #if IEEE_754 505 /* first handle NaN*/ 506 if(uprv_isNaN(x) || uprv_isNaN(y)) 507 return uprv_getNaN(); 508 509 /* check for -0 and 0*/ 510 if(x == 0.0 && y == 0.0 && u_signBit(x)) 511 return y; 512 513 #endif 514 515 /* this should work for all flt point w/o NaN and Inf special cases */ 516 return (x > y ? x : y); 517 } 518 519 U_CAPI double U_EXPORT2 520 uprv_fmin(double x, double y) 521 { 522 #if IEEE_754 523 /* first handle NaN*/ 524 if(uprv_isNaN(x) || uprv_isNaN(y)) 525 return uprv_getNaN(); 526 527 /* check for -0 and 0*/ 528 if(x == 0.0 && y == 0.0 && u_signBit(y)) 529 return y; 530 531 #endif 532 533 /* this should work for all flt point w/o NaN and Inf special cases */ 534 return (x > y ? y : x); 535 } 536 537 /** 538 * Truncates the given double. 539 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 540 * This is different than calling floor() or ceil(): 541 * floor(3.3) = 3, floor(-3.3) = -4 542 * ceil(3.3) = 4, ceil(-3.3) = -3 543 */ 544 U_CAPI double U_EXPORT2 545 uprv_trunc(double d) 546 { 547 #if IEEE_754 548 /* handle error cases*/ 549 if(uprv_isNaN(d)) 550 return uprv_getNaN(); 551 if(uprv_isInfinite(d)) 552 return uprv_getInfinity(); 553 554 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 555 return ceil(d); 556 else 557 return floor(d); 558 559 #else 560 return d >= 0 ? floor(d) : ceil(d); 561 562 #endif 563 } 564 565 /** 566 * Return the largest positive number that can be represented by an integer 567 * type of arbitrary bit length. 568 */ 569 U_CAPI double U_EXPORT2 570 uprv_maxMantissa(void) 571 { 572 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 573 } 574 575 U_CAPI double U_EXPORT2 576 uprv_log(double d) 577 { 578 return log(d); 579 } 580 581 U_CAPI void * U_EXPORT2 582 uprv_maximumPtr(void * base) 583 { 584 #if U_PLATFORM == U_PF_OS400 585 /* 586 * With the provided function we should never be out of range of a given segment 587 * (a traditional/typical segment that is). Our segments have 5 bytes for the 588 * id and 3 bytes for the offset. The key is that the casting takes care of 589 * only retrieving the offset portion minus x1000. Hence, the smallest offset 590 * seen in a program is x001000 and when casted to an int would be 0. 591 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 592 * 593 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 594 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 595 * This function determines the activation based on the pointer that is passed in and 596 * calculates the appropriate maximum available size for 597 * each pointer type (TERASPACE and non-TERASPACE) 598 * 599 * Unlike other operating systems, the pointer model isn't determined at 600 * compile time on i5/OS. 601 */ 602 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 603 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 604 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 605 } 606 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 607 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 608 609 #else 610 return U_MAX_PTR(base); 611 #endif 612 } 613 614 /*--------------------------------------------------------------------------- 615 Platform-specific Implementations 616 Try these, and if they don't work on your platform, then special case your 617 platform with new implementations. 618 ---------------------------------------------------------------------------*/ 619 620 /* Generic time zone layer -------------------------------------------------- */ 621 622 /* Time zone utilities */ 623 U_CAPI void U_EXPORT2 624 uprv_tzset() 625 { 626 #if defined(U_TZSET) 627 U_TZSET(); 628 #else 629 /* no initialization*/ 630 #endif 631 } 632 633 U_CAPI int32_t U_EXPORT2 634 uprv_timezone() 635 { 636 #ifdef U_TIMEZONE 637 return U_TIMEZONE; 638 #else 639 time_t t, t1, t2; 640 struct tm tmrec; 641 UBool dst_checked; 642 int32_t tdiff = 0; 643 644 time(&t); 645 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 646 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 647 t1 = mktime(&tmrec); /* local time in seconds*/ 648 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 649 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 650 tdiff = t2 - t1; 651 /* imitate NT behaviour, which returns same timezone offset to GMT for 652 winter and summer. 653 This does not work on all platforms. For instance, on glibc on Linux 654 and on Mac OS 10.5, tdiff calculated above remains the same 655 regardless of whether DST is in effect or not. However, U_TIMEZONE 656 is defined on those platforms and this code is not reached so that 657 we can leave this alone. If there's a platform behaving 658 like glibc that uses this code, we need to add platform-dependent 659 preprocessor here. */ 660 if (dst_checked) 661 tdiff += 3600; 662 return tdiff; 663 #endif 664 } 665 666 /* Note that U_TZNAME does *not* have to be tzname, but if it is, 667 some platforms need to have it declared here. */ 668 669 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API)) 670 /* RS6000 and others reject char **tzname. */ 671 extern U_IMPORT char *U_TZNAME[]; 672 #endif 673 674 #if !UCONFIG_NO_FILE_IO && (U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD) 675 /* These platforms are likely to use Olson timezone IDs. */ 676 #define CHECK_LOCALTIME_LINK 1 677 #if U_PLATFORM_IS_DARWIN_BASED 678 #include <tzfile.h> 679 #define TZZONEINFO (TZDIR "/") 680 #else 681 #define TZDEFAULT "/etc/localtime" 682 #define TZZONEINFO "/usr/share/zoneinfo/" 683 #endif 684 #if U_HAVE_DIRENT_H 685 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 686 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 687 symlinked to /etc/localtime, which makes searchForTZFile return 688 'localtime' when it's the first match. */ 689 #define TZFILE_SKIP2 "localtime" 690 #define SEARCH_TZFILE 691 #include <dirent.h> /* Needed to search through system timezone files */ 692 #endif 693 static char gTimeZoneBuffer[PATH_MAX]; 694 static char *gTimeZoneBufferPtr = NULL; 695 #endif 696 697 #if !U_PLATFORM_USES_ONLY_WIN32_API 698 #define isNonDigit(ch) (ch < '0' || '9' < ch) 699 static UBool isValidOlsonID(const char *id) { 700 int32_t idx = 0; 701 702 /* Determine if this is something like Iceland (Olson ID) 703 or AST4ADT (non-Olson ID) */ 704 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 705 idx++; 706 } 707 708 /* If we went through the whole string, then it might be okay. 709 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 710 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 711 The rest of the time it could be an Olson ID. George */ 712 return (UBool)(id[idx] == 0 713 || uprv_strcmp(id, "PST8PDT") == 0 714 || uprv_strcmp(id, "MST7MDT") == 0 715 || uprv_strcmp(id, "CST6CDT") == 0 716 || uprv_strcmp(id, "EST5EDT") == 0); 717 } 718 719 /* On some Unix-like OS, 'posix' subdirectory in 720 /usr/share/zoneinfo replicates the top-level contents. 'right' 721 subdirectory has the same set of files, but individual files 722 are different from those in the top-level directory or 'posix' 723 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 724 has files for UTC. 725 When the first match for /etc/localtime is in either of them 726 (usually in posix because 'right' has different file contents), 727 or TZ environment variable points to one of them, createTimeZone 728 fails because, say, 'posix/America/New_York' is not an Olson 729 timezone id ('America/New_York' is). So, we have to skip 730 'posix/' and 'right/' at the beginning. */ 731 static void skipZoneIDPrefix(const char** id) { 732 if (uprv_strncmp(*id, "posix/", 6) == 0 733 || uprv_strncmp(*id, "right/", 6) == 0) 734 { 735 *id += 6; 736 } 737 } 738 #endif 739 740 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API 741 742 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 743 typedef struct OffsetZoneMapping { 744 int32_t offsetSeconds; 745 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ 746 const char *stdID; 747 const char *dstID; 748 const char *olsonID; 749 } OffsetZoneMapping; 750 751 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; 752 753 /* 754 This list tries to disambiguate a set of abbreviated timezone IDs and offsets 755 and maps it to an Olson ID. 756 Before adding anything to this list, take a look at 757 icu/source/tools/tzcode/tz.alias 758 Sometimes no daylight savings (0) is important to define due to aliases. 759 This list can be tested with icu/source/test/compat/tzone.pl 760 More values could be added to daylightType to increase precision. 761 */ 762 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 763 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 764 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 765 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 766 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 767 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 768 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 769 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 770 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 771 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 772 {-34200, 2, "CST", "CST", "Australia/South"}, 773 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 774 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 775 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 776 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 777 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 778 {-28800, 2, "WST", "WST", "Australia/West"}, 779 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 780 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 781 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 782 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 783 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 784 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 785 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 786 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 787 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 788 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 789 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 790 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 791 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 792 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 793 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 794 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 795 {0, 1, "GMT", "IST", "Europe/Dublin"}, 796 {0, 1, "GMT", "BST", "Europe/London"}, 797 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 798 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 799 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 800 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 801 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 802 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 803 {10800, 1, "WGT", "WGST", "America/Godthab"}, 804 {10800, 2, "BRT", "BRST", "Brazil/East"}, 805 {12600, 1, "NST", "NDT", "America/St_Johns"}, 806 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 807 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 808 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 809 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 810 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 811 {18000, 1, "CST", "CDT", "America/Havana"}, 812 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 813 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 814 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 815 {21600, 0, "CST", "CDT", "America/Guatemala"}, 816 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 817 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 818 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 819 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 820 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 821 {36000, 1, "HAST", "HADT", "US/Aleutian"} 822 }; 823 824 /*#define DEBUG_TZNAME*/ 825 826 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 827 { 828 int32_t idx; 829 #ifdef DEBUG_TZNAME 830 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 831 #endif 832 for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) 833 { 834 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 835 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 836 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 837 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 838 { 839 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 840 } 841 } 842 return NULL; 843 } 844 #endif 845 846 #ifdef SEARCH_TZFILE 847 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 848 #define MAX_READ_SIZE 512 849 850 typedef struct DefaultTZInfo { 851 char* defaultTZBuffer; 852 int64_t defaultTZFileSize; 853 FILE* defaultTZFilePtr; 854 UBool defaultTZstatus; 855 int32_t defaultTZPosition; 856 } DefaultTZInfo; 857 858 /* 859 * This method compares the two files given to see if they are a match. 860 * It is currently use to compare two TZ files. 861 */ 862 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 863 FILE* file; 864 int64_t sizeFile; 865 int64_t sizeFileLeft; 866 int32_t sizeFileRead; 867 int32_t sizeFileToRead; 868 char bufferFile[MAX_READ_SIZE]; 869 UBool result = TRUE; 870 871 if (tzInfo->defaultTZFilePtr == NULL) { 872 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 873 } 874 file = fopen(TZFileName, "r"); 875 876 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 877 878 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 879 /* First check that the file size are equal. */ 880 if (tzInfo->defaultTZFileSize == 0) { 881 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 882 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 883 } 884 fseek(file, 0, SEEK_END); 885 sizeFile = ftell(file); 886 sizeFileLeft = sizeFile; 887 888 if (sizeFile != tzInfo->defaultTZFileSize) { 889 result = FALSE; 890 } else { 891 /* Store the data from the files in seperate buffers and 892 * compare each byte to determine equality. 893 */ 894 if (tzInfo->defaultTZBuffer == NULL) { 895 rewind(tzInfo->defaultTZFilePtr); 896 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 897 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 898 } 899 rewind(file); 900 while(sizeFileLeft > 0) { 901 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 902 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 903 904 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 905 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 906 result = FALSE; 907 break; 908 } 909 sizeFileLeft -= sizeFileRead; 910 tzInfo->defaultTZPosition += sizeFileRead; 911 } 912 } 913 } else { 914 result = FALSE; 915 } 916 917 if (file != NULL) { 918 fclose(file); 919 } 920 921 return result; 922 } 923 /* 924 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 925 */ 926 /* dirent also lists two entries: "." and ".." that we can safely ignore. */ 927 #define SKIP1 "." 928 #define SKIP2 ".." 929 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 930 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 931 char curpath[MAX_PATH_SIZE]; 932 DIR* dirp = opendir(path); 933 DIR* subDirp = NULL; 934 struct dirent* dirEntry = NULL; 935 936 char* result = NULL; 937 if (dirp == NULL) { 938 return result; 939 } 940 941 /* Save the current path */ 942 uprv_memset(curpath, 0, MAX_PATH_SIZE); 943 uprv_strcpy(curpath, path); 944 945 /* Check each entry in the directory. */ 946 while((dirEntry = readdir(dirp)) != NULL) { 947 const char* dirName = dirEntry->d_name; 948 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 949 /* Create a newpath with the new entry to test each entry in the directory. */ 950 char newpath[MAX_PATH_SIZE]; 951 uprv_strcpy(newpath, curpath); 952 uprv_strcat(newpath, dirName); 953 954 if ((subDirp = opendir(newpath)) != NULL) { 955 /* If this new path is a directory, make a recursive call with the newpath. */ 956 closedir(subDirp); 957 uprv_strcat(newpath, "/"); 958 result = searchForTZFile(newpath, tzInfo); 959 /* 960 Have to get out here. Otherwise, we'd keep looking 961 and return the first match in the top-level directory 962 if there's a match in the top-level. If not, this function 963 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 964 It worked without this in most cases because we have a fallback of calling 965 localtime_r to figure out the default timezone. 966 */ 967 if (result != NULL) 968 break; 969 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 970 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 971 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 972 skipZoneIDPrefix(&zoneid); 973 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 974 result = SEARCH_TZFILE_RESULT; 975 /* Get out after the first one found. */ 976 break; 977 } 978 } 979 } 980 } 981 closedir(dirp); 982 return result; 983 } 984 #endif 985 U_CAPI const char* U_EXPORT2 986 uprv_tzname(int n) 987 { 988 const char *tzid = NULL; 989 #if U_PLATFORM_USES_ONLY_WIN32_API 990 tzid = uprv_detectWindowsTimeZone(); 991 992 if (tzid != NULL) { 993 return tzid; 994 } 995 #else 996 997 /*#if U_PLATFORM_IS_DARWIN_BASED 998 int ret; 999 1000 tzid = getenv("TZFILE"); 1001 if (tzid != NULL) { 1002 return tzid; 1003 } 1004 #endif*/ 1005 1006 /* This code can be temporarily disabled to test tzname resolution later on. */ 1007 #ifndef DEBUG_TZNAME 1008 tzid = getenv("TZ"); 1009 if (tzid != NULL && isValidOlsonID(tzid)) 1010 { 1011 /* This might be a good Olson ID. */ 1012 skipZoneIDPrefix(&tzid); 1013 return tzid; 1014 } 1015 /* else U_TZNAME will give a better result. */ 1016 #endif 1017 1018 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) 1019 /* Caller must handle threading issues */ 1020 if (gTimeZoneBufferPtr == NULL) { 1021 /* 1022 This is a trick to look at the name of the link to get the Olson ID 1023 because the tzfile contents is underspecified. 1024 This isn't guaranteed to work because it may not be a symlink. 1025 */ 1026 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1027 if (0 < ret) { 1028 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1029 gTimeZoneBuffer[ret] = 0; 1030 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1031 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1032 { 1033 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1034 } 1035 } else { 1036 #if defined(SEARCH_TZFILE) 1037 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1038 if (tzInfo != NULL) { 1039 tzInfo->defaultTZBuffer = NULL; 1040 tzInfo->defaultTZFileSize = 0; 1041 tzInfo->defaultTZFilePtr = NULL; 1042 tzInfo->defaultTZstatus = FALSE; 1043 tzInfo->defaultTZPosition = 0; 1044 1045 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1046 1047 /* Free previously allocated memory */ 1048 if (tzInfo->defaultTZBuffer != NULL) { 1049 uprv_free(tzInfo->defaultTZBuffer); 1050 } 1051 if (tzInfo->defaultTZFilePtr != NULL) { 1052 fclose(tzInfo->defaultTZFilePtr); 1053 } 1054 uprv_free(tzInfo); 1055 } 1056 1057 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1058 return gTimeZoneBufferPtr; 1059 } 1060 #endif 1061 } 1062 } 1063 else { 1064 return gTimeZoneBufferPtr; 1065 } 1066 #endif 1067 #endif 1068 1069 #ifdef U_TZNAME 1070 #if U_PLATFORM_USES_ONLY_WIN32_API 1071 /* The return value is free'd in timezone.cpp on Windows because 1072 * the other code path returns a pointer to a heap location. */ 1073 return uprv_strdup(U_TZNAME[n]); 1074 #else 1075 /* 1076 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1077 So we remap the abbreviation to an olson ID. 1078 1079 Since Windows exposes a little more timezone information, 1080 we normally don't use this code on Windows because 1081 uprv_detectWindowsTimeZone should have already given the correct answer. 1082 */ 1083 { 1084 struct tm juneSol, decemberSol; 1085 int daylightType; 1086 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1087 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1088 1089 /* This probing will tell us when daylight savings occurs. */ 1090 localtime_r(&juneSolstice, &juneSol); 1091 localtime_r(&decemberSolstice, &decemberSol); 1092 if(decemberSol.tm_isdst > 0) { 1093 daylightType = U_DAYLIGHT_DECEMBER; 1094 } else if(juneSol.tm_isdst > 0) { 1095 daylightType = U_DAYLIGHT_JUNE; 1096 } else { 1097 daylightType = U_DAYLIGHT_NONE; 1098 } 1099 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1100 if (tzid != NULL) { 1101 return tzid; 1102 } 1103 } 1104 return U_TZNAME[n]; 1105 #endif 1106 #else 1107 return ""; 1108 #endif 1109 } 1110 1111 /* Get and set the ICU data directory --------------------------------------- */ 1112 1113 static char *gDataDirectory = NULL; 1114 #if U_POSIX_LOCALE 1115 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1116 #endif 1117 1118 static UBool U_CALLCONV putil_cleanup(void) 1119 { 1120 if (gDataDirectory && *gDataDirectory) { 1121 uprv_free(gDataDirectory); 1122 } 1123 gDataDirectory = NULL; 1124 #if U_POSIX_LOCALE 1125 if (gCorrectedPOSIXLocale) { 1126 uprv_free(gCorrectedPOSIXLocale); 1127 gCorrectedPOSIXLocale = NULL; 1128 } 1129 #endif 1130 return TRUE; 1131 } 1132 1133 /* 1134 * Set the data directory. 1135 * Make a copy of the passed string, and set the global data dir to point to it. 1136 * TODO: see bug #2849, regarding thread safety. 1137 */ 1138 U_CAPI void U_EXPORT2 1139 u_setDataDirectory(const char *directory) { 1140 char *newDataDir; 1141 int32_t length; 1142 1143 if(directory==NULL || *directory==0) { 1144 /* A small optimization to prevent the malloc and copy when the 1145 shared library is used, and this is a way to make sure that NULL 1146 is never returned. 1147 */ 1148 newDataDir = (char *)""; 1149 } 1150 else { 1151 length=(int32_t)uprv_strlen(directory); 1152 newDataDir = (char *)uprv_malloc(length + 2); 1153 /* Exit out if newDataDir could not be created. */ 1154 if (newDataDir == NULL) { 1155 return; 1156 } 1157 uprv_strcpy(newDataDir, directory); 1158 1159 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1160 { 1161 char *p; 1162 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1163 *p = U_FILE_SEP_CHAR; 1164 } 1165 } 1166 #endif 1167 } 1168 1169 umtx_lock(NULL); 1170 if (gDataDirectory && *gDataDirectory) { 1171 uprv_free(gDataDirectory); 1172 } 1173 gDataDirectory = newDataDir; 1174 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1175 umtx_unlock(NULL); 1176 } 1177 1178 U_CAPI UBool U_EXPORT2 1179 uprv_pathIsAbsolute(const char *path) 1180 { 1181 if(!path || !*path) { 1182 return FALSE; 1183 } 1184 1185 if(*path == U_FILE_SEP_CHAR) { 1186 return TRUE; 1187 } 1188 1189 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1190 if(*path == U_FILE_ALT_SEP_CHAR) { 1191 return TRUE; 1192 } 1193 #endif 1194 1195 #if U_PLATFORM_USES_ONLY_WIN32_API 1196 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1197 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1198 path[1] == ':' ) { 1199 return TRUE; 1200 } 1201 #endif 1202 1203 return FALSE; 1204 } 1205 1206 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1207 until some client wrapper makefiles are updated */ 1208 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR 1209 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1210 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1211 # endif 1212 #endif 1213 1214 U_CAPI const char * U_EXPORT2 1215 u_getDataDirectory(void) { 1216 const char *path = NULL; 1217 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1218 char datadir_path_buffer[PATH_MAX]; 1219 #endif 1220 1221 /* if we have the directory, then return it immediately */ 1222 UMTX_CHECK(NULL, gDataDirectory, path); 1223 1224 if(path) { 1225 return path; 1226 } 1227 1228 /* 1229 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1230 override ICU's data with the ICU_DATA environment variable. This prevents 1231 problems where multiple custom copies of ICU's specific version of data 1232 are installed on a system. Either the application must define the data 1233 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1234 ICU, set the data with udata_setCommonData or trust that all of the 1235 required data is contained in ICU's data library that contains 1236 the entry point defined by U_ICUDATA_ENTRY_POINT. 1237 1238 There may also be some platforms where environment variables 1239 are not allowed. 1240 */ 1241 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1242 /* First try to get the environment variable */ 1243 path=getenv("ICU_DATA"); 1244 # endif 1245 1246 /* ICU_DATA_DIR may be set as a compile option. 1247 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1248 * and is used only when data is built in archive mode eliminating the need 1249 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1250 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1251 * set their own path. 1252 */ 1253 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1254 if(path==NULL || *path==0) { 1255 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1256 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1257 # endif 1258 # ifdef ICU_DATA_DIR 1259 path=ICU_DATA_DIR; 1260 # else 1261 path=U_ICU_DATA_DEFAULT_DIR; 1262 # endif 1263 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1264 if (prefix != NULL) { 1265 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1266 path=datadir_path_buffer; 1267 } 1268 # endif 1269 } 1270 #endif 1271 1272 if(path==NULL) { 1273 /* It looks really bad, set it to something. */ 1274 path = ""; 1275 } 1276 1277 u_setDataDirectory(path); 1278 return gDataDirectory; 1279 } 1280 1281 1282 1283 1284 1285 /* Macintosh-specific locale information ------------------------------------ */ 1286 #if U_PLATFORM == U_PF_CLASSIC_MACOS 1287 1288 typedef struct { 1289 int32_t script; 1290 int32_t region; 1291 int32_t lang; 1292 int32_t date_region; 1293 const char* posixID; 1294 } mac_lc_rec; 1295 1296 /* Todo: This will be updated with a newer version from www.unicode.org web 1297 page when it's available.*/ 1298 #define MAC_LC_MAGIC_NUMBER -5 1299 #define MAC_LC_INIT_NUMBER -9 1300 1301 static const mac_lc_rec mac_lc_recs[] = { 1302 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1303 /* United States*/ 1304 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1305 /* France*/ 1306 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1307 /* Great Britain*/ 1308 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1309 /* Germany*/ 1310 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1311 /* Italy*/ 1312 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1313 /* Metherlands*/ 1314 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1315 /* French for Belgium or Lxembourg*/ 1316 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1317 /* Sweden*/ 1318 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1319 /* Denmark*/ 1320 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1321 /* Portugal*/ 1322 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1323 /* French Canada*/ 1324 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1325 /* Israel*/ 1326 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1327 /* Japan*/ 1328 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1329 /* Australia*/ 1330 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1331 /* the Arabic world (?)*/ 1332 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1333 /* Finland*/ 1334 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1335 /* French for Switzerland*/ 1336 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1337 /* German for Switzerland*/ 1338 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1339 /* Greece*/ 1340 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1341 /* Iceland ===*/ 1342 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1343 /* Malta ===*/ 1344 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1345 /* Cyprus ===*/ 1346 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1347 /* Turkey ===*/ 1348 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1349 /* Croatian system for Yugoslavia*/ 1350 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1351 /* Hindi system for India*/ 1352 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1353 /* Pakistan*/ 1354 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1355 /* Lithuania*/ 1356 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1357 /* Poland*/ 1358 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1359 /* Hungary*/ 1360 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1361 /* Estonia*/ 1362 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1363 /* Latvia*/ 1364 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1365 /* Lapland [Ask Rich for the data. HS]*/ 1366 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1367 /* Faeroe Islands*/ 1368 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1369 /* Iran*/ 1370 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1371 /* Russia*/ 1372 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1373 /* Ireland*/ 1374 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1375 /* Korea*/ 1376 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1377 /* People's Republic of China*/ 1378 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1379 /* Taiwan*/ 1380 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1381 /* Thailand*/ 1382 1383 /* fallback is en_US*/ 1384 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1385 MAC_LC_MAGIC_NUMBER, "en_US" 1386 }; 1387 1388 #endif 1389 1390 #if U_POSIX_LOCALE 1391 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1392 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1393 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1394 */ 1395 static const char *uprv_getPOSIXIDForCategory(int category) 1396 { 1397 const char* posixID = NULL; 1398 if (category == LC_MESSAGES || category == LC_CTYPE) { 1399 /* 1400 * On Solaris two different calls to setlocale can result in 1401 * different values. Only get this value once. 1402 * 1403 * We must check this first because an application can set this. 1404 * 1405 * LC_ALL can't be used because it's platform dependent. The LANG 1406 * environment variable seems to affect LC_CTYPE variable by default. 1407 * Here is what setlocale(LC_ALL, NULL) can return. 1408 * HPUX can return 'C C C C C C C' 1409 * Solaris can return /en_US/C/C/C/C/C on the second try. 1410 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1411 * 1412 * The default codepage detection also needs to use LC_CTYPE. 1413 * 1414 * Do not call setlocale(LC_*, "")! Using an empty string instead 1415 * of NULL, will modify the libc behavior. 1416 */ 1417 posixID = setlocale(category, NULL); 1418 if ((posixID == 0) 1419 || (uprv_strcmp("C", posixID) == 0) 1420 || (uprv_strcmp("POSIX", posixID) == 0)) 1421 { 1422 /* Maybe we got some garbage. Try something more reasonable */ 1423 posixID = getenv("LC_ALL"); 1424 if (posixID == 0) { 1425 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1426 if (posixID == 0) { 1427 posixID = getenv("LANG"); 1428 } 1429 } 1430 } 1431 } 1432 if ((posixID==0) 1433 || (uprv_strcmp("C", posixID) == 0) 1434 || (uprv_strcmp("POSIX", posixID) == 0)) 1435 { 1436 /* Nothing worked. Give it a nice POSIX default value. */ 1437 posixID = "en_US_POSIX"; 1438 } 1439 return posixID; 1440 } 1441 1442 /* Return just the POSIX id for the default locale, whatever happens to be in 1443 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1444 */ 1445 static const char *uprv_getPOSIXIDForDefaultLocale(void) 1446 { 1447 static const char* posixID = NULL; 1448 if (posixID == 0) { 1449 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1450 } 1451 return posixID; 1452 } 1453 1454 #if !U_CHARSET_IS_UTF8 1455 /* Return just the POSIX id for the default codepage, whatever happens to be in 1456 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1457 */ 1458 static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1459 { 1460 static const char* posixID = NULL; 1461 if (posixID == 0) { 1462 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1463 } 1464 return posixID; 1465 } 1466 #endif 1467 #endif 1468 1469 /* NOTE: The caller should handle thread safety */ 1470 U_CAPI const char* U_EXPORT2 1471 uprv_getDefaultLocaleID() 1472 { 1473 #if U_POSIX_LOCALE 1474 /* 1475 Note that: (a '!' means the ID is improper somehow) 1476 LC_ALL ----> default_loc codepage 1477 -------------------------------------------------------- 1478 ab.CD ab CD 1479 ab@CD ab__CD - 1480 ab (at) CD.EF ab__CD EF 1481 1482 ab_CD.EF@GH ab_CD_GH EF 1483 1484 Some 'improper' ways to do the same as above: 1485 ! ab_CD (at) GH.EF ab_CD_GH EF 1486 ! ab_CD.EF (at) GH.IJ ab_CD_GH EF 1487 ! ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH EF 1488 1489 _CD@GH _CD_GH - 1490 _CD.EF@GH _CD_GH EF 1491 1492 The variant cannot have dots in it. 1493 The 'rightmost' variant (@xxx) wins. 1494 The leftmost codepage (.xxx) wins. 1495 */ 1496 char *correctedPOSIXLocale = 0; 1497 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1498 const char *p; 1499 const char *q; 1500 int32_t len; 1501 1502 /* Format: (no spaces) 1503 ll [ _CC ] [ . MM ] [ @ VV] 1504 1505 l = lang, C = ctry, M = charmap, V = variant 1506 */ 1507 1508 if (gCorrectedPOSIXLocale != NULL) { 1509 return gCorrectedPOSIXLocale; 1510 } 1511 1512 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1513 /* assume new locale can't be larger than old one? */ 1514 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1515 /* Exit on memory allocation error. */ 1516 if (correctedPOSIXLocale == NULL) { 1517 return NULL; 1518 } 1519 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1520 correctedPOSIXLocale[p-posixID] = 0; 1521 1522 /* do not copy after the @ */ 1523 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1524 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1525 } 1526 } 1527 1528 /* Note that we scan the *uncorrected* ID. */ 1529 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1530 if (correctedPOSIXLocale == NULL) { 1531 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1532 /* Exit on memory allocation error. */ 1533 if (correctedPOSIXLocale == NULL) { 1534 return NULL; 1535 } 1536 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1537 correctedPOSIXLocale[p-posixID] = 0; 1538 } 1539 p++; 1540 1541 /* Take care of any special cases here.. */ 1542 if (!uprv_strcmp(p, "nynorsk")) { 1543 p = "NY"; 1544 /* Don't worry about no__NY. In practice, it won't appear. */ 1545 } 1546 1547 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1548 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1549 } 1550 else { 1551 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1552 } 1553 1554 if ((q = uprv_strchr(p, '.')) != NULL) { 1555 /* How big will the resulting string be? */ 1556 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1557 uprv_strncat(correctedPOSIXLocale, p, q-p); 1558 correctedPOSIXLocale[len] = 0; 1559 } 1560 else { 1561 /* Anything following the @ sign */ 1562 uprv_strcat(correctedPOSIXLocale, p); 1563 } 1564 1565 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1566 * How about 'russian' -> 'ru'? 1567 * Many of the other locales using ISO codes will be handled by the 1568 * canonicalization functions in uloc_getDefault. 1569 */ 1570 } 1571 1572 /* Was a correction made? */ 1573 if (correctedPOSIXLocale != NULL) { 1574 posixID = correctedPOSIXLocale; 1575 } 1576 else { 1577 /* copy it, just in case the original pointer goes away. See j2395 */ 1578 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1579 /* Exit on memory allocation error. */ 1580 if (correctedPOSIXLocale == NULL) { 1581 return NULL; 1582 } 1583 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1584 } 1585 1586 if (gCorrectedPOSIXLocale == NULL) { 1587 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1588 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1589 correctedPOSIXLocale = NULL; 1590 } 1591 1592 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1593 uprv_free(correctedPOSIXLocale); 1594 } 1595 1596 return posixID; 1597 1598 #elif U_PLATFORM_USES_ONLY_WIN32_API 1599 UErrorCode status = U_ZERO_ERROR; 1600 LCID id = GetThreadLocale(); 1601 const char* locID = uprv_convertToPosix(id, &status); 1602 1603 if (U_FAILURE(status)) { 1604 locID = "en_US"; 1605 } 1606 return locID; 1607 1608 #elif U_PLATFORM == U_PF_CLASSIC_MACOS 1609 int32_t script = MAC_LC_INIT_NUMBER; 1610 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1611 int32_t region = MAC_LC_INIT_NUMBER; 1612 /* = GetScriptManagerVariable(smRegionCode);*/ 1613 int32_t lang = MAC_LC_INIT_NUMBER; 1614 /* = GetScriptManagerVariable(smScriptLang);*/ 1615 int32_t date_region = MAC_LC_INIT_NUMBER; 1616 const char* posixID = 0; 1617 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1618 int32_t i; 1619 Intl1Hndl ih; 1620 1621 ih = (Intl1Hndl) GetIntlResource(1); 1622 if (ih) 1623 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1624 1625 for (i = 0; i < count; i++) { 1626 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1627 || (mac_lc_recs[i].script == script)) 1628 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1629 || (mac_lc_recs[i].region == region)) 1630 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1631 || (mac_lc_recs[i].lang == lang)) 1632 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1633 || (mac_lc_recs[i].date_region == date_region)) 1634 ) 1635 { 1636 posixID = mac_lc_recs[i].posixID; 1637 break; 1638 } 1639 } 1640 1641 return posixID; 1642 1643 #elif U_PLATFORM == U_PF_OS400 1644 /* locales are process scoped and are by definition thread safe */ 1645 static char correctedLocale[64]; 1646 const char *localeID = getenv("LC_ALL"); 1647 char *p; 1648 1649 if (localeID == NULL) 1650 localeID = getenv("LANG"); 1651 if (localeID == NULL) 1652 localeID = setlocale(LC_ALL, NULL); 1653 /* Make sure we have something... */ 1654 if (localeID == NULL) 1655 return "en_US_POSIX"; 1656 1657 /* Extract the locale name from the path. */ 1658 if((p = uprv_strrchr(localeID, '/')) != NULL) 1659 { 1660 /* Increment p to start of locale name. */ 1661 p++; 1662 localeID = p; 1663 } 1664 1665 /* Copy to work location. */ 1666 uprv_strcpy(correctedLocale, localeID); 1667 1668 /* Strip off the '.locale' extension. */ 1669 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1670 *p = 0; 1671 } 1672 1673 /* Upper case the locale name. */ 1674 T_CString_toUpperCase(correctedLocale); 1675 1676 /* See if we are using the POSIX locale. Any of the 1677 * following are equivalent and use the same QLGPGCMA 1678 * (POSIX) locale. 1679 * QLGPGCMA2 means UCS2 1680 * QLGPGCMA_4 means UTF-32 1681 * QLGPGCMA_8 means UTF-8 1682 */ 1683 if ((uprv_strcmp("C", correctedLocale) == 0) || 1684 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1685 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1686 { 1687 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1688 } 1689 else 1690 { 1691 int16_t LocaleLen; 1692 1693 /* Lower case the lang portion. */ 1694 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1695 { 1696 *p = uprv_tolower(*p); 1697 } 1698 1699 /* Adjust for Euro. After '_E' add 'URO'. */ 1700 LocaleLen = uprv_strlen(correctedLocale); 1701 if (correctedLocale[LocaleLen - 2] == '_' && 1702 correctedLocale[LocaleLen - 1] == 'E') 1703 { 1704 uprv_strcat(correctedLocale, "URO"); 1705 } 1706 1707 /* If using Lotus-based locale then convert to 1708 * equivalent non Lotus. 1709 */ 1710 else if (correctedLocale[LocaleLen - 2] == '_' && 1711 correctedLocale[LocaleLen - 1] == 'L') 1712 { 1713 correctedLocale[LocaleLen - 2] = 0; 1714 } 1715 1716 /* There are separate simplified and traditional 1717 * locales called zh_HK_S and zh_HK_T. 1718 */ 1719 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1720 { 1721 uprv_strcpy(correctedLocale, "zh_HK"); 1722 } 1723 1724 /* A special zh_CN_GBK locale... 1725 */ 1726 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1727 { 1728 uprv_strcpy(correctedLocale, "zh_CN"); 1729 } 1730 1731 } 1732 1733 return correctedLocale; 1734 #endif 1735 1736 } 1737 1738 #if !U_CHARSET_IS_UTF8 1739 #if U_POSIX_LOCALE 1740 /* 1741 Due to various platform differences, one platform may specify a charset, 1742 when they really mean a different charset. Remap the names so that they are 1743 compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1744 here. Before adding anything to this function, please consider adding unique 1745 names to the ICU alias table in the data directory. 1746 */ 1747 static const char* 1748 remapPlatformDependentCodepage(const char *locale, const char *name) { 1749 if (locale != NULL && *locale == 0) { 1750 /* Make sure that an empty locale is handled the same way. */ 1751 locale = NULL; 1752 } 1753 if (name == NULL) { 1754 return NULL; 1755 } 1756 #if U_PLATFORM == U_PF_AIX 1757 if (uprv_strcmp(name, "IBM-943") == 0) { 1758 /* Use the ASCII compatible ibm-943 */ 1759 name = "Shift-JIS"; 1760 } 1761 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1762 /* Use the windows-1252 that contains the Euro */ 1763 name = "IBM-5348"; 1764 } 1765 #elif U_PLATFORM == U_PF_SOLARIS 1766 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1767 /* Solaris underspecifies the "EUC" name. */ 1768 if (uprv_strcmp(locale, "zh_CN") == 0) { 1769 name = "EUC-CN"; 1770 } 1771 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1772 name = "EUC-TW"; 1773 } 1774 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1775 name = "EUC-KR"; 1776 } 1777 } 1778 else if (uprv_strcmp(name, "eucJP") == 0) { 1779 /* 1780 ibm-954 is the best match. 1781 ibm-33722 is the default for eucJP (similar to Windows). 1782 */ 1783 name = "eucjis"; 1784 } 1785 else if (uprv_strcmp(name, "646") == 0) { 1786 /* 1787 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1788 * ISO-8859-1 instead of US-ASCII(646). 1789 */ 1790 name = "ISO-8859-1"; 1791 } 1792 #elif U_PLATFORM_IS_DARWIN_BASED 1793 if (locale == NULL && *name == 0) { 1794 /* 1795 No locale was specified, and an empty name was passed in. 1796 This usually indicates that nl_langinfo didn't return valid information. 1797 Mac OS X uses UTF-8 by default (especially the locale data and console). 1798 */ 1799 name = "UTF-8"; 1800 } 1801 else if (uprv_strcmp(name, "CP949") == 0) { 1802 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1803 name = "EUC-KR"; 1804 } 1805 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1806 /* 1807 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1808 */ 1809 name = "UTF-8"; 1810 } 1811 #elif U_PLATFORM == U_PF_BSD 1812 if (uprv_strcmp(name, "CP949") == 0) { 1813 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1814 name = "EUC-KR"; 1815 } 1816 #elif U_PLATFORM == U_PF_HPUX 1817 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1818 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1819 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1820 name = "hkbig5"; 1821 } 1822 else if (uprv_strcmp(name, "eucJP") == 0) { 1823 /* 1824 ibm-1350 is the best match, but unavailable. 1825 ibm-954 is mostly a superset of ibm-1350. 1826 ibm-33722 is the default for eucJP (similar to Windows). 1827 */ 1828 name = "eucjis"; 1829 } 1830 #elif U_PLATFORM == U_PF_LINUX 1831 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1832 /* Linux underspecifies the "EUC" name. */ 1833 if (uprv_strcmp(locale, "korean") == 0) { 1834 name = "EUC-KR"; 1835 } 1836 else if (uprv_strcmp(locale, "japanese") == 0) { 1837 /* See comment below about eucJP */ 1838 name = "eucjis"; 1839 } 1840 } 1841 else if (uprv_strcmp(name, "eucjp") == 0) { 1842 /* 1843 ibm-1350 is the best match, but unavailable. 1844 ibm-954 is mostly a superset of ibm-1350. 1845 ibm-33722 is the default for eucJP (similar to Windows). 1846 */ 1847 name = "eucjis"; 1848 } 1849 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1850 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1851 /* 1852 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1853 */ 1854 name = "UTF-8"; 1855 } 1856 /* 1857 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1858 * it by falling back to 'US-ASCII' when NULL is returned from this 1859 * function. So, we don't have to worry about it here. 1860 */ 1861 #endif 1862 /* return NULL when "" is passed in */ 1863 if (*name == 0) { 1864 name = NULL; 1865 } 1866 return name; 1867 } 1868 1869 static const char* 1870 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1871 { 1872 char localeBuf[100]; 1873 const char *name = NULL; 1874 char *variant = NULL; 1875 1876 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1877 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1878 uprv_strncpy(localeBuf, localeName, localeCapacity); 1879 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1880 name = uprv_strncpy(buffer, name+1, buffCapacity); 1881 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1882 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { 1883 *variant = 0; 1884 } 1885 name = remapPlatformDependentCodepage(localeBuf, name); 1886 } 1887 return name; 1888 } 1889 #endif 1890 1891 static const char* 1892 int_getDefaultCodepage() 1893 { 1894 #if U_PLATFORM == U_PF_OS400 1895 uint32_t ccsid = 37; /* Default to ibm-37 */ 1896 static char codepage[64]; 1897 Qwc_JOBI0400_t jobinfo; 1898 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1899 1900 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1901 "* ", " ", &error); 1902 1903 if (error.Bytes_Available == 0) { 1904 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1905 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1906 } 1907 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1908 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1909 } 1910 /* else use the default */ 1911 } 1912 sprintf(codepage,"ibm-%d", ccsid); 1913 return codepage; 1914 1915 #elif U_PLATFORM == U_PF_OS390 1916 static char codepage[64]; 1917 1918 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1919 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1920 codepage[63] = 0; /* NULL terminate */ 1921 1922 return codepage; 1923 1924 #elif U_PLATFORM == U_PF_CLASSIC_MACOS 1925 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1926 1927 #elif U_PLATFORM_USES_ONLY_WIN32_API 1928 static char codepage[64]; 1929 sprintf(codepage, "windows-%d", GetACP()); 1930 return codepage; 1931 1932 #elif U_POSIX_LOCALE 1933 static char codesetName[100]; 1934 const char *localeName = NULL; 1935 const char *name = NULL; 1936 1937 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1938 uprv_memset(codesetName, 0, sizeof(codesetName)); 1939 #if U_HAVE_NL_LANGINFO_CODESET 1940 /* When available, check nl_langinfo first because it usually gives more 1941 useful names. It depends on LC_CTYPE. 1942 nl_langinfo may use the same buffer as setlocale. */ 1943 { 1944 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1945 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED 1946 /* 1947 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1948 * instead of ASCII. 1949 */ 1950 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1951 codeset = remapPlatformDependentCodepage(localeName, codeset); 1952 } else 1953 #endif 1954 { 1955 codeset = remapPlatformDependentCodepage(NULL, codeset); 1956 } 1957 1958 if (codeset != NULL) { 1959 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1960 codesetName[sizeof(codesetName)-1] = 0; 1961 return codesetName; 1962 } 1963 } 1964 #endif 1965 1966 /* Use setlocale in a nice way, and then check some environment variables. 1967 Maybe the application used setlocale already. 1968 */ 1969 uprv_memset(codesetName, 0, sizeof(codesetName)); 1970 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1971 if (name) { 1972 /* if we can find the codeset name from setlocale, return that. */ 1973 return name; 1974 } 1975 1976 if (*codesetName == 0) 1977 { 1978 /* Everything failed. Return US ASCII (ISO 646). */ 1979 (void)uprv_strcpy(codesetName, "US-ASCII"); 1980 } 1981 return codesetName; 1982 #else 1983 return "US-ASCII"; 1984 #endif 1985 } 1986 1987 1988 U_CAPI const char* U_EXPORT2 1989 uprv_getDefaultCodepage() 1990 { 1991 static char const *name = NULL; 1992 umtx_lock(NULL); 1993 if (name == NULL) { 1994 name = int_getDefaultCodepage(); 1995 } 1996 umtx_unlock(NULL); 1997 return name; 1998 } 1999 #endif /* !U_CHARSET_IS_UTF8 */ 2000 2001 2002 /* end of platform-specific implementation -------------- */ 2003 2004 /* version handling --------------------------------------------------------- */ 2005 2006 U_CAPI void U_EXPORT2 2007 u_versionFromString(UVersionInfo versionArray, const char *versionString) { 2008 char *end; 2009 uint16_t part=0; 2010 2011 if(versionArray==NULL) { 2012 return; 2013 } 2014 2015 if(versionString!=NULL) { 2016 for(;;) { 2017 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 2018 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 2019 break; 2020 } 2021 versionString=end+1; 2022 } 2023 } 2024 2025 while(part<U_MAX_VERSION_LENGTH) { 2026 versionArray[part++]=0; 2027 } 2028 } 2029 2030 U_CAPI void U_EXPORT2 2031 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2032 if(versionArray!=NULL && versionString!=NULL) { 2033 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2034 int32_t len = u_strlen(versionString); 2035 if(len>U_MAX_VERSION_STRING_LENGTH) { 2036 len = U_MAX_VERSION_STRING_LENGTH; 2037 } 2038 u_UCharsToChars(versionString, versionChars, len); 2039 versionChars[len]=0; 2040 u_versionFromString(versionArray, versionChars); 2041 } 2042 } 2043 2044 U_CAPI void U_EXPORT2 2045 u_versionToString(const UVersionInfo versionArray, char *versionString) { 2046 uint16_t count, part; 2047 uint8_t field; 2048 2049 if(versionString==NULL) { 2050 return; 2051 } 2052 2053 if(versionArray==NULL) { 2054 versionString[0]=0; 2055 return; 2056 } 2057 2058 /* count how many fields need to be written */ 2059 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2060 } 2061 2062 if(count <= 1) { 2063 count = 2; 2064 } 2065 2066 /* write the first part */ 2067 /* write the decimal field value */ 2068 field=versionArray[0]; 2069 if(field>=100) { 2070 *versionString++=(char)('0'+field/100); 2071 field%=100; 2072 } 2073 if(field>=10) { 2074 *versionString++=(char)('0'+field/10); 2075 field%=10; 2076 } 2077 *versionString++=(char)('0'+field); 2078 2079 /* write the following parts */ 2080 for(part=1; part<count; ++part) { 2081 /* write a dot first */ 2082 *versionString++=U_VERSION_DELIMITER; 2083 2084 /* write the decimal field value */ 2085 field=versionArray[part]; 2086 if(field>=100) { 2087 *versionString++=(char)('0'+field/100); 2088 field%=100; 2089 } 2090 if(field>=10) { 2091 *versionString++=(char)('0'+field/10); 2092 field%=10; 2093 } 2094 *versionString++=(char)('0'+field); 2095 } 2096 2097 /* NUL-terminate */ 2098 *versionString=0; 2099 } 2100 2101 U_CAPI void U_EXPORT2 2102 u_getVersion(UVersionInfo versionArray) { 2103 u_versionFromString(versionArray, U_ICU_VERSION); 2104 } 2105 2106 /** 2107 * icucfg.h dependent code 2108 */ 2109 2110 #if U_ENABLE_DYLOAD 2111 2112 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API 2113 2114 #if HAVE_DLFCN_H 2115 2116 #ifdef __MVS__ 2117 #ifndef __SUSV3 2118 #define __SUSV3 1 2119 #endif 2120 #endif 2121 #include <dlfcn.h> 2122 #endif 2123 2124 U_INTERNAL void * U_EXPORT2 2125 uprv_dl_open(const char *libName, UErrorCode *status) { 2126 void *ret = NULL; 2127 if(U_FAILURE(*status)) return ret; 2128 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2129 if(ret==NULL) { 2130 #ifdef U_TRACE_DYLOAD 2131 printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); 2132 #endif 2133 *status = U_MISSING_RESOURCE_ERROR; 2134 } 2135 return ret; 2136 } 2137 2138 U_INTERNAL void U_EXPORT2 2139 uprv_dl_close(void *lib, UErrorCode *status) { 2140 if(U_FAILURE(*status)) return; 2141 dlclose(lib); 2142 } 2143 2144 U_INTERNAL UVoidFunction* U_EXPORT2 2145 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2146 union { 2147 UVoidFunction *fp; 2148 void *vp; 2149 } uret; 2150 uret.fp = NULL; 2151 if(U_FAILURE(*status)) return uret.fp; 2152 uret.vp = dlsym(lib, sym); 2153 if(uret.vp == NULL) { 2154 #ifdef U_TRACE_DYLOAD 2155 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); 2156 #endif 2157 *status = U_MISSING_RESOURCE_ERROR; 2158 } 2159 return uret.fp; 2160 } 2161 2162 #else 2163 2164 /* null (nonexistent) implementation. */ 2165 2166 U_INTERNAL void * U_EXPORT2 2167 uprv_dl_open(const char *libName, UErrorCode *status) { 2168 if(U_FAILURE(*status)) return NULL; 2169 *status = U_UNSUPPORTED_ERROR; 2170 return NULL; 2171 } 2172 2173 U_INTERNAL void U_EXPORT2 2174 uprv_dl_close(void *lib, UErrorCode *status) { 2175 if(U_FAILURE(*status)) return; 2176 *status = U_UNSUPPORTED_ERROR; 2177 return; 2178 } 2179 2180 2181 U_INTERNAL UVoidFunction* U_EXPORT2 2182 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2183 if(U_SUCCESS(*status)) { 2184 *status = U_UNSUPPORTED_ERROR; 2185 } 2186 return (UVoidFunction*)NULL; 2187 } 2188 2189 2190 2191 #endif 2192 2193 #elif U_PLATFORM_USES_ONLY_WIN32_API 2194 2195 U_INTERNAL void * U_EXPORT2 2196 uprv_dl_open(const char *libName, UErrorCode *status) { 2197 HMODULE lib = NULL; 2198 2199 if(U_FAILURE(*status)) return NULL; 2200 2201 lib = LoadLibraryA(libName); 2202 2203 if(lib==NULL) { 2204 *status = U_MISSING_RESOURCE_ERROR; 2205 } 2206 2207 return (void*)lib; 2208 } 2209 2210 U_INTERNAL void U_EXPORT2 2211 uprv_dl_close(void *lib, UErrorCode *status) { 2212 HMODULE handle = (HMODULE)lib; 2213 if(U_FAILURE(*status)) return; 2214 2215 FreeLibrary(handle); 2216 2217 return; 2218 } 2219 2220 2221 U_INTERNAL UVoidFunction* U_EXPORT2 2222 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2223 HMODULE handle = (HMODULE)lib; 2224 UVoidFunction* addr = NULL; 2225 2226 if(U_FAILURE(*status) || lib==NULL) return NULL; 2227 2228 addr = (UVoidFunction*)GetProcAddress(handle, sym); 2229 2230 if(addr==NULL) { 2231 DWORD lastError = GetLastError(); 2232 if(lastError == ERROR_PROC_NOT_FOUND) { 2233 *status = U_MISSING_RESOURCE_ERROR; 2234 } else { 2235 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2236 } 2237 } 2238 2239 return addr; 2240 } 2241 2242 2243 #else 2244 2245 /* No dynamic loading set. */ 2246 2247 U_INTERNAL void * U_EXPORT2 2248 uprv_dl_open(const char *libName, UErrorCode *status) { 2249 if(U_FAILURE(*status)) return NULL; 2250 *status = U_UNSUPPORTED_ERROR; 2251 return NULL; 2252 } 2253 2254 U_INTERNAL void U_EXPORT2 2255 uprv_dl_close(void *lib, UErrorCode *status) { 2256 if(U_FAILURE(*status)) return; 2257 *status = U_UNSUPPORTED_ERROR; 2258 return; 2259 } 2260 2261 2262 U_INTERNAL UVoidFunction* U_EXPORT2 2263 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2264 if(U_SUCCESS(*status)) { 2265 *status = U_UNSUPPORTED_ERROR; 2266 } 2267 return (UVoidFunction*)NULL; 2268 } 2269 2270 #endif /* U_ENABLE_DYLOAD */ 2271 2272 /* 2273 * Hey, Emacs, please set the following: 2274 * 2275 * Local Variables: 2276 * indent-tabs-mode: nil 2277 * End: 2278 * 2279 */ 2280