1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1997-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10 * 11 * Date Name Description 12 * 04/14/97 aliu Creation. 13 * 04/24/97 aliu Added getDefaultDataDirectory() and 14 * getDefaultLocaleID(). 15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods 16 * for assumed case. Non-UNIX platforms must be 17 * special-cased. Rewrote numeric methods dealing 18 * with NaN and Infinity to be platform independent 19 * over all IEEE 754 platforms. 20 * 05/13/97 aliu Restored sign of timezone 21 * (semantics are hours West of GMT) 22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23 * nextDouble.. 24 * 07/22/98 stephen Added remainder, max, min, trunc 25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26 * 08/24/98 stephen Added longBitsFromDouble 27 * 09/08/98 stephen Minor changes for Mac Port 28 * 03/02/99 stephen Removed openFile(). Added AS400 support. 29 * Fixed EBCDIC tables 30 * 04/15/99 stephen Converted to C. 31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32 * 08/04/99 jeffrey R. Added OS/2 changes 33 * 11/15/99 helena Integrated S/390 IEEE support. 34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36 * 01/03/08 Steven L. Fake Time Support 37 ****************************************************************************** 38 */ 39 40 // Defines _XOPEN_SOURCE for access to POSIX functions. 41 // Must be before any other #includes. 42 #include "uposixdefs.h" 43 44 /* include ICU headers */ 45 #include "unicode/utypes.h" 46 #include "unicode/putil.h" 47 #include "unicode/ustring.h" 48 #include "putilimp.h" 49 #include "uassert.h" 50 #include "umutex.h" 51 #include "cmemory.h" 52 #include "cstring.h" 53 #include "locmap.h" 54 #include "ucln_cmn.h" 55 56 /* Include standard headers. */ 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <math.h> 61 #include <locale.h> 62 #include <float.h> 63 64 #ifndef U_COMMON_IMPLEMENTATION 65 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu 66 #endif 67 68 69 /* include system headers */ 70 #if U_PLATFORM_USES_ONLY_WIN32_API 71 /* 72 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. 73 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) 74 * to use native APIs as much as possible? 75 */ 76 # define WIN32_LEAN_AND_MEAN 77 # define VC_EXTRALEAN 78 # define NOUSER 79 # define NOSERVICE 80 # define NOIME 81 # define NOMCX 82 # include <windows.h> 83 # include "wintz.h" 84 #elif U_PLATFORM == U_PF_OS400 85 # include <float.h> 86 # include <qusec.h> /* error code structure */ 87 # include <qusrjobi.h> 88 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 89 # include <mih/testptr.h> /* For uprv_maximumPtr */ 90 #elif U_PLATFORM == U_PF_CLASSIC_MACOS 91 # include <Files.h> 92 # include <IntlResources.h> 93 # include <Script.h> 94 # include <Folders.h> 95 # include <MacTypes.h> 96 # include <TextUtils.h> 97 # define ICU_NO_USER_DATA_OVERRIDE 1 98 #elif U_PLATFORM == U_PF_OS390 99 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 100 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS 101 # include <limits.h> 102 # include <unistd.h> 103 # if U_PLATFORM == U_PF_SOLARIS 104 # ifndef _XPG4_2 105 # define _XPG4_2 106 # endif 107 # endif 108 #elif U_PLATFORM == U_PF_QNX 109 # include <sys/neutrino.h> 110 #endif 111 112 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) 113 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ 114 #undef __STRICT_ANSI__ 115 #endif 116 117 /* 118 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. 119 */ 120 #include <time.h> 121 122 #if !U_PLATFORM_USES_ONLY_WIN32_API 123 #include <sys/time.h> 124 #endif 125 126 /* 127 * Only include langinfo.h if we have a way to get the codeset. If we later 128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 129 * 130 */ 131 132 #if U_HAVE_NL_LANGINFO_CODESET 133 #include <langinfo.h> 134 #endif 135 136 /** 137 * Simple things (presence of functions, etc) should just go in configure.in and be added to 138 * icucfg.h via autoheader. 139 */ 140 #if U_PLATFORM_IMPLEMENTS_POSIX 141 # if U_PLATFORM == U_PF_OS400 142 # define HAVE_DLFCN_H 0 143 # define HAVE_DLOPEN 0 144 # else 145 # ifndef HAVE_DLFCN_H 146 # define HAVE_DLFCN_H 1 147 # endif 148 # ifndef HAVE_DLOPEN 149 # define HAVE_DLOPEN 1 150 # endif 151 # endif 152 # ifndef HAVE_GETTIMEOFDAY 153 # define HAVE_GETTIMEOFDAY 1 154 # endif 155 #else 156 # define HAVE_DLFCN_H 0 157 # define HAVE_DLOPEN 0 158 # define HAVE_GETTIMEOFDAY 0 159 #endif 160 161 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 162 163 /* Define the extension for data files, again... */ 164 #define DATA_TYPE "dat" 165 166 /* Leave this copyright notice here! */ 167 static const char copyright[] = U_COPYRIGHT_STRING; 168 169 /* floating point implementations ------------------------------------------- */ 170 171 /* We return QNAN rather than SNAN*/ 172 #define SIGN 0x80000000U 173 174 /* Make it easy to define certain types of constants */ 175 typedef union { 176 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 177 double d64; 178 } BitPatternConversion; 179 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 180 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 181 182 /*--------------------------------------------------------------------------- 183 Platform utilities 184 Our general strategy is to assume we're on a POSIX platform. Platforms which 185 are non-POSIX must declare themselves so. The default POSIX implementation 186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 187 functions). 188 ---------------------------------------------------------------------------*/ 189 190 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400 191 # undef U_POSIX_LOCALE 192 #else 193 # define U_POSIX_LOCALE 1 194 #endif 195 196 /* 197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 199 */ 200 #if !IEEE_754 201 static char* 202 u_topNBytesOfDouble(double* d, int n) 203 { 204 #if U_IS_BIG_ENDIAN 205 return (char*)d; 206 #else 207 return (char*)(d + 1) - n; 208 #endif 209 } 210 211 static char* 212 u_bottomNBytesOfDouble(double* d, int n) 213 { 214 #if U_IS_BIG_ENDIAN 215 return (char*)(d + 1) - n; 216 #else 217 return (char*)d; 218 #endif 219 } 220 #endif /* !IEEE_754 */ 221 222 #if IEEE_754 223 static UBool 224 u_signBit(double d) { 225 uint8_t hiByte; 226 #if U_IS_BIG_ENDIAN 227 hiByte = *(uint8_t *)&d; 228 #else 229 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 230 #endif 231 return (hiByte & 0x80) != 0; 232 } 233 #endif 234 235 236 237 #if defined (U_DEBUG_FAKETIME) 238 /* Override the clock to test things without having to move the system clock. 239 * Assumes POSIX gettimeofday() will function 240 */ 241 UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 242 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 243 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 244 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; 245 246 static UDate getUTCtime_real() { 247 struct timeval posixTime; 248 gettimeofday(&posixTime, NULL); 249 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 250 } 251 252 static UDate getUTCtime_fake() { 253 umtx_lock(&fakeClockMutex); 254 if(!fakeClock_set) { 255 UDate real = getUTCtime_real(); 256 const char *fake_start = getenv("U_FAKETIME_START"); 257 if((fake_start!=NULL) && (fake_start[0]!=0)) { 258 sscanf(fake_start,"%lf",&fakeClock_t0); 259 fakeClock_dt = fakeClock_t0 - real; 260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 262 fakeClock_t0, fake_start, fakeClock_dt, real); 263 } else { 264 fakeClock_dt = 0; 265 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 267 } 268 fakeClock_set = TRUE; 269 } 270 umtx_unlock(&fakeClockMutex); 271 272 return getUTCtime_real() + fakeClock_dt; 273 } 274 #endif 275 276 #if U_PLATFORM_USES_ONLY_WIN32_API 277 typedef union { 278 int64_t int64; 279 FILETIME fileTime; 280 } FileTimeConversion; /* This is like a ULARGE_INTEGER */ 281 282 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 283 #define EPOCH_BIAS INT64_C(116444736000000000) 284 #define HECTONANOSECOND_PER_MILLISECOND 10000 285 286 #endif 287 288 /*--------------------------------------------------------------------------- 289 Universal Implementations 290 These are designed to work on all platforms. Try these, and if they 291 don't work on your platform, then special case your platform with new 292 implementations. 293 ---------------------------------------------------------------------------*/ 294 295 U_CAPI UDate U_EXPORT2 296 uprv_getUTCtime() 297 { 298 #if defined(U_DEBUG_FAKETIME) 299 return getUTCtime_fake(); /* Hook for overriding the clock */ 300 #else 301 return uprv_getRawUTCtime(); 302 #endif 303 } 304 305 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 306 U_CAPI UDate U_EXPORT2 307 uprv_getRawUTCtime() 308 { 309 #if U_PLATFORM == U_PF_CLASSIC_MACOS 310 time_t t, t1, t2; 311 struct tm tmrec; 312 313 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 314 tmrec.tm_year = 70; 315 tmrec.tm_mon = 0; 316 tmrec.tm_mday = 1; 317 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 318 319 time(&t); 320 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 321 t2 = mktime(&tmrec); /* seconds of current GMT*/ 322 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 323 #elif U_PLATFORM_USES_ONLY_WIN32_API 324 325 FileTimeConversion winTime; 326 GetSystemTimeAsFileTime(&winTime.fileTime); 327 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 328 #else 329 330 #if HAVE_GETTIMEOFDAY 331 struct timeval posixTime; 332 gettimeofday(&posixTime, NULL); 333 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 334 #else 335 time_t epochtime; 336 time(&epochtime); 337 return (UDate)epochtime * U_MILLIS_PER_SECOND; 338 #endif 339 340 #endif 341 } 342 343 /*----------------------------------------------------------------------------- 344 IEEE 754 345 These methods detect and return NaN and infinity values for doubles 346 conforming to IEEE 754. Platforms which support this standard include X86, 347 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 348 If this doesn't work on your platform, you have non-IEEE floating-point, and 349 will need to code your own versions. A naive implementation is to return 0.0 350 for getNaN and getInfinity, and false for isNaN and isInfinite. 351 ---------------------------------------------------------------------------*/ 352 353 U_CAPI UBool U_EXPORT2 354 uprv_isNaN(double number) 355 { 356 #if IEEE_754 357 BitPatternConversion convertedNumber; 358 convertedNumber.d64 = number; 359 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 360 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 361 362 #elif U_PLATFORM == U_PF_OS390 363 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 364 sizeof(uint32_t)); 365 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 366 sizeof(uint32_t)); 367 368 return ((highBits & 0x7F080000L) == 0x7F080000L) && 369 (lowBits == 0x00000000L); 370 371 #else 372 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 373 /* you'll need to replace this default implementation with what's correct*/ 374 /* for your platform.*/ 375 return number != number; 376 #endif 377 } 378 379 U_CAPI UBool U_EXPORT2 380 uprv_isInfinite(double number) 381 { 382 #if IEEE_754 383 BitPatternConversion convertedNumber; 384 convertedNumber.d64 = number; 385 /* Infinity is exactly 0x7FF0000000000000U. */ 386 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 387 #elif U_PLATFORM == U_PF_OS390 388 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 389 sizeof(uint32_t)); 390 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 391 sizeof(uint32_t)); 392 393 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 394 395 #else 396 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 397 /* value, you'll need to replace this default implementation with what's*/ 398 /* correct for your platform.*/ 399 return number == (2.0 * number); 400 #endif 401 } 402 403 U_CAPI UBool U_EXPORT2 404 uprv_isPositiveInfinity(double number) 405 { 406 #if IEEE_754 || U_PLATFORM == U_PF_OS390 407 return (UBool)(number > 0 && uprv_isInfinite(number)); 408 #else 409 return uprv_isInfinite(number); 410 #endif 411 } 412 413 U_CAPI UBool U_EXPORT2 414 uprv_isNegativeInfinity(double number) 415 { 416 #if IEEE_754 || U_PLATFORM == U_PF_OS390 417 return (UBool)(number < 0 && uprv_isInfinite(number)); 418 419 #else 420 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 421 sizeof(uint32_t)); 422 return((highBits & SIGN) && uprv_isInfinite(number)); 423 424 #endif 425 } 426 427 U_CAPI double U_EXPORT2 428 uprv_getNaN() 429 { 430 #if IEEE_754 || U_PLATFORM == U_PF_OS390 431 return gNan.d64; 432 #else 433 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 434 /* you'll need to replace this default implementation with what's correct*/ 435 /* for your platform.*/ 436 return 0.0; 437 #endif 438 } 439 440 U_CAPI double U_EXPORT2 441 uprv_getInfinity() 442 { 443 #if IEEE_754 || U_PLATFORM == U_PF_OS390 444 return gInf.d64; 445 #else 446 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 447 /* value, you'll need to replace this default implementation with what's*/ 448 /* correct for your platform.*/ 449 return 0.0; 450 #endif 451 } 452 453 U_CAPI double U_EXPORT2 454 uprv_floor(double x) 455 { 456 return floor(x); 457 } 458 459 U_CAPI double U_EXPORT2 460 uprv_ceil(double x) 461 { 462 return ceil(x); 463 } 464 465 U_CAPI double U_EXPORT2 466 uprv_round(double x) 467 { 468 return uprv_floor(x + 0.5); 469 } 470 471 U_CAPI double U_EXPORT2 472 uprv_fabs(double x) 473 { 474 return fabs(x); 475 } 476 477 U_CAPI double U_EXPORT2 478 uprv_modf(double x, double* y) 479 { 480 return modf(x, y); 481 } 482 483 U_CAPI double U_EXPORT2 484 uprv_fmod(double x, double y) 485 { 486 return fmod(x, y); 487 } 488 489 U_CAPI double U_EXPORT2 490 uprv_pow(double x, double y) 491 { 492 /* This is declared as "double pow(double x, double y)" */ 493 return pow(x, y); 494 } 495 496 U_CAPI double U_EXPORT2 497 uprv_pow10(int32_t x) 498 { 499 return pow(10.0, (double)x); 500 } 501 502 U_CAPI double U_EXPORT2 503 uprv_fmax(double x, double y) 504 { 505 #if IEEE_754 506 /* first handle NaN*/ 507 if(uprv_isNaN(x) || uprv_isNaN(y)) 508 return uprv_getNaN(); 509 510 /* check for -0 and 0*/ 511 if(x == 0.0 && y == 0.0 && u_signBit(x)) 512 return y; 513 514 #endif 515 516 /* this should work for all flt point w/o NaN and Inf special cases */ 517 return (x > y ? x : y); 518 } 519 520 U_CAPI double U_EXPORT2 521 uprv_fmin(double x, double y) 522 { 523 #if IEEE_754 524 /* first handle NaN*/ 525 if(uprv_isNaN(x) || uprv_isNaN(y)) 526 return uprv_getNaN(); 527 528 /* check for -0 and 0*/ 529 if(x == 0.0 && y == 0.0 && u_signBit(y)) 530 return y; 531 532 #endif 533 534 /* this should work for all flt point w/o NaN and Inf special cases */ 535 return (x > y ? y : x); 536 } 537 538 /** 539 * Truncates the given double. 540 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 541 * This is different than calling floor() or ceil(): 542 * floor(3.3) = 3, floor(-3.3) = -4 543 * ceil(3.3) = 4, ceil(-3.3) = -3 544 */ 545 U_CAPI double U_EXPORT2 546 uprv_trunc(double d) 547 { 548 #if IEEE_754 549 /* handle error cases*/ 550 if(uprv_isNaN(d)) 551 return uprv_getNaN(); 552 if(uprv_isInfinite(d)) 553 return uprv_getInfinity(); 554 555 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 556 return ceil(d); 557 else 558 return floor(d); 559 560 #else 561 return d >= 0 ? floor(d) : ceil(d); 562 563 #endif 564 } 565 566 /** 567 * Return the largest positive number that can be represented by an integer 568 * type of arbitrary bit length. 569 */ 570 U_CAPI double U_EXPORT2 571 uprv_maxMantissa(void) 572 { 573 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 574 } 575 576 U_CAPI double U_EXPORT2 577 uprv_log(double d) 578 { 579 return log(d); 580 } 581 582 U_CAPI void * U_EXPORT2 583 uprv_maximumPtr(void * base) 584 { 585 #if U_PLATFORM == U_PF_OS400 586 /* 587 * With the provided function we should never be out of range of a given segment 588 * (a traditional/typical segment that is). Our segments have 5 bytes for the 589 * id and 3 bytes for the offset. The key is that the casting takes care of 590 * only retrieving the offset portion minus x1000. Hence, the smallest offset 591 * seen in a program is x001000 and when casted to an int would be 0. 592 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 593 * 594 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 595 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 596 * This function determines the activation based on the pointer that is passed in and 597 * calculates the appropriate maximum available size for 598 * each pointer type (TERASPACE and non-TERASPACE) 599 * 600 * Unlike other operating systems, the pointer model isn't determined at 601 * compile time on i5/OS. 602 */ 603 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 604 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 605 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 606 } 607 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 608 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 609 610 #else 611 return U_MAX_PTR(base); 612 #endif 613 } 614 615 /*--------------------------------------------------------------------------- 616 Platform-specific Implementations 617 Try these, and if they don't work on your platform, then special case your 618 platform with new implementations. 619 ---------------------------------------------------------------------------*/ 620 621 /* Generic time zone layer -------------------------------------------------- */ 622 623 /* Time zone utilities */ 624 U_CAPI void U_EXPORT2 625 uprv_tzset() 626 { 627 #if defined(U_TZSET) 628 U_TZSET(); 629 #else 630 /* no initialization*/ 631 #endif 632 } 633 634 U_CAPI int32_t U_EXPORT2 635 uprv_timezone() 636 { 637 #ifdef U_TIMEZONE 638 return U_TIMEZONE; 639 #else 640 time_t t, t1, t2; 641 struct tm tmrec; 642 UBool dst_checked; 643 int32_t tdiff = 0; 644 645 time(&t); 646 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 647 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 648 t1 = mktime(&tmrec); /* local time in seconds*/ 649 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 650 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 651 tdiff = t2 - t1; 652 /* imitate NT behaviour, which returns same timezone offset to GMT for 653 winter and summer. 654 This does not work on all platforms. For instance, on glibc on Linux 655 and on Mac OS 10.5, tdiff calculated above remains the same 656 regardless of whether DST is in effect or not. However, U_TIMEZONE 657 is defined on those platforms and this code is not reached so that 658 we can leave this alone. If there's a platform behaving 659 like glibc that uses this code, we need to add platform-dependent 660 preprocessor here. */ 661 if (dst_checked) 662 tdiff += 3600; 663 return tdiff; 664 #endif 665 } 666 667 /* Note that U_TZNAME does *not* have to be tzname, but if it is, 668 some platforms need to have it declared here. */ 669 670 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API)) 671 /* RS6000 and others reject char **tzname. */ 672 extern U_IMPORT char *U_TZNAME[]; 673 #endif 674 675 #if !UCONFIG_NO_FILE_IO && (U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) 676 /* These platforms are likely to use Olson timezone IDs. */ 677 #define CHECK_LOCALTIME_LINK 1 678 #if U_PLATFORM_IS_DARWIN_BASED 679 #include <tzfile.h> 680 #define TZZONEINFO (TZDIR "/") 681 #elif U_PLATFORM == U_PF_SOLARIS 682 #define TZDEFAULT "/etc/localtime" 683 #define TZZONEINFO "/usr/share/lib/zoneinfo/" 684 #define TZ_ENV_CHECK "localtime" 685 #else 686 #define TZDEFAULT "/etc/localtime" 687 #define TZZONEINFO "/usr/share/zoneinfo/" 688 #endif 689 #if U_HAVE_DIRENT_H 690 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 691 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 692 symlinked to /etc/localtime, which makes searchForTZFile return 693 'localtime' when it's the first match. */ 694 #define TZFILE_SKIP2 "localtime" 695 #define SEARCH_TZFILE 696 #include <dirent.h> /* Needed to search through system timezone files */ 697 #endif 698 static char gTimeZoneBuffer[PATH_MAX]; 699 static char *gTimeZoneBufferPtr = NULL; 700 #endif 701 702 #if !U_PLATFORM_USES_ONLY_WIN32_API 703 #define isNonDigit(ch) (ch < '0' || '9' < ch) 704 static UBool isValidOlsonID(const char *id) { 705 int32_t idx = 0; 706 707 /* Determine if this is something like Iceland (Olson ID) 708 or AST4ADT (non-Olson ID) */ 709 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 710 idx++; 711 } 712 713 /* If we went through the whole string, then it might be okay. 714 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 715 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 716 The rest of the time it could be an Olson ID. George */ 717 return (UBool)(id[idx] == 0 718 || uprv_strcmp(id, "PST8PDT") == 0 719 || uprv_strcmp(id, "MST7MDT") == 0 720 || uprv_strcmp(id, "CST6CDT") == 0 721 || uprv_strcmp(id, "EST5EDT") == 0); 722 } 723 724 /* On some Unix-like OS, 'posix' subdirectory in 725 /usr/share/zoneinfo replicates the top-level contents. 'right' 726 subdirectory has the same set of files, but individual files 727 are different from those in the top-level directory or 'posix' 728 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 729 has files for UTC. 730 When the first match for /etc/localtime is in either of them 731 (usually in posix because 'right' has different file contents), 732 or TZ environment variable points to one of them, createTimeZone 733 fails because, say, 'posix/America/New_York' is not an Olson 734 timezone id ('America/New_York' is). So, we have to skip 735 'posix/' and 'right/' at the beginning. */ 736 static void skipZoneIDPrefix(const char** id) { 737 if (uprv_strncmp(*id, "posix/", 6) == 0 738 || uprv_strncmp(*id, "right/", 6) == 0) 739 { 740 *id += 6; 741 } 742 } 743 #endif 744 745 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API 746 747 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 748 typedef struct OffsetZoneMapping { 749 int32_t offsetSeconds; 750 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ 751 const char *stdID; 752 const char *dstID; 753 const char *olsonID; 754 } OffsetZoneMapping; 755 756 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; 757 758 /* 759 This list tries to disambiguate a set of abbreviated timezone IDs and offsets 760 and maps it to an Olson ID. 761 Before adding anything to this list, take a look at 762 icu/source/tools/tzcode/tz.alias 763 Sometimes no daylight savings (0) is important to define due to aliases. 764 This list can be tested with icu/source/test/compat/tzone.pl 765 More values could be added to daylightType to increase precision. 766 */ 767 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 768 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 769 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 770 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 771 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 772 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 773 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 774 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 775 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 776 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 777 {-34200, 2, "CST", "CST", "Australia/South"}, 778 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 779 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 780 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 781 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 782 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 783 {-28800, 2, "WST", "WST", "Australia/West"}, 784 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 785 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 786 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 787 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 788 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 789 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 790 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 791 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 792 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 793 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 794 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 795 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 796 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 797 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 798 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 799 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 800 {0, 1, "GMT", "IST", "Europe/Dublin"}, 801 {0, 1, "GMT", "BST", "Europe/London"}, 802 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 803 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 804 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 805 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 806 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 807 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 808 {10800, 1, "WGT", "WGST", "America/Godthab"}, 809 {10800, 2, "BRT", "BRST", "Brazil/East"}, 810 {12600, 1, "NST", "NDT", "America/St_Johns"}, 811 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 812 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 813 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 814 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 815 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 816 {18000, 1, "CST", "CDT", "America/Havana"}, 817 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 818 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 819 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 820 {21600, 0, "CST", "CDT", "America/Guatemala"}, 821 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 822 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 823 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 824 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 825 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 826 {36000, 1, "HAST", "HADT", "US/Aleutian"} 827 }; 828 829 /*#define DEBUG_TZNAME*/ 830 831 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 832 { 833 int32_t idx; 834 #ifdef DEBUG_TZNAME 835 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 836 #endif 837 for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) 838 { 839 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 840 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 841 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 842 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 843 { 844 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 845 } 846 } 847 return NULL; 848 } 849 #endif 850 851 #ifdef SEARCH_TZFILE 852 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 853 #define MAX_READ_SIZE 512 854 855 typedef struct DefaultTZInfo { 856 char* defaultTZBuffer; 857 int64_t defaultTZFileSize; 858 FILE* defaultTZFilePtr; 859 UBool defaultTZstatus; 860 int32_t defaultTZPosition; 861 } DefaultTZInfo; 862 863 /* 864 * This method compares the two files given to see if they are a match. 865 * It is currently use to compare two TZ files. 866 */ 867 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 868 FILE* file; 869 int64_t sizeFile; 870 int64_t sizeFileLeft; 871 int32_t sizeFileRead; 872 int32_t sizeFileToRead; 873 char bufferFile[MAX_READ_SIZE]; 874 UBool result = TRUE; 875 876 if (tzInfo->defaultTZFilePtr == NULL) { 877 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 878 } 879 file = fopen(TZFileName, "r"); 880 881 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 882 883 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 884 /* First check that the file size are equal. */ 885 if (tzInfo->defaultTZFileSize == 0) { 886 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 887 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 888 } 889 fseek(file, 0, SEEK_END); 890 sizeFile = ftell(file); 891 sizeFileLeft = sizeFile; 892 893 if (sizeFile != tzInfo->defaultTZFileSize) { 894 result = FALSE; 895 } else { 896 /* Store the data from the files in seperate buffers and 897 * compare each byte to determine equality. 898 */ 899 if (tzInfo->defaultTZBuffer == NULL) { 900 rewind(tzInfo->defaultTZFilePtr); 901 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 902 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 903 } 904 rewind(file); 905 while(sizeFileLeft > 0) { 906 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 907 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 908 909 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 910 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 911 result = FALSE; 912 break; 913 } 914 sizeFileLeft -= sizeFileRead; 915 tzInfo->defaultTZPosition += sizeFileRead; 916 } 917 } 918 } else { 919 result = FALSE; 920 } 921 922 if (file != NULL) { 923 fclose(file); 924 } 925 926 return result; 927 } 928 /* 929 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 930 */ 931 /* dirent also lists two entries: "." and ".." that we can safely ignore. */ 932 #define SKIP1 "." 933 #define SKIP2 ".." 934 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 935 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 936 char curpath[MAX_PATH_SIZE]; 937 DIR* dirp = opendir(path); 938 DIR* subDirp = NULL; 939 struct dirent* dirEntry = NULL; 940 941 char* result = NULL; 942 if (dirp == NULL) { 943 return result; 944 } 945 946 /* Save the current path */ 947 uprv_memset(curpath, 0, MAX_PATH_SIZE); 948 uprv_strcpy(curpath, path); 949 950 /* Check each entry in the directory. */ 951 while((dirEntry = readdir(dirp)) != NULL) { 952 const char* dirName = dirEntry->d_name; 953 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 954 /* Create a newpath with the new entry to test each entry in the directory. */ 955 char newpath[MAX_PATH_SIZE]; 956 uprv_strcpy(newpath, curpath); 957 uprv_strcat(newpath, dirName); 958 959 if ((subDirp = opendir(newpath)) != NULL) { 960 /* If this new path is a directory, make a recursive call with the newpath. */ 961 closedir(subDirp); 962 uprv_strcat(newpath, "/"); 963 result = searchForTZFile(newpath, tzInfo); 964 /* 965 Have to get out here. Otherwise, we'd keep looking 966 and return the first match in the top-level directory 967 if there's a match in the top-level. If not, this function 968 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 969 It worked without this in most cases because we have a fallback of calling 970 localtime_r to figure out the default timezone. 971 */ 972 if (result != NULL) 973 break; 974 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 975 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 976 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 977 skipZoneIDPrefix(&zoneid); 978 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 979 result = SEARCH_TZFILE_RESULT; 980 /* Get out after the first one found. */ 981 break; 982 } 983 } 984 } 985 } 986 closedir(dirp); 987 return result; 988 } 989 #endif 990 U_CAPI const char* U_EXPORT2 991 uprv_tzname(int n) 992 { 993 const char *tzid = NULL; 994 #if U_PLATFORM_USES_ONLY_WIN32_API 995 tzid = uprv_detectWindowsTimeZone(); 996 997 if (tzid != NULL) { 998 return tzid; 999 } 1000 #else 1001 1002 /*#if U_PLATFORM_IS_DARWIN_BASED 1003 int ret; 1004 1005 tzid = getenv("TZFILE"); 1006 if (tzid != NULL) { 1007 return tzid; 1008 } 1009 #endif*/ 1010 1011 /* This code can be temporarily disabled to test tzname resolution later on. */ 1012 #ifndef DEBUG_TZNAME 1013 tzid = getenv("TZ"); 1014 if (tzid != NULL && isValidOlsonID(tzid) 1015 #if U_PLATFORM == U_PF_SOLARIS 1016 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ 1017 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 1018 #endif 1019 ) { 1020 /* This might be a good Olson ID. */ 1021 skipZoneIDPrefix(&tzid); 1022 return tzid; 1023 } 1024 /* else U_TZNAME will give a better result. */ 1025 #endif 1026 1027 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) 1028 /* Caller must handle threading issues */ 1029 if (gTimeZoneBufferPtr == NULL) { 1030 /* 1031 This is a trick to look at the name of the link to get the Olson ID 1032 because the tzfile contents is underspecified. 1033 This isn't guaranteed to work because it may not be a symlink. 1034 */ 1035 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1036 if (0 < ret) { 1037 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1038 gTimeZoneBuffer[ret] = 0; 1039 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1040 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1041 { 1042 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1043 } 1044 } else { 1045 #if defined(SEARCH_TZFILE) 1046 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1047 if (tzInfo != NULL) { 1048 tzInfo->defaultTZBuffer = NULL; 1049 tzInfo->defaultTZFileSize = 0; 1050 tzInfo->defaultTZFilePtr = NULL; 1051 tzInfo->defaultTZstatus = FALSE; 1052 tzInfo->defaultTZPosition = 0; 1053 1054 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1055 1056 /* Free previously allocated memory */ 1057 if (tzInfo->defaultTZBuffer != NULL) { 1058 uprv_free(tzInfo->defaultTZBuffer); 1059 } 1060 if (tzInfo->defaultTZFilePtr != NULL) { 1061 fclose(tzInfo->defaultTZFilePtr); 1062 } 1063 uprv_free(tzInfo); 1064 } 1065 1066 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1067 return gTimeZoneBufferPtr; 1068 } 1069 #endif 1070 } 1071 } 1072 else { 1073 return gTimeZoneBufferPtr; 1074 } 1075 #endif 1076 #endif 1077 1078 #ifdef U_TZNAME 1079 #if U_PLATFORM_USES_ONLY_WIN32_API 1080 /* The return value is free'd in timezone.cpp on Windows because 1081 * the other code path returns a pointer to a heap location. */ 1082 return uprv_strdup(U_TZNAME[n]); 1083 #else 1084 /* 1085 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1086 So we remap the abbreviation to an olson ID. 1087 1088 Since Windows exposes a little more timezone information, 1089 we normally don't use this code on Windows because 1090 uprv_detectWindowsTimeZone should have already given the correct answer. 1091 */ 1092 { 1093 struct tm juneSol, decemberSol; 1094 int daylightType; 1095 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1096 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1097 1098 /* This probing will tell us when daylight savings occurs. */ 1099 localtime_r(&juneSolstice, &juneSol); 1100 localtime_r(&decemberSolstice, &decemberSol); 1101 if(decemberSol.tm_isdst > 0) { 1102 daylightType = U_DAYLIGHT_DECEMBER; 1103 } else if(juneSol.tm_isdst > 0) { 1104 daylightType = U_DAYLIGHT_JUNE; 1105 } else { 1106 daylightType = U_DAYLIGHT_NONE; 1107 } 1108 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1109 if (tzid != NULL) { 1110 return tzid; 1111 } 1112 } 1113 return U_TZNAME[n]; 1114 #endif 1115 #else 1116 return ""; 1117 #endif 1118 } 1119 1120 /* Get and set the ICU data directory --------------------------------------- */ 1121 1122 static char *gDataDirectory = NULL; 1123 #if U_POSIX_LOCALE 1124 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1125 #endif 1126 1127 static UBool U_CALLCONV putil_cleanup(void) 1128 { 1129 if (gDataDirectory && *gDataDirectory) { 1130 uprv_free(gDataDirectory); 1131 } 1132 gDataDirectory = NULL; 1133 #if U_POSIX_LOCALE 1134 if (gCorrectedPOSIXLocale) { 1135 uprv_free(gCorrectedPOSIXLocale); 1136 gCorrectedPOSIXLocale = NULL; 1137 } 1138 #endif 1139 return TRUE; 1140 } 1141 1142 /* 1143 * Set the data directory. 1144 * Make a copy of the passed string, and set the global data dir to point to it. 1145 * TODO: see bug #2849, regarding thread safety. 1146 */ 1147 U_CAPI void U_EXPORT2 1148 u_setDataDirectory(const char *directory) { 1149 char *newDataDir; 1150 int32_t length; 1151 1152 if(directory==NULL || *directory==0) { 1153 /* A small optimization to prevent the malloc and copy when the 1154 shared library is used, and this is a way to make sure that NULL 1155 is never returned. 1156 */ 1157 newDataDir = (char *)""; 1158 } 1159 else { 1160 length=(int32_t)uprv_strlen(directory); 1161 newDataDir = (char *)uprv_malloc(length + 2); 1162 /* Exit out if newDataDir could not be created. */ 1163 if (newDataDir == NULL) { 1164 return; 1165 } 1166 uprv_strcpy(newDataDir, directory); 1167 1168 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1169 { 1170 char *p; 1171 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1172 *p = U_FILE_SEP_CHAR; 1173 } 1174 } 1175 #endif 1176 } 1177 1178 umtx_lock(NULL); 1179 if (gDataDirectory && *gDataDirectory) { 1180 uprv_free(gDataDirectory); 1181 } 1182 gDataDirectory = newDataDir; 1183 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1184 umtx_unlock(NULL); 1185 } 1186 1187 U_CAPI UBool U_EXPORT2 1188 uprv_pathIsAbsolute(const char *path) 1189 { 1190 if(!path || !*path) { 1191 return FALSE; 1192 } 1193 1194 if(*path == U_FILE_SEP_CHAR) { 1195 return TRUE; 1196 } 1197 1198 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1199 if(*path == U_FILE_ALT_SEP_CHAR) { 1200 return TRUE; 1201 } 1202 #endif 1203 1204 #if U_PLATFORM_USES_ONLY_WIN32_API 1205 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1206 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1207 path[1] == ':' ) { 1208 return TRUE; 1209 } 1210 #endif 1211 1212 return FALSE; 1213 } 1214 1215 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1216 until some client wrapper makefiles are updated */ 1217 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR 1218 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1219 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1220 # endif 1221 #endif 1222 1223 U_CAPI const char * U_EXPORT2 1224 u_getDataDirectory(void) { 1225 const char *path = NULL; 1226 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1227 char datadir_path_buffer[PATH_MAX]; 1228 #endif 1229 1230 /* if we have the directory, then return it immediately */ 1231 UMTX_CHECK(NULL, gDataDirectory, path); 1232 1233 if(path) { 1234 return path; 1235 } 1236 1237 /* 1238 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1239 override ICU's data with the ICU_DATA environment variable. This prevents 1240 problems where multiple custom copies of ICU's specific version of data 1241 are installed on a system. Either the application must define the data 1242 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1243 ICU, set the data with udata_setCommonData or trust that all of the 1244 required data is contained in ICU's data library that contains 1245 the entry point defined by U_ICUDATA_ENTRY_POINT. 1246 1247 There may also be some platforms where environment variables 1248 are not allowed. 1249 */ 1250 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1251 /* First try to get the environment variable */ 1252 path=getenv("ICU_DATA"); 1253 # endif 1254 1255 /* ICU_DATA_DIR may be set as a compile option. 1256 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1257 * and is used only when data is built in archive mode eliminating the need 1258 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1259 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1260 * set their own path. 1261 */ 1262 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1263 if(path==NULL || *path==0) { 1264 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1265 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1266 # endif 1267 # ifdef ICU_DATA_DIR 1268 path=ICU_DATA_DIR; 1269 # else 1270 path=U_ICU_DATA_DEFAULT_DIR; 1271 # endif 1272 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1273 if (prefix != NULL) { 1274 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1275 path=datadir_path_buffer; 1276 } 1277 # endif 1278 } 1279 #endif 1280 1281 if(path==NULL) { 1282 /* It looks really bad, set it to something. */ 1283 path = ""; 1284 } 1285 1286 u_setDataDirectory(path); 1287 return gDataDirectory; 1288 } 1289 1290 1291 1292 1293 1294 /* Macintosh-specific locale information ------------------------------------ */ 1295 #if U_PLATFORM == U_PF_CLASSIC_MACOS 1296 1297 typedef struct { 1298 int32_t script; 1299 int32_t region; 1300 int32_t lang; 1301 int32_t date_region; 1302 const char* posixID; 1303 } mac_lc_rec; 1304 1305 /* Todo: This will be updated with a newer version from www.unicode.org web 1306 page when it's available.*/ 1307 #define MAC_LC_MAGIC_NUMBER -5 1308 #define MAC_LC_INIT_NUMBER -9 1309 1310 static const mac_lc_rec mac_lc_recs[] = { 1311 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1312 /* United States*/ 1313 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1314 /* France*/ 1315 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1316 /* Great Britain*/ 1317 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1318 /* Germany*/ 1319 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1320 /* Italy*/ 1321 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1322 /* Metherlands*/ 1323 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1324 /* French for Belgium or Lxembourg*/ 1325 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1326 /* Sweden*/ 1327 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1328 /* Denmark*/ 1329 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1330 /* Portugal*/ 1331 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1332 /* French Canada*/ 1333 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1334 /* Israel*/ 1335 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1336 /* Japan*/ 1337 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1338 /* Australia*/ 1339 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1340 /* the Arabic world (?)*/ 1341 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1342 /* Finland*/ 1343 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1344 /* French for Switzerland*/ 1345 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1346 /* German for Switzerland*/ 1347 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1348 /* Greece*/ 1349 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1350 /* Iceland ===*/ 1351 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1352 /* Malta ===*/ 1353 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1354 /* Cyprus ===*/ 1355 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1356 /* Turkey ===*/ 1357 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1358 /* Croatian system for Yugoslavia*/ 1359 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1360 /* Hindi system for India*/ 1361 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1362 /* Pakistan*/ 1363 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1364 /* Lithuania*/ 1365 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1366 /* Poland*/ 1367 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1368 /* Hungary*/ 1369 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1370 /* Estonia*/ 1371 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1372 /* Latvia*/ 1373 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1374 /* Lapland [Ask Rich for the data. HS]*/ 1375 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1376 /* Faeroe Islands*/ 1377 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1378 /* Iran*/ 1379 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1380 /* Russia*/ 1381 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1382 /* Ireland*/ 1383 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1384 /* Korea*/ 1385 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1386 /* People's Republic of China*/ 1387 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1388 /* Taiwan*/ 1389 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1390 /* Thailand*/ 1391 1392 /* fallback is en_US*/ 1393 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1394 MAC_LC_MAGIC_NUMBER, "en_US" 1395 }; 1396 1397 #endif 1398 1399 #if U_POSIX_LOCALE 1400 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1403 */ 1404 static const char *uprv_getPOSIXIDForCategory(int category) 1405 { 1406 const char* posixID = NULL; 1407 if (category == LC_MESSAGES || category == LC_CTYPE) { 1408 /* 1409 * On Solaris two different calls to setlocale can result in 1410 * different values. Only get this value once. 1411 * 1412 * We must check this first because an application can set this. 1413 * 1414 * LC_ALL can't be used because it's platform dependent. The LANG 1415 * environment variable seems to affect LC_CTYPE variable by default. 1416 * Here is what setlocale(LC_ALL, NULL) can return. 1417 * HPUX can return 'C C C C C C C' 1418 * Solaris can return /en_US/C/C/C/C/C on the second try. 1419 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1420 * 1421 * The default codepage detection also needs to use LC_CTYPE. 1422 * 1423 * Do not call setlocale(LC_*, "")! Using an empty string instead 1424 * of NULL, will modify the libc behavior. 1425 */ 1426 posixID = setlocale(category, NULL); 1427 if ((posixID == 0) 1428 || (uprv_strcmp("C", posixID) == 0) 1429 || (uprv_strcmp("POSIX", posixID) == 0)) 1430 { 1431 /* Maybe we got some garbage. Try something more reasonable */ 1432 posixID = getenv("LC_ALL"); 1433 if (posixID == 0) { 1434 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1435 if (posixID == 0) { 1436 posixID = getenv("LANG"); 1437 } 1438 } 1439 } 1440 } 1441 if ((posixID==0) 1442 || (uprv_strcmp("C", posixID) == 0) 1443 || (uprv_strcmp("POSIX", posixID) == 0)) 1444 { 1445 /* Nothing worked. Give it a nice POSIX default value. */ 1446 posixID = "en_US_POSIX"; 1447 } 1448 return posixID; 1449 } 1450 1451 /* Return just the POSIX id for the default locale, whatever happens to be in 1452 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1453 */ 1454 static const char *uprv_getPOSIXIDForDefaultLocale(void) 1455 { 1456 static const char* posixID = NULL; 1457 if (posixID == 0) { 1458 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1459 } 1460 return posixID; 1461 } 1462 1463 #if !U_CHARSET_IS_UTF8 1464 /* Return just the POSIX id for the default codepage, whatever happens to be in 1465 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1466 */ 1467 static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1468 { 1469 static const char* posixID = NULL; 1470 if (posixID == 0) { 1471 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1472 } 1473 return posixID; 1474 } 1475 #endif 1476 #endif 1477 1478 /* NOTE: The caller should handle thread safety */ 1479 U_CAPI const char* U_EXPORT2 1480 uprv_getDefaultLocaleID() 1481 { 1482 #if U_POSIX_LOCALE 1483 /* 1484 Note that: (a '!' means the ID is improper somehow) 1485 LC_ALL ----> default_loc codepage 1486 -------------------------------------------------------- 1487 ab.CD ab CD 1488 ab@CD ab__CD - 1489 ab (at) CD.EF ab__CD EF 1490 1491 ab_CD.EF@GH ab_CD_GH EF 1492 1493 Some 'improper' ways to do the same as above: 1494 ! ab_CD (at) GH.EF ab_CD_GH EF 1495 ! ab_CD.EF (at) GH.IJ ab_CD_GH EF 1496 ! ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH EF 1497 1498 _CD@GH _CD_GH - 1499 _CD.EF@GH _CD_GH EF 1500 1501 The variant cannot have dots in it. 1502 The 'rightmost' variant (@xxx) wins. 1503 The leftmost codepage (.xxx) wins. 1504 */ 1505 char *correctedPOSIXLocale = 0; 1506 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1507 const char *p; 1508 const char *q; 1509 int32_t len; 1510 1511 /* Format: (no spaces) 1512 ll [ _CC ] [ . MM ] [ @ VV] 1513 1514 l = lang, C = ctry, M = charmap, V = variant 1515 */ 1516 1517 if (gCorrectedPOSIXLocale != NULL) { 1518 return gCorrectedPOSIXLocale; 1519 } 1520 1521 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1522 /* assume new locale can't be larger than old one? */ 1523 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1524 /* Exit on memory allocation error. */ 1525 if (correctedPOSIXLocale == NULL) { 1526 return NULL; 1527 } 1528 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1529 correctedPOSIXLocale[p-posixID] = 0; 1530 1531 /* do not copy after the @ */ 1532 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1533 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1534 } 1535 } 1536 1537 /* Note that we scan the *uncorrected* ID. */ 1538 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1539 if (correctedPOSIXLocale == NULL) { 1540 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1541 /* Exit on memory allocation error. */ 1542 if (correctedPOSIXLocale == NULL) { 1543 return NULL; 1544 } 1545 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1546 correctedPOSIXLocale[p-posixID] = 0; 1547 } 1548 p++; 1549 1550 /* Take care of any special cases here.. */ 1551 if (!uprv_strcmp(p, "nynorsk")) { 1552 p = "NY"; 1553 /* Don't worry about no__NY. In practice, it won't appear. */ 1554 } 1555 1556 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1557 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1558 } 1559 else { 1560 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1561 } 1562 1563 if ((q = uprv_strchr(p, '.')) != NULL) { 1564 /* How big will the resulting string be? */ 1565 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1566 uprv_strncat(correctedPOSIXLocale, p, q-p); 1567 correctedPOSIXLocale[len] = 0; 1568 } 1569 else { 1570 /* Anything following the @ sign */ 1571 uprv_strcat(correctedPOSIXLocale, p); 1572 } 1573 1574 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1575 * How about 'russian' -> 'ru'? 1576 * Many of the other locales using ISO codes will be handled by the 1577 * canonicalization functions in uloc_getDefault. 1578 */ 1579 } 1580 1581 /* Was a correction made? */ 1582 if (correctedPOSIXLocale != NULL) { 1583 posixID = correctedPOSIXLocale; 1584 } 1585 else { 1586 /* copy it, just in case the original pointer goes away. See j2395 */ 1587 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1588 /* Exit on memory allocation error. */ 1589 if (correctedPOSIXLocale == NULL) { 1590 return NULL; 1591 } 1592 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1593 } 1594 1595 if (gCorrectedPOSIXLocale == NULL) { 1596 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1597 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1598 correctedPOSIXLocale = NULL; 1599 } 1600 1601 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1602 uprv_free(correctedPOSIXLocale); 1603 } 1604 1605 return posixID; 1606 1607 #elif U_PLATFORM_USES_ONLY_WIN32_API 1608 UErrorCode status = U_ZERO_ERROR; 1609 LCID id = GetThreadLocale(); 1610 const char* locID = uprv_convertToPosix(id, &status); 1611 1612 if (U_FAILURE(status)) { 1613 locID = "en_US"; 1614 } 1615 return locID; 1616 1617 #elif U_PLATFORM == U_PF_CLASSIC_MACOS 1618 int32_t script = MAC_LC_INIT_NUMBER; 1619 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1620 int32_t region = MAC_LC_INIT_NUMBER; 1621 /* = GetScriptManagerVariable(smRegionCode);*/ 1622 int32_t lang = MAC_LC_INIT_NUMBER; 1623 /* = GetScriptManagerVariable(smScriptLang);*/ 1624 int32_t date_region = MAC_LC_INIT_NUMBER; 1625 const char* posixID = 0; 1626 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1627 int32_t i; 1628 Intl1Hndl ih; 1629 1630 ih = (Intl1Hndl) GetIntlResource(1); 1631 if (ih) 1632 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1633 1634 for (i = 0; i < count; i++) { 1635 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1636 || (mac_lc_recs[i].script == script)) 1637 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1638 || (mac_lc_recs[i].region == region)) 1639 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1640 || (mac_lc_recs[i].lang == lang)) 1641 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1642 || (mac_lc_recs[i].date_region == date_region)) 1643 ) 1644 { 1645 posixID = mac_lc_recs[i].posixID; 1646 break; 1647 } 1648 } 1649 1650 return posixID; 1651 1652 #elif U_PLATFORM == U_PF_OS400 1653 /* locales are process scoped and are by definition thread safe */ 1654 static char correctedLocale[64]; 1655 const char *localeID = getenv("LC_ALL"); 1656 char *p; 1657 1658 if (localeID == NULL) 1659 localeID = getenv("LANG"); 1660 if (localeID == NULL) 1661 localeID = setlocale(LC_ALL, NULL); 1662 /* Make sure we have something... */ 1663 if (localeID == NULL) 1664 return "en_US_POSIX"; 1665 1666 /* Extract the locale name from the path. */ 1667 if((p = uprv_strrchr(localeID, '/')) != NULL) 1668 { 1669 /* Increment p to start of locale name. */ 1670 p++; 1671 localeID = p; 1672 } 1673 1674 /* Copy to work location. */ 1675 uprv_strcpy(correctedLocale, localeID); 1676 1677 /* Strip off the '.locale' extension. */ 1678 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1679 *p = 0; 1680 } 1681 1682 /* Upper case the locale name. */ 1683 T_CString_toUpperCase(correctedLocale); 1684 1685 /* See if we are using the POSIX locale. Any of the 1686 * following are equivalent and use the same QLGPGCMA 1687 * (POSIX) locale. 1688 * QLGPGCMA2 means UCS2 1689 * QLGPGCMA_4 means UTF-32 1690 * QLGPGCMA_8 means UTF-8 1691 */ 1692 if ((uprv_strcmp("C", correctedLocale) == 0) || 1693 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1694 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1695 { 1696 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1697 } 1698 else 1699 { 1700 int16_t LocaleLen; 1701 1702 /* Lower case the lang portion. */ 1703 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1704 { 1705 *p = uprv_tolower(*p); 1706 } 1707 1708 /* Adjust for Euro. After '_E' add 'URO'. */ 1709 LocaleLen = uprv_strlen(correctedLocale); 1710 if (correctedLocale[LocaleLen - 2] == '_' && 1711 correctedLocale[LocaleLen - 1] == 'E') 1712 { 1713 uprv_strcat(correctedLocale, "URO"); 1714 } 1715 1716 /* If using Lotus-based locale then convert to 1717 * equivalent non Lotus. 1718 */ 1719 else if (correctedLocale[LocaleLen - 2] == '_' && 1720 correctedLocale[LocaleLen - 1] == 'L') 1721 { 1722 correctedLocale[LocaleLen - 2] = 0; 1723 } 1724 1725 /* There are separate simplified and traditional 1726 * locales called zh_HK_S and zh_HK_T. 1727 */ 1728 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1729 { 1730 uprv_strcpy(correctedLocale, "zh_HK"); 1731 } 1732 1733 /* A special zh_CN_GBK locale... 1734 */ 1735 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1736 { 1737 uprv_strcpy(correctedLocale, "zh_CN"); 1738 } 1739 1740 } 1741 1742 return correctedLocale; 1743 #endif 1744 1745 } 1746 1747 #if !U_CHARSET_IS_UTF8 1748 #if U_POSIX_LOCALE 1749 /* 1750 Due to various platform differences, one platform may specify a charset, 1751 when they really mean a different charset. Remap the names so that they are 1752 compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1753 here. Before adding anything to this function, please consider adding unique 1754 names to the ICU alias table in the data directory. 1755 */ 1756 static const char* 1757 remapPlatformDependentCodepage(const char *locale, const char *name) { 1758 if (locale != NULL && *locale == 0) { 1759 /* Make sure that an empty locale is handled the same way. */ 1760 locale = NULL; 1761 } 1762 if (name == NULL) { 1763 return NULL; 1764 } 1765 #if U_PLATFORM == U_PF_AIX 1766 if (uprv_strcmp(name, "IBM-943") == 0) { 1767 /* Use the ASCII compatible ibm-943 */ 1768 name = "Shift-JIS"; 1769 } 1770 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1771 /* Use the windows-1252 that contains the Euro */ 1772 name = "IBM-5348"; 1773 } 1774 #elif U_PLATFORM == U_PF_SOLARIS 1775 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1776 /* Solaris underspecifies the "EUC" name. */ 1777 if (uprv_strcmp(locale, "zh_CN") == 0) { 1778 name = "EUC-CN"; 1779 } 1780 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1781 name = "EUC-TW"; 1782 } 1783 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1784 name = "EUC-KR"; 1785 } 1786 } 1787 else if (uprv_strcmp(name, "eucJP") == 0) { 1788 /* 1789 ibm-954 is the best match. 1790 ibm-33722 is the default for eucJP (similar to Windows). 1791 */ 1792 name = "eucjis"; 1793 } 1794 else if (uprv_strcmp(name, "646") == 0) { 1795 /* 1796 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1797 * ISO-8859-1 instead of US-ASCII(646). 1798 */ 1799 name = "ISO-8859-1"; 1800 } 1801 #elif U_PLATFORM_IS_DARWIN_BASED 1802 if (locale == NULL && *name == 0) { 1803 /* 1804 No locale was specified, and an empty name was passed in. 1805 This usually indicates that nl_langinfo didn't return valid information. 1806 Mac OS X uses UTF-8 by default (especially the locale data and console). 1807 */ 1808 name = "UTF-8"; 1809 } 1810 else if (uprv_strcmp(name, "CP949") == 0) { 1811 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1812 name = "EUC-KR"; 1813 } 1814 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1815 /* 1816 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1817 */ 1818 name = "UTF-8"; 1819 } 1820 #elif U_PLATFORM == U_PF_BSD 1821 if (uprv_strcmp(name, "CP949") == 0) { 1822 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1823 name = "EUC-KR"; 1824 } 1825 #elif U_PLATFORM == U_PF_HPUX 1826 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1827 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1828 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1829 name = "hkbig5"; 1830 } 1831 else if (uprv_strcmp(name, "eucJP") == 0) { 1832 /* 1833 ibm-1350 is the best match, but unavailable. 1834 ibm-954 is mostly a superset of ibm-1350. 1835 ibm-33722 is the default for eucJP (similar to Windows). 1836 */ 1837 name = "eucjis"; 1838 } 1839 #elif U_PLATFORM == U_PF_LINUX 1840 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1841 /* Linux underspecifies the "EUC" name. */ 1842 if (uprv_strcmp(locale, "korean") == 0) { 1843 name = "EUC-KR"; 1844 } 1845 else if (uprv_strcmp(locale, "japanese") == 0) { 1846 /* See comment below about eucJP */ 1847 name = "eucjis"; 1848 } 1849 } 1850 else if (uprv_strcmp(name, "eucjp") == 0) { 1851 /* 1852 ibm-1350 is the best match, but unavailable. 1853 ibm-954 is mostly a superset of ibm-1350. 1854 ibm-33722 is the default for eucJP (similar to Windows). 1855 */ 1856 name = "eucjis"; 1857 } 1858 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1859 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1860 /* 1861 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1862 */ 1863 name = "UTF-8"; 1864 } 1865 /* 1866 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1867 * it by falling back to 'US-ASCII' when NULL is returned from this 1868 * function. So, we don't have to worry about it here. 1869 */ 1870 #endif 1871 /* return NULL when "" is passed in */ 1872 if (*name == 0) { 1873 name = NULL; 1874 } 1875 return name; 1876 } 1877 1878 static const char* 1879 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1880 { 1881 char localeBuf[100]; 1882 const char *name = NULL; 1883 char *variant = NULL; 1884 1885 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1886 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1887 uprv_strncpy(localeBuf, localeName, localeCapacity); 1888 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1889 name = uprv_strncpy(buffer, name+1, buffCapacity); 1890 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1891 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { 1892 *variant = 0; 1893 } 1894 name = remapPlatformDependentCodepage(localeBuf, name); 1895 } 1896 return name; 1897 } 1898 #endif 1899 1900 static const char* 1901 int_getDefaultCodepage() 1902 { 1903 #if U_PLATFORM == U_PF_OS400 1904 uint32_t ccsid = 37; /* Default to ibm-37 */ 1905 static char codepage[64]; 1906 Qwc_JOBI0400_t jobinfo; 1907 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1908 1909 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1910 "* ", " ", &error); 1911 1912 if (error.Bytes_Available == 0) { 1913 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1914 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1915 } 1916 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1917 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1918 } 1919 /* else use the default */ 1920 } 1921 sprintf(codepage,"ibm-%d", ccsid); 1922 return codepage; 1923 1924 #elif U_PLATFORM == U_PF_OS390 1925 static char codepage[64]; 1926 1927 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1928 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1929 codepage[63] = 0; /* NULL terminate */ 1930 1931 return codepage; 1932 1933 #elif U_PLATFORM == U_PF_CLASSIC_MACOS 1934 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1935 1936 #elif U_PLATFORM_USES_ONLY_WIN32_API 1937 static char codepage[64]; 1938 sprintf(codepage, "windows-%d", GetACP()); 1939 return codepage; 1940 1941 #elif U_POSIX_LOCALE 1942 static char codesetName[100]; 1943 const char *localeName = NULL; 1944 const char *name = NULL; 1945 1946 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1947 uprv_memset(codesetName, 0, sizeof(codesetName)); 1948 #if U_HAVE_NL_LANGINFO_CODESET 1949 /* When available, check nl_langinfo first because it usually gives more 1950 useful names. It depends on LC_CTYPE. 1951 nl_langinfo may use the same buffer as setlocale. */ 1952 { 1953 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1954 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED 1955 /* 1956 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1957 * instead of ASCII. 1958 */ 1959 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1960 codeset = remapPlatformDependentCodepage(localeName, codeset); 1961 } else 1962 #endif 1963 { 1964 codeset = remapPlatformDependentCodepage(NULL, codeset); 1965 } 1966 1967 if (codeset != NULL) { 1968 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1969 codesetName[sizeof(codesetName)-1] = 0; 1970 return codesetName; 1971 } 1972 } 1973 #endif 1974 1975 /* Use setlocale in a nice way, and then check some environment variables. 1976 Maybe the application used setlocale already. 1977 */ 1978 uprv_memset(codesetName, 0, sizeof(codesetName)); 1979 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1980 if (name) { 1981 /* if we can find the codeset name from setlocale, return that. */ 1982 return name; 1983 } 1984 1985 if (*codesetName == 0) 1986 { 1987 /* Everything failed. Return US ASCII (ISO 646). */ 1988 (void)uprv_strcpy(codesetName, "US-ASCII"); 1989 } 1990 return codesetName; 1991 #else 1992 return "US-ASCII"; 1993 #endif 1994 } 1995 1996 1997 U_CAPI const char* U_EXPORT2 1998 uprv_getDefaultCodepage() 1999 { 2000 static char const *name = NULL; 2001 umtx_lock(NULL); 2002 if (name == NULL) { 2003 name = int_getDefaultCodepage(); 2004 } 2005 umtx_unlock(NULL); 2006 return name; 2007 } 2008 #endif /* !U_CHARSET_IS_UTF8 */ 2009 2010 2011 /* end of platform-specific implementation -------------- */ 2012 2013 /* version handling --------------------------------------------------------- */ 2014 2015 U_CAPI void U_EXPORT2 2016 u_versionFromString(UVersionInfo versionArray, const char *versionString) { 2017 char *end; 2018 uint16_t part=0; 2019 2020 if(versionArray==NULL) { 2021 return; 2022 } 2023 2024 if(versionString!=NULL) { 2025 for(;;) { 2026 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 2027 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 2028 break; 2029 } 2030 versionString=end+1; 2031 } 2032 } 2033 2034 while(part<U_MAX_VERSION_LENGTH) { 2035 versionArray[part++]=0; 2036 } 2037 } 2038 2039 U_CAPI void U_EXPORT2 2040 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2041 if(versionArray!=NULL && versionString!=NULL) { 2042 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2043 int32_t len = u_strlen(versionString); 2044 if(len>U_MAX_VERSION_STRING_LENGTH) { 2045 len = U_MAX_VERSION_STRING_LENGTH; 2046 } 2047 u_UCharsToChars(versionString, versionChars, len); 2048 versionChars[len]=0; 2049 u_versionFromString(versionArray, versionChars); 2050 } 2051 } 2052 2053 U_CAPI void U_EXPORT2 2054 u_versionToString(const UVersionInfo versionArray, char *versionString) { 2055 uint16_t count, part; 2056 uint8_t field; 2057 2058 if(versionString==NULL) { 2059 return; 2060 } 2061 2062 if(versionArray==NULL) { 2063 versionString[0]=0; 2064 return; 2065 } 2066 2067 /* count how many fields need to be written */ 2068 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2069 } 2070 2071 if(count <= 1) { 2072 count = 2; 2073 } 2074 2075 /* write the first part */ 2076 /* write the decimal field value */ 2077 field=versionArray[0]; 2078 if(field>=100) { 2079 *versionString++=(char)('0'+field/100); 2080 field%=100; 2081 } 2082 if(field>=10) { 2083 *versionString++=(char)('0'+field/10); 2084 field%=10; 2085 } 2086 *versionString++=(char)('0'+field); 2087 2088 /* write the following parts */ 2089 for(part=1; part<count; ++part) { 2090 /* write a dot first */ 2091 *versionString++=U_VERSION_DELIMITER; 2092 2093 /* write the decimal field value */ 2094 field=versionArray[part]; 2095 if(field>=100) { 2096 *versionString++=(char)('0'+field/100); 2097 field%=100; 2098 } 2099 if(field>=10) { 2100 *versionString++=(char)('0'+field/10); 2101 field%=10; 2102 } 2103 *versionString++=(char)('0'+field); 2104 } 2105 2106 /* NUL-terminate */ 2107 *versionString=0; 2108 } 2109 2110 U_CAPI void U_EXPORT2 2111 u_getVersion(UVersionInfo versionArray) { 2112 u_versionFromString(versionArray, U_ICU_VERSION); 2113 } 2114 2115 /** 2116 * icucfg.h dependent code 2117 */ 2118 2119 #if U_ENABLE_DYLOAD 2120 2121 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API 2122 2123 #if HAVE_DLFCN_H 2124 2125 #ifdef __MVS__ 2126 #ifndef __SUSV3 2127 #define __SUSV3 1 2128 #endif 2129 #endif 2130 #include <dlfcn.h> 2131 #endif 2132 2133 U_INTERNAL void * U_EXPORT2 2134 uprv_dl_open(const char *libName, UErrorCode *status) { 2135 void *ret = NULL; 2136 if(U_FAILURE(*status)) return ret; 2137 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2138 if(ret==NULL) { 2139 #ifdef U_TRACE_DYLOAD 2140 printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); 2141 #endif 2142 *status = U_MISSING_RESOURCE_ERROR; 2143 } 2144 return ret; 2145 } 2146 2147 U_INTERNAL void U_EXPORT2 2148 uprv_dl_close(void *lib, UErrorCode *status) { 2149 if(U_FAILURE(*status)) return; 2150 dlclose(lib); 2151 } 2152 2153 U_INTERNAL UVoidFunction* U_EXPORT2 2154 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2155 union { 2156 UVoidFunction *fp; 2157 void *vp; 2158 } uret; 2159 uret.fp = NULL; 2160 if(U_FAILURE(*status)) return uret.fp; 2161 uret.vp = dlsym(lib, sym); 2162 if(uret.vp == NULL) { 2163 #ifdef U_TRACE_DYLOAD 2164 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); 2165 #endif 2166 *status = U_MISSING_RESOURCE_ERROR; 2167 } 2168 return uret.fp; 2169 } 2170 2171 #else 2172 2173 /* null (nonexistent) implementation. */ 2174 2175 U_INTERNAL void * U_EXPORT2 2176 uprv_dl_open(const char *libName, UErrorCode *status) { 2177 if(U_FAILURE(*status)) return NULL; 2178 *status = U_UNSUPPORTED_ERROR; 2179 return NULL; 2180 } 2181 2182 U_INTERNAL void U_EXPORT2 2183 uprv_dl_close(void *lib, UErrorCode *status) { 2184 if(U_FAILURE(*status)) return; 2185 *status = U_UNSUPPORTED_ERROR; 2186 return; 2187 } 2188 2189 2190 U_INTERNAL UVoidFunction* U_EXPORT2 2191 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2192 if(U_SUCCESS(*status)) { 2193 *status = U_UNSUPPORTED_ERROR; 2194 } 2195 return (UVoidFunction*)NULL; 2196 } 2197 2198 2199 2200 #endif 2201 2202 #elif U_PLATFORM_USES_ONLY_WIN32_API 2203 2204 U_INTERNAL void * U_EXPORT2 2205 uprv_dl_open(const char *libName, UErrorCode *status) { 2206 HMODULE lib = NULL; 2207 2208 if(U_FAILURE(*status)) return NULL; 2209 2210 lib = LoadLibraryA(libName); 2211 2212 if(lib==NULL) { 2213 *status = U_MISSING_RESOURCE_ERROR; 2214 } 2215 2216 return (void*)lib; 2217 } 2218 2219 U_INTERNAL void U_EXPORT2 2220 uprv_dl_close(void *lib, UErrorCode *status) { 2221 HMODULE handle = (HMODULE)lib; 2222 if(U_FAILURE(*status)) return; 2223 2224 FreeLibrary(handle); 2225 2226 return; 2227 } 2228 2229 2230 U_INTERNAL UVoidFunction* U_EXPORT2 2231 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2232 HMODULE handle = (HMODULE)lib; 2233 UVoidFunction* addr = NULL; 2234 2235 if(U_FAILURE(*status) || lib==NULL) return NULL; 2236 2237 addr = (UVoidFunction*)GetProcAddress(handle, sym); 2238 2239 if(addr==NULL) { 2240 DWORD lastError = GetLastError(); 2241 if(lastError == ERROR_PROC_NOT_FOUND) { 2242 *status = U_MISSING_RESOURCE_ERROR; 2243 } else { 2244 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2245 } 2246 } 2247 2248 return addr; 2249 } 2250 2251 2252 #else 2253 2254 /* No dynamic loading set. */ 2255 2256 U_INTERNAL void * U_EXPORT2 2257 uprv_dl_open(const char *libName, UErrorCode *status) { 2258 if(U_FAILURE(*status)) return NULL; 2259 *status = U_UNSUPPORTED_ERROR; 2260 return NULL; 2261 } 2262 2263 U_INTERNAL void U_EXPORT2 2264 uprv_dl_close(void *lib, UErrorCode *status) { 2265 if(U_FAILURE(*status)) return; 2266 *status = U_UNSUPPORTED_ERROR; 2267 return; 2268 } 2269 2270 2271 U_INTERNAL UVoidFunction* U_EXPORT2 2272 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2273 if(U_SUCCESS(*status)) { 2274 *status = U_UNSUPPORTED_ERROR; 2275 } 2276 return (UVoidFunction*)NULL; 2277 } 2278 2279 #endif /* U_ENABLE_DYLOAD */ 2280 2281 /* 2282 * Hey, Emacs, please set the following: 2283 * 2284 * Local Variables: 2285 * indent-tabs-mode: nil 2286 * End: 2287 * 2288 */ 2289