1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1997-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10 * 11 * Date Name Description 12 * 04/14/97 aliu Creation. 13 * 04/24/97 aliu Added getDefaultDataDirectory() and 14 * getDefaultLocaleID(). 15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods 16 * for assumed case. Non-UNIX platforms must be 17 * special-cased. Rewrote numeric methods dealing 18 * with NaN and Infinity to be platform independent 19 * over all IEEE 754 platforms. 20 * 05/13/97 aliu Restored sign of timezone 21 * (semantics are hours West of GMT) 22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23 * nextDouble.. 24 * 07/22/98 stephen Added remainder, max, min, trunc 25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26 * 08/24/98 stephen Added longBitsFromDouble 27 * 09/08/98 stephen Minor changes for Mac Port 28 * 03/02/99 stephen Removed openFile(). Added AS400 support. 29 * Fixed EBCDIC tables 30 * 04/15/99 stephen Converted to C. 31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32 * 08/04/99 jeffrey R. Added OS/2 changes 33 * 11/15/99 helena Integrated S/390 IEEE support. 34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36 * 01/03/08 Steven L. Fake Time Support 37 ****************************************************************************** 38 */ 39 40 /* Define _XOPEN_SOURCE for Solaris and friends. */ 41 /* NetBSD needs it to be >= 4 */ 42 #if !defined(_XOPEN_SOURCE) 43 #if __STDC_VERSION__ >= 199901L 44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */ 45 #define _XOPEN_SOURCE 600 46 #else 47 #define _XOPEN_SOURCE 4 48 #endif 49 #endif 50 51 /* Make sure things like readlink and such functions work. 52 Poorly upgraded Solaris machines can't have this defined. 53 Cleanly installed Solaris can use this #define. 54 */ 55 #if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__)) 56 #define _XOPEN_SOURCE_EXTENDED 1 57 #endif 58 59 /* include ICU headers */ 60 #include "unicode/utypes.h" 61 #include "unicode/putil.h" 62 #include "unicode/ustring.h" 63 #include "putilimp.h" 64 #include "uassert.h" 65 #include "umutex.h" 66 #include "cmemory.h" 67 #include "cstring.h" 68 #include "locmap.h" 69 #include "ucln_cmn.h" 70 71 /* Include standard headers. */ 72 #include <stdio.h> 73 #include <stdlib.h> 74 #include <string.h> 75 #include <math.h> 76 #include <locale.h> 77 #include <float.h> 78 #include <time.h> 79 80 /* include system headers */ 81 #ifdef U_WINDOWS 82 # define WIN32_LEAN_AND_MEAN 83 # define VC_EXTRALEAN 84 # define NOUSER 85 # define NOSERVICE 86 # define NOIME 87 # define NOMCX 88 # include <windows.h> 89 # include "wintz.h" 90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__) 91 /* tzset isn't defined in strict ANSI on Cygwin. */ 92 # undef __STRICT_ANSI__ 93 #elif defined(OS400) 94 # include <float.h> 95 # include <qusec.h> /* error code structure */ 96 # include <qusrjobi.h> 97 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 98 # include <mih/testptr.h> /* For uprv_maximumPtr */ 99 #elif defined(XP_MAC) 100 # include <Files.h> 101 # include <IntlResources.h> 102 # include <Script.h> 103 # include <Folders.h> 104 # include <MacTypes.h> 105 # include <TextUtils.h> 106 # define ICU_NO_USER_DATA_OVERRIDE 1 107 #elif defined(OS390) 108 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD) 110 #include <limits.h> 111 #include <unistd.h> 112 #elif defined(U_QNX) 113 #include <sys/neutrino.h> 114 #elif defined(U_SOLARIS) 115 # ifndef _XPG4_2 116 # define _XPG4_2 117 # endif 118 #endif 119 120 121 #if defined(U_DARWIN) 122 #include <TargetConditionals.h> 123 #endif 124 125 #ifndef U_WINDOWS 126 #include <sys/time.h> 127 #endif 128 129 /* 130 * Only include langinfo.h if we have a way to get the codeset. If we later 131 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 132 * 133 */ 134 135 #if U_HAVE_NL_LANGINFO_CODESET 136 #include <langinfo.h> 137 #endif 138 139 /** 140 * Simple things (presence of functions, etc) should just go in configure.in and be added to 141 * icucfg.h via autoheader. 142 */ 143 #if defined(HAVE_CONFIG_H) 144 #include "icucfg.h" 145 #endif 146 147 /* Define the extension for data files, again... */ 148 #define DATA_TYPE "dat" 149 150 /* Leave this copyright notice here! */ 151 static const char copyright[] = U_COPYRIGHT_STRING; 152 153 /* floating point implementations ------------------------------------------- */ 154 155 /* We return QNAN rather than SNAN*/ 156 #define SIGN 0x80000000U 157 158 /* Make it easy to define certain types of constants */ 159 typedef union { 160 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 161 double d64; 162 } BitPatternConversion; 163 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 164 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 165 166 /*--------------------------------------------------------------------------- 167 Platform utilities 168 Our general strategy is to assume we're on a POSIX platform. Platforms which 169 are non-POSIX must declare themselves so. The default POSIX implementation 170 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 171 functions). 172 ---------------------------------------------------------------------------*/ 173 174 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) 175 # undef U_POSIX_LOCALE 176 #else 177 # define U_POSIX_LOCALE 1 178 #endif 179 180 /* 181 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 182 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 183 */ 184 #if !IEEE_754 185 static char* 186 u_topNBytesOfDouble(double* d, int n) 187 { 188 #if U_IS_BIG_ENDIAN 189 return (char*)d; 190 #else 191 return (char*)(d + 1) - n; 192 #endif 193 } 194 195 static char* 196 u_bottomNBytesOfDouble(double* d, int n) 197 { 198 #if U_IS_BIG_ENDIAN 199 return (char*)(d + 1) - n; 200 #else 201 return (char*)d; 202 #endif 203 } 204 #endif /* !IEEE_754 */ 205 206 #if IEEE_754 207 static UBool 208 u_signBit(double d) { 209 uint8_t hiByte; 210 #if U_IS_BIG_ENDIAN 211 hiByte = *(uint8_t *)&d; 212 #else 213 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 214 #endif 215 return (hiByte & 0x80) != 0; 216 } 217 #endif 218 219 220 221 #if defined (U_DEBUG_FAKETIME) 222 /* Override the clock to test things without having to move the system clock. 223 * Assumes POSIX gettimeofday() will function 224 */ 225 UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 226 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 227 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 228 static UMTX fakeClockMutex = NULL; 229 230 static UDate getUTCtime_real() { 231 struct timeval posixTime; 232 gettimeofday(&posixTime, NULL); 233 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 234 } 235 236 static UDate getUTCtime_fake() { 237 umtx_lock(&fakeClockMutex); 238 if(!fakeClock_set) { 239 UDate real = getUTCtime_real(); 240 const char *fake_start = getenv("U_FAKETIME_START"); 241 if((fake_start!=NULL) && (fake_start[0]!=0)) { 242 sscanf(fake_start,"%lf",&fakeClock_t0); 243 fakeClock_dt = fakeClock_t0 - real; 244 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 245 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 246 fakeClock_t0, fake_start, fakeClock_dt, real); 247 } else { 248 fakeClock_dt = 0; 249 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 250 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 251 } 252 fakeClock_set = TRUE; 253 } 254 umtx_unlock(&fakeClockMutex); 255 256 return getUTCtime_real() + fakeClock_dt; 257 } 258 #endif 259 260 #if defined(U_WINDOWS) 261 typedef union { 262 int64_t int64; 263 FILETIME fileTime; 264 } FileTimeConversion; /* This is like a ULARGE_INTEGER */ 265 266 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 267 #define EPOCH_BIAS INT64_C(116444736000000000) 268 #define HECTONANOSECOND_PER_MILLISECOND 10000 269 270 #endif 271 272 /*--------------------------------------------------------------------------- 273 Universal Implementations 274 These are designed to work on all platforms. Try these, and if they 275 don't work on your platform, then special case your platform with new 276 implementations. 277 ---------------------------------------------------------------------------*/ 278 279 U_CAPI UDate U_EXPORT2 280 uprv_getUTCtime() 281 { 282 #if defined(U_DEBUG_FAKETIME) 283 return getUTCtime_fake(); /* Hook for overriding the clock */ 284 #else 285 return uprv_getRawUTCtime(); 286 #endif 287 } 288 289 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 290 U_CAPI UDate U_EXPORT2 291 uprv_getRawUTCtime() 292 { 293 #if defined(XP_MAC) 294 time_t t, t1, t2; 295 struct tm tmrec; 296 297 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 298 tmrec.tm_year = 70; 299 tmrec.tm_mon = 0; 300 tmrec.tm_mday = 1; 301 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 302 303 time(&t); 304 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 305 t2 = mktime(&tmrec); /* seconds of current GMT*/ 306 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 307 #elif defined(U_WINDOWS) 308 309 FileTimeConversion winTime; 310 GetSystemTimeAsFileTime(&winTime.fileTime); 311 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 312 #else 313 314 #if defined(HAVE_GETTIMEOFDAY) 315 struct timeval posixTime; 316 gettimeofday(&posixTime, NULL); 317 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 318 #else 319 time_t epochtime; 320 time(&epochtime); 321 return (UDate)epochtime * U_MILLIS_PER_SECOND; 322 #endif 323 324 #endif 325 } 326 327 /*----------------------------------------------------------------------------- 328 IEEE 754 329 These methods detect and return NaN and infinity values for doubles 330 conforming to IEEE 754. Platforms which support this standard include X86, 331 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 332 If this doesn't work on your platform, you have non-IEEE floating-point, and 333 will need to code your own versions. A naive implementation is to return 0.0 334 for getNaN and getInfinity, and false for isNaN and isInfinite. 335 ---------------------------------------------------------------------------*/ 336 337 U_CAPI UBool U_EXPORT2 338 uprv_isNaN(double number) 339 { 340 #if IEEE_754 341 BitPatternConversion convertedNumber; 342 convertedNumber.d64 = number; 343 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 344 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 345 346 #elif defined(OS390) 347 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 348 sizeof(uint32_t)); 349 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 350 sizeof(uint32_t)); 351 352 return ((highBits & 0x7F080000L) == 0x7F080000L) && 353 (lowBits == 0x00000000L); 354 355 #else 356 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 357 /* you'll need to replace this default implementation with what's correct*/ 358 /* for your platform.*/ 359 return number != number; 360 #endif 361 } 362 363 U_CAPI UBool U_EXPORT2 364 uprv_isInfinite(double number) 365 { 366 #if IEEE_754 367 BitPatternConversion convertedNumber; 368 convertedNumber.d64 = number; 369 /* Infinity is exactly 0x7FF0000000000000U. */ 370 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 371 #elif defined(OS390) 372 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 373 sizeof(uint32_t)); 374 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 375 sizeof(uint32_t)); 376 377 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 378 379 #else 380 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 381 /* value, you'll need to replace this default implementation with what's*/ 382 /* correct for your platform.*/ 383 return number == (2.0 * number); 384 #endif 385 } 386 387 U_CAPI UBool U_EXPORT2 388 uprv_isPositiveInfinity(double number) 389 { 390 #if IEEE_754 || defined(OS390) 391 return (UBool)(number > 0 && uprv_isInfinite(number)); 392 #else 393 return uprv_isInfinite(number); 394 #endif 395 } 396 397 U_CAPI UBool U_EXPORT2 398 uprv_isNegativeInfinity(double number) 399 { 400 #if IEEE_754 || defined(OS390) 401 return (UBool)(number < 0 && uprv_isInfinite(number)); 402 403 #else 404 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 405 sizeof(uint32_t)); 406 return((highBits & SIGN) && uprv_isInfinite(number)); 407 408 #endif 409 } 410 411 U_CAPI double U_EXPORT2 412 uprv_getNaN() 413 { 414 #if IEEE_754 || defined(OS390) 415 return gNan.d64; 416 #else 417 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 418 /* you'll need to replace this default implementation with what's correct*/ 419 /* for your platform.*/ 420 return 0.0; 421 #endif 422 } 423 424 U_CAPI double U_EXPORT2 425 uprv_getInfinity() 426 { 427 #if IEEE_754 || defined(OS390) 428 return gInf.d64; 429 #else 430 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 431 /* value, you'll need to replace this default implementation with what's*/ 432 /* correct for your platform.*/ 433 return 0.0; 434 #endif 435 } 436 437 U_CAPI double U_EXPORT2 438 uprv_floor(double x) 439 { 440 return floor(x); 441 } 442 443 U_CAPI double U_EXPORT2 444 uprv_ceil(double x) 445 { 446 return ceil(x); 447 } 448 449 U_CAPI double U_EXPORT2 450 uprv_round(double x) 451 { 452 return uprv_floor(x + 0.5); 453 } 454 455 U_CAPI double U_EXPORT2 456 uprv_fabs(double x) 457 { 458 return fabs(x); 459 } 460 461 U_CAPI double U_EXPORT2 462 uprv_modf(double x, double* y) 463 { 464 return modf(x, y); 465 } 466 467 U_CAPI double U_EXPORT2 468 uprv_fmod(double x, double y) 469 { 470 return fmod(x, y); 471 } 472 473 U_CAPI double U_EXPORT2 474 uprv_pow(double x, double y) 475 { 476 /* This is declared as "double pow(double x, double y)" */ 477 return pow(x, y); 478 } 479 480 U_CAPI double U_EXPORT2 481 uprv_pow10(int32_t x) 482 { 483 return pow(10.0, (double)x); 484 } 485 486 U_CAPI double U_EXPORT2 487 uprv_fmax(double x, double y) 488 { 489 #if IEEE_754 490 /* first handle NaN*/ 491 if(uprv_isNaN(x) || uprv_isNaN(y)) 492 return uprv_getNaN(); 493 494 /* check for -0 and 0*/ 495 if(x == 0.0 && y == 0.0 && u_signBit(x)) 496 return y; 497 498 #endif 499 500 /* this should work for all flt point w/o NaN and Inf special cases */ 501 return (x > y ? x : y); 502 } 503 504 U_CAPI double U_EXPORT2 505 uprv_fmin(double x, double y) 506 { 507 #if IEEE_754 508 /* first handle NaN*/ 509 if(uprv_isNaN(x) || uprv_isNaN(y)) 510 return uprv_getNaN(); 511 512 /* check for -0 and 0*/ 513 if(x == 0.0 && y == 0.0 && u_signBit(y)) 514 return y; 515 516 #endif 517 518 /* this should work for all flt point w/o NaN and Inf special cases */ 519 return (x > y ? y : x); 520 } 521 522 /** 523 * Truncates the given double. 524 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 525 * This is different than calling floor() or ceil(): 526 * floor(3.3) = 3, floor(-3.3) = -4 527 * ceil(3.3) = 4, ceil(-3.3) = -3 528 */ 529 U_CAPI double U_EXPORT2 530 uprv_trunc(double d) 531 { 532 #if IEEE_754 533 /* handle error cases*/ 534 if(uprv_isNaN(d)) 535 return uprv_getNaN(); 536 if(uprv_isInfinite(d)) 537 return uprv_getInfinity(); 538 539 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 540 return ceil(d); 541 else 542 return floor(d); 543 544 #else 545 return d >= 0 ? floor(d) : ceil(d); 546 547 #endif 548 } 549 550 /** 551 * Return the largest positive number that can be represented by an integer 552 * type of arbitrary bit length. 553 */ 554 U_CAPI double U_EXPORT2 555 uprv_maxMantissa(void) 556 { 557 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 558 } 559 560 U_CAPI double U_EXPORT2 561 uprv_log(double d) 562 { 563 return log(d); 564 } 565 566 U_CAPI void * U_EXPORT2 567 uprv_maximumPtr(void * base) 568 { 569 #if defined(OS400) 570 /* 571 * With the provided function we should never be out of range of a given segment 572 * (a traditional/typical segment that is). Our segments have 5 bytes for the 573 * id and 3 bytes for the offset. The key is that the casting takes care of 574 * only retrieving the offset portion minus x1000. Hence, the smallest offset 575 * seen in a program is x001000 and when casted to an int would be 0. 576 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 577 * 578 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 579 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 580 * This function determines the activation based on the pointer that is passed in and 581 * calculates the appropriate maximum available size for 582 * each pointer type (TERASPACE and non-TERASPACE) 583 * 584 * Unlike other operating systems, the pointer model isn't determined at 585 * compile time on i5/OS. 586 */ 587 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 588 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 589 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 590 } 591 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 592 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 593 594 #else 595 return U_MAX_PTR(base); 596 #endif 597 } 598 599 /*--------------------------------------------------------------------------- 600 Platform-specific Implementations 601 Try these, and if they don't work on your platform, then special case your 602 platform with new implementations. 603 ---------------------------------------------------------------------------*/ 604 605 /* Generic time zone layer -------------------------------------------------- */ 606 607 /* Time zone utilities */ 608 U_CAPI void U_EXPORT2 609 uprv_tzset() 610 { 611 #ifdef U_TZSET 612 U_TZSET(); 613 #else 614 /* no initialization*/ 615 #endif 616 } 617 618 U_CAPI int32_t U_EXPORT2 619 uprv_timezone() 620 { 621 #ifdef U_TIMEZONE 622 return U_TIMEZONE; 623 #else 624 time_t t, t1, t2; 625 struct tm tmrec; 626 #ifndef U_IOS 627 UBool dst_checked; 628 #endif 629 int32_t tdiff = 0; 630 631 time(&t); 632 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 633 #ifndef U_IOS 634 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 635 #endif 636 t1 = mktime(&tmrec); /* local time in seconds*/ 637 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 638 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 639 tdiff = t2 - t1; 640 #ifndef U_IOS 641 /* On iOS the calculated tdiff is correct so and doesn't need this dst 642 shift applied. */ 643 /* imitate NT behaviour, which returns same timezone offset to GMT for 644 winter and summer*/ 645 if (dst_checked) 646 tdiff += 3600; 647 #endif 648 return tdiff; 649 #endif 650 } 651 652 /* Note that U_TZNAME does *not* have to be tzname, but if it is, 653 some platforms need to have it declared here. */ 654 655 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN)) 656 /* RS6000 and others reject char **tzname. */ 657 extern U_IMPORT char *U_TZNAME[]; 658 #endif 659 660 #if !UCONFIG_NO_FILE_IO && ((defined(U_DARWIN) && !defined(U_IOS)) || defined(U_LINUX) || defined(U_BSD)) 661 /* These platforms are likely to use Olson timezone IDs. */ 662 #define CHECK_LOCALTIME_LINK 1 663 #if defined(U_DARWIN) 664 #include <tzfile.h> 665 #define TZZONEINFO (TZDIR "/") 666 #else 667 #define TZDEFAULT "/etc/localtime" 668 #define TZZONEINFO "/usr/share/zoneinfo/" 669 #endif 670 #if U_HAVE_DIRENT_H 671 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 672 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 673 symlinked to /etc/localtime, which makes searchForTZFile return 674 'localtime' when it's the first match. */ 675 #define TZFILE_SKIP2 "localtime" 676 #define SEARCH_TZFILE 677 #include <dirent.h> /* Needed to search through system timezone files */ 678 #endif 679 static char gTimeZoneBuffer[PATH_MAX]; 680 static char *gTimeZoneBufferPtr = NULL; 681 #endif 682 683 #ifndef U_WINDOWS 684 #define isNonDigit(ch) (ch < '0' || '9' < ch) 685 static UBool isValidOlsonID(const char *id) { 686 int32_t idx = 0; 687 688 /* Determine if this is something like Iceland (Olson ID) 689 or AST4ADT (non-Olson ID) */ 690 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 691 idx++; 692 } 693 694 /* If we went through the whole string, then it might be okay. 695 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 696 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 697 The rest of the time it could be an Olson ID. George */ 698 return (UBool)(id[idx] == 0 699 || uprv_strcmp(id, "PST8PDT") == 0 700 || uprv_strcmp(id, "MST7MDT") == 0 701 || uprv_strcmp(id, "CST6CDT") == 0 702 || uprv_strcmp(id, "EST5EDT") == 0); 703 } 704 705 /* On some Unix-like OS, 'posix' subdirectory in 706 /usr/share/zoneinfo replicates the top-level contents. 'right' 707 subdirectory has the same set of files, but individual files 708 are different from those in the top-level directory or 'posix' 709 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 710 has files for UTC. 711 When the first match for /etc/localtime is in either of them 712 (usually in posix because 'right' has different file contents), 713 or TZ environment variable points to one of them, createTimeZone 714 fails because, say, 'posix/America/New_York' is not an Olson 715 timezone id ('America/New_York' is). So, we have to skip 716 'posix/' and 'right/' at the beginning. */ 717 static void skipZoneIDPrefix(const char** id) { 718 if (uprv_strncmp(*id, "posix/", 6) == 0 719 || uprv_strncmp(*id, "right/", 6) == 0) 720 { 721 *id += 6; 722 } 723 } 724 #endif 725 726 #if defined(U_TZNAME) && !defined(U_WINDOWS) 727 728 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 729 typedef struct OffsetZoneMapping { 730 int32_t offsetSeconds; 731 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/ 732 const char *stdID; 733 const char *dstID; 734 const char *olsonID; 735 } OffsetZoneMapping; 736 737 /* 738 This list tries to disambiguate a set of abbreviated timezone IDs and offsets 739 and maps it to an Olson ID. 740 Before adding anything to this list, take a look at 741 icu/source/tools/tzcode/tz.alias 742 Sometimes no daylight savings (0) is important to define due to aliases. 743 This list can be tested with icu/source/test/compat/tzone.pl 744 More values could be added to daylightType to increase precision. 745 */ 746 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 747 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 748 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 749 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 750 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 751 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 752 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 753 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 754 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 755 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 756 {-34200, 2, "CST", "CST", "Australia/South"}, 757 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 758 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 759 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 760 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 761 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 762 {-28800, 2, "WST", "WST", "Australia/West"}, 763 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 764 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 765 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 766 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 767 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 768 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 769 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 770 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 771 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 772 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 773 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 774 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 775 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 776 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 777 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 778 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 779 {0, 1, "GMT", "IST", "Europe/Dublin"}, 780 {0, 1, "GMT", "BST", "Europe/London"}, 781 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 782 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 783 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 784 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 785 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 786 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 787 {10800, 1, "WGT", "WGST", "America/Godthab"}, 788 {10800, 2, "BRT", "BRST", "Brazil/East"}, 789 {12600, 1, "NST", "NDT", "America/St_Johns"}, 790 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 791 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 792 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 793 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 794 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 795 {18000, 1, "CST", "CDT", "America/Havana"}, 796 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 797 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 798 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 799 {21600, 0, "CST", "CDT", "America/Guatemala"}, 800 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 801 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 802 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 803 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 804 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 805 {36000, 1, "HAST", "HADT", "US/Aleutian"} 806 }; 807 808 /*#define DEBUG_TZNAME*/ 809 810 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 811 { 812 int32_t idx; 813 #ifdef DEBUG_TZNAME 814 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 815 #endif 816 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++) 817 { 818 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 819 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 820 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 821 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 822 { 823 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 824 } 825 } 826 return NULL; 827 } 828 #endif 829 830 #ifdef SEARCH_TZFILE 831 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 832 #define MAX_READ_SIZE 512 833 834 typedef struct DefaultTZInfo { 835 char* defaultTZBuffer; 836 int64_t defaultTZFileSize; 837 FILE* defaultTZFilePtr; 838 UBool defaultTZstatus; 839 int32_t defaultTZPosition; 840 } DefaultTZInfo; 841 842 /* 843 * This method compares the two files given to see if they are a match. 844 * It is currently use to compare two TZ files. 845 */ 846 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 847 FILE* file; 848 int64_t sizeFile; 849 int64_t sizeFileLeft; 850 int32_t sizeFileRead; 851 int32_t sizeFileToRead; 852 char bufferFile[MAX_READ_SIZE]; 853 UBool result = TRUE; 854 855 if (tzInfo->defaultTZFilePtr == NULL) { 856 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 857 } 858 file = fopen(TZFileName, "r"); 859 860 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 861 862 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 863 /* First check that the file size are equal. */ 864 if (tzInfo->defaultTZFileSize == 0) { 865 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 866 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 867 } 868 fseek(file, 0, SEEK_END); 869 sizeFile = ftell(file); 870 sizeFileLeft = sizeFile; 871 872 if (sizeFile != tzInfo->defaultTZFileSize) { 873 result = FALSE; 874 } else { 875 /* Store the data from the files in seperate buffers and 876 * compare each byte to determine equality. 877 */ 878 if (tzInfo->defaultTZBuffer == NULL) { 879 rewind(tzInfo->defaultTZFilePtr); 880 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 881 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 882 } 883 rewind(file); 884 while(sizeFileLeft > 0) { 885 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 886 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 887 888 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 889 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 890 result = FALSE; 891 break; 892 } 893 sizeFileLeft -= sizeFileRead; 894 tzInfo->defaultTZPosition += sizeFileRead; 895 } 896 } 897 } else { 898 result = FALSE; 899 } 900 901 if (file != NULL) { 902 fclose(file); 903 } 904 905 return result; 906 } 907 /* 908 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 909 */ 910 /* dirent also lists two entries: "." and ".." that we can safely ignore. */ 911 #define SKIP1 "." 912 #define SKIP2 ".." 913 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 914 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 915 char curpath[MAX_PATH_SIZE]; 916 DIR* dirp = opendir(path); 917 DIR* subDirp = NULL; 918 struct dirent* dirEntry = NULL; 919 920 char* result = NULL; 921 if (dirp == NULL) { 922 return result; 923 } 924 925 /* Save the current path */ 926 uprv_memset(curpath, 0, MAX_PATH_SIZE); 927 uprv_strcpy(curpath, path); 928 929 /* Check each entry in the directory. */ 930 while((dirEntry = readdir(dirp)) != NULL) { 931 const char* dirName = dirEntry->d_name; 932 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 933 /* Create a newpath with the new entry to test each entry in the directory. */ 934 char newpath[MAX_PATH_SIZE]; 935 uprv_strcpy(newpath, curpath); 936 uprv_strcat(newpath, dirName); 937 938 if ((subDirp = opendir(newpath)) != NULL) { 939 /* If this new path is a directory, make a recursive call with the newpath. */ 940 closedir(subDirp); 941 uprv_strcat(newpath, "/"); 942 result = searchForTZFile(newpath, tzInfo); 943 /* 944 Have to get out here. Otherwise, we'd keep looking 945 and return the first match in the top-level directory 946 if there's a match in the top-level. If not, this function 947 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 948 It worked without this in most cases because we have a fallback of calling 949 localtime_r to figure out the default timezone. 950 */ 951 if (result != NULL) 952 break; 953 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 954 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 955 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 956 skipZoneIDPrefix(&zoneid); 957 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 958 result = SEARCH_TZFILE_RESULT; 959 /* Get out after the first one found. */ 960 break; 961 } 962 } 963 } 964 } 965 closedir(dirp); 966 return result; 967 } 968 #endif 969 U_CAPI const char* U_EXPORT2 970 uprv_tzname(int n) 971 { 972 const char *tzid = NULL; 973 #ifdef U_WINDOWS 974 tzid = uprv_detectWindowsTimeZone(); 975 976 if (tzid != NULL) { 977 return tzid; 978 } 979 #else 980 981 /*#if defined(U_DARWIN) 982 int ret; 983 984 tzid = getenv("TZFILE"); 985 if (tzid != NULL) { 986 return tzid; 987 } 988 #endif*/ 989 990 /* This code can be temporarily disabled to test tzname resolution later on. */ 991 #ifndef DEBUG_TZNAME 992 tzid = getenv("TZ"); 993 if (tzid != NULL && isValidOlsonID(tzid)) 994 { 995 /* This might be a good Olson ID. */ 996 skipZoneIDPrefix(&tzid); 997 return tzid; 998 } 999 /* else U_TZNAME will give a better result. */ 1000 #endif 1001 1002 #if defined(CHECK_LOCALTIME_LINK) 1003 /* Caller must handle threading issues */ 1004 if (gTimeZoneBufferPtr == NULL) { 1005 /* 1006 This is a trick to look at the name of the link to get the Olson ID 1007 because the tzfile contents is underspecified. 1008 This isn't guaranteed to work because it may not be a symlink. 1009 */ 1010 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1011 if (0 < ret) { 1012 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1013 gTimeZoneBuffer[ret] = 0; 1014 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1015 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1016 { 1017 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1018 } 1019 } else { 1020 #if defined(SEARCH_TZFILE) 1021 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1022 if (tzInfo != NULL) { 1023 tzInfo->defaultTZBuffer = NULL; 1024 tzInfo->defaultTZFileSize = 0; 1025 tzInfo->defaultTZFilePtr = NULL; 1026 tzInfo->defaultTZstatus = FALSE; 1027 tzInfo->defaultTZPosition = 0; 1028 1029 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1030 1031 /* Free previously allocated memory */ 1032 if (tzInfo->defaultTZBuffer != NULL) { 1033 uprv_free(tzInfo->defaultTZBuffer); 1034 } 1035 if (tzInfo->defaultTZFilePtr != NULL) { 1036 fclose(tzInfo->defaultTZFilePtr); 1037 } 1038 uprv_free(tzInfo); 1039 } 1040 1041 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1042 return gTimeZoneBufferPtr; 1043 } 1044 #endif 1045 } 1046 } 1047 else { 1048 return gTimeZoneBufferPtr; 1049 } 1050 #endif 1051 #endif 1052 1053 #ifdef U_TZNAME 1054 #ifdef U_WINDOWS 1055 /* The return value is free'd in timezone.cpp on Windows because 1056 * the other code path returns a pointer to a heap location. */ 1057 return uprv_strdup(U_TZNAME[n]); 1058 #else 1059 /* 1060 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1061 So we remap the abbreviation to an olson ID. 1062 1063 Since Windows exposes a little more timezone information, 1064 we normally don't use this code on Windows because 1065 uprv_detectWindowsTimeZone should have already given the correct answer. 1066 */ 1067 { 1068 struct tm juneSol, decemberSol; 1069 int daylightType; 1070 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1071 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1072 1073 /* This probing will tell us when daylight savings occurs. */ 1074 localtime_r(&juneSolstice, &juneSol); 1075 localtime_r(&decemberSolstice, &decemberSol); 1076 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0); 1077 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1078 if (tzid != NULL) { 1079 return tzid; 1080 } 1081 } 1082 return U_TZNAME[n]; 1083 #endif 1084 #else 1085 return ""; 1086 #endif 1087 } 1088 1089 /* Get and set the ICU data directory --------------------------------------- */ 1090 1091 static char *gDataDirectory = NULL; 1092 #if U_POSIX_LOCALE 1093 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1094 #endif 1095 1096 static UBool U_CALLCONV putil_cleanup(void) 1097 { 1098 if (gDataDirectory && *gDataDirectory) { 1099 uprv_free(gDataDirectory); 1100 } 1101 gDataDirectory = NULL; 1102 #if U_POSIX_LOCALE 1103 if (gCorrectedPOSIXLocale) { 1104 uprv_free(gCorrectedPOSIXLocale); 1105 gCorrectedPOSIXLocale = NULL; 1106 } 1107 #endif 1108 return TRUE; 1109 } 1110 1111 /* 1112 * Set the data directory. 1113 * Make a copy of the passed string, and set the global data dir to point to it. 1114 * TODO: see bug #2849, regarding thread safety. 1115 */ 1116 U_CAPI void U_EXPORT2 1117 u_setDataDirectory(const char *directory) { 1118 char *newDataDir; 1119 int32_t length; 1120 1121 if(directory==NULL || *directory==0) { 1122 /* A small optimization to prevent the malloc and copy when the 1123 shared library is used, and this is a way to make sure that NULL 1124 is never returned. 1125 */ 1126 newDataDir = (char *)""; 1127 } 1128 else { 1129 length=(int32_t)uprv_strlen(directory); 1130 newDataDir = (char *)uprv_malloc(length + 2); 1131 /* Exit out if newDataDir could not be created. */ 1132 if (newDataDir == NULL) { 1133 return; 1134 } 1135 uprv_strcpy(newDataDir, directory); 1136 1137 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1138 { 1139 char *p; 1140 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1141 *p = U_FILE_SEP_CHAR; 1142 } 1143 } 1144 #endif 1145 } 1146 1147 umtx_lock(NULL); 1148 if (gDataDirectory && *gDataDirectory) { 1149 uprv_free(gDataDirectory); 1150 } 1151 gDataDirectory = newDataDir; 1152 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1153 umtx_unlock(NULL); 1154 } 1155 1156 U_CAPI UBool U_EXPORT2 1157 uprv_pathIsAbsolute(const char *path) 1158 { 1159 if(!path || !*path) { 1160 return FALSE; 1161 } 1162 1163 if(*path == U_FILE_SEP_CHAR) { 1164 return TRUE; 1165 } 1166 1167 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1168 if(*path == U_FILE_ALT_SEP_CHAR) { 1169 return TRUE; 1170 } 1171 #endif 1172 1173 #if defined(U_WINDOWS) 1174 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1175 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1176 path[1] == ':' ) { 1177 return TRUE; 1178 } 1179 #endif 1180 1181 return FALSE; 1182 } 1183 1184 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1185 until some client wrapper makefiles are updated */ 1186 #if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR 1187 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1188 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1189 # endif 1190 #endif 1191 1192 U_CAPI const char * U_EXPORT2 1193 u_getDataDirectory(void) { 1194 const char *path = NULL; 1195 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1196 char datadir_path_buffer[PATH_MAX]; 1197 #endif 1198 1199 /* if we have the directory, then return it immediately */ 1200 UMTX_CHECK(NULL, gDataDirectory, path); 1201 1202 if(path) { 1203 return path; 1204 } 1205 1206 /* 1207 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1208 override ICU's data with the ICU_DATA environment variable. This prevents 1209 problems where multiple custom copies of ICU's specific version of data 1210 are installed on a system. Either the application must define the data 1211 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1212 ICU, set the data with udata_setCommonData or trust that all of the 1213 required data is contained in ICU's data library that contains 1214 the entry point defined by U_ICUDATA_ENTRY_POINT. 1215 1216 There may also be some platforms where environment variables 1217 are not allowed. 1218 */ 1219 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1220 /* First try to get the environment variable */ 1221 path=getenv("ICU_DATA"); 1222 # endif 1223 1224 /* ICU_DATA_DIR may be set as a compile option. 1225 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1226 * and is used only when data is built in archive mode eliminating the need 1227 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1228 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1229 * set their own path. 1230 */ 1231 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1232 if(path==NULL || *path==0) { 1233 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1234 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1235 # endif 1236 # ifdef ICU_DATA_DIR 1237 path=ICU_DATA_DIR; 1238 # else 1239 path=U_ICU_DATA_DEFAULT_DIR; 1240 # endif 1241 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1242 if (prefix != NULL) { 1243 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1244 path=datadir_path_buffer; 1245 } 1246 # endif 1247 } 1248 #endif 1249 1250 if(path==NULL) { 1251 /* It looks really bad, set it to something. */ 1252 path = ""; 1253 } 1254 1255 u_setDataDirectory(path); 1256 return gDataDirectory; 1257 } 1258 1259 1260 1261 1262 1263 /* Macintosh-specific locale information ------------------------------------ */ 1264 #ifdef XP_MAC 1265 1266 typedef struct { 1267 int32_t script; 1268 int32_t region; 1269 int32_t lang; 1270 int32_t date_region; 1271 const char* posixID; 1272 } mac_lc_rec; 1273 1274 /* Todo: This will be updated with a newer version from www.unicode.org web 1275 page when it's available.*/ 1276 #define MAC_LC_MAGIC_NUMBER -5 1277 #define MAC_LC_INIT_NUMBER -9 1278 1279 static const mac_lc_rec mac_lc_recs[] = { 1280 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1281 /* United States*/ 1282 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1283 /* France*/ 1284 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1285 /* Great Britain*/ 1286 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1287 /* Germany*/ 1288 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1289 /* Italy*/ 1290 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1291 /* Metherlands*/ 1292 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1293 /* French for Belgium or Lxembourg*/ 1294 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1295 /* Sweden*/ 1296 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1297 /* Denmark*/ 1298 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1299 /* Portugal*/ 1300 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1301 /* French Canada*/ 1302 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1303 /* Israel*/ 1304 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1305 /* Japan*/ 1306 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1307 /* Australia*/ 1308 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1309 /* the Arabic world (?)*/ 1310 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1311 /* Finland*/ 1312 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1313 /* French for Switzerland*/ 1314 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1315 /* German for Switzerland*/ 1316 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1317 /* Greece*/ 1318 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1319 /* Iceland ===*/ 1320 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1321 /* Malta ===*/ 1322 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1323 /* Cyprus ===*/ 1324 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1325 /* Turkey ===*/ 1326 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1327 /* Croatian system for Yugoslavia*/ 1328 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1329 /* Hindi system for India*/ 1330 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1331 /* Pakistan*/ 1332 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1333 /* Lithuania*/ 1334 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1335 /* Poland*/ 1336 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1337 /* Hungary*/ 1338 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1339 /* Estonia*/ 1340 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1341 /* Latvia*/ 1342 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1343 /* Lapland [Ask Rich for the data. HS]*/ 1344 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1345 /* Faeroe Islands*/ 1346 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1347 /* Iran*/ 1348 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1349 /* Russia*/ 1350 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1351 /* Ireland*/ 1352 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1353 /* Korea*/ 1354 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1355 /* People's Republic of China*/ 1356 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1357 /* Taiwan*/ 1358 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1359 /* Thailand*/ 1360 1361 /* fallback is en_US*/ 1362 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1363 MAC_LC_MAGIC_NUMBER, "en_US" 1364 }; 1365 1366 #endif 1367 1368 #if U_POSIX_LOCALE 1369 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1370 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1371 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1372 */ 1373 static const char *uprv_getPOSIXIDForCategory(int category) 1374 { 1375 const char* posixID = NULL; 1376 if (category == LC_MESSAGES || category == LC_CTYPE) { 1377 /* 1378 * On Solaris two different calls to setlocale can result in 1379 * different values. Only get this value once. 1380 * 1381 * We must check this first because an application can set this. 1382 * 1383 * LC_ALL can't be used because it's platform dependent. The LANG 1384 * environment variable seems to affect LC_CTYPE variable by default. 1385 * Here is what setlocale(LC_ALL, NULL) can return. 1386 * HPUX can return 'C C C C C C C' 1387 * Solaris can return /en_US/C/C/C/C/C on the second try. 1388 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1389 * 1390 * The default codepage detection also needs to use LC_CTYPE. 1391 * 1392 * Do not call setlocale(LC_*, "")! Using an empty string instead 1393 * of NULL, will modify the libc behavior. 1394 */ 1395 posixID = setlocale(category, NULL); 1396 if ((posixID == 0) 1397 || (uprv_strcmp("C", posixID) == 0) 1398 || (uprv_strcmp("POSIX", posixID) == 0)) 1399 { 1400 /* Maybe we got some garbage. Try something more reasonable */ 1401 posixID = getenv("LC_ALL"); 1402 if (posixID == 0) { 1403 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1404 if (posixID == 0) { 1405 posixID = getenv("LANG"); 1406 } 1407 } 1408 } 1409 } 1410 if ((posixID==0) 1411 || (uprv_strcmp("C", posixID) == 0) 1412 || (uprv_strcmp("POSIX", posixID) == 0)) 1413 { 1414 /* Nothing worked. Give it a nice POSIX default value. */ 1415 posixID = "en_US_POSIX"; 1416 } 1417 return posixID; 1418 } 1419 1420 /* Return just the POSIX id for the default locale, whatever happens to be in 1421 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1422 */ 1423 static const char *uprv_getPOSIXIDForDefaultLocale(void) 1424 { 1425 static const char* posixID = NULL; 1426 if (posixID == 0) { 1427 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1428 } 1429 return posixID; 1430 } 1431 1432 /* Return just the POSIX id for the default codepage, whatever happens to be in 1433 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1434 */ 1435 static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1436 { 1437 static const char* posixID = NULL; 1438 if (posixID == 0) { 1439 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1440 } 1441 return posixID; 1442 } 1443 #endif 1444 1445 /* NOTE: The caller should handle thread safety */ 1446 U_CAPI const char* U_EXPORT2 1447 uprv_getDefaultLocaleID() 1448 { 1449 #if U_POSIX_LOCALE 1450 /* 1451 Note that: (a '!' means the ID is improper somehow) 1452 LC_ALL ----> default_loc codepage 1453 -------------------------------------------------------- 1454 ab.CD ab CD 1455 ab@CD ab__CD - 1456 ab (at) CD.EF ab__CD EF 1457 1458 ab_CD.EF@GH ab_CD_GH EF 1459 1460 Some 'improper' ways to do the same as above: 1461 ! ab_CD (at) GH.EF ab_CD_GH EF 1462 ! ab_CD.EF (at) GH.IJ ab_CD_GH EF 1463 ! ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH EF 1464 1465 _CD@GH _CD_GH - 1466 _CD.EF@GH _CD_GH EF 1467 1468 The variant cannot have dots in it. 1469 The 'rightmost' variant (@xxx) wins. 1470 The leftmost codepage (.xxx) wins. 1471 */ 1472 char *correctedPOSIXLocale = 0; 1473 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1474 const char *p; 1475 const char *q; 1476 int32_t len; 1477 1478 /* Format: (no spaces) 1479 ll [ _CC ] [ . MM ] [ @ VV] 1480 1481 l = lang, C = ctry, M = charmap, V = variant 1482 */ 1483 1484 if (gCorrectedPOSIXLocale != NULL) { 1485 return gCorrectedPOSIXLocale; 1486 } 1487 1488 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1489 /* assume new locale can't be larger than old one? */ 1490 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1491 /* Exit on memory allocation error. */ 1492 if (correctedPOSIXLocale == NULL) { 1493 return NULL; 1494 } 1495 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1496 correctedPOSIXLocale[p-posixID] = 0; 1497 1498 /* do not copy after the @ */ 1499 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1500 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1501 } 1502 } 1503 1504 /* Note that we scan the *uncorrected* ID. */ 1505 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1506 if (correctedPOSIXLocale == NULL) { 1507 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1508 /* Exit on memory allocation error. */ 1509 if (correctedPOSIXLocale == NULL) { 1510 return NULL; 1511 } 1512 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1513 correctedPOSIXLocale[p-posixID] = 0; 1514 } 1515 p++; 1516 1517 /* Take care of any special cases here.. */ 1518 if (!uprv_strcmp(p, "nynorsk")) { 1519 p = "NY"; 1520 /* Don't worry about no__NY. In practice, it won't appear. */ 1521 } 1522 1523 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1524 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1525 } 1526 else { 1527 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1528 } 1529 1530 if ((q = uprv_strchr(p, '.')) != NULL) { 1531 /* How big will the resulting string be? */ 1532 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1533 uprv_strncat(correctedPOSIXLocale, p, q-p); 1534 correctedPOSIXLocale[len] = 0; 1535 } 1536 else { 1537 /* Anything following the @ sign */ 1538 uprv_strcat(correctedPOSIXLocale, p); 1539 } 1540 1541 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1542 * How about 'russian' -> 'ru'? 1543 * Many of the other locales using ISO codes will be handled by the 1544 * canonicalization functions in uloc_getDefault. 1545 */ 1546 } 1547 1548 /* Was a correction made? */ 1549 if (correctedPOSIXLocale != NULL) { 1550 posixID = correctedPOSIXLocale; 1551 } 1552 else { 1553 /* copy it, just in case the original pointer goes away. See j2395 */ 1554 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1555 /* Exit on memory allocation error. */ 1556 if (correctedPOSIXLocale == NULL) { 1557 return NULL; 1558 } 1559 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1560 } 1561 1562 if (gCorrectedPOSIXLocale == NULL) { 1563 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1564 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1565 correctedPOSIXLocale = NULL; 1566 } 1567 1568 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1569 uprv_free(correctedPOSIXLocale); 1570 } 1571 1572 return posixID; 1573 1574 #elif defined(U_WINDOWS) 1575 UErrorCode status = U_ZERO_ERROR; 1576 LCID id = GetThreadLocale(); 1577 const char* locID = uprv_convertToPosix(id, &status); 1578 1579 if (U_FAILURE(status)) { 1580 locID = "en_US"; 1581 } 1582 return locID; 1583 1584 #elif defined(XP_MAC) 1585 int32_t script = MAC_LC_INIT_NUMBER; 1586 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1587 int32_t region = MAC_LC_INIT_NUMBER; 1588 /* = GetScriptManagerVariable(smRegionCode);*/ 1589 int32_t lang = MAC_LC_INIT_NUMBER; 1590 /* = GetScriptManagerVariable(smScriptLang);*/ 1591 int32_t date_region = MAC_LC_INIT_NUMBER; 1592 const char* posixID = 0; 1593 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1594 int32_t i; 1595 Intl1Hndl ih; 1596 1597 ih = (Intl1Hndl) GetIntlResource(1); 1598 if (ih) 1599 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1600 1601 for (i = 0; i < count; i++) { 1602 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1603 || (mac_lc_recs[i].script == script)) 1604 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1605 || (mac_lc_recs[i].region == region)) 1606 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1607 || (mac_lc_recs[i].lang == lang)) 1608 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1609 || (mac_lc_recs[i].date_region == date_region)) 1610 ) 1611 { 1612 posixID = mac_lc_recs[i].posixID; 1613 break; 1614 } 1615 } 1616 1617 return posixID; 1618 1619 #elif defined(OS400) 1620 /* locales are process scoped and are by definition thread safe */ 1621 static char correctedLocale[64]; 1622 const char *localeID = getenv("LC_ALL"); 1623 char *p; 1624 1625 if (localeID == NULL) 1626 localeID = getenv("LANG"); 1627 if (localeID == NULL) 1628 localeID = setlocale(LC_ALL, NULL); 1629 /* Make sure we have something... */ 1630 if (localeID == NULL) 1631 return "en_US_POSIX"; 1632 1633 /* Extract the locale name from the path. */ 1634 if((p = uprv_strrchr(localeID, '/')) != NULL) 1635 { 1636 /* Increment p to start of locale name. */ 1637 p++; 1638 localeID = p; 1639 } 1640 1641 /* Copy to work location. */ 1642 uprv_strcpy(correctedLocale, localeID); 1643 1644 /* Strip off the '.locale' extension. */ 1645 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1646 *p = 0; 1647 } 1648 1649 /* Upper case the locale name. */ 1650 T_CString_toUpperCase(correctedLocale); 1651 1652 /* See if we are using the POSIX locale. Any of the 1653 * following are equivalent and use the same QLGPGCMA 1654 * (POSIX) locale. 1655 * QLGPGCMA2 means UCS2 1656 * QLGPGCMA_4 means UTF-32 1657 * QLGPGCMA_8 means UTF-8 1658 */ 1659 if ((uprv_strcmp("C", correctedLocale) == 0) || 1660 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1661 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1662 { 1663 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1664 } 1665 else 1666 { 1667 int16_t LocaleLen; 1668 1669 /* Lower case the lang portion. */ 1670 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1671 { 1672 *p = uprv_tolower(*p); 1673 } 1674 1675 /* Adjust for Euro. After '_E' add 'URO'. */ 1676 LocaleLen = uprv_strlen(correctedLocale); 1677 if (correctedLocale[LocaleLen - 2] == '_' && 1678 correctedLocale[LocaleLen - 1] == 'E') 1679 { 1680 uprv_strcat(correctedLocale, "URO"); 1681 } 1682 1683 /* If using Lotus-based locale then convert to 1684 * equivalent non Lotus. 1685 */ 1686 else if (correctedLocale[LocaleLen - 2] == '_' && 1687 correctedLocale[LocaleLen - 1] == 'L') 1688 { 1689 correctedLocale[LocaleLen - 2] = 0; 1690 } 1691 1692 /* There are separate simplified and traditional 1693 * locales called zh_HK_S and zh_HK_T. 1694 */ 1695 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1696 { 1697 uprv_strcpy(correctedLocale, "zh_HK"); 1698 } 1699 1700 /* A special zh_CN_GBK locale... 1701 */ 1702 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1703 { 1704 uprv_strcpy(correctedLocale, "zh_CN"); 1705 } 1706 1707 } 1708 1709 return correctedLocale; 1710 #endif 1711 1712 } 1713 1714 #if !U_CHARSET_IS_UTF8 1715 #if U_POSIX_LOCALE 1716 /* 1717 Due to various platform differences, one platform may specify a charset, 1718 when they really mean a different charset. Remap the names so that they are 1719 compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1720 here. Before adding anything to this function, please consider adding unique 1721 names to the ICU alias table in the data directory. 1722 */ 1723 static const char* 1724 remapPlatformDependentCodepage(const char *locale, const char *name) { 1725 if (locale != NULL && *locale == 0) { 1726 /* Make sure that an empty locale is handled the same way. */ 1727 locale = NULL; 1728 } 1729 if (name == NULL) { 1730 return NULL; 1731 } 1732 #if defined(U_AIX) 1733 if (uprv_strcmp(name, "IBM-943") == 0) { 1734 /* Use the ASCII compatible ibm-943 */ 1735 name = "Shift-JIS"; 1736 } 1737 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1738 /* Use the windows-1252 that contains the Euro */ 1739 name = "IBM-5348"; 1740 } 1741 #elif defined(U_SOLARIS) 1742 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1743 /* Solaris underspecifies the "EUC" name. */ 1744 if (uprv_strcmp(locale, "zh_CN") == 0) { 1745 name = "EUC-CN"; 1746 } 1747 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1748 name = "EUC-TW"; 1749 } 1750 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1751 name = "EUC-KR"; 1752 } 1753 } 1754 else if (uprv_strcmp(name, "eucJP") == 0) { 1755 /* 1756 ibm-954 is the best match. 1757 ibm-33722 is the default for eucJP (similar to Windows). 1758 */ 1759 name = "eucjis"; 1760 } 1761 else if (uprv_strcmp(name, "646") == 0) { 1762 /* 1763 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1764 * ISO-8859-1 instead of US-ASCII(646). 1765 */ 1766 name = "ISO-8859-1"; 1767 } 1768 #elif defined(U_DARWIN) 1769 if (locale == NULL && *name == 0) { 1770 /* 1771 No locale was specified, and an empty name was passed in. 1772 This usually indicates that nl_langinfo didn't return valid information. 1773 Mac OS X uses UTF-8 by default (especially the locale data and console). 1774 */ 1775 name = "UTF-8"; 1776 } 1777 else if (uprv_strcmp(name, "CP949") == 0) { 1778 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1779 name = "EUC-KR"; 1780 } 1781 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1782 /* 1783 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1784 */ 1785 name = "UTF-8"; 1786 } 1787 #elif defined(U_BSD) 1788 if (uprv_strcmp(name, "CP949") == 0) { 1789 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1790 name = "EUC-KR"; 1791 } 1792 #elif defined(U_HPUX) 1793 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1794 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1795 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1796 name = "hkbig5"; 1797 } 1798 else if (uprv_strcmp(name, "eucJP") == 0) { 1799 /* 1800 ibm-1350 is the best match, but unavailable. 1801 ibm-954 is mostly a superset of ibm-1350. 1802 ibm-33722 is the default for eucJP (similar to Windows). 1803 */ 1804 name = "eucjis"; 1805 } 1806 #elif defined(U_LINUX) 1807 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1808 /* Linux underspecifies the "EUC" name. */ 1809 if (uprv_strcmp(locale, "korean") == 0) { 1810 name = "EUC-KR"; 1811 } 1812 else if (uprv_strcmp(locale, "japanese") == 0) { 1813 /* See comment below about eucJP */ 1814 name = "eucjis"; 1815 } 1816 } 1817 else if (uprv_strcmp(name, "eucjp") == 0) { 1818 /* 1819 ibm-1350 is the best match, but unavailable. 1820 ibm-954 is mostly a superset of ibm-1350. 1821 ibm-33722 is the default for eucJP (similar to Windows). 1822 */ 1823 name = "eucjis"; 1824 } 1825 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1826 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1827 /* 1828 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1829 */ 1830 name = "UTF-8"; 1831 } 1832 /* 1833 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1834 * it by falling back to 'US-ASCII' when NULL is returned from this 1835 * function. So, we don't have to worry about it here. 1836 */ 1837 #endif 1838 /* return NULL when "" is passed in */ 1839 if (*name == 0) { 1840 name = NULL; 1841 } 1842 return name; 1843 } 1844 1845 static const char* 1846 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1847 { 1848 char localeBuf[100]; 1849 const char *name = NULL; 1850 char *variant = NULL; 1851 1852 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1853 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1854 uprv_strncpy(localeBuf, localeName, localeCapacity); 1855 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1856 name = uprv_strncpy(buffer, name+1, buffCapacity); 1857 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1858 if ((variant = (uprv_strchr(name, '@'))) != NULL) { 1859 *variant = 0; 1860 } 1861 name = remapPlatformDependentCodepage(localeBuf, name); 1862 } 1863 return name; 1864 } 1865 #endif 1866 1867 static const char* 1868 int_getDefaultCodepage() 1869 { 1870 #if defined(OS400) 1871 uint32_t ccsid = 37; /* Default to ibm-37 */ 1872 static char codepage[64]; 1873 Qwc_JOBI0400_t jobinfo; 1874 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1875 1876 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1877 "* ", " ", &error); 1878 1879 if (error.Bytes_Available == 0) { 1880 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1881 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1882 } 1883 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1884 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1885 } 1886 /* else use the default */ 1887 } 1888 sprintf(codepage,"ibm-%d", ccsid); 1889 return codepage; 1890 1891 #elif defined(OS390) 1892 static char codepage[64]; 1893 1894 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1895 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1896 codepage[63] = 0; /* NULL terminate */ 1897 1898 return codepage; 1899 1900 #elif defined(XP_MAC) 1901 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1902 1903 #elif defined(U_WINDOWS) 1904 static char codepage[64]; 1905 sprintf(codepage, "windows-%d", GetACP()); 1906 return codepage; 1907 1908 #elif U_POSIX_LOCALE 1909 static char codesetName[100]; 1910 const char *localeName = NULL; 1911 const char *name = NULL; 1912 1913 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1914 uprv_memset(codesetName, 0, sizeof(codesetName)); 1915 #if U_HAVE_NL_LANGINFO_CODESET 1916 /* When available, check nl_langinfo first because it usually gives more 1917 useful names. It depends on LC_CTYPE. 1918 nl_langinfo may use the same buffer as setlocale. */ 1919 { 1920 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1921 #if defined(U_DARWIN) || defined(U_LINUX) 1922 /* 1923 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1924 * instead of ASCII. 1925 */ 1926 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1927 codeset = remapPlatformDependentCodepage(localeName, codeset); 1928 } else 1929 #endif 1930 { 1931 codeset = remapPlatformDependentCodepage(NULL, codeset); 1932 } 1933 1934 if (codeset != NULL) { 1935 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1936 codesetName[sizeof(codesetName)-1] = 0; 1937 return codesetName; 1938 } 1939 } 1940 #endif 1941 1942 /* Use setlocale in a nice way, and then check some environment variables. 1943 Maybe the application used setlocale already. 1944 */ 1945 uprv_memset(codesetName, 0, sizeof(codesetName)); 1946 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1947 if (name) { 1948 /* if we can find the codeset name from setlocale, return that. */ 1949 return name; 1950 } 1951 1952 if (*codesetName == 0) 1953 { 1954 /* Everything failed. Return US ASCII (ISO 646). */ 1955 (void)uprv_strcpy(codesetName, "US-ASCII"); 1956 } 1957 return codesetName; 1958 #else 1959 return "US-ASCII"; 1960 #endif 1961 } 1962 1963 1964 U_CAPI const char* U_EXPORT2 1965 uprv_getDefaultCodepage() 1966 { 1967 static char const *name = NULL; 1968 umtx_lock(NULL); 1969 if (name == NULL) { 1970 name = int_getDefaultCodepage(); 1971 } 1972 umtx_unlock(NULL); 1973 return name; 1974 } 1975 #endif /* !U_CHARSET_IS_UTF8 */ 1976 1977 1978 /* end of platform-specific implementation -------------- */ 1979 1980 /* version handling --------------------------------------------------------- */ 1981 1982 U_CAPI void U_EXPORT2 1983 u_versionFromString(UVersionInfo versionArray, const char *versionString) { 1984 char *end; 1985 uint16_t part=0; 1986 1987 if(versionArray==NULL) { 1988 return; 1989 } 1990 1991 if(versionString!=NULL) { 1992 for(;;) { 1993 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 1994 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 1995 break; 1996 } 1997 versionString=end+1; 1998 } 1999 } 2000 2001 while(part<U_MAX_VERSION_LENGTH) { 2002 versionArray[part++]=0; 2003 } 2004 } 2005 2006 U_CAPI void U_EXPORT2 2007 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2008 if(versionArray!=NULL && versionString!=NULL) { 2009 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2010 int32_t len = u_strlen(versionString); 2011 if(len>U_MAX_VERSION_STRING_LENGTH) { 2012 len = U_MAX_VERSION_STRING_LENGTH; 2013 } 2014 u_UCharsToChars(versionString, versionChars, len); 2015 versionChars[len]=0; 2016 u_versionFromString(versionArray, versionChars); 2017 } 2018 } 2019 2020 U_CAPI void U_EXPORT2 2021 u_versionToString(UVersionInfo versionArray, char *versionString) { 2022 uint16_t count, part; 2023 uint8_t field; 2024 2025 if(versionString==NULL) { 2026 return; 2027 } 2028 2029 if(versionArray==NULL) { 2030 versionString[0]=0; 2031 return; 2032 } 2033 2034 /* count how many fields need to be written */ 2035 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2036 } 2037 2038 if(count <= 1) { 2039 count = 2; 2040 } 2041 2042 /* write the first part */ 2043 /* write the decimal field value */ 2044 field=versionArray[0]; 2045 if(field>=100) { 2046 *versionString++=(char)('0'+field/100); 2047 field%=100; 2048 } 2049 if(field>=10) { 2050 *versionString++=(char)('0'+field/10); 2051 field%=10; 2052 } 2053 *versionString++=(char)('0'+field); 2054 2055 /* write the following parts */ 2056 for(part=1; part<count; ++part) { 2057 /* write a dot first */ 2058 *versionString++=U_VERSION_DELIMITER; 2059 2060 /* write the decimal field value */ 2061 field=versionArray[part]; 2062 if(field>=100) { 2063 *versionString++=(char)('0'+field/100); 2064 field%=100; 2065 } 2066 if(field>=10) { 2067 *versionString++=(char)('0'+field/10); 2068 field%=10; 2069 } 2070 *versionString++=(char)('0'+field); 2071 } 2072 2073 /* NUL-terminate */ 2074 *versionString=0; 2075 } 2076 2077 U_CAPI void U_EXPORT2 2078 u_getVersion(UVersionInfo versionArray) { 2079 u_versionFromString(versionArray, U_ICU_VERSION); 2080 } 2081 2082 /** 2083 * icucfg.h dependent code 2084 */ 2085 2086 #if U_ENABLE_DYLOAD 2087 2088 #if defined(U_CHECK_DYLOAD) 2089 2090 #if defined(HAVE_DLOPEN) 2091 2092 #ifdef HAVE_DLFCN_H 2093 #ifdef __MVS__ 2094 #ifndef __SUSV3 2095 #define __SUSV3 1 2096 #endif 2097 #endif 2098 #include <dlfcn.h> 2099 #endif 2100 2101 U_INTERNAL void * U_EXPORT2 2102 uprv_dl_open(const char *libName, UErrorCode *status) { 2103 void *ret = NULL; 2104 if(U_FAILURE(*status)) return ret; 2105 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2106 if(ret==NULL) { 2107 #ifndef U_TRACE_DYLOAD 2108 perror("dlopen"); 2109 #endif 2110 *status = U_MISSING_RESOURCE_ERROR; 2111 } 2112 return ret; 2113 } 2114 2115 U_INTERNAL void U_EXPORT2 2116 uprv_dl_close(void *lib, UErrorCode *status) { 2117 if(U_FAILURE(*status)) return; 2118 dlclose(lib); 2119 } 2120 2121 U_INTERNAL void* U_EXPORT2 2122 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2123 void *ret = NULL; 2124 if(U_FAILURE(*status)) return ret; 2125 ret = dlsym(lib, sym); 2126 if(ret == NULL) { 2127 *status = U_MISSING_RESOURCE_ERROR; 2128 } 2129 return ret; 2130 } 2131 2132 #else 2133 2134 /* null (nonexistent) implementation. */ 2135 2136 U_INTERNAL void * U_EXPORT2 2137 uprv_dl_open(const char *libName, UErrorCode *status) { 2138 if(U_FAILURE(*status)) return NULL; 2139 *status = U_UNSUPPORTED_ERROR; 2140 return NULL; 2141 } 2142 2143 U_INTERNAL void U_EXPORT2 2144 uprv_dl_close(void *lib, UErrorCode *status) { 2145 if(U_FAILURE(*status)) return; 2146 *status = U_UNSUPPORTED_ERROR; 2147 return; 2148 } 2149 2150 2151 U_INTERNAL void* U_EXPORT2 2152 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2153 if(U_FAILURE(*status)) return NULL; 2154 *status = U_UNSUPPORTED_ERROR; 2155 return NULL; 2156 } 2157 2158 2159 2160 #endif 2161 2162 #elif defined U_WINDOWS 2163 2164 U_INTERNAL void * U_EXPORT2 2165 uprv_dl_open(const char *libName, UErrorCode *status) { 2166 HMODULE lib = NULL; 2167 2168 if(U_FAILURE(*status)) return NULL; 2169 2170 lib = LoadLibraryA(libName); 2171 2172 if(lib==NULL) { 2173 *status = U_MISSING_RESOURCE_ERROR; 2174 } 2175 2176 return (void*)lib; 2177 } 2178 2179 U_INTERNAL void U_EXPORT2 2180 uprv_dl_close(void *lib, UErrorCode *status) { 2181 HMODULE handle = (HMODULE)lib; 2182 if(U_FAILURE(*status)) return; 2183 2184 FreeLibrary(handle); 2185 2186 return; 2187 } 2188 2189 2190 U_INTERNAL void* U_EXPORT2 2191 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2192 HMODULE handle = (HMODULE)lib; 2193 void * addr = NULL; 2194 2195 if(U_FAILURE(*status) || lib==NULL) return NULL; 2196 2197 addr = GetProcAddress(handle, sym); 2198 2199 if(addr==NULL) { 2200 DWORD lastError = GetLastError(); 2201 if(lastError == ERROR_PROC_NOT_FOUND) { 2202 *status = U_MISSING_RESOURCE_ERROR; 2203 } else { 2204 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2205 } 2206 } 2207 2208 return addr; 2209 } 2210 2211 2212 #else 2213 2214 /* No dynamic loading set. */ 2215 2216 U_INTERNAL void * U_EXPORT2 2217 uprv_dl_open(const char *libName, UErrorCode *status) { 2218 if(U_FAILURE(*status)) return NULL; 2219 *status = U_UNSUPPORTED_ERROR; 2220 return NULL; 2221 } 2222 2223 U_INTERNAL void U_EXPORT2 2224 uprv_dl_close(void *lib, UErrorCode *status) { 2225 if(U_FAILURE(*status)) return; 2226 *status = U_UNSUPPORTED_ERROR; 2227 return; 2228 } 2229 2230 2231 U_INTERNAL void* U_EXPORT2 2232 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2233 if(U_FAILURE(*status)) return NULL; 2234 *status = U_UNSUPPORTED_ERROR; 2235 return NULL; 2236 } 2237 2238 2239 #endif 2240 2241 #endif /* U_ENABLE_DYLOAD */ 2242 2243 /* 2244 * Hey, Emacs, please set the following: 2245 * 2246 * Local Variables: 2247 * indent-tabs-mode: nil 2248 * End: 2249 * 2250 */ 2251