1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1997-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10 * 11 * Date Name Description 12 * 04/14/97 aliu Creation. 13 * 04/24/97 aliu Added getDefaultDataDirectory() and 14 * getDefaultLocaleID(). 15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods 16 * for assumed case. Non-UNIX platforms must be 17 * special-cased. Rewrote numeric methods dealing 18 * with NaN and Infinity to be platform independent 19 * over all IEEE 754 platforms. 20 * 05/13/97 aliu Restored sign of timezone 21 * (semantics are hours West of GMT) 22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23 * nextDouble.. 24 * 07/22/98 stephen Added remainder, max, min, trunc 25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26 * 08/24/98 stephen Added longBitsFromDouble 27 * 09/08/98 stephen Minor changes for Mac Port 28 * 03/02/99 stephen Removed openFile(). Added AS400 support. 29 * Fixed EBCDIC tables 30 * 04/15/99 stephen Converted to C. 31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32 * 08/04/99 jeffrey R. Added OS/2 changes 33 * 11/15/99 helena Integrated S/390 IEEE support. 34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36 * 01/03/08 Steven L. Fake Time Support 37 ****************************************************************************** 38 */ 39 40 /* Define _XOPEN_SOURCE for Solaris and friends. */ 41 /* NetBSD needs it to be >= 4 */ 42 #if !defined(_XOPEN_SOURCE) 43 #if __STDC_VERSION__ >= 199901L 44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */ 45 #define _XOPEN_SOURCE 600 46 #else 47 #define _XOPEN_SOURCE 4 48 #endif 49 #endif 50 51 /* Make sure things like readlink and such functions work. 52 Poorly upgraded Solaris machines can't have this defined. 53 Cleanly installed Solaris can use this #define. 54 */ 55 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) 56 #define _XOPEN_SOURCE_EXTENDED 1 57 #endif 58 59 /* include ICU headers */ 60 #include "unicode/utypes.h" 61 #include "unicode/putil.h" 62 #include "unicode/ustring.h" 63 #include "putilimp.h" 64 #include "uassert.h" 65 #include "umutex.h" 66 #include "cmemory.h" 67 #include "cstring.h" 68 #include "locmap.h" 69 #include "ucln_cmn.h" 70 71 /* Include standard headers. */ 72 #include <stdio.h> 73 #include <stdlib.h> 74 #include <string.h> 75 #include <math.h> 76 #include <locale.h> 77 #include <float.h> 78 #include <time.h> 79 80 /* include system headers */ 81 #ifdef U_WINDOWS 82 # define WIN32_LEAN_AND_MEAN 83 # define VC_EXTRALEAN 84 # define NOUSER 85 # define NOSERVICE 86 # define NOIME 87 # define NOMCX 88 # include <windows.h> 89 # include "wintz.h" 90 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__) 91 /* tzset isn't defined in strict ANSI on Cygwin. */ 92 # undef __STRICT_ANSI__ 93 #elif defined(OS400) 94 # include <float.h> 95 # include <qusec.h> /* error code structure */ 96 # include <qusrjobi.h> 97 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 98 # include <mih/testptr.h> /* For uprv_maximumPtr */ 99 #elif defined(XP_MAC) 100 # include <Files.h> 101 # include <IntlResources.h> 102 # include <Script.h> 103 # include <Folders.h> 104 # include <MacTypes.h> 105 # include <TextUtils.h> 106 # define ICU_NO_USER_DATA_OVERRIDE 1 107 #elif defined(OS390) 108 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 109 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD) 110 #include <limits.h> 111 #include <unistd.h> 112 #elif defined(U_QNX) 113 #include <sys/neutrino.h> 114 #endif 115 116 #ifndef U_WINDOWS 117 #include <sys/time.h> 118 #endif 119 120 /* 121 * Only include langinfo.h if we have a way to get the codeset. If we later 122 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 123 * 124 */ 125 126 #if U_HAVE_NL_LANGINFO_CODESET 127 #include <langinfo.h> 128 #endif 129 130 /* Define the extension for data files, again... */ 131 #define DATA_TYPE "dat" 132 133 /* Leave this copyright notice here! */ 134 static const char copyright[] = U_COPYRIGHT_STRING; 135 136 /* floating point implementations ------------------------------------------- */ 137 138 /* We return QNAN rather than SNAN*/ 139 #define SIGN 0x80000000U 140 141 /* Make it easy to define certain types of constants */ 142 typedef union { 143 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 144 double d64; 145 } BitPatternConversion; 146 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 147 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 148 149 /*--------------------------------------------------------------------------- 150 Platform utilities 151 Our general strategy is to assume we're on a POSIX platform. Platforms which 152 are non-POSIX must declare themselves so. The default POSIX implementation 153 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 154 functions). 155 ---------------------------------------------------------------------------*/ 156 157 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) 158 # undef U_POSIX_LOCALE 159 #else 160 # define U_POSIX_LOCALE 1 161 #endif 162 163 /* 164 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 165 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 166 */ 167 #if !IEEE_754 168 static char* 169 u_topNBytesOfDouble(double* d, int n) 170 { 171 #if U_IS_BIG_ENDIAN 172 return (char*)d; 173 #else 174 return (char*)(d + 1) - n; 175 #endif 176 } 177 #endif 178 179 static char* 180 u_bottomNBytesOfDouble(double* d, int n) 181 { 182 #if U_IS_BIG_ENDIAN 183 return (char*)(d + 1) - n; 184 #else 185 return (char*)d; 186 #endif 187 } 188 189 #if defined (U_DEBUG_FAKETIME) 190 /* Override the clock to test things without having to move the system clock. 191 * Assumes POSIX gettimeofday() will function 192 */ 193 UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 194 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 195 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 196 static UMTX fakeClockMutex = NULL; 197 198 static UDate getUTCtime_real() { 199 struct timeval posixTime; 200 gettimeofday(&posixTime, NULL); 201 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 202 } 203 204 static UDate getUTCtime_fake() { 205 umtx_lock(&fakeClockMutex); 206 if(!fakeClock_set) { 207 UDate real = getUTCtime_real(); 208 const char *fake_start = getenv("U_FAKETIME_START"); 209 if(fake_start!=NULL) { 210 sscanf(fake_start,"%lf",&fakeClock_t0); 211 } 212 fakeClock_dt = fakeClock_t0 - real; 213 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 214 "U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 215 fakeClock_t0, fake_start, fakeClock_dt, real); 216 fakeClock_set = TRUE; 217 } 218 umtx_unlock(&fakeClockMutex); 219 220 return getUTCtime_real() + fakeClock_dt; 221 } 222 #endif 223 224 #if defined(U_WINDOWS) 225 typedef union { 226 int64_t int64; 227 FILETIME fileTime; 228 } FileTimeConversion; /* This is like a ULARGE_INTEGER */ 229 230 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 231 #define EPOCH_BIAS INT64_C(116444736000000000) 232 #define HECTONANOSECOND_PER_MILLISECOND 10000 233 234 #endif 235 236 /*--------------------------------------------------------------------------- 237 Universal Implementations 238 These are designed to work on all platforms. Try these, and if they 239 don't work on your platform, then special case your platform with new 240 implementations. 241 ---------------------------------------------------------------------------*/ 242 243 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 244 U_CAPI UDate U_EXPORT2 245 uprv_getUTCtime() 246 { 247 #if defined(U_DEBUG_FAKETIME) 248 return getUTCtime_fake(); /* Hook for overriding the clock */ 249 #elif defined(XP_MAC) 250 time_t t, t1, t2; 251 struct tm tmrec; 252 253 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 254 tmrec.tm_year = 70; 255 tmrec.tm_mon = 0; 256 tmrec.tm_mday = 1; 257 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 258 259 time(&t); 260 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 261 t2 = mktime(&tmrec); /* seconds of current GMT*/ 262 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 263 #elif defined(U_WINDOWS) 264 265 FileTimeConversion winTime; 266 GetSystemTimeAsFileTime(&winTime.fileTime); 267 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 268 #else 269 /* 270 struct timeval posixTime; 271 gettimeofday(&posixTime, NULL); 272 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 273 */ 274 time_t epochtime; 275 time(&epochtime); 276 return (UDate)epochtime * U_MILLIS_PER_SECOND; 277 #endif 278 } 279 280 /*----------------------------------------------------------------------------- 281 IEEE 754 282 These methods detect and return NaN and infinity values for doubles 283 conforming to IEEE 754. Platforms which support this standard include X86, 284 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 285 If this doesn't work on your platform, you have non-IEEE floating-point, and 286 will need to code your own versions. A naive implementation is to return 0.0 287 for getNaN and getInfinity, and false for isNaN and isInfinite. 288 ---------------------------------------------------------------------------*/ 289 290 U_CAPI UBool U_EXPORT2 291 uprv_isNaN(double number) 292 { 293 #if IEEE_754 294 BitPatternConversion convertedNumber; 295 convertedNumber.d64 = number; 296 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 297 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 298 299 #elif defined(OS390) 300 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 301 sizeof(uint32_t)); 302 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 303 sizeof(uint32_t)); 304 305 return ((highBits & 0x7F080000L) == 0x7F080000L) && 306 (lowBits == 0x00000000L); 307 308 #else 309 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 310 /* you'll need to replace this default implementation with what's correct*/ 311 /* for your platform.*/ 312 return number != number; 313 #endif 314 } 315 316 U_CAPI UBool U_EXPORT2 317 uprv_isInfinite(double number) 318 { 319 #if IEEE_754 320 BitPatternConversion convertedNumber; 321 convertedNumber.d64 = number; 322 /* Infinity is exactly 0x7FF0000000000000U. */ 323 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 324 #elif defined(OS390) 325 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 326 sizeof(uint32_t)); 327 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 328 sizeof(uint32_t)); 329 330 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 331 332 #else 333 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 334 /* value, you'll need to replace this default implementation with what's*/ 335 /* correct for your platform.*/ 336 return number == (2.0 * number); 337 #endif 338 } 339 340 U_CAPI UBool U_EXPORT2 341 uprv_isPositiveInfinity(double number) 342 { 343 #if IEEE_754 || defined(OS390) 344 return (UBool)(number > 0 && uprv_isInfinite(number)); 345 #else 346 return uprv_isInfinite(number); 347 #endif 348 } 349 350 U_CAPI UBool U_EXPORT2 351 uprv_isNegativeInfinity(double number) 352 { 353 #if IEEE_754 || defined(OS390) 354 return (UBool)(number < 0 && uprv_isInfinite(number)); 355 356 #else 357 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 358 sizeof(uint32_t)); 359 return((highBits & SIGN) && uprv_isInfinite(number)); 360 361 #endif 362 } 363 364 U_CAPI double U_EXPORT2 365 uprv_getNaN() 366 { 367 #if IEEE_754 || defined(OS390) 368 return gNan.d64; 369 #else 370 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 371 /* you'll need to replace this default implementation with what's correct*/ 372 /* for your platform.*/ 373 return 0.0; 374 #endif 375 } 376 377 U_CAPI double U_EXPORT2 378 uprv_getInfinity() 379 { 380 #if IEEE_754 || defined(OS390) 381 return gInf.d64; 382 #else 383 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 384 /* value, you'll need to replace this default implementation with what's*/ 385 /* correct for your platform.*/ 386 return 0.0; 387 #endif 388 } 389 390 U_CAPI double U_EXPORT2 391 uprv_floor(double x) 392 { 393 return floor(x); 394 } 395 396 U_CAPI double U_EXPORT2 397 uprv_ceil(double x) 398 { 399 return ceil(x); 400 } 401 402 U_CAPI double U_EXPORT2 403 uprv_round(double x) 404 { 405 return uprv_floor(x + 0.5); 406 } 407 408 U_CAPI double U_EXPORT2 409 uprv_fabs(double x) 410 { 411 return fabs(x); 412 } 413 414 U_CAPI double U_EXPORT2 415 uprv_modf(double x, double* y) 416 { 417 return modf(x, y); 418 } 419 420 U_CAPI double U_EXPORT2 421 uprv_fmod(double x, double y) 422 { 423 return fmod(x, y); 424 } 425 426 U_CAPI double U_EXPORT2 427 uprv_pow(double x, double y) 428 { 429 /* This is declared as "double pow(double x, double y)" */ 430 return pow(x, y); 431 } 432 433 U_CAPI double U_EXPORT2 434 uprv_pow10(int32_t x) 435 { 436 return pow(10.0, (double)x); 437 } 438 439 U_CAPI double U_EXPORT2 440 uprv_fmax(double x, double y) 441 { 442 #if IEEE_754 443 int32_t lowBits; 444 445 /* first handle NaN*/ 446 if(uprv_isNaN(x) || uprv_isNaN(y)) 447 return uprv_getNaN(); 448 449 /* check for -0 and 0*/ 450 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t)); 451 if(x == 0.0 && y == 0.0 && (lowBits & SIGN)) 452 return y; 453 454 #endif 455 456 /* this should work for all flt point w/o NaN and Infpecial cases */ 457 return (x > y ? x : y); 458 } 459 460 U_CAPI double U_EXPORT2 461 uprv_fmin(double x, double y) 462 { 463 #if IEEE_754 464 int32_t lowBits; 465 466 /* first handle NaN*/ 467 if(uprv_isNaN(x) || uprv_isNaN(y)) 468 return uprv_getNaN(); 469 470 /* check for -0 and 0*/ 471 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t)); 472 if(x == 0.0 && y == 0.0 && (lowBits & SIGN)) 473 return y; 474 475 #endif 476 477 /* this should work for all flt point w/o NaN and Inf special cases */ 478 return (x > y ? y : x); 479 } 480 481 /** 482 * Truncates the given double. 483 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 484 * This is different than calling floor() or ceil(): 485 * floor(3.3) = 3, floor(-3.3) = -4 486 * ceil(3.3) = 4, ceil(-3.3) = -3 487 */ 488 U_CAPI double U_EXPORT2 489 uprv_trunc(double d) 490 { 491 #if IEEE_754 492 int32_t lowBits; 493 494 /* handle error cases*/ 495 if(uprv_isNaN(d)) 496 return uprv_getNaN(); 497 if(uprv_isInfinite(d)) 498 return uprv_getInfinity(); 499 500 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t)); 501 if( (d == 0.0 && (lowBits & SIGN)) || d < 0) 502 return ceil(d); 503 else 504 return floor(d); 505 506 #else 507 return d >= 0 ? floor(d) : ceil(d); 508 509 #endif 510 } 511 512 /** 513 * Return the largest positive number that can be represented by an integer 514 * type of arbitrary bit length. 515 */ 516 U_CAPI double U_EXPORT2 517 uprv_maxMantissa(void) 518 { 519 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 520 } 521 522 U_CAPI double U_EXPORT2 523 uprv_log(double d) 524 { 525 return log(d); 526 } 527 528 U_CAPI void * U_EXPORT2 529 uprv_maximumPtr(void * base) 530 { 531 #if defined(OS400) 532 /* 533 * With the provided function we should never be out of range of a given segment 534 * (a traditional/typical segment that is). Our segments have 5 bytes for the 535 * id and 3 bytes for the offset. The key is that the casting takes care of 536 * only retrieving the offset portion minus x1000. Hence, the smallest offset 537 * seen in a program is x001000 and when casted to an int would be 0. 538 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 539 * 540 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 541 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 542 * This function determines the activation based on the pointer that is passed in and 543 * calculates the appropriate maximum available size for 544 * each pointer type (TERASPACE and non-TERASPACE) 545 * 546 * Unlike other operating systems, the pointer model isn't determined at 547 * compile time on i5/OS. 548 */ 549 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 550 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 551 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 552 } 553 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 554 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 555 556 #else 557 return U_MAX_PTR(base); 558 #endif 559 } 560 561 /*--------------------------------------------------------------------------- 562 Platform-specific Implementations 563 Try these, and if they don't work on your platform, then special case your 564 platform with new implementations. 565 ---------------------------------------------------------------------------*/ 566 567 /* Generic time zone layer -------------------------------------------------- */ 568 569 /* Time zone utilities */ 570 U_CAPI void U_EXPORT2 571 uprv_tzset() 572 { 573 #ifdef U_TZSET 574 U_TZSET(); 575 #else 576 /* no initialization*/ 577 #endif 578 } 579 580 U_CAPI int32_t U_EXPORT2 581 uprv_timezone() 582 { 583 #ifdef U_TIMEZONE 584 return U_TIMEZONE; 585 #else 586 time_t t, t1, t2; 587 struct tm tmrec; 588 UBool dst_checked; 589 int32_t tdiff = 0; 590 591 time(&t); 592 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 593 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 594 t1 = mktime(&tmrec); /* local time in seconds*/ 595 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 596 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 597 tdiff = t2 - t1; 598 /* imitate NT behaviour, which returns same timezone offset to GMT for 599 winter and summer*/ 600 if (dst_checked) 601 tdiff += 3600; 602 return tdiff; 603 #endif 604 } 605 606 /* Note that U_TZNAME does *not* have to be tzname, but if it is, 607 some platforms need to have it declared here. */ 608 609 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN)) 610 /* RS6000 and others reject char **tzname. */ 611 extern U_IMPORT char *U_TZNAME[]; 612 #endif 613 614 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)) 615 /* These platforms are likely to use Olson timezone IDs. */ 616 #define CHECK_LOCALTIME_LINK 1 617 #if defined(U_DARWIN) 618 #include <tzfile.h> 619 #define TZZONEINFO (TZDIR "/") 620 #else 621 #define TZDEFAULT "/etc/localtime" 622 #define TZZONEINFO "/usr/share/zoneinfo/" 623 #endif 624 #if U_HAVE_DIRENT_H 625 #define SEARCH_TZFILE 626 #include <dirent.h> /* Needed to search through system timezone files */ 627 #endif 628 static char gTimeZoneBuffer[PATH_MAX]; 629 static char *gTimeZoneBufferPtr = NULL; 630 #endif 631 632 #ifndef U_WINDOWS 633 #define isNonDigit(ch) (ch < '0' || '9' < ch) 634 static UBool isValidOlsonID(const char *id) { 635 int32_t idx = 0; 636 637 /* Determine if this is something like Iceland (Olson ID) 638 or AST4ADT (non-Olson ID) */ 639 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 640 idx++; 641 } 642 643 /* If we went through the whole string, then it might be okay. 644 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 645 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 646 The rest of the time it could be an Olson ID. George */ 647 return (UBool)(id[idx] == 0 648 || uprv_strcmp(id, "PST8PDT") == 0 649 || uprv_strcmp(id, "MST7MDT") == 0 650 || uprv_strcmp(id, "CST6CDT") == 0 651 || uprv_strcmp(id, "EST5EDT") == 0); 652 } 653 #endif 654 655 #if defined(U_TZNAME) && !defined(U_WINDOWS) 656 657 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 658 typedef struct OffsetZoneMapping { 659 int32_t offsetSeconds; 660 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/ 661 const char *stdID; 662 const char *dstID; 663 const char *olsonID; 664 } OffsetZoneMapping; 665 666 /* 667 This list tries to disambiguate a set of abbreviated timezone IDs and offsets 668 and maps it to an Olson ID. 669 Before adding anything to this list, take a look at 670 icu/source/tools/tzcode/tz.alias 671 Sometimes no daylight savings (0) is important to define due to aliases. 672 This list can be tested with icu/source/test/compat/tzone.pl 673 More values could be added to daylightType to increase precision. 674 */ 675 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 676 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 677 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 678 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 679 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 680 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 681 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 682 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 683 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 684 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 685 {-34200, 2, "CST", "CST", "Australia/South"}, 686 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 687 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 688 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 689 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 690 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 691 {-28800, 2, "WST", "WST", "Australia/West"}, 692 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 693 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 694 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 695 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 696 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 697 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 698 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 699 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 700 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 701 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 702 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 703 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 704 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 705 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 706 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 707 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 708 {0, 1, "GMT", "IST", "Europe/Dublin"}, 709 {0, 1, "GMT", "BST", "Europe/London"}, 710 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 711 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 712 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 713 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 714 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 715 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 716 {10800, 1, "WGT", "WGST", "America/Godthab"}, 717 {10800, 2, "BRT", "BRST", "Brazil/East"}, 718 {12600, 1, "NST", "NDT", "America/St_Johns"}, 719 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 720 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 721 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 722 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 723 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 724 {18000, 1, "CST", "CDT", "America/Havana"}, 725 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 726 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 727 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 728 {21600, 0, "CST", "CDT", "America/Guatemala"}, 729 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 730 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 731 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 732 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 733 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 734 {36000, 1, "HAST", "HADT", "US/Aleutian"} 735 }; 736 737 /*#define DEBUG_TZNAME*/ 738 739 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 740 { 741 int32_t idx; 742 #ifdef DEBUG_TZNAME 743 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 744 #endif 745 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++) 746 { 747 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 748 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 749 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 750 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 751 { 752 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 753 } 754 } 755 return NULL; 756 } 757 #endif 758 759 #ifdef SEARCH_TZFILE 760 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 761 #define MAX_READ_SIZE 512 762 763 typedef struct DefaultTZInfo { 764 char* defaultTZBuffer; 765 int64_t defaultTZFileSize; 766 FILE* defaultTZFilePtr; 767 UBool defaultTZstatus; 768 int32_t defaultTZPosition; 769 } DefaultTZInfo; 770 771 /* 772 * This method compares the two files given to see if they are a match. 773 * It is currently use to compare two TZ files. 774 */ 775 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 776 FILE* file; 777 int64_t sizeFile; 778 int64_t sizeFileLeft; 779 int32_t sizeFileRead; 780 int32_t sizeFileToRead; 781 char bufferFile[MAX_READ_SIZE]; 782 UBool result = TRUE; 783 784 if (tzInfo->defaultTZFilePtr == NULL) { 785 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 786 } 787 file = fopen(TZFileName, "r"); 788 789 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 790 791 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 792 /* First check that the file size are equal. */ 793 if (tzInfo->defaultTZFileSize == 0) { 794 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 795 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 796 } 797 fseek(file, 0, SEEK_END); 798 sizeFile = ftell(file); 799 sizeFileLeft = sizeFile; 800 801 if (sizeFile != tzInfo->defaultTZFileSize) { 802 result = FALSE; 803 } else { 804 /* Store the data from the files in seperate buffers and 805 * compare each byte to determine equality. 806 */ 807 if (tzInfo->defaultTZBuffer == NULL) { 808 rewind(tzInfo->defaultTZFilePtr); 809 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 810 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 811 } 812 rewind(file); 813 while(sizeFileLeft > 0) { 814 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 815 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 816 817 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 818 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 819 result = FALSE; 820 break; 821 } 822 sizeFileLeft -= sizeFileRead; 823 tzInfo->defaultTZPosition += sizeFileRead; 824 } 825 } 826 } else { 827 result = FALSE; 828 } 829 830 if (file != NULL) { 831 fclose(file); 832 } 833 834 return result; 835 } 836 /* 837 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 838 */ 839 /* dirent also lists two entries: "." and ".." that we can safely ignore. */ 840 #define SKIP1 "." 841 #define SKIP2 ".." 842 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 843 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 844 DIR* dirp = opendir(path); 845 DIR* subDirp = NULL; 846 struct dirent* dirEntry = NULL; 847 848 char* result = NULL; 849 850 /* Save the current path */ 851 char curpath[MAX_PATH_SIZE]; 852 853 if (dirp == NULL) 854 return result; 855 856 uprv_memset(curpath, 0, MAX_PATH_SIZE); 857 uprv_strcpy(curpath, path); 858 859 /* Check each entry in the directory. */ 860 while((dirEntry = readdir(dirp)) != NULL) { 861 if (uprv_strcmp(dirEntry->d_name, SKIP1) != 0 && uprv_strcmp(dirEntry->d_name, SKIP2) != 0) { 862 /* Create a newpath with the new entry to test each entry in the directory. */ 863 char newpath[MAX_PATH_SIZE]; 864 uprv_strcpy(newpath, curpath); 865 uprv_strcat(newpath, dirEntry->d_name); 866 867 if ((subDirp = opendir(newpath)) != NULL) { 868 /* If this new path is a directory, make a recursive call with the newpath. */ 869 closedir(subDirp); 870 uprv_strcat(newpath, "/"); 871 result = searchForTZFile(newpath, tzInfo); 872 } else { 873 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 874 uprv_strcpy(SEARCH_TZFILE_RESULT, newpath + (sizeof(TZZONEINFO) - 1)); 875 result = SEARCH_TZFILE_RESULT; 876 /* Get out after the first one found. */ 877 break; 878 } 879 } 880 } 881 } 882 closedir(dirp); 883 return result; 884 } 885 #endif 886 U_CAPI const char* U_EXPORT2 887 uprv_tzname(int n) 888 { 889 const char *tzid = NULL; 890 #ifdef U_WINDOWS 891 tzid = uprv_detectWindowsTimeZone(); 892 893 if (tzid != NULL) { 894 return tzid; 895 } 896 #else 897 898 /*#if defined(U_DARWIN) 899 int ret; 900 901 tzid = getenv("TZFILE"); 902 if (tzid != NULL) { 903 return tzid; 904 } 905 #endif*/ 906 907 /* This code can be temporarily disabled to test tzname resolution later on. */ 908 #ifndef DEBUG_TZNAME 909 tzid = getenv("TZ"); 910 if (tzid != NULL && isValidOlsonID(tzid)) 911 { 912 /* This might be a good Olson ID. */ 913 if (uprv_strncmp(tzid, "posix/", 6) == 0 914 || uprv_strncmp(tzid, "right/", 6) == 0) 915 { 916 /* Remove the posix/ or right/ prefix. */ 917 tzid += 6; 918 } 919 return tzid; 920 } 921 /* else U_TZNAME will give a better result. */ 922 #endif 923 924 #if defined(CHECK_LOCALTIME_LINK) 925 /* Caller must handle threading issues */ 926 if (gTimeZoneBufferPtr == NULL) { 927 /* 928 This is a trick to look at the name of the link to get the Olson ID 929 because the tzfile contents is underspecified. 930 This isn't guaranteed to work because it may not be a symlink. 931 */ 932 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 933 if (0 < ret) { 934 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 935 gTimeZoneBuffer[ret] = 0; 936 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 937 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 938 { 939 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 940 } 941 } else { 942 #if defined(SEARCH_TZFILE) 943 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 944 if (tzInfo != NULL) { 945 tzInfo->defaultTZBuffer = NULL; 946 tzInfo->defaultTZFileSize = 0; 947 tzInfo->defaultTZFilePtr = NULL; 948 tzInfo->defaultTZstatus = FALSE; 949 tzInfo->defaultTZPosition = 0; 950 951 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 952 953 /* Free previously allocated memory */ 954 if (tzInfo->defaultTZBuffer != NULL) { 955 uprv_free(tzInfo->defaultTZBuffer); 956 } 957 if (tzInfo->defaultTZFilePtr != NULL) { 958 fclose(tzInfo->defaultTZFilePtr); 959 } 960 uprv_free(tzInfo); 961 } 962 963 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 964 return gTimeZoneBufferPtr; 965 } 966 #endif 967 } 968 } 969 else { 970 return gTimeZoneBufferPtr; 971 } 972 #endif 973 #endif 974 975 #ifdef U_TZNAME 976 #ifdef U_WINDOWS 977 return uprv_strdup(U_TZNAME[n]); 978 #else 979 /* 980 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 981 So we remap the abbreviation to an olson ID. 982 983 Since Windows exposes a little more timezone information, 984 we normally don't use this code on Windows because 985 uprv_detectWindowsTimeZone should have already given the correct answer. 986 */ 987 { 988 struct tm juneSol, decemberSol; 989 int daylightType; 990 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 991 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 992 993 /* This probing will tell us when daylight savings occurs. */ 994 localtime_r(&juneSolstice, &juneSol); 995 localtime_r(&decemberSolstice, &decemberSol); 996 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0); 997 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 998 if (tzid != NULL) { 999 return tzid; 1000 } 1001 } 1002 return U_TZNAME[n]; 1003 #endif 1004 #else 1005 return ""; 1006 #endif 1007 } 1008 1009 /* Get and set the ICU data directory --------------------------------------- */ 1010 1011 static char *gDataDirectory = NULL; 1012 #if U_POSIX_LOCALE 1013 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1014 #endif 1015 1016 static UBool U_CALLCONV putil_cleanup(void) 1017 { 1018 if (gDataDirectory && *gDataDirectory) { 1019 uprv_free(gDataDirectory); 1020 } 1021 gDataDirectory = NULL; 1022 #if U_POSIX_LOCALE 1023 if (gCorrectedPOSIXLocale) { 1024 uprv_free(gCorrectedPOSIXLocale); 1025 gCorrectedPOSIXLocale = NULL; 1026 } 1027 #endif 1028 return TRUE; 1029 } 1030 1031 /* 1032 * Set the data directory. 1033 * Make a copy of the passed string, and set the global data dir to point to it. 1034 * TODO: see bug #2849, regarding thread safety. 1035 */ 1036 U_CAPI void U_EXPORT2 1037 u_setDataDirectory(const char *directory) { 1038 char *newDataDir; 1039 int32_t length; 1040 1041 if(directory==NULL || *directory==0) { 1042 /* A small optimization to prevent the malloc and copy when the 1043 shared library is used, and this is a way to make sure that NULL 1044 is never returned. 1045 */ 1046 newDataDir = (char *)""; 1047 } 1048 else { 1049 length=(int32_t)uprv_strlen(directory); 1050 newDataDir = (char *)uprv_malloc(length + 2); 1051 /* Exit out if newDataDir could not be created. */ 1052 if (newDataDir == NULL) { 1053 return; 1054 } 1055 uprv_strcpy(newDataDir, directory); 1056 1057 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1058 { 1059 char *p; 1060 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1061 *p = U_FILE_SEP_CHAR; 1062 } 1063 } 1064 #endif 1065 } 1066 1067 umtx_lock(NULL); 1068 if (gDataDirectory && *gDataDirectory) { 1069 uprv_free(gDataDirectory); 1070 } 1071 gDataDirectory = newDataDir; 1072 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1073 umtx_unlock(NULL); 1074 } 1075 1076 U_CAPI UBool U_EXPORT2 1077 uprv_pathIsAbsolute(const char *path) 1078 { 1079 if(!path || !*path) { 1080 return FALSE; 1081 } 1082 1083 if(*path == U_FILE_SEP_CHAR) { 1084 return TRUE; 1085 } 1086 1087 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1088 if(*path == U_FILE_ALT_SEP_CHAR) { 1089 return TRUE; 1090 } 1091 #endif 1092 1093 #if defined(U_WINDOWS) 1094 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1095 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1096 path[1] == ':' ) { 1097 return TRUE; 1098 } 1099 #endif 1100 1101 return FALSE; 1102 } 1103 1104 U_CAPI const char * U_EXPORT2 1105 u_getDataDirectory(void) { 1106 const char *path = NULL; 1107 1108 /* if we have the directory, then return it immediately */ 1109 UMTX_CHECK(NULL, gDataDirectory, path); 1110 1111 if(path) { 1112 return path; 1113 } 1114 1115 /* 1116 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1117 override ICU's data with the ICU_DATA environment variable. This prevents 1118 problems where multiple custom copies of ICU's specific version of data 1119 are installed on a system. Either the application must define the data 1120 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1121 ICU, set the data with udata_setCommonData or trust that all of the 1122 required data is contained in ICU's data library that contains 1123 the entry point defined by U_ICUDATA_ENTRY_POINT. 1124 1125 There may also be some platforms where environment variables 1126 are not allowed. 1127 */ 1128 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1129 /* First try to get the environment variable */ 1130 path=getenv("ICU_DATA"); 1131 # endif 1132 1133 /* ICU_DATA_DIR may be set as a compile option */ 1134 # ifdef ICU_DATA_DIR 1135 if(path==NULL || *path==0) { 1136 path=ICU_DATA_DIR; 1137 } 1138 # endif 1139 1140 if(path==NULL) { 1141 /* It looks really bad, set it to something. */ 1142 path = ""; 1143 } 1144 1145 u_setDataDirectory(path); 1146 return gDataDirectory; 1147 } 1148 1149 1150 1151 1152 1153 /* Macintosh-specific locale information ------------------------------------ */ 1154 #ifdef XP_MAC 1155 1156 typedef struct { 1157 int32_t script; 1158 int32_t region; 1159 int32_t lang; 1160 int32_t date_region; 1161 const char* posixID; 1162 } mac_lc_rec; 1163 1164 /* Todo: This will be updated with a newer version from www.unicode.org web 1165 page when it's available.*/ 1166 #define MAC_LC_MAGIC_NUMBER -5 1167 #define MAC_LC_INIT_NUMBER -9 1168 1169 static const mac_lc_rec mac_lc_recs[] = { 1170 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1171 /* United States*/ 1172 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1173 /* France*/ 1174 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1175 /* Great Britain*/ 1176 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1177 /* Germany*/ 1178 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1179 /* Italy*/ 1180 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1181 /* Metherlands*/ 1182 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1183 /* French for Belgium or Lxembourg*/ 1184 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1185 /* Sweden*/ 1186 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1187 /* Denmark*/ 1188 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1189 /* Portugal*/ 1190 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1191 /* French Canada*/ 1192 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1193 /* Israel*/ 1194 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1195 /* Japan*/ 1196 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1197 /* Australia*/ 1198 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1199 /* the Arabic world (?)*/ 1200 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1201 /* Finland*/ 1202 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1203 /* French for Switzerland*/ 1204 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1205 /* German for Switzerland*/ 1206 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1207 /* Greece*/ 1208 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1209 /* Iceland ===*/ 1210 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1211 /* Malta ===*/ 1212 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1213 /* Cyprus ===*/ 1214 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1215 /* Turkey ===*/ 1216 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1217 /* Croatian system for Yugoslavia*/ 1218 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1219 /* Hindi system for India*/ 1220 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1221 /* Pakistan*/ 1222 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1223 /* Lithuania*/ 1224 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1225 /* Poland*/ 1226 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1227 /* Hungary*/ 1228 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1229 /* Estonia*/ 1230 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1231 /* Latvia*/ 1232 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1233 /* Lapland [Ask Rich for the data. HS]*/ 1234 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1235 /* Faeroe Islands*/ 1236 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1237 /* Iran*/ 1238 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1239 /* Russia*/ 1240 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1241 /* Ireland*/ 1242 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1243 /* Korea*/ 1244 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1245 /* People's Republic of China*/ 1246 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1247 /* Taiwan*/ 1248 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1249 /* Thailand*/ 1250 1251 /* fallback is en_US*/ 1252 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1253 MAC_LC_MAGIC_NUMBER, "en_US" 1254 }; 1255 1256 #endif 1257 1258 #if U_POSIX_LOCALE 1259 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1260 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1261 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1262 */ 1263 static const char *uprv_getPOSIXIDForCategory(int category) 1264 { 1265 const char* posixID = NULL; 1266 if (category == LC_MESSAGES || category == LC_CTYPE) { 1267 /* 1268 * On Solaris two different calls to setlocale can result in 1269 * different values. Only get this value once. 1270 * 1271 * We must check this first because an application can set this. 1272 * 1273 * LC_ALL can't be used because it's platform dependent. The LANG 1274 * environment variable seems to affect LC_CTYPE variable by default. 1275 * Here is what setlocale(LC_ALL, NULL) can return. 1276 * HPUX can return 'C C C C C C C' 1277 * Solaris can return /en_US/C/C/C/C/C on the second try. 1278 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1279 * 1280 * The default codepage detection also needs to use LC_CTYPE. 1281 * 1282 * Do not call setlocale(LC_*, "")! Using an empty string instead 1283 * of NULL, will modify the libc behavior. 1284 */ 1285 posixID = setlocale(category, NULL); 1286 if ((posixID == 0) 1287 || (uprv_strcmp("C", posixID) == 0) 1288 || (uprv_strcmp("POSIX", posixID) == 0)) 1289 { 1290 /* Maybe we got some garbage. Try something more reasonable */ 1291 posixID = getenv("LC_ALL"); 1292 if (posixID == 0) { 1293 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1294 if (posixID == 0) { 1295 posixID = getenv("LANG"); 1296 } 1297 } 1298 } 1299 } 1300 if ((posixID==0) 1301 || (uprv_strcmp("C", posixID) == 0) 1302 || (uprv_strcmp("POSIX", posixID) == 0)) 1303 { 1304 /* Nothing worked. Give it a nice POSIX default value. */ 1305 posixID = "en_US_POSIX"; 1306 } 1307 return posixID; 1308 } 1309 1310 /* Return just the POSIX id for the default locale, whatever happens to be in 1311 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1312 */ 1313 static const char *uprv_getPOSIXIDForDefaultLocale(void) 1314 { 1315 static const char* posixID = NULL; 1316 if (posixID == 0) { 1317 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1318 } 1319 return posixID; 1320 } 1321 1322 /* Return just the POSIX id for the default codepage, whatever happens to be in 1323 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1324 */ 1325 static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1326 { 1327 static const char* posixID = NULL; 1328 if (posixID == 0) { 1329 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1330 } 1331 return posixID; 1332 } 1333 #endif 1334 1335 /* NOTE: The caller should handle thread safety */ 1336 U_CAPI const char* U_EXPORT2 1337 uprv_getDefaultLocaleID() 1338 { 1339 #if U_POSIX_LOCALE 1340 /* 1341 Note that: (a '!' means the ID is improper somehow) 1342 LC_ALL ----> default_loc codepage 1343 -------------------------------------------------------- 1344 ab.CD ab CD 1345 ab@CD ab__CD - 1346 ab (at) CD.EF ab__CD EF 1347 1348 ab_CD.EF@GH ab_CD_GH EF 1349 1350 Some 'improper' ways to do the same as above: 1351 ! ab_CD (at) GH.EF ab_CD_GH EF 1352 ! ab_CD.EF (at) GH.IJ ab_CD_GH EF 1353 ! ab_CD (at) ZZ.EF@GH.IJ ab_CD_GH EF 1354 1355 _CD@GH _CD_GH - 1356 _CD.EF@GH _CD_GH EF 1357 1358 The variant cannot have dots in it. 1359 The 'rightmost' variant (@xxx) wins. 1360 The leftmost codepage (.xxx) wins. 1361 */ 1362 char *correctedPOSIXLocale = 0; 1363 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1364 const char *p; 1365 const char *q; 1366 int32_t len; 1367 1368 /* Format: (no spaces) 1369 ll [ _CC ] [ . MM ] [ @ VV] 1370 1371 l = lang, C = ctry, M = charmap, V = variant 1372 */ 1373 1374 if (gCorrectedPOSIXLocale != NULL) { 1375 return gCorrectedPOSIXLocale; 1376 } 1377 1378 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1379 /* assume new locale can't be larger than old one? */ 1380 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1381 /* Exit on memory allocation error. */ 1382 if (correctedPOSIXLocale == NULL) { 1383 return NULL; 1384 } 1385 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1386 correctedPOSIXLocale[p-posixID] = 0; 1387 1388 /* do not copy after the @ */ 1389 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1390 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1391 } 1392 } 1393 1394 /* Note that we scan the *uncorrected* ID. */ 1395 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1396 if (correctedPOSIXLocale == NULL) { 1397 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1398 /* Exit on memory allocation error. */ 1399 if (correctedPOSIXLocale == NULL) { 1400 return NULL; 1401 } 1402 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1403 correctedPOSIXLocale[p-posixID] = 0; 1404 } 1405 p++; 1406 1407 /* Take care of any special cases here.. */ 1408 if (!uprv_strcmp(p, "nynorsk")) { 1409 p = "NY"; 1410 /* Don't worry about no__NY. In practice, it won't appear. */ 1411 } 1412 1413 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1414 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1415 } 1416 else { 1417 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1418 } 1419 1420 if ((q = uprv_strchr(p, '.')) != NULL) { 1421 /* How big will the resulting string be? */ 1422 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1423 uprv_strncat(correctedPOSIXLocale, p, q-p); 1424 correctedPOSIXLocale[len] = 0; 1425 } 1426 else { 1427 /* Anything following the @ sign */ 1428 uprv_strcat(correctedPOSIXLocale, p); 1429 } 1430 1431 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1432 * How about 'russian' -> 'ru'? 1433 * Many of the other locales using ISO codes will be handled by the 1434 * canonicalization functions in uloc_getDefault. 1435 */ 1436 } 1437 1438 /* Was a correction made? */ 1439 if (correctedPOSIXLocale != NULL) { 1440 posixID = correctedPOSIXLocale; 1441 } 1442 else { 1443 /* copy it, just in case the original pointer goes away. See j2395 */ 1444 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1445 /* Exit on memory allocation error. */ 1446 if (correctedPOSIXLocale == NULL) { 1447 return NULL; 1448 } 1449 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1450 } 1451 1452 if (gCorrectedPOSIXLocale == NULL) { 1453 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1454 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1455 correctedPOSIXLocale = NULL; 1456 } 1457 1458 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1459 uprv_free(correctedPOSIXLocale); 1460 } 1461 1462 return posixID; 1463 1464 #elif defined(U_WINDOWS) 1465 UErrorCode status = U_ZERO_ERROR; 1466 LCID id = GetThreadLocale(); 1467 const char* locID = uprv_convertToPosix(id, &status); 1468 1469 if (U_FAILURE(status)) { 1470 locID = "en_US"; 1471 } 1472 return locID; 1473 1474 #elif defined(XP_MAC) 1475 int32_t script = MAC_LC_INIT_NUMBER; 1476 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1477 int32_t region = MAC_LC_INIT_NUMBER; 1478 /* = GetScriptManagerVariable(smRegionCode);*/ 1479 int32_t lang = MAC_LC_INIT_NUMBER; 1480 /* = GetScriptManagerVariable(smScriptLang);*/ 1481 int32_t date_region = MAC_LC_INIT_NUMBER; 1482 const char* posixID = 0; 1483 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1484 int32_t i; 1485 Intl1Hndl ih; 1486 1487 ih = (Intl1Hndl) GetIntlResource(1); 1488 if (ih) 1489 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1490 1491 for (i = 0; i < count; i++) { 1492 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1493 || (mac_lc_recs[i].script == script)) 1494 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1495 || (mac_lc_recs[i].region == region)) 1496 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1497 || (mac_lc_recs[i].lang == lang)) 1498 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1499 || (mac_lc_recs[i].date_region == date_region)) 1500 ) 1501 { 1502 posixID = mac_lc_recs[i].posixID; 1503 break; 1504 } 1505 } 1506 1507 return posixID; 1508 1509 #elif defined(OS400) 1510 /* locales are process scoped and are by definition thread safe */ 1511 static char correctedLocale[64]; 1512 const char *localeID = getenv("LC_ALL"); 1513 char *p; 1514 1515 if (localeID == NULL) 1516 localeID = getenv("LANG"); 1517 if (localeID == NULL) 1518 localeID = setlocale(LC_ALL, NULL); 1519 /* Make sure we have something... */ 1520 if (localeID == NULL) 1521 return "en_US_POSIX"; 1522 1523 /* Extract the locale name from the path. */ 1524 if((p = uprv_strrchr(localeID, '/')) != NULL) 1525 { 1526 /* Increment p to start of locale name. */ 1527 p++; 1528 localeID = p; 1529 } 1530 1531 /* Copy to work location. */ 1532 uprv_strcpy(correctedLocale, localeID); 1533 1534 /* Strip off the '.locale' extension. */ 1535 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1536 *p = 0; 1537 } 1538 1539 /* Upper case the locale name. */ 1540 T_CString_toUpperCase(correctedLocale); 1541 1542 /* See if we are using the POSIX locale. Any of the 1543 * following are equivalent and use the same QLGPGCMA 1544 * (POSIX) locale. 1545 * QLGPGCMA2 means UCS2 1546 * QLGPGCMA_4 means UTF-32 1547 * QLGPGCMA_8 means UTF-8 1548 */ 1549 if ((uprv_strcmp("C", correctedLocale) == 0) || 1550 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1551 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1552 { 1553 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1554 } 1555 else 1556 { 1557 int16_t LocaleLen; 1558 1559 /* Lower case the lang portion. */ 1560 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1561 { 1562 *p = uprv_tolower(*p); 1563 } 1564 1565 /* Adjust for Euro. After '_E' add 'URO'. */ 1566 LocaleLen = uprv_strlen(correctedLocale); 1567 if (correctedLocale[LocaleLen - 2] == '_' && 1568 correctedLocale[LocaleLen - 1] == 'E') 1569 { 1570 uprv_strcat(correctedLocale, "URO"); 1571 } 1572 1573 /* If using Lotus-based locale then convert to 1574 * equivalent non Lotus. 1575 */ 1576 else if (correctedLocale[LocaleLen - 2] == '_' && 1577 correctedLocale[LocaleLen - 1] == 'L') 1578 { 1579 correctedLocale[LocaleLen - 2] = 0; 1580 } 1581 1582 /* There are separate simplified and traditional 1583 * locales called zh_HK_S and zh_HK_T. 1584 */ 1585 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1586 { 1587 uprv_strcpy(correctedLocale, "zh_HK"); 1588 } 1589 1590 /* A special zh_CN_GBK locale... 1591 */ 1592 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1593 { 1594 uprv_strcpy(correctedLocale, "zh_CN"); 1595 } 1596 1597 } 1598 1599 return correctedLocale; 1600 #endif 1601 1602 } 1603 1604 #if !U_CHARSET_IS_UTF8 1605 #if U_POSIX_LOCALE 1606 /* 1607 Due to various platform differences, one platform may specify a charset, 1608 when they really mean a different charset. Remap the names so that they are 1609 compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1610 here. Before adding anything to this function, please consider adding unique 1611 names to the ICU alias table in the data directory. 1612 */ 1613 static const char* 1614 remapPlatformDependentCodepage(const char *locale, const char *name) { 1615 if (locale != NULL && *locale == 0) { 1616 /* Make sure that an empty locale is handled the same way. */ 1617 locale = NULL; 1618 } 1619 if (name == NULL) { 1620 return NULL; 1621 } 1622 #if defined(U_AIX) 1623 if (uprv_strcmp(name, "IBM-943") == 0) { 1624 /* Use the ASCII compatible ibm-943 */ 1625 name = "Shift-JIS"; 1626 } 1627 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1628 /* Use the windows-1252 that contains the Euro */ 1629 name = "IBM-5348"; 1630 } 1631 #elif defined(U_SOLARIS) 1632 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1633 /* Solaris underspecifies the "EUC" name. */ 1634 if (uprv_strcmp(locale, "zh_CN") == 0) { 1635 name = "EUC-CN"; 1636 } 1637 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1638 name = "EUC-TW"; 1639 } 1640 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1641 name = "EUC-KR"; 1642 } 1643 } 1644 else if (uprv_strcmp(name, "eucJP") == 0) { 1645 /* 1646 ibm-954 is the best match. 1647 ibm-33722 is the default for eucJP (similar to Windows). 1648 */ 1649 name = "eucjis"; 1650 } 1651 else if (uprv_strcmp(name, "646") == 0) { 1652 /* 1653 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1654 * ISO-8859-1 instead of US-ASCII(646). 1655 */ 1656 name = "ISO-8859-1"; 1657 } 1658 #elif defined(U_DARWIN) 1659 if (locale == NULL && *name == 0) { 1660 /* 1661 No locale was specified, and an empty name was passed in. 1662 This usually indicates that nl_langinfo didn't return valid information. 1663 Mac OS X uses UTF-8 by default (especially the locale data and console). 1664 */ 1665 name = "UTF-8"; 1666 } 1667 else if (uprv_strcmp(name, "CP949") == 0) { 1668 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1669 name = "EUC-KR"; 1670 } 1671 #elif defined(U_BSD) 1672 if (uprv_strcmp(name, "CP949") == 0) { 1673 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1674 name = "EUC-KR"; 1675 } 1676 #elif defined(U_HPUX) 1677 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1678 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1679 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1680 name = "hkbig5"; 1681 } 1682 else if (uprv_strcmp(name, "eucJP") == 0) { 1683 /* 1684 ibm-1350 is the best match, but unavailable. 1685 ibm-954 is mostly a superset of ibm-1350. 1686 ibm-33722 is the default for eucJP (similar to Windows). 1687 */ 1688 name = "eucjis"; 1689 } 1690 #elif defined(U_LINUX) 1691 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1692 /* Linux underspecifies the "EUC" name. */ 1693 if (uprv_strcmp(locale, "korean") == 0) { 1694 name = "EUC-KR"; 1695 } 1696 else if (uprv_strcmp(locale, "japanese") == 0) { 1697 /* See comment below about eucJP */ 1698 name = "eucjis"; 1699 } 1700 } 1701 else if (uprv_strcmp(name, "eucjp") == 0) { 1702 /* 1703 ibm-1350 is the best match, but unavailable. 1704 ibm-954 is mostly a superset of ibm-1350. 1705 ibm-33722 is the default for eucJP (similar to Windows). 1706 */ 1707 name = "eucjis"; 1708 } 1709 /* 1710 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1711 * it by falling back to 'US-ASCII' when NULL is returned from this 1712 * function. So, we don't have to worry about it here. 1713 */ 1714 #endif 1715 /* return NULL when "" is passed in */ 1716 if (*name == 0) { 1717 name = NULL; 1718 } 1719 return name; 1720 } 1721 1722 static const char* 1723 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1724 { 1725 char localeBuf[100]; 1726 const char *name = NULL; 1727 char *variant = NULL; 1728 1729 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1730 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1731 uprv_strncpy(localeBuf, localeName, localeCapacity); 1732 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1733 name = uprv_strncpy(buffer, name+1, buffCapacity); 1734 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1735 if ((variant = (uprv_strchr(name, '@'))) != NULL) { 1736 *variant = 0; 1737 } 1738 name = remapPlatformDependentCodepage(localeBuf, name); 1739 } 1740 return name; 1741 } 1742 #endif 1743 1744 static const char* 1745 int_getDefaultCodepage() 1746 { 1747 #if defined(OS400) 1748 uint32_t ccsid = 37; /* Default to ibm-37 */ 1749 static char codepage[64]; 1750 Qwc_JOBI0400_t jobinfo; 1751 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1752 1753 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1754 "* ", " ", &error); 1755 1756 if (error.Bytes_Available == 0) { 1757 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1758 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1759 } 1760 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1761 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1762 } 1763 /* else use the default */ 1764 } 1765 sprintf(codepage,"ibm-%d", ccsid); 1766 return codepage; 1767 1768 #elif defined(OS390) 1769 static char codepage[64]; 1770 1771 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1772 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1773 codepage[63] = 0; /* NULL terminate */ 1774 1775 return codepage; 1776 1777 #elif defined(XP_MAC) 1778 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1779 1780 #elif defined(U_WINDOWS) 1781 static char codepage[64]; 1782 sprintf(codepage, "windows-%d", GetACP()); 1783 return codepage; 1784 1785 #elif U_POSIX_LOCALE 1786 static char codesetName[100]; 1787 const char *localeName = NULL; 1788 const char *name = NULL; 1789 1790 uprv_memset(codesetName, 0, sizeof(codesetName)); 1791 #if U_HAVE_NL_LANGINFO_CODESET 1792 /* When available, check nl_langinfo first because it usually gives more 1793 useful names. It depends on LC_CTYPE. 1794 nl_langinfo may use the same buffer as setlocale. */ 1795 { 1796 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1797 codeset = remapPlatformDependentCodepage(NULL, codeset); 1798 if (codeset != NULL) { 1799 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1800 codesetName[sizeof(codesetName)-1] = 0; 1801 return codesetName; 1802 } 1803 } 1804 #endif 1805 1806 /* Use setlocale in a nice way, and then check some environment variables. 1807 Maybe the application used setlocale already. 1808 */ 1809 uprv_memset(codesetName, 0, sizeof(codesetName)); 1810 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1811 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1812 if (name) { 1813 /* if we can find the codeset name from setlocale, return that. */ 1814 return name; 1815 } 1816 1817 if (*codesetName == 0) 1818 { 1819 /* Everything failed. Return US ASCII (ISO 646). */ 1820 (void)uprv_strcpy(codesetName, "US-ASCII"); 1821 } 1822 return codesetName; 1823 #else 1824 return "US-ASCII"; 1825 #endif 1826 } 1827 1828 1829 U_CAPI const char* U_EXPORT2 1830 uprv_getDefaultCodepage() 1831 { 1832 static char const *name = NULL; 1833 umtx_lock(NULL); 1834 if (name == NULL) { 1835 name = int_getDefaultCodepage(); 1836 } 1837 umtx_unlock(NULL); 1838 return name; 1839 } 1840 #endif /* !U_CHARSET_IS_UTF8 */ 1841 1842 1843 /* end of platform-specific implementation -------------- */ 1844 1845 /* version handling --------------------------------------------------------- */ 1846 1847 U_CAPI void U_EXPORT2 1848 u_versionFromString(UVersionInfo versionArray, const char *versionString) { 1849 char *end; 1850 uint16_t part=0; 1851 1852 if(versionArray==NULL) { 1853 return; 1854 } 1855 1856 if(versionString!=NULL) { 1857 for(;;) { 1858 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 1859 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 1860 break; 1861 } 1862 versionString=end+1; 1863 } 1864 } 1865 1866 while(part<U_MAX_VERSION_LENGTH) { 1867 versionArray[part++]=0; 1868 } 1869 } 1870 1871 U_CAPI void U_EXPORT2 1872 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 1873 if(versionArray!=NULL && versionString!=NULL) { 1874 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 1875 int32_t len = u_strlen(versionString); 1876 if(len>U_MAX_VERSION_STRING_LENGTH) { 1877 len = U_MAX_VERSION_STRING_LENGTH; 1878 } 1879 u_UCharsToChars(versionString, versionChars, len); 1880 versionChars[U_MAX_VERSION_STRING_LENGTH]=0; 1881 u_versionFromString(versionArray, versionChars); 1882 } 1883 } 1884 1885 U_CAPI int32_t U_EXPORT2 1886 u_compareVersions(UVersionInfo v1, UVersionInfo v2) { 1887 int n; 1888 if(v1==NULL||v2==NULL) return 0; 1889 for(n=0;n<U_MAX_VERSION_LENGTH;n++) { 1890 if(v1[n]<v2[n]) { 1891 return -1; 1892 } else if(v1[n]>v2[n]) { 1893 return 1; 1894 } 1895 } 1896 return 0; /* no difference */ 1897 } 1898 1899 1900 U_CAPI void U_EXPORT2 1901 u_versionToString(UVersionInfo versionArray, char *versionString) { 1902 uint16_t count, part; 1903 uint8_t field; 1904 1905 if(versionString==NULL) { 1906 return; 1907 } 1908 1909 if(versionArray==NULL) { 1910 versionString[0]=0; 1911 return; 1912 } 1913 1914 /* count how many fields need to be written */ 1915 for(count=4; count>0 && versionArray[count-1]==0; --count) { 1916 } 1917 1918 if(count <= 1) { 1919 count = 2; 1920 } 1921 1922 /* write the first part */ 1923 /* write the decimal field value */ 1924 field=versionArray[0]; 1925 if(field>=100) { 1926 *versionString++=(char)('0'+field/100); 1927 field%=100; 1928 } 1929 if(field>=10) { 1930 *versionString++=(char)('0'+field/10); 1931 field%=10; 1932 } 1933 *versionString++=(char)('0'+field); 1934 1935 /* write the following parts */ 1936 for(part=1; part<count; ++part) { 1937 /* write a dot first */ 1938 *versionString++=U_VERSION_DELIMITER; 1939 1940 /* write the decimal field value */ 1941 field=versionArray[part]; 1942 if(field>=100) { 1943 *versionString++=(char)('0'+field/100); 1944 field%=100; 1945 } 1946 if(field>=10) { 1947 *versionString++=(char)('0'+field/10); 1948 field%=10; 1949 } 1950 *versionString++=(char)('0'+field); 1951 } 1952 1953 /* NUL-terminate */ 1954 *versionString=0; 1955 } 1956 1957 U_CAPI void U_EXPORT2 1958 u_getVersion(UVersionInfo versionArray) { 1959 u_versionFromString(versionArray, U_ICU_VERSION); 1960 } 1961 1962 /* 1963 * Hey, Emacs, please set the following: 1964 * 1965 * Local Variables: 1966 * indent-tabs-mode: nil 1967 * End: 1968 * 1969 */ 1970