1 /* 2 ********************************************************************** 3 * Copyright (C) 2009-2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #include "unicode/utypes.h" 9 #include "unicode/ures.h" 10 #include "unicode/putil.h" 11 #include "unicode/uloc.h" 12 #include "ustr_imp.h" 13 #include "cmemory.h" 14 #include "cstring.h" 15 #include "putilimp.h" 16 #include "uinvchar.h" 17 18 /* struct holding a single variant */ 19 typedef struct VariantListEntry { 20 const char *variant; 21 struct VariantListEntry *next; 22 } VariantListEntry; 23 24 /* struct holding a single extension */ 25 typedef struct ExtensionListEntry { 26 const char *key; 27 const char *value; 28 struct ExtensionListEntry *next; 29 } ExtensionListEntry; 30 31 #define MAXEXTLANG 3 32 typedef struct ULanguageTag { 33 char *buf; /* holding parsed subtags */ 34 const char *language; 35 const char *extlang[MAXEXTLANG]; 36 const char *script; 37 const char *region; 38 VariantListEntry *variants; 39 ExtensionListEntry *extensions; 40 const char *privateuse; 41 const char *grandfathered; 42 } ULanguageTag; 43 44 #define MINLEN 2 45 #define SEP '-' 46 #define PRIVATEUSE 'x' 47 #define LDMLEXT 'u' 48 49 #define LOCALE_SEP '_' 50 #define LOCALE_EXT_SEP '@' 51 #define LOCALE_KEYWORD_SEP ';' 52 #define LOCALE_KEY_TYPE_SEP '=' 53 54 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z')) 55 #define ISNUMERIC(c) ((c)>='0' && (c)<='9') 56 57 static const char* EMPTY = ""; 58 static const char* LANG_UND = "und"; 59 static const char* PRIVATEUSE_KEY = "x"; 60 static const char* _POSIX = "_POSIX"; 61 static const char* POSIX_KEY = "va"; 62 static const char* POSIX_VALUE = "posix"; 63 64 #define LANG_UND_LEN 3 65 66 static const char* GRANDFATHERED[] = { 67 /* grandfathered preferred */ 68 "art-lojban", "jbo", 69 "cel-gaulish", "", 70 "en-GB-oed", "", 71 "i-ami", "ami", 72 "i-bnn", "bnn", 73 "i-default", "", 74 "i-enochian", "", 75 "i-hak", "hak", 76 "i-klingon", "tlh", 77 "i-lux", "lb", 78 "i-mingo", "", 79 "i-navajo", "nv", 80 "i-pwn", "pwn", 81 "i-tao", "tao", 82 "i-tay", "tay", 83 "i-tsu", "tsu", 84 "no-bok", "nb", 85 "no-nyn", "nn", 86 "sgn-be-fr", "sfb", 87 "sgn-be-nl", "vgt", 88 "sgn-ch-de", "sgg", 89 "zh-guoyu", "cmn", 90 "zh-hakka", "hak", 91 "zh-min", "", 92 "zh-min-nan", "nan", 93 "zh-xiang", "hsn", 94 NULL, NULL 95 }; 96 97 static const char* DEPRECATEDLANGS[] = { 98 /* deprecated new */ 99 "iw", "he", 100 "ji", "yi", 101 "in", "id", 102 NULL, NULL 103 }; 104 105 /* 106 * ------------------------------------------------- 107 * 108 * These ultag_ functions may be exposed as APIs later 109 * 110 * ------------------------------------------------- 111 */ 112 113 static ULanguageTag* 114 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); 115 116 static void 117 ultag_close(ULanguageTag* langtag); 118 119 static const char* 120 ultag_getLanguage(const ULanguageTag* langtag); 121 122 #if 0 123 static const char* 124 ultag_getJDKLanguage(const ULanguageTag* langtag); 125 #endif 126 127 static const char* 128 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); 129 130 static int32_t 131 ultag_getExtlangSize(const ULanguageTag* langtag); 132 133 static const char* 134 ultag_getScript(const ULanguageTag* langtag); 135 136 static const char* 137 ultag_getRegion(const ULanguageTag* langtag); 138 139 static const char* 140 ultag_getVariant(const ULanguageTag* langtag, int32_t idx); 141 142 static int32_t 143 ultag_getVariantsSize(const ULanguageTag* langtag); 144 145 static const char* 146 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); 147 148 static const char* 149 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); 150 151 static int32_t 152 ultag_getExtensionsSize(const ULanguageTag* langtag); 153 154 static const char* 155 ultag_getPrivateUse(const ULanguageTag* langtag); 156 157 #if 0 158 static const char* 159 ultag_getGrandfathered(const ULanguageTag* langtag); 160 #endif 161 162 /* 163 * ------------------------------------------------- 164 * 165 * Language subtag syntax validation functions 166 * 167 * ------------------------------------------------- 168 */ 169 170 static UBool 171 _isAlphaString(const char* s, int32_t len) { 172 int32_t i; 173 for (i = 0; i < len; i++) { 174 if (!ISALPHA(*(s + i))) { 175 return FALSE; 176 } 177 } 178 return TRUE; 179 } 180 181 static UBool 182 _isNumericString(const char* s, int32_t len) { 183 int32_t i; 184 for (i = 0; i < len; i++) { 185 if (!ISNUMERIC(*(s + i))) { 186 return FALSE; 187 } 188 } 189 return TRUE; 190 } 191 192 static UBool 193 _isAlphaNumericString(const char* s, int32_t len) { 194 int32_t i; 195 for (i = 0; i < len; i++) { 196 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { 197 return FALSE; 198 } 199 } 200 return TRUE; 201 } 202 203 static UBool 204 _isLanguageSubtag(const char* s, int32_t len) { 205 /* 206 * language = 2*3ALPHA ; shortest ISO 639 code 207 * ["-" extlang] ; sometimes followed by 208 * ; extended language subtags 209 * / 4ALPHA ; or reserved for future use 210 * / 5*8ALPHA ; or registered language subtag 211 */ 212 if (len < 0) { 213 len = (int32_t)uprv_strlen(s); 214 } 215 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { 216 return TRUE; 217 } 218 return FALSE; 219 } 220 221 static UBool 222 _isExtlangSubtag(const char* s, int32_t len) { 223 /* 224 * extlang = 3ALPHA ; selected ISO 639 codes 225 * *2("-" 3ALPHA) ; permanently reserved 226 */ 227 if (len < 0) { 228 len = (int32_t)uprv_strlen(s); 229 } 230 if (len == 3 && _isAlphaString(s, len)) { 231 return TRUE; 232 } 233 return FALSE; 234 } 235 236 static UBool 237 _isScriptSubtag(const char* s, int32_t len) { 238 /* 239 * script = 4ALPHA ; ISO 15924 code 240 */ 241 if (len < 0) { 242 len = (int32_t)uprv_strlen(s); 243 } 244 if (len == 4 && _isAlphaString(s, len)) { 245 return TRUE; 246 } 247 return FALSE; 248 } 249 250 static UBool 251 _isRegionSubtag(const char* s, int32_t len) { 252 /* 253 * region = 2ALPHA ; ISO 3166-1 code 254 * / 3DIGIT ; UN M.49 code 255 */ 256 if (len < 0) { 257 len = (int32_t)uprv_strlen(s); 258 } 259 if (len == 2 && _isAlphaString(s, len)) { 260 return TRUE; 261 } 262 if (len == 3 && _isNumericString(s, len)) { 263 return TRUE; 264 } 265 return FALSE; 266 } 267 268 static UBool 269 _isVariantSubtag(const char* s, int32_t len) { 270 /* 271 * variant = 5*8alphanum ; registered variants 272 * / (DIGIT 3alphanum) 273 */ 274 if (len < 0) { 275 len = (int32_t)uprv_strlen(s); 276 } 277 if (len >= 5 && len <= 8 && _isAlphaString(s, len)) { 278 return TRUE; 279 } 280 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { 281 return TRUE; 282 } 283 return FALSE; 284 } 285 286 static UBool 287 _isExtensionSingleton(const char* s, int32_t len) { 288 /* 289 * extension = singleton 1*("-" (2*8alphanum)) 290 */ 291 if (len < 0) { 292 len = (int32_t)uprv_strlen(s); 293 } 294 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { 295 return TRUE; 296 } 297 return FALSE; 298 } 299 300 static UBool 301 _isExtensionSubtag(const char* s, int32_t len) { 302 /* 303 * extension = singleton 1*("-" (2*8alphanum)) 304 */ 305 if (len < 0) { 306 len = (int32_t)uprv_strlen(s); 307 } 308 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { 309 return TRUE; 310 } 311 return FALSE; 312 } 313 314 static UBool 315 _isExtensionSubtags(const char* s, int32_t len) { 316 const char *p = s; 317 const char *pSubtag = NULL; 318 319 if (len < 0) { 320 len = (int32_t)uprv_strlen(s); 321 } 322 323 while ((p - s) < len) { 324 if (*p == SEP) { 325 if (pSubtag == NULL) { 326 return FALSE; 327 } 328 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { 329 return FALSE; 330 } 331 pSubtag = NULL; 332 } else if (pSubtag == NULL) { 333 pSubtag = p; 334 } 335 p++; 336 } 337 if (pSubtag == NULL) { 338 return FALSE; 339 } 340 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); 341 } 342 343 static UBool 344 _isPrivateuseValueSubtag(const char* s, int32_t len) { 345 /* 346 * privateuse = "x" 1*("-" (1*8alphanum)) 347 */ 348 if (len < 0) { 349 len = (int32_t)uprv_strlen(s); 350 } 351 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 352 return TRUE; 353 } 354 return FALSE; 355 } 356 357 static UBool 358 _isPrivateuseValueSubtags(const char* s, int32_t len) { 359 const char *p = s; 360 const char *pSubtag = NULL; 361 362 if (len < 0) { 363 len = (int32_t)uprv_strlen(s); 364 } 365 366 while ((p - s) < len) { 367 if (*p == SEP) { 368 if (pSubtag == NULL) { 369 return FALSE; 370 } 371 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { 372 return FALSE; 373 } 374 pSubtag = NULL; 375 } else if (pSubtag == NULL) { 376 pSubtag = p; 377 } 378 p++; 379 } 380 if (pSubtag == NULL) { 381 return FALSE; 382 } 383 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); 384 } 385 386 static UBool 387 _isLDMLKey(const char* s, int32_t len) { 388 if (len < 0) { 389 len = (int32_t)uprv_strlen(s); 390 } 391 if (len == 2 && _isAlphaNumericString(s, len)) { 392 return TRUE; 393 } 394 return FALSE; 395 } 396 397 static UBool 398 _isLDMLType(const char* s, int32_t len) { 399 if (len < 0) { 400 len = (int32_t)uprv_strlen(s); 401 } 402 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) { 403 return TRUE; 404 } 405 return FALSE; 406 } 407 408 /* 409 * ------------------------------------------------- 410 * 411 * Helper functions 412 * 413 * ------------------------------------------------- 414 */ 415 416 static UBool 417 _addVariantToList(VariantListEntry **first, VariantListEntry *var) { 418 UBool bAdded = TRUE; 419 420 if (*first == NULL) { 421 var->next = NULL; 422 *first = var; 423 } else { 424 VariantListEntry *prev, *cur; 425 int32_t cmp; 426 427 /* reorder variants in alphabetical order */ 428 prev = NULL; 429 cur = *first; 430 while (TRUE) { 431 if (cur == NULL) { 432 prev->next = var; 433 var->next = NULL; 434 break; 435 } 436 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); 437 if (cmp < 0) { 438 if (prev == NULL) { 439 *first = var; 440 } else { 441 prev->next = var; 442 } 443 var->next = cur; 444 break; 445 } 446 if (cmp == 0) { 447 /* duplicated variant */ 448 bAdded = FALSE; 449 break; 450 } 451 prev = cur; 452 cur = cur->next; 453 } 454 } 455 456 return bAdded; 457 } 458 459 460 static UBool 461 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { 462 UBool bAdded = TRUE; 463 464 if (*first == NULL) { 465 ext->next = NULL; 466 *first = ext; 467 } else { 468 ExtensionListEntry *prev, *cur; 469 int32_t cmp; 470 471 /* reorder variants in alphabetical order */ 472 prev = NULL; 473 cur = *first; 474 while (TRUE) { 475 if (cur == NULL) { 476 prev->next = ext; 477 ext->next = NULL; 478 break; 479 } 480 if (localeToBCP) { 481 /* special handling for locale to bcp conversion */ 482 int32_t len, curlen; 483 484 len = (int32_t)uprv_strlen(ext->key); 485 curlen = (int32_t)uprv_strlen(cur->key); 486 487 if (len == 1 && curlen == 1) { 488 if (*(ext->key) == *(cur->key)) { 489 cmp = 0; 490 } else if (*(ext->key) == PRIVATEUSE) { 491 cmp = 1; 492 } else if (*(cur->key) == PRIVATEUSE) { 493 cmp = -1; 494 } else { 495 cmp = *(ext->key) - *(cur->key); 496 } 497 } else if (len == 1) { 498 cmp = *(ext->key) - LDMLEXT; 499 } else if (curlen == 1) { 500 cmp = LDMLEXT - *(cur->key); 501 } else { 502 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 503 } 504 } else { 505 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 506 } 507 if (cmp < 0) { 508 if (prev == NULL) { 509 *first = ext; 510 } else { 511 prev->next = ext; 512 } 513 ext->next = cur; 514 break; 515 } 516 if (cmp == 0) { 517 /* duplicated extension key */ 518 bAdded = FALSE; 519 break; 520 } 521 prev = cur; 522 cur = cur->next; 523 } 524 } 525 526 return bAdded; 527 } 528 529 static void 530 _initializeULanguageTag(ULanguageTag* langtag) { 531 int32_t i; 532 533 langtag->buf = NULL; 534 535 langtag->language = EMPTY; 536 for (i = 0; i < MAXEXTLANG; i++) { 537 langtag->extlang[i] = NULL; 538 } 539 540 langtag->script = EMPTY; 541 langtag->region = EMPTY; 542 543 langtag->variants = NULL; 544 langtag->extensions = NULL; 545 546 langtag->grandfathered = EMPTY; 547 langtag->privateuse = EMPTY; 548 } 549 550 #define KEYTYPEDATA "keyTypeData" 551 #define KEYMAP "keyMap" 552 #define TYPEMAP "typeMap" 553 #define TYPEALIAS "typeAlias" 554 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */ 555 #define MAX_LDML_KEY_LEN 22 556 #define MAX_LDML_TYPE_LEN 32 557 558 static int32_t 559 _ldmlKeyToBCP47(const char* key, int32_t keyLen, 560 char* bcpKey, int32_t bcpKeyCapacity, 561 UErrorCode *status) { 562 UResourceBundle *rb; 563 char keyBuf[MAX_LDML_KEY_LEN]; 564 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 565 int32_t resultLen = 0; 566 int32_t i; 567 UErrorCode tmpStatus = U_ZERO_ERROR; 568 const UChar *uBcpKey; 569 int32_t bcpKeyLen; 570 571 if (keyLen < 0) { 572 keyLen = (int32_t)uprv_strlen(key); 573 } 574 575 if (keyLen >= sizeof(keyBuf)) { 576 /* no known valid LDML key exceeding 21 */ 577 *status = U_ILLEGAL_ARGUMENT_ERROR; 578 return 0; 579 } 580 581 uprv_memcpy(keyBuf, key, keyLen); 582 keyBuf[keyLen] = 0; 583 584 /* to lower case */ 585 for (i = 0; i < keyLen; i++) { 586 keyBuf[i] = uprv_tolower(keyBuf[i]); 587 } 588 589 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 590 ures_getByKey(rb, KEYMAP, rb, status); 591 592 if (U_FAILURE(*status)) { 593 ures_close(rb); 594 return 0; 595 } 596 597 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus); 598 if (U_SUCCESS(tmpStatus)) { 599 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen); 600 bcpKeyBuf[bcpKeyLen] = 0; 601 resultLen = bcpKeyLen; 602 } else { 603 if (_isLDMLKey(key, keyLen)) { 604 uprv_memcpy(bcpKeyBuf, key, keyLen); 605 bcpKeyBuf[keyLen] = 0; 606 resultLen = keyLen; 607 } else { 608 /* mapping not availabe */ 609 *status = U_ILLEGAL_ARGUMENT_ERROR; 610 } 611 } 612 ures_close(rb); 613 614 if (U_FAILURE(*status)) { 615 return 0; 616 } 617 618 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity)); 619 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status); 620 } 621 622 static int32_t 623 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen, 624 char* key, int32_t keyCapacity, 625 UErrorCode *status) { 626 UResourceBundle *rb; 627 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 628 int32_t resultLen = 0; 629 int32_t i; 630 const char *resKey = NULL; 631 UResourceBundle *mapData; 632 633 if (bcpKeyLen < 0) { 634 bcpKeyLen = (int32_t)uprv_strlen(bcpKey); 635 } 636 637 if (bcpKeyLen >= sizeof(bcpKeyBuf)) { 638 *status = U_ILLEGAL_ARGUMENT_ERROR; 639 return 0; 640 } 641 642 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen); 643 bcpKeyBuf[bcpKeyLen] = 0; 644 645 /* to lower case */ 646 for (i = 0; i < bcpKeyLen; i++) { 647 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]); 648 } 649 650 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 651 ures_getByKey(rb, KEYMAP, rb, status); 652 if (U_FAILURE(*status)) { 653 ures_close(rb); 654 return 0; 655 } 656 657 mapData = ures_getNextResource(rb, NULL, status); 658 while (U_SUCCESS(*status)) { 659 const UChar *uBcpKey; 660 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 661 int32_t tmpBcpKeyLen; 662 663 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status); 664 if (U_FAILURE(*status)) { 665 break; 666 } 667 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen); 668 tmpBcpKeyBuf[tmpBcpKeyLen] = 0; 669 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) { 670 /* found a matching BCP47 key */ 671 resKey = ures_getKey(mapData); 672 resultLen = (int32_t)uprv_strlen(resKey); 673 break; 674 } 675 if (!ures_hasNext(rb)) { 676 break; 677 } 678 ures_getNextResource(rb, mapData, status); 679 } 680 ures_close(mapData); 681 ures_close(rb); 682 683 if (U_FAILURE(*status)) { 684 return 0; 685 } 686 687 if (resKey == NULL) { 688 resKey = bcpKeyBuf; 689 resultLen = bcpKeyLen; 690 } 691 692 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity)); 693 return u_terminateChars(key, keyCapacity, resultLen, status); 694 } 695 696 static int32_t 697 _ldmlTypeToBCP47(const char* key, int32_t keyLen, 698 const char* type, int32_t typeLen, 699 char* bcpType, int32_t bcpTypeCapacity, 700 UErrorCode *status) { 701 UResourceBundle *rb, *keyTypeData, *typeMapForKey; 702 char keyBuf[MAX_LDML_KEY_LEN]; 703 char typeBuf[MAX_LDML_TYPE_LEN]; 704 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 705 int32_t resultLen = 0; 706 int32_t i; 707 UErrorCode tmpStatus = U_ZERO_ERROR; 708 const UChar *uBcpType, *uCanonicalType; 709 int32_t bcpTypeLen, canonicalTypeLen; 710 UBool isTimezone = FALSE; 711 712 if (keyLen < 0) { 713 keyLen = (int32_t)uprv_strlen(key); 714 } 715 if (keyLen >= sizeof(keyBuf)) { 716 /* no known valid LDML key exceeding 21 */ 717 *status = U_ILLEGAL_ARGUMENT_ERROR; 718 return 0; 719 } 720 uprv_memcpy(keyBuf, key, keyLen); 721 keyBuf[keyLen] = 0; 722 723 /* to lower case */ 724 for (i = 0; i < keyLen; i++) { 725 keyBuf[i] = uprv_tolower(keyBuf[i]); 726 } 727 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 728 isTimezone = TRUE; 729 } 730 731 if (typeLen < 0) { 732 typeLen = (int32_t)uprv_strlen(type); 733 } 734 if (typeLen >= sizeof(typeBuf)) { 735 *status = U_ILLEGAL_ARGUMENT_ERROR; 736 return 0; 737 } 738 739 if (isTimezone) { 740 /* replace '/' with ':' */ 741 for (i = 0; i < typeLen; i++) { 742 if (*(type + i) == '/') { 743 typeBuf[i] = ':'; 744 } else { 745 typeBuf[i] = *(type + i); 746 } 747 } 748 typeBuf[typeLen] = 0; 749 type = &typeBuf[0]; 750 } 751 752 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status); 753 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status); 754 if (U_FAILURE(*status)) { 755 ures_close(rb); 756 ures_close(keyTypeData); 757 return 0; 758 } 759 760 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus); 761 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus); 762 if (U_SUCCESS(tmpStatus)) { 763 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 764 resultLen = bcpTypeLen; 765 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 766 /* is this type alias? */ 767 tmpStatus = U_ZERO_ERROR; 768 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus); 769 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 770 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus); 771 if (U_SUCCESS(tmpStatus)) { 772 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen); 773 if (isTimezone) { 774 /* replace '/' with ':' */ 775 for (i = 0; i < canonicalTypeLen; i++) { 776 if (typeBuf[i] == '/') { 777 typeBuf[i] = ':'; 778 } 779 } 780 } 781 typeBuf[canonicalTypeLen] = 0; 782 783 /* look up the canonical type */ 784 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus); 785 if (U_SUCCESS(tmpStatus)) { 786 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 787 resultLen = bcpTypeLen; 788 } 789 } 790 if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 791 if (_isLDMLType(type, typeLen)) { 792 uprv_memcpy(bcpTypeBuf, type, typeLen); 793 resultLen = typeLen; 794 } else { 795 /* mapping not availabe */ 796 *status = U_ILLEGAL_ARGUMENT_ERROR; 797 } 798 } 799 } else { 800 *status = tmpStatus; 801 } 802 ures_close(rb); 803 ures_close(typeMapForKey); 804 ures_close(keyTypeData); 805 806 if (U_FAILURE(*status)) { 807 return 0; 808 } 809 810 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity)); 811 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status); 812 } 813 814 static int32_t 815 _bcp47ToLDMLType(const char* key, int32_t keyLen, 816 const char* bcpType, int32_t bcpTypeLen, 817 char* type, int32_t typeCapacity, 818 UErrorCode *status) { 819 UResourceBundle *rb; 820 char keyBuf[MAX_LDML_KEY_LEN]; 821 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 822 int32_t resultLen = 0; 823 int32_t i; 824 const char *resType = NULL; 825 UResourceBundle *mapData; 826 UErrorCode tmpStatus = U_ZERO_ERROR; 827 int32_t copyLen; 828 829 if (keyLen < 0) { 830 keyLen = (int32_t)uprv_strlen(key); 831 } 832 833 if (keyLen >= sizeof(keyBuf)) { 834 /* no known valid LDML key exceeding 21 */ 835 *status = U_ILLEGAL_ARGUMENT_ERROR; 836 return 0; 837 } 838 uprv_memcpy(keyBuf, key, keyLen); 839 keyBuf[keyLen] = 0; 840 841 /* to lower case */ 842 for (i = 0; i < keyLen; i++) { 843 keyBuf[i] = uprv_tolower(keyBuf[i]); 844 } 845 846 847 if (bcpTypeLen < 0) { 848 bcpTypeLen = (int32_t)uprv_strlen(bcpType); 849 } 850 851 if (bcpTypeLen >= sizeof(bcpTypeBuf)) { 852 *status = U_ILLEGAL_ARGUMENT_ERROR; 853 return 0; 854 } 855 856 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen); 857 bcpTypeBuf[bcpTypeLen] = 0; 858 859 /* to lower case */ 860 for (i = 0; i < bcpTypeLen; i++) { 861 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]); 862 } 863 864 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 865 ures_getByKey(rb, TYPEMAP, rb, status); 866 if (U_FAILURE(*status)) { 867 ures_close(rb); 868 return 0; 869 } 870 871 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 872 mapData = ures_getNextResource(rb, NULL, &tmpStatus); 873 while (U_SUCCESS(tmpStatus)) { 874 const UChar *uBcpType; 875 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 876 int32_t tmpBcpTypeLen; 877 878 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus); 879 if (U_FAILURE(tmpStatus)) { 880 break; 881 } 882 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen); 883 tmpBcpTypeBuf[tmpBcpTypeLen] = 0; 884 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) { 885 /* found a matching BCP47 type */ 886 resType = ures_getKey(mapData); 887 resultLen = (int32_t)uprv_strlen(resType); 888 break; 889 } 890 if (!ures_hasNext(rb)) { 891 break; 892 } 893 ures_getNextResource(rb, mapData, &tmpStatus); 894 } 895 ures_close(mapData); 896 ures_close(rb); 897 898 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) { 899 *status = tmpStatus; 900 return 0; 901 } 902 903 if (resType == NULL) { 904 resType = bcpTypeBuf; 905 resultLen = bcpTypeLen; 906 } 907 908 copyLen = uprv_min(resultLen, typeCapacity); 909 uprv_memcpy(type, resType, copyLen); 910 911 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 912 for (i = 0; i < copyLen; i++) { 913 if (*(type + i) == ':') { 914 *(type + i) = '/'; 915 } 916 } 917 } 918 919 return u_terminateChars(type, typeCapacity, resultLen, status); 920 } 921 922 static int32_t 923 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 924 char buf[ULOC_LANG_CAPACITY]; 925 UErrorCode tmpStatus = U_ZERO_ERROR; 926 int32_t len, i; 927 int32_t reslen = 0; 928 929 if (U_FAILURE(*status)) { 930 return 0; 931 } 932 933 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); 934 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 935 if (strict) { 936 *status = U_ILLEGAL_ARGUMENT_ERROR; 937 return 0; 938 } 939 len = 0; 940 } 941 942 /* Note: returned language code is in lower case letters */ 943 944 if (len == 0) { 945 if (reslen < capacity) { 946 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 947 } 948 reslen += LANG_UND_LEN; 949 } else if (!_isLanguageSubtag(buf, len)) { 950 /* invalid language code */ 951 if (strict) { 952 *status = U_ILLEGAL_ARGUMENT_ERROR; 953 return 0; 954 } 955 if (reslen < capacity) { 956 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 957 } 958 reslen += LANG_UND_LEN; 959 } else { 960 /* resolve deprecated */ 961 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 962 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { 963 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); 964 len = (int32_t)uprv_strlen(buf); 965 break; 966 } 967 } 968 if (reslen < capacity) { 969 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 970 } 971 reslen += len; 972 } 973 u_terminateChars(appendAt, capacity, reslen, status); 974 return reslen; 975 } 976 977 static int32_t 978 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 979 char buf[ULOC_SCRIPT_CAPACITY]; 980 UErrorCode tmpStatus = U_ZERO_ERROR; 981 int32_t len; 982 int32_t reslen = 0; 983 984 if (U_FAILURE(*status)) { 985 return 0; 986 } 987 988 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); 989 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 990 if (strict) { 991 *status = U_ILLEGAL_ARGUMENT_ERROR; 992 } 993 return 0; 994 } 995 996 if (len > 0) { 997 if (!_isScriptSubtag(buf, len)) { 998 /* invalid script code */ 999 if (strict) { 1000 *status = U_ILLEGAL_ARGUMENT_ERROR; 1001 } 1002 return 0; 1003 } else { 1004 if (reslen < capacity) { 1005 *(appendAt + reslen) = SEP; 1006 } 1007 reslen++; 1008 1009 if (reslen < capacity) { 1010 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1011 } 1012 reslen += len; 1013 } 1014 } 1015 u_terminateChars(appendAt, capacity, reslen, status); 1016 return reslen; 1017 } 1018 1019 static int32_t 1020 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1021 char buf[ULOC_COUNTRY_CAPACITY]; 1022 UErrorCode tmpStatus = U_ZERO_ERROR; 1023 int32_t len; 1024 int32_t reslen = 0; 1025 1026 if (U_FAILURE(*status)) { 1027 return 0; 1028 } 1029 1030 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); 1031 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1032 if (strict) { 1033 *status = U_ILLEGAL_ARGUMENT_ERROR; 1034 } 1035 return 0; 1036 } 1037 1038 if (len > 0) { 1039 if (!_isRegionSubtag(buf, len)) { 1040 /* invalid region code */ 1041 if (strict) { 1042 *status = U_ILLEGAL_ARGUMENT_ERROR; 1043 } 1044 return 0; 1045 } else { 1046 if (reslen < capacity) { 1047 *(appendAt + reslen) = SEP; 1048 } 1049 reslen++; 1050 1051 if (reslen < capacity) { 1052 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1053 } 1054 reslen += len; 1055 } 1056 } 1057 u_terminateChars(appendAt, capacity, reslen, status); 1058 return reslen; 1059 } 1060 1061 static int32_t 1062 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { 1063 char buf[ULOC_FULLNAME_CAPACITY]; 1064 UErrorCode tmpStatus = U_ZERO_ERROR; 1065 int32_t len, i; 1066 int32_t reslen = 0; 1067 1068 if (U_FAILURE(*status)) { 1069 return 0; 1070 } 1071 1072 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1073 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1074 if (strict) { 1075 *status = U_ILLEGAL_ARGUMENT_ERROR; 1076 } 1077 return 0; 1078 } 1079 1080 if (len > 0) { 1081 char *p, *pVar; 1082 UBool bNext = TRUE; 1083 VariantListEntry *var; 1084 VariantListEntry *varFirst = NULL; 1085 1086 pVar = NULL; 1087 p = buf; 1088 while (bNext) { 1089 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1090 if (*p == 0) { 1091 bNext = FALSE; 1092 } else { 1093 *p = 0; /* terminate */ 1094 } 1095 if (pVar == NULL) { 1096 if (strict) { 1097 *status = U_ILLEGAL_ARGUMENT_ERROR; 1098 break; 1099 } 1100 /* ignore empty variant */ 1101 } else { 1102 /* ICU uses upper case letters for variants, but 1103 the canonical format is lowercase in BCP47 */ 1104 for (i = 0; *(pVar + i) != 0; i++) { 1105 *(pVar + i) = uprv_tolower(*(pVar + i)); 1106 } 1107 1108 /* validate */ 1109 if (_isVariantSubtag(pVar, -1)) { 1110 if (uprv_strcmp(pVar,POSIX_VALUE)) { 1111 /* emit the variant to the list */ 1112 var = uprv_malloc(sizeof(VariantListEntry)); 1113 if (var == NULL) { 1114 *status = U_MEMORY_ALLOCATION_ERROR; 1115 break; 1116 } 1117 var->variant = pVar; 1118 if (!_addVariantToList(&varFirst, var)) { 1119 /* duplicated variant */ 1120 uprv_free(var); 1121 if (strict) { 1122 *status = U_ILLEGAL_ARGUMENT_ERROR; 1123 break; 1124 } 1125 } 1126 } else { 1127 /* Special handling for POSIX variant, need to remember that we had it and then */ 1128 /* treat it like an extension later. */ 1129 *hadPosix = TRUE; 1130 } 1131 } else if (strict) { 1132 *status = U_ILLEGAL_ARGUMENT_ERROR; 1133 break; 1134 } 1135 } 1136 /* reset variant starting position */ 1137 pVar = NULL; 1138 } else if (pVar == NULL) { 1139 pVar = p; 1140 } 1141 p++; 1142 } 1143 1144 if (U_SUCCESS(*status)) { 1145 if (varFirst != NULL) { 1146 int32_t varLen; 1147 1148 /* write out sorted/validated/normalized variants to the target */ 1149 var = varFirst; 1150 while (var != NULL) { 1151 if (reslen < capacity) { 1152 *(appendAt + reslen) = SEP; 1153 } 1154 reslen++; 1155 varLen = (int32_t)uprv_strlen(var->variant); 1156 if (reslen < capacity) { 1157 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); 1158 } 1159 reslen += varLen; 1160 var = var->next; 1161 } 1162 } 1163 } 1164 1165 /* clean up */ 1166 var = varFirst; 1167 while (var != NULL) { 1168 VariantListEntry *tmpVar = var->next; 1169 uprv_free(var); 1170 var = tmpVar; 1171 } 1172 1173 if (U_FAILURE(*status)) { 1174 return 0; 1175 } 1176 } 1177 1178 u_terminateChars(appendAt, capacity, reslen, status); 1179 return reslen; 1180 } 1181 1182 static int32_t 1183 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1184 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1185 UEnumeration *keywordEnum = NULL; 1186 int32_t reslen = 0; 1187 1188 keywordEnum = uloc_openKeywords(localeID, status); 1189 if (U_FAILURE(*status) && !hadPosix) { 1190 uenum_close(keywordEnum); 1191 return 0; 1192 } 1193 if (keywordEnum != NULL || hadPosix) { 1194 /* reorder extensions */ 1195 int32_t len; 1196 const char *key; 1197 ExtensionListEntry *firstExt = NULL; 1198 ExtensionListEntry *ext; 1199 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1200 char *pExtBuf = extBuf; 1201 int32_t extBufCapacity = sizeof(extBuf); 1202 const char *bcpKey, *bcpValue; 1203 UErrorCode tmpStatus = U_ZERO_ERROR; 1204 int32_t keylen; 1205 UBool isLDMLKeyword; 1206 1207 while (TRUE) { 1208 key = uenum_next(keywordEnum, NULL, status); 1209 if (key == NULL) { 1210 break; 1211 } 1212 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); 1213 if (U_FAILURE(tmpStatus)) { 1214 if (strict) { 1215 *status = U_ILLEGAL_ARGUMENT_ERROR; 1216 break; 1217 } 1218 /* ignore this keyword */ 1219 tmpStatus = U_ZERO_ERROR; 1220 continue; 1221 } 1222 1223 keylen = (int32_t)uprv_strlen(key); 1224 isLDMLKeyword = (keylen > 1); 1225 1226 if (isLDMLKeyword) { 1227 int32_t modKeyLen; 1228 1229 /* transform key and value to bcp47 style */ 1230 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus); 1231 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1232 if (strict) { 1233 *status = U_ILLEGAL_ARGUMENT_ERROR; 1234 break; 1235 } 1236 tmpStatus = U_ZERO_ERROR; 1237 continue; 1238 } 1239 1240 bcpKey = pExtBuf; 1241 pExtBuf += (modKeyLen + 1); 1242 extBufCapacity -= (modKeyLen + 1); 1243 1244 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus); 1245 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1246 if (strict) { 1247 *status = U_ILLEGAL_ARGUMENT_ERROR; 1248 break; 1249 } 1250 tmpStatus = U_ZERO_ERROR; 1251 continue; 1252 } 1253 bcpValue = pExtBuf; 1254 pExtBuf += (len + 1); 1255 extBufCapacity -= (len + 1); 1256 } else { 1257 if (*key == PRIVATEUSE) { 1258 if (!_isPrivateuseValueSubtags(buf, len)) { 1259 if (strict) { 1260 *status = U_ILLEGAL_ARGUMENT_ERROR; 1261 break; 1262 } 1263 continue; 1264 } 1265 } else { 1266 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { 1267 if (strict) { 1268 *status = U_ILLEGAL_ARGUMENT_ERROR; 1269 break; 1270 } 1271 continue; 1272 } 1273 } 1274 bcpKey = key; 1275 if ((len + 1) < extBufCapacity) { 1276 uprv_memcpy(pExtBuf, buf, len); 1277 bcpValue = pExtBuf; 1278 1279 pExtBuf += len; 1280 1281 *pExtBuf = 0; 1282 pExtBuf++; 1283 1284 extBufCapacity -= (len + 1); 1285 } else { 1286 *status = U_ILLEGAL_ARGUMENT_ERROR; 1287 break; 1288 } 1289 } 1290 1291 /* create ExtensionListEntry */ 1292 ext = uprv_malloc(sizeof(ExtensionListEntry)); 1293 if (ext == NULL) { 1294 *status = U_MEMORY_ALLOCATION_ERROR; 1295 break; 1296 } 1297 ext->key = bcpKey; 1298 ext->value = bcpValue; 1299 1300 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1301 uprv_free(ext); 1302 if (strict) { 1303 *status = U_ILLEGAL_ARGUMENT_ERROR; 1304 break; 1305 } 1306 } 1307 } 1308 1309 /* Special handling for POSIX variant - add the keywords for POSIX */ 1310 if (hadPosix) { 1311 /* create ExtensionListEntry for POSIX */ 1312 ext = uprv_malloc(sizeof(ExtensionListEntry)); 1313 if (ext == NULL) { 1314 *status = U_MEMORY_ALLOCATION_ERROR; 1315 } 1316 ext->key = POSIX_KEY; 1317 ext->value = POSIX_VALUE; 1318 1319 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1320 uprv_free(ext); 1321 } 1322 } 1323 1324 if (U_SUCCESS(*status) && (firstExt != NULL)) { 1325 UBool startLDMLExtension = FALSE; 1326 1327 /* write out the sorted BCP47 extensions and private use */ 1328 ext = firstExt; 1329 while (ext != NULL) { 1330 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) { 1331 /* write LDML singleton extension */ 1332 if (reslen < capacity) { 1333 *(appendAt + reslen) = SEP; 1334 } 1335 reslen++; 1336 if (reslen < capacity) { 1337 *(appendAt + reslen) = LDMLEXT; 1338 } 1339 reslen++; 1340 startLDMLExtension = TRUE; 1341 } 1342 1343 if (reslen < capacity) { 1344 *(appendAt + reslen) = SEP; 1345 } 1346 reslen++; 1347 len = (int32_t)uprv_strlen(ext->key); 1348 if (reslen < capacity) { 1349 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); 1350 } 1351 reslen += len; 1352 if (reslen < capacity) { 1353 *(appendAt + reslen) = SEP; 1354 } 1355 reslen++; 1356 len = (int32_t)uprv_strlen(ext->value); 1357 if (reslen < capacity) { 1358 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); 1359 } 1360 reslen += len; 1361 1362 ext = ext->next; 1363 } 1364 } 1365 /* clean up */ 1366 ext = firstExt; 1367 while (ext != NULL) { 1368 ExtensionListEntry *tmpExt = ext->next; 1369 uprv_free(ext); 1370 ext = tmpExt; 1371 } 1372 1373 uenum_close(keywordEnum); 1374 1375 if (U_FAILURE(*status)) { 1376 return 0; 1377 } 1378 } 1379 1380 return u_terminateChars(appendAt, capacity, reslen, status); 1381 } 1382 1383 /** 1384 * Append keywords parsed from LDML extension value 1385 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} 1386 * Note: char* buf is used for storing keywords 1387 */ 1388 static void 1389 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { 1390 const char *p, *pNext, *pSep; 1391 const char *pBcpKey, *pBcpType; 1392 const char *pKey, *pType; 1393 int32_t bcpKeyLen = 0, bcpTypeLen; 1394 ExtensionListEntry *kwd, *nextKwd; 1395 ExtensionListEntry *kwdFirst = NULL; 1396 int32_t bufIdx = 0; 1397 int32_t len; 1398 1399 pNext = ldmlext; 1400 pBcpKey = pBcpType = NULL; 1401 while (pNext) { 1402 p = pSep = pNext; 1403 1404 /* locate next separator char */ 1405 while (*pSep) { 1406 if (*pSep == SEP) { 1407 break; 1408 } 1409 pSep++; 1410 } 1411 if (*pSep == 0) { 1412 /* last subtag */ 1413 pNext = NULL; 1414 } else { 1415 pNext = pSep + 1; 1416 } 1417 1418 if (pBcpKey == NULL) { 1419 pBcpKey = p; 1420 bcpKeyLen = (int32_t)(pSep - p); 1421 } else { 1422 pBcpType = p; 1423 bcpTypeLen = (int32_t)(pSep - p); 1424 1425 /* BCP key to locale key */ 1426 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1427 if (U_FAILURE(*status)) { 1428 goto cleanup; 1429 } 1430 pKey = buf + bufIdx; 1431 bufIdx += len; 1432 *(buf + bufIdx) = 0; 1433 bufIdx++; 1434 1435 /* BCP type to locale type */ 1436 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1437 if (U_FAILURE(*status)) { 1438 goto cleanup; 1439 } 1440 pType = buf + bufIdx; 1441 bufIdx += len; 1442 *(buf + bufIdx) = 0; 1443 bufIdx++; 1444 1445 /* Special handling for u-va-posix, since we want to treat this as a variant, not */ 1446 /* as a keyword. */ 1447 1448 if ( !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) { 1449 *posixVariant = TRUE; 1450 } else { 1451 /* create an ExtensionListEntry for this keyword */ 1452 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1453 if (kwd == NULL) { 1454 *status = U_MEMORY_ALLOCATION_ERROR; 1455 goto cleanup; 1456 } 1457 1458 kwd->key = pKey; 1459 kwd->value = pType; 1460 1461 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1462 *status = U_ILLEGAL_ARGUMENT_ERROR; 1463 uprv_free(kwd); 1464 goto cleanup; 1465 } 1466 } 1467 1468 /* for next pair */ 1469 pBcpKey = NULL; 1470 pBcpType = NULL; 1471 } 1472 } 1473 1474 if (pBcpKey != NULL) { 1475 *status = U_ILLEGAL_ARGUMENT_ERROR; 1476 goto cleanup; 1477 } 1478 1479 kwd = kwdFirst; 1480 while (kwd != NULL) { 1481 nextKwd = kwd->next; 1482 _addExtensionToList(appendTo, kwd, FALSE); 1483 kwd = nextKwd; 1484 } 1485 1486 return; 1487 1488 cleanup: 1489 kwd = kwdFirst; 1490 while (kwd != NULL) { 1491 nextKwd = kwd->next; 1492 uprv_free(kwd); 1493 kwd = nextKwd; 1494 } 1495 } 1496 1497 1498 static int32_t 1499 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { 1500 int32_t reslen = 0; 1501 int32_t i, n; 1502 int32_t len; 1503 ExtensionListEntry *kwdFirst = NULL; 1504 ExtensionListEntry *kwd; 1505 const char *key, *type; 1506 char kwdBuf[ULOC_KEYWORDS_CAPACITY]; 1507 UBool posixVariant = FALSE; 1508 1509 if (U_FAILURE(*status)) { 1510 return 0; 1511 } 1512 1513 n = ultag_getExtensionsSize(langtag); 1514 1515 /* resolve locale keywords and reordering keys */ 1516 for (i = 0; i < n; i++) { 1517 key = ultag_getExtensionKey(langtag, i); 1518 type = ultag_getExtensionValue(langtag, i); 1519 if (*key == LDMLEXT) { 1520 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), &posixVariant, status); 1521 if (U_FAILURE(*status)) { 1522 break; 1523 } 1524 } else { 1525 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1526 if (kwd == NULL) { 1527 *status = U_MEMORY_ALLOCATION_ERROR; 1528 break; 1529 } 1530 kwd->key = key; 1531 kwd->value = type; 1532 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1533 uprv_free(kwd); 1534 *status = U_ILLEGAL_ARGUMENT_ERROR; 1535 break; 1536 } 1537 } 1538 } 1539 1540 if (U_SUCCESS(*status)) { 1541 type = ultag_getPrivateUse(langtag); 1542 if ((int32_t)uprv_strlen(type) > 0) { 1543 /* add private use as a keyword */ 1544 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1545 if (kwd == NULL) { 1546 *status = U_MEMORY_ALLOCATION_ERROR; 1547 } else { 1548 kwd->key = PRIVATEUSE_KEY; 1549 kwd->value = type; 1550 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1551 uprv_free(kwd); 1552 *status = U_ILLEGAL_ARGUMENT_ERROR; 1553 } 1554 } 1555 } 1556 } 1557 1558 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ 1559 1560 if (U_SUCCESS(*status) && posixVariant) { 1561 len = (int32_t) uprv_strlen(_POSIX); 1562 if (reslen < capacity) { 1563 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); 1564 } 1565 reslen += len; 1566 } 1567 1568 if (U_SUCCESS(*status) && kwdFirst != NULL) { 1569 /* write out the sorted keywords */ 1570 kwd = kwdFirst; 1571 while (kwd != NULL) { 1572 if (reslen < capacity) { 1573 if (kwd == kwdFirst) { 1574 /* '@' */ 1575 *(appendAt + reslen) = LOCALE_EXT_SEP; 1576 } else { 1577 /* ';' */ 1578 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; 1579 } 1580 } 1581 reslen++; 1582 1583 /* key */ 1584 len = (int32_t)uprv_strlen(kwd->key); 1585 if (reslen < capacity) { 1586 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); 1587 } 1588 reslen += len; 1589 1590 /* '=' */ 1591 if (reslen < capacity) { 1592 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1593 } 1594 reslen++; 1595 1596 /* type */ 1597 len = (int32_t)uprv_strlen(kwd->value); 1598 if (reslen < capacity) { 1599 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); 1600 } 1601 reslen += len; 1602 1603 kwd = kwd->next; 1604 } 1605 } 1606 1607 /* clean up */ 1608 kwd = kwdFirst; 1609 while (kwd != NULL) { 1610 ExtensionListEntry *tmpKwd = kwd->next; 1611 uprv_free(kwd); 1612 kwd = tmpKwd; 1613 } 1614 1615 if (U_FAILURE(*status)) { 1616 return 0; 1617 } 1618 1619 return u_terminateChars(appendAt, capacity, reslen, status); 1620 } 1621 1622 /* 1623 * ------------------------------------------------- 1624 * 1625 * ultag_ functions 1626 * 1627 * ------------------------------------------------- 1628 */ 1629 1630 /* Bit flags used by the parser */ 1631 #define LANG 0x0001 1632 #define EXTL 0x0002 1633 #define SCRT 0x0004 1634 #define REGN 0x0008 1635 #define VART 0x0010 1636 #define EXTS 0x0020 1637 #define EXTV 0x0040 1638 #define PRIV 0x0080 1639 1640 static ULanguageTag* 1641 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { 1642 ULanguageTag *t; 1643 char *tagBuf; 1644 int16_t next; 1645 char *pSubtag, *pNext, *pLastGoodPosition; 1646 int32_t subtagLen; 1647 int32_t extlangIdx; 1648 ExtensionListEntry *pExtension; 1649 char *pExtValueSubtag, *pExtValueSubtagEnd; 1650 int32_t i; 1651 UBool isLDMLExtension, reqLDMLType; 1652 1653 if (parsedLen != NULL) { 1654 *parsedLen = 0; 1655 } 1656 1657 if (U_FAILURE(*status)) { 1658 return NULL; 1659 } 1660 1661 if (tagLen < 0) { 1662 tagLen = (int32_t)uprv_strlen(tag); 1663 } 1664 1665 /* copy the entire string */ 1666 tagBuf = (char*)uprv_malloc(tagLen + 1); 1667 if (tagBuf == NULL) { 1668 *status = U_MEMORY_ALLOCATION_ERROR; 1669 return NULL; 1670 } 1671 uprv_memcpy(tagBuf, tag, tagLen); 1672 *(tagBuf + tagLen) = 0; 1673 1674 /* create a ULanguageTag */ 1675 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); 1676 _initializeULanguageTag(t); 1677 t->buf = tagBuf; 1678 if (t == NULL) { 1679 uprv_free(tagBuf); 1680 *status = U_MEMORY_ALLOCATION_ERROR; 1681 return NULL; 1682 } 1683 1684 if (tagLen < MINLEN) { 1685 /* the input tag is too short - return empty ULanguageTag */ 1686 return t; 1687 } 1688 1689 /* check if the tag is grandfathered */ 1690 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { 1691 if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) { 1692 /* a grandfathered tag is always longer than its preferred mapping */ 1693 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); 1694 t->language = t->buf; 1695 if (parsedLen != NULL) { 1696 *parsedLen = tagLen; 1697 } 1698 return t; 1699 } 1700 } 1701 1702 /* 1703 * langtag = language 1704 * ["-" script] 1705 * ["-" region] 1706 * *("-" variant) 1707 * *("-" extension) 1708 * ["-" privateuse] 1709 */ 1710 1711 next = LANG | PRIV; 1712 pNext = pLastGoodPosition = tagBuf; 1713 extlangIdx = 0; 1714 pExtension = NULL; 1715 pExtValueSubtag = NULL; 1716 pExtValueSubtagEnd = NULL; 1717 isLDMLExtension = FALSE; 1718 reqLDMLType = FALSE; 1719 1720 while (pNext) { 1721 char *pSep; 1722 1723 pSubtag = pNext; 1724 1725 /* locate next separator char */ 1726 pSep = pSubtag; 1727 while (*pSep) { 1728 if (*pSep == SEP) { 1729 break; 1730 } 1731 pSep++; 1732 } 1733 if (*pSep == 0) { 1734 /* last subtag */ 1735 pNext = NULL; 1736 } else { 1737 pNext = pSep + 1; 1738 } 1739 subtagLen = (int32_t)(pSep - pSubtag); 1740 1741 if (next & LANG) { 1742 if (_isLanguageSubtag(pSubtag, subtagLen)) { 1743 *pSep = 0; /* terminate */ 1744 t->language = T_CString_toLowerCase(pSubtag); 1745 1746 pLastGoodPosition = pSep; 1747 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1748 continue; 1749 } 1750 } 1751 if (next & EXTL) { 1752 if (_isExtlangSubtag(pSubtag, subtagLen)) { 1753 *pSep = 0; 1754 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); 1755 1756 pLastGoodPosition = pSep; 1757 if (extlangIdx < 3) { 1758 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1759 } else { 1760 next = SCRT | REGN | VART | EXTS | PRIV; 1761 } 1762 continue; 1763 } 1764 } 1765 if (next & SCRT) { 1766 if (_isScriptSubtag(pSubtag, subtagLen)) { 1767 char *p = pSubtag; 1768 1769 *pSep = 0; 1770 1771 /* to title case */ 1772 *p = uprv_toupper(*p); 1773 p++; 1774 for (; *p; p++) { 1775 *p = uprv_tolower(*p); 1776 } 1777 1778 t->script = pSubtag; 1779 1780 pLastGoodPosition = pSep; 1781 next = REGN | VART | EXTS | PRIV; 1782 continue; 1783 } 1784 } 1785 if (next & REGN) { 1786 if (_isRegionSubtag(pSubtag, subtagLen)) { 1787 *pSep = 0; 1788 t->region = T_CString_toUpperCase(pSubtag); 1789 1790 pLastGoodPosition = pSep; 1791 next = VART | EXTS | PRIV; 1792 continue; 1793 } 1794 } 1795 if (next & VART) { 1796 if (_isVariantSubtag(pSubtag, subtagLen)) { 1797 VariantListEntry *var; 1798 UBool isAdded; 1799 1800 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 1801 if (var == NULL) { 1802 *status = U_MEMORY_ALLOCATION_ERROR; 1803 goto error; 1804 } 1805 *pSep = 0; 1806 var->variant = T_CString_toUpperCase(pSubtag); 1807 isAdded = _addVariantToList(&(t->variants), var); 1808 if (!isAdded) { 1809 /* duplicated variant entry */ 1810 uprv_free(var); 1811 break; 1812 } 1813 pLastGoodPosition = pSep; 1814 next = VART | EXTS | PRIV; 1815 continue; 1816 } 1817 } 1818 if (next & EXTS) { 1819 if (_isExtensionSingleton(pSubtag, subtagLen)) { 1820 if (pExtension != NULL) { 1821 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1822 /* the previous extension is incomplete */ 1823 uprv_free(pExtension); 1824 pExtension = NULL; 1825 break; 1826 } 1827 1828 /* terminate the previous extension value */ 1829 *pExtValueSubtagEnd = 0; 1830 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1831 1832 /* insert the extension to the list */ 1833 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1834 pLastGoodPosition = pExtValueSubtagEnd; 1835 } else { 1836 /* stop parsing here */ 1837 uprv_free(pExtension); 1838 pExtension = NULL; 1839 break; 1840 } 1841 1842 if (isLDMLExtension && reqLDMLType) { 1843 /* incomplete LDML extension key and type pair */ 1844 pExtension = NULL; 1845 break; 1846 } 1847 } 1848 1849 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT); 1850 1851 /* create a new extension */ 1852 pExtension = uprv_malloc(sizeof(ExtensionListEntry)); 1853 if (pExtension == NULL) { 1854 *status = U_MEMORY_ALLOCATION_ERROR; 1855 goto error; 1856 } 1857 *pSep = 0; 1858 pExtension->key = T_CString_toLowerCase(pSubtag); 1859 pExtension->value = NULL; /* will be set later */ 1860 1861 /* 1862 * reset the start and the end location of extension value 1863 * subtags for this extension 1864 */ 1865 pExtValueSubtag = NULL; 1866 pExtValueSubtagEnd = NULL; 1867 1868 next = EXTV; 1869 continue; 1870 } 1871 } 1872 if (next & EXTV) { 1873 if (_isExtensionSubtag(pSubtag, subtagLen)) { 1874 if (isLDMLExtension) { 1875 if (reqLDMLType) { 1876 /* already saw an LDML key */ 1877 if (!_isLDMLType(pSubtag, subtagLen)) { 1878 /* stop parsing here and let the valid LDML extension key/type 1879 pairs processed by the code out of this while loop */ 1880 break; 1881 } 1882 pExtValueSubtagEnd = pSep; 1883 reqLDMLType = FALSE; 1884 next = EXTS | EXTV | PRIV; 1885 } else { 1886 /* LDML key */ 1887 if (!_isLDMLKey(pSubtag, subtagLen)) { 1888 /* stop parsing here and let the valid LDML extension key/type 1889 pairs processed by the code out of this while loop */ 1890 break; 1891 } 1892 reqLDMLType = TRUE; 1893 next = EXTV; 1894 } 1895 } else { 1896 /* Mark the end of this subtag */ 1897 pExtValueSubtagEnd = pSep; 1898 next = EXTS | EXTV | PRIV; 1899 } 1900 1901 if (pExtValueSubtag == NULL) { 1902 /* if the start postion of this extension's value is not yet, 1903 this one is the first value subtag */ 1904 pExtValueSubtag = pSubtag; 1905 } 1906 continue; 1907 } 1908 } 1909 if (next & PRIV) { 1910 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { 1911 char *pPrivuseVal; 1912 1913 if (pExtension != NULL) { 1914 /* Process the last extension */ 1915 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1916 /* the previous extension is incomplete */ 1917 uprv_free(pExtension); 1918 pExtension = NULL; 1919 break; 1920 } else { 1921 /* terminate the previous extension value */ 1922 *pExtValueSubtagEnd = 0; 1923 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1924 1925 /* insert the extension to the list */ 1926 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1927 pLastGoodPosition = pExtValueSubtagEnd; 1928 pExtension = NULL; 1929 } else { 1930 /* stop parsing here */ 1931 uprv_free(pExtension); 1932 pExtension = NULL; 1933 break; 1934 } 1935 } 1936 } 1937 1938 /* The rest of part will be private use value subtags */ 1939 if (pNext == NULL) { 1940 /* empty private use subtag */ 1941 break; 1942 } 1943 /* back up the private use value start position */ 1944 pPrivuseVal = pNext; 1945 1946 /* validate private use value subtags */ 1947 while (pNext) { 1948 pSubtag = pNext; 1949 pSep = pSubtag; 1950 while (*pSep) { 1951 if (*pSep == SEP) { 1952 break; 1953 } 1954 pSep++; 1955 } 1956 if (*pSep == 0) { 1957 /* last subtag */ 1958 pNext = NULL; 1959 } else { 1960 pNext = pSep + 1; 1961 } 1962 subtagLen = (int32_t)(pSep - pSubtag); 1963 1964 if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { 1965 pLastGoodPosition = pSep; 1966 } else { 1967 break; 1968 } 1969 } 1970 if (pLastGoodPosition - pPrivuseVal > 0) { 1971 *pLastGoodPosition = 0; 1972 t->privateuse = T_CString_toLowerCase(pPrivuseVal); 1973 } 1974 /* No more subtags, exiting the parse loop */ 1975 break; 1976 } 1977 break; 1978 } 1979 /* If we fell through here, it means this subtag is illegal - quit parsing */ 1980 break; 1981 } 1982 1983 if (pExtension != NULL) { 1984 /* Process the last extension */ 1985 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1986 /* the previous extension is incomplete */ 1987 uprv_free(pExtension); 1988 } else { 1989 /* terminate the previous extension value */ 1990 *pExtValueSubtagEnd = 0; 1991 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1992 /* insert the extension to the list */ 1993 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1994 pLastGoodPosition = pExtValueSubtagEnd; 1995 } else { 1996 uprv_free(pExtension); 1997 } 1998 } 1999 } 2000 2001 if (parsedLen != NULL) { 2002 *parsedLen = (int32_t)(pLastGoodPosition - t->buf); 2003 } 2004 2005 return t; 2006 2007 error: 2008 uprv_free(t); 2009 return NULL; 2010 } 2011 2012 static void 2013 ultag_close(ULanguageTag* langtag) { 2014 2015 if (langtag == NULL) { 2016 return; 2017 } 2018 2019 uprv_free(langtag->buf); 2020 2021 if (langtag->variants) { 2022 VariantListEntry *curVar = langtag->variants; 2023 while (curVar) { 2024 VariantListEntry *nextVar = curVar->next; 2025 uprv_free(curVar); 2026 curVar = nextVar; 2027 } 2028 } 2029 2030 if (langtag->extensions) { 2031 ExtensionListEntry *curExt = langtag->extensions; 2032 while (curExt) { 2033 ExtensionListEntry *nextExt = curExt->next; 2034 uprv_free(curExt); 2035 curExt = nextExt; 2036 } 2037 } 2038 2039 uprv_free(langtag); 2040 } 2041 2042 static const char* 2043 ultag_getLanguage(const ULanguageTag* langtag) { 2044 return langtag->language; 2045 } 2046 2047 #if 0 2048 static const char* 2049 ultag_getJDKLanguage(const ULanguageTag* langtag) { 2050 int32_t i; 2051 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 2052 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { 2053 return DEPRECATEDLANGS[i + 1]; 2054 } 2055 } 2056 return langtag->language; 2057 } 2058 #endif 2059 2060 static const char* 2061 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { 2062 if (idx >= 0 && idx < MAXEXTLANG) { 2063 return langtag->extlang[idx]; 2064 } 2065 return NULL; 2066 } 2067 2068 static int32_t 2069 ultag_getExtlangSize(const ULanguageTag* langtag) { 2070 int32_t size = 0; 2071 int32_t i; 2072 for (i = 0; i < MAXEXTLANG; i++) { 2073 if (langtag->extlang[i]) { 2074 size++; 2075 } 2076 } 2077 return size; 2078 } 2079 2080 static const char* 2081 ultag_getScript(const ULanguageTag* langtag) { 2082 return langtag->script; 2083 } 2084 2085 static const char* 2086 ultag_getRegion(const ULanguageTag* langtag) { 2087 return langtag->region; 2088 } 2089 2090 static const char* 2091 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { 2092 const char *var = NULL; 2093 VariantListEntry *cur = langtag->variants; 2094 int32_t i = 0; 2095 while (cur) { 2096 if (i == idx) { 2097 var = cur->variant; 2098 break; 2099 } 2100 cur = cur->next; 2101 i++; 2102 } 2103 return var; 2104 } 2105 2106 static int32_t 2107 ultag_getVariantsSize(const ULanguageTag* langtag) { 2108 int32_t size = 0; 2109 VariantListEntry *cur = langtag->variants; 2110 while (TRUE) { 2111 if (cur == NULL) { 2112 break; 2113 } 2114 size++; 2115 cur = cur->next; 2116 } 2117 return size; 2118 } 2119 2120 static const char* 2121 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { 2122 const char *key = NULL; 2123 ExtensionListEntry *cur = langtag->extensions; 2124 int32_t i = 0; 2125 while (cur) { 2126 if (i == idx) { 2127 key = cur->key; 2128 break; 2129 } 2130 cur = cur->next; 2131 i++; 2132 } 2133 return key; 2134 } 2135 2136 static const char* 2137 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { 2138 const char *val = NULL; 2139 ExtensionListEntry *cur = langtag->extensions; 2140 int32_t i = 0; 2141 while (cur) { 2142 if (i == idx) { 2143 val = cur->value; 2144 break; 2145 } 2146 cur = cur->next; 2147 i++; 2148 } 2149 return val; 2150 } 2151 2152 static int32_t 2153 ultag_getExtensionsSize(const ULanguageTag* langtag) { 2154 int32_t size = 0; 2155 ExtensionListEntry *cur = langtag->extensions; 2156 while (TRUE) { 2157 if (cur == NULL) { 2158 break; 2159 } 2160 size++; 2161 cur = cur->next; 2162 } 2163 return size; 2164 } 2165 2166 static const char* 2167 ultag_getPrivateUse(const ULanguageTag* langtag) { 2168 return langtag->privateuse; 2169 } 2170 2171 #if 0 2172 static const char* 2173 ultag_getGrandfathered(const ULanguageTag* langtag) { 2174 return langtag->grandfathered; 2175 } 2176 #endif 2177 2178 2179 /* 2180 * ------------------------------------------------- 2181 * 2182 * Locale/BCP47 conversion APIs, exposed as uloc_* 2183 * 2184 * ------------------------------------------------- 2185 */ 2186 U_DRAFT int32_t U_EXPORT2 2187 uloc_toLanguageTag(const char* localeID, 2188 char* langtag, 2189 int32_t langtagCapacity, 2190 UBool strict, 2191 UErrorCode* status) { 2192 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ 2193 char canonical[256]; 2194 int32_t reslen = 0; 2195 UErrorCode tmpStatus = U_ZERO_ERROR; 2196 UBool hadPosix = FALSE; 2197 2198 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ 2199 canonical[0] = 0; 2200 if (uprv_strlen(localeID) > 0) { 2201 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); 2202 if (tmpStatus != U_ZERO_ERROR) { 2203 *status = U_ILLEGAL_ARGUMENT_ERROR; 2204 return 0; 2205 } 2206 } 2207 2208 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); 2209 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2210 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2211 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); 2212 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2213 2214 return reslen; 2215 } 2216 2217 2218 U_DRAFT int32_t U_EXPORT2 2219 uloc_forLanguageTag(const char* langtag, 2220 char* localeID, 2221 int32_t localeIDCapacity, 2222 int32_t* parsedLength, 2223 UErrorCode* status) { 2224 ULanguageTag *lt; 2225 int32_t reslen = 0; 2226 const char *subtag, *p; 2227 int32_t len; 2228 int32_t i, n; 2229 UBool noRegion = TRUE; 2230 2231 lt = ultag_parse(langtag, -1, parsedLength, status); 2232 if (U_FAILURE(*status)) { 2233 return 0; 2234 } 2235 2236 /* language */ 2237 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); 2238 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { 2239 len = (int32_t)uprv_strlen(subtag); 2240 if (len > 0) { 2241 if (reslen < localeIDCapacity) { 2242 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); 2243 } 2244 reslen += len; 2245 } 2246 } 2247 2248 /* script */ 2249 subtag = ultag_getScript(lt); 2250 len = (int32_t)uprv_strlen(subtag); 2251 if (len > 0) { 2252 if (reslen < localeIDCapacity) { 2253 *(localeID + reslen) = LOCALE_SEP; 2254 } 2255 reslen++; 2256 2257 /* write out the script in title case */ 2258 p = subtag; 2259 while (*p) { 2260 if (reslen < localeIDCapacity) { 2261 if (p == subtag) { 2262 *(localeID + reslen) = uprv_toupper(*p); 2263 } else { 2264 *(localeID + reslen) = *p; 2265 } 2266 } 2267 reslen++; 2268 p++; 2269 } 2270 } 2271 2272 /* region */ 2273 subtag = ultag_getRegion(lt); 2274 len = (int32_t)uprv_strlen(subtag); 2275 if (len > 0) { 2276 if (reslen < localeIDCapacity) { 2277 *(localeID + reslen) = LOCALE_SEP; 2278 } 2279 reslen++; 2280 /* write out the retion in upper case */ 2281 p = subtag; 2282 while (*p) { 2283 if (reslen < localeIDCapacity) { 2284 *(localeID + reslen) = uprv_toupper(*p); 2285 } 2286 reslen++; 2287 p++; 2288 } 2289 noRegion = FALSE; 2290 } 2291 2292 /* variants */ 2293 n = ultag_getVariantsSize(lt); 2294 if (n > 0) { 2295 if (noRegion) { 2296 if (reslen < localeIDCapacity) { 2297 *(localeID + reslen) = LOCALE_SEP; 2298 } 2299 reslen++; 2300 } 2301 2302 for (i = 0; i < n; i++) { 2303 subtag = ultag_getVariant(lt, i); 2304 if (reslen < localeIDCapacity) { 2305 *(localeID + reslen) = LOCALE_SEP; 2306 } 2307 reslen++; 2308 /* write out the variant in upper case */ 2309 p = subtag; 2310 while (*p) { 2311 if (reslen < localeIDCapacity) { 2312 *(localeID + reslen) = uprv_toupper(*p); 2313 } 2314 reslen++; 2315 p++; 2316 } 2317 } 2318 } 2319 2320 /* keywords */ 2321 n = ultag_getExtensionsSize(lt); 2322 subtag = ultag_getPrivateUse(lt); 2323 if (n > 0 || uprv_strlen(subtag) > 0) { 2324 if (reslen == 0) { 2325 /* need a language */ 2326 if (reslen < localeIDCapacity) { 2327 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); 2328 } 2329 reslen += LANG_UND_LEN; 2330 } 2331 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); 2332 reslen += len; 2333 } 2334 2335 ultag_close(lt); 2336 return u_terminateChars(localeID, localeIDCapacity, reslen, status); 2337 } 2338 2339 2340