1 /* 2 ********************************************************************** 3 * Copyright (C) 2009-2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #include "unicode/utypes.h" 9 #include "unicode/ures.h" 10 #include "unicode/putil.h" 11 #include "unicode/uloc.h" 12 #include "ustr_imp.h" 13 #include "cmemory.h" 14 #include "cstring.h" 15 #include "putilimp.h" 16 #include "uinvchar.h" 17 18 /* struct holding a single variant */ 19 typedef struct VariantListEntry { 20 const char *variant; 21 struct VariantListEntry *next; 22 } VariantListEntry; 23 24 /* struct holding a single extension */ 25 typedef struct ExtensionListEntry { 26 const char *key; 27 const char *value; 28 struct ExtensionListEntry *next; 29 } ExtensionListEntry; 30 31 #define MAXEXTLANG 3 32 typedef struct ULanguageTag { 33 char *buf; /* holding parsed subtags */ 34 const char *language; 35 const char *extlang[MAXEXTLANG]; 36 const char *script; 37 const char *region; 38 VariantListEntry *variants; 39 ExtensionListEntry *extensions; 40 const char *privateuse; 41 const char *grandfathered; 42 } ULanguageTag; 43 44 #define MINLEN 2 45 #define SEP '-' 46 #define PRIVATEUSE 'x' 47 #define LDMLEXT 'u' 48 49 #define LOCALE_SEP '_' 50 #define LOCALE_EXT_SEP '@' 51 #define LOCALE_KEYWORD_SEP ';' 52 #define LOCALE_KEY_TYPE_SEP '=' 53 54 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z')) 55 #define ISNUMERIC(c) ((c)>='0' && (c)<='9') 56 57 static const char* EMPTY = ""; 58 static const char* LANG_UND = "und"; 59 static const char* PRIVATEUSE_KEY = "x"; 60 61 #define LANG_UND_LEN 3 62 63 static const char* GRANDFATHERED[] = { 64 /* grandfathered preferred */ 65 "art-lojban", "jbo", 66 "cel-gaulish", "", 67 "en-GB-oed", "", 68 "i-ami", "ami", 69 "i-bnn", "bnn", 70 "i-default", "", 71 "i-enochian", "", 72 "i-hak", "hak", 73 "i-klingon", "tlh", 74 "i-lux", "lb", 75 "i-mingo", "", 76 "i-navajo", "nv", 77 "i-pwn", "pwn", 78 "i-tao", "tao", 79 "i-tay", "tay", 80 "i-tsu", "tsu", 81 "no-bok", "nb", 82 "no-nyn", "nn", 83 "sgn-be-fr", "sfb", 84 "sgn-be-nl", "vgt", 85 "sgn-ch-de", "sgg", 86 "zh-guoyu", "cmn", 87 "zh-hakka", "hak", 88 "zh-min", "", 89 "zh-min-nan", "nan", 90 "zh-xiang", "hsn", 91 NULL, NULL 92 }; 93 94 static const char* DEPRECATEDLANGS[] = { 95 /* deprecated new */ 96 "iw", "he", 97 "ji", "yi", 98 "in", "id", 99 NULL, NULL 100 }; 101 102 /* 103 * ------------------------------------------------- 104 * 105 * These ultag_ functions may be exposed as APIs later 106 * 107 * ------------------------------------------------- 108 */ 109 110 static ULanguageTag* 111 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); 112 113 static void 114 ultag_close(ULanguageTag* langtag); 115 116 static const char* 117 ultag_getLanguage(const ULanguageTag* langtag); 118 119 #if 0 120 static const char* 121 ultag_getJDKLanguage(const ULanguageTag* langtag); 122 #endif 123 124 static const char* 125 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); 126 127 static int32_t 128 ultag_getExtlangSize(const ULanguageTag* langtag); 129 130 static const char* 131 ultag_getScript(const ULanguageTag* langtag); 132 133 static const char* 134 ultag_getRegion(const ULanguageTag* langtag); 135 136 static const char* 137 ultag_getVariant(const ULanguageTag* langtag, int32_t idx); 138 139 static int32_t 140 ultag_getVariantsSize(const ULanguageTag* langtag); 141 142 static const char* 143 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); 144 145 static const char* 146 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); 147 148 static int32_t 149 ultag_getExtensionsSize(const ULanguageTag* langtag); 150 151 static const char* 152 ultag_getPrivateUse(const ULanguageTag* langtag); 153 154 #if 0 155 static const char* 156 ultag_getGrandfathered(const ULanguageTag* langtag); 157 #endif 158 159 /* 160 * ------------------------------------------------- 161 * 162 * Language subtag syntax validation functions 163 * 164 * ------------------------------------------------- 165 */ 166 167 static UBool 168 _isAlphaString(const char* s, int32_t len) { 169 int32_t i; 170 for (i = 0; i < len; i++) { 171 if (!ISALPHA(*(s + i))) { 172 return FALSE; 173 } 174 } 175 return TRUE; 176 } 177 178 static UBool 179 _isNumericString(const char* s, int32_t len) { 180 int32_t i; 181 for (i = 0; i < len; i++) { 182 if (!ISNUMERIC(*(s + i))) { 183 return FALSE; 184 } 185 } 186 return TRUE; 187 } 188 189 static UBool 190 _isAlphaNumericString(const char* s, int32_t len) { 191 int32_t i; 192 for (i = 0; i < len; i++) { 193 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { 194 return FALSE; 195 } 196 } 197 return TRUE; 198 } 199 200 static UBool 201 _isLanguageSubtag(const char* s, int32_t len) { 202 /* 203 * language = 2*3ALPHA ; shortest ISO 639 code 204 * ["-" extlang] ; sometimes followed by 205 * ; extended language subtags 206 * / 4ALPHA ; or reserved for future use 207 * / 5*8ALPHA ; or registered language subtag 208 */ 209 if (len < 0) { 210 len = (int32_t)uprv_strlen(s); 211 } 212 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { 213 return TRUE; 214 } 215 return FALSE; 216 } 217 218 static UBool 219 _isExtlangSubtag(const char* s, int32_t len) { 220 /* 221 * extlang = 3ALPHA ; selected ISO 639 codes 222 * *2("-" 3ALPHA) ; permanently reserved 223 */ 224 if (len < 0) { 225 len = (int32_t)uprv_strlen(s); 226 } 227 if (len == 3 && _isAlphaString(s, len)) { 228 return TRUE; 229 } 230 return FALSE; 231 } 232 233 static UBool 234 _isScriptSubtag(const char* s, int32_t len) { 235 /* 236 * script = 4ALPHA ; ISO 15924 code 237 */ 238 if (len < 0) { 239 len = (int32_t)uprv_strlen(s); 240 } 241 if (len == 4 && _isAlphaString(s, len)) { 242 return TRUE; 243 } 244 return FALSE; 245 } 246 247 static UBool 248 _isRegionSubtag(const char* s, int32_t len) { 249 /* 250 * region = 2ALPHA ; ISO 3166-1 code 251 * / 3DIGIT ; UN M.49 code 252 */ 253 if (len < 0) { 254 len = (int32_t)uprv_strlen(s); 255 } 256 if (len == 2 && _isAlphaString(s, len)) { 257 return TRUE; 258 } 259 if (len == 3 && _isNumericString(s, len)) { 260 return TRUE; 261 } 262 return FALSE; 263 } 264 265 static UBool 266 _isVariantSubtag(const char* s, int32_t len) { 267 /* 268 * variant = 5*8alphanum ; registered variants 269 * / (DIGIT 3alphanum) 270 */ 271 if (len < 0) { 272 len = (int32_t)uprv_strlen(s); 273 } 274 if (len >= 5 && len <= 8 && _isAlphaString(s, len)) { 275 return TRUE; 276 } 277 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { 278 return TRUE; 279 } 280 return FALSE; 281 } 282 283 static UBool 284 _isExtensionSingleton(const char* s, int32_t len) { 285 /* 286 * extension = singleton 1*("-" (2*8alphanum)) 287 */ 288 if (len < 0) { 289 len = (int32_t)uprv_strlen(s); 290 } 291 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { 292 return TRUE; 293 } 294 return FALSE; 295 } 296 297 static UBool 298 _isExtensionSubtag(const char* s, int32_t len) { 299 /* 300 * extension = singleton 1*("-" (2*8alphanum)) 301 */ 302 if (len < 0) { 303 len = (int32_t)uprv_strlen(s); 304 } 305 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { 306 return TRUE; 307 } 308 return FALSE; 309 } 310 311 static UBool 312 _isExtensionSubtags(const char* s, int32_t len) { 313 const char *p = s; 314 const char *pSubtag = NULL; 315 316 if (len < 0) { 317 len = (int32_t)uprv_strlen(s); 318 } 319 320 while ((p - s) < len) { 321 if (*p == SEP) { 322 if (pSubtag == NULL) { 323 return FALSE; 324 } 325 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { 326 return FALSE; 327 } 328 pSubtag = NULL; 329 } else if (pSubtag == NULL) { 330 pSubtag = p; 331 } 332 p++; 333 } 334 if (pSubtag == NULL) { 335 return FALSE; 336 } 337 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); 338 } 339 340 static UBool 341 _isPrivateuseValueSubtag(const char* s, int32_t len) { 342 /* 343 * privateuse = "x" 1*("-" (1*8alphanum)) 344 */ 345 if (len < 0) { 346 len = (int32_t)uprv_strlen(s); 347 } 348 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 349 return TRUE; 350 } 351 return FALSE; 352 } 353 354 static UBool 355 _isPrivateuseValueSubtags(const char* s, int32_t len) { 356 const char *p = s; 357 const char *pSubtag = NULL; 358 359 if (len < 0) { 360 len = (int32_t)uprv_strlen(s); 361 } 362 363 while ((p - s) < len) { 364 if (*p == SEP) { 365 if (pSubtag == NULL) { 366 return FALSE; 367 } 368 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { 369 return FALSE; 370 } 371 pSubtag = NULL; 372 } else if (pSubtag == NULL) { 373 pSubtag = p; 374 } 375 p++; 376 } 377 if (pSubtag == NULL) { 378 return FALSE; 379 } 380 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); 381 } 382 383 static UBool 384 _isLDMLKey(const char* s, int32_t len) { 385 if (len < 0) { 386 len = (int32_t)uprv_strlen(s); 387 } 388 if (len == 2 && _isAlphaNumericString(s, len)) { 389 return TRUE; 390 } 391 return FALSE; 392 } 393 394 static UBool 395 _isLDMLType(const char* s, int32_t len) { 396 if (len < 0) { 397 len = (int32_t)uprv_strlen(s); 398 } 399 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) { 400 return TRUE; 401 } 402 return FALSE; 403 } 404 405 /* 406 * ------------------------------------------------- 407 * 408 * Helper functions 409 * 410 * ------------------------------------------------- 411 */ 412 413 static UBool 414 _addVariantToList(VariantListEntry **first, VariantListEntry *var) { 415 UBool bAdded = TRUE; 416 417 if (*first == NULL) { 418 var->next = NULL; 419 *first = var; 420 } else { 421 VariantListEntry *prev, *cur; 422 int32_t cmp; 423 424 /* reorder variants in alphabetical order */ 425 prev = NULL; 426 cur = *first; 427 while (TRUE) { 428 if (cur == NULL) { 429 prev->next = var; 430 var->next = NULL; 431 break; 432 } 433 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); 434 if (cmp < 0) { 435 if (prev == NULL) { 436 *first = var; 437 } else { 438 prev->next = var; 439 } 440 var->next = cur; 441 break; 442 } 443 if (cmp == 0) { 444 /* duplicated variant */ 445 bAdded = FALSE; 446 break; 447 } 448 prev = cur; 449 cur = cur->next; 450 } 451 } 452 453 return bAdded; 454 } 455 456 457 static UBool 458 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { 459 UBool bAdded = TRUE; 460 461 if (*first == NULL) { 462 ext->next = NULL; 463 *first = ext; 464 } else { 465 ExtensionListEntry *prev, *cur; 466 int32_t cmp; 467 468 /* reorder variants in alphabetical order */ 469 prev = NULL; 470 cur = *first; 471 while (TRUE) { 472 if (cur == NULL) { 473 prev->next = ext; 474 ext->next = NULL; 475 break; 476 } 477 if (localeToBCP) { 478 /* special handling for locale to bcp conversion */ 479 int32_t len, curlen; 480 481 len = (int32_t)uprv_strlen(ext->key); 482 curlen = (int32_t)uprv_strlen(cur->key); 483 484 if (len == 1 && curlen == 1) { 485 if (*(ext->key) == *(cur->key)) { 486 cmp = 0; 487 } else if (*(ext->key) == PRIVATEUSE) { 488 cmp = 1; 489 } else if (*(cur->key) == PRIVATEUSE) { 490 cmp = -1; 491 } else { 492 cmp = *(ext->key) - *(cur->key); 493 } 494 } else if (len == 1) { 495 cmp = *(ext->key) - LDMLEXT; 496 } else if (curlen == 1) { 497 cmp = LDMLEXT - *(cur->key); 498 } else { 499 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 500 } 501 } else { 502 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 503 } 504 if (cmp < 0) { 505 if (prev == NULL) { 506 *first = ext; 507 } else { 508 prev->next = ext; 509 } 510 ext->next = cur; 511 break; 512 } 513 if (cmp == 0) { 514 /* duplicated extension key */ 515 bAdded = FALSE; 516 break; 517 } 518 prev = cur; 519 cur = cur->next; 520 } 521 } 522 523 return bAdded; 524 } 525 526 static void 527 _initializeULanguageTag(ULanguageTag* langtag) { 528 int32_t i; 529 530 langtag->buf = NULL; 531 532 langtag->language = EMPTY; 533 for (i = 0; i < MAXEXTLANG; i++) { 534 langtag->extlang[i] = NULL; 535 } 536 537 langtag->script = EMPTY; 538 langtag->region = EMPTY; 539 540 langtag->variants = NULL; 541 langtag->extensions = NULL; 542 543 langtag->grandfathered = EMPTY; 544 langtag->privateuse = EMPTY; 545 } 546 547 #define KEYTYPEDATA "keyTypeData" 548 #define KEYMAP "keyMap" 549 #define TYPEMAP "typeMap" 550 #define TYPEALIAS "typeAlias" 551 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */ 552 #define MAX_LDML_KEY_LEN 22 553 #define MAX_LDML_TYPE_LEN 32 554 555 static int32_t 556 _ldmlKeyToBCP47(const char* key, int32_t keyLen, 557 char* bcpKey, int32_t bcpKeyCapacity, 558 UErrorCode *status) { 559 UResourceBundle *rb; 560 char keyBuf[MAX_LDML_KEY_LEN]; 561 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 562 int32_t resultLen = 0; 563 int32_t i; 564 UErrorCode tmpStatus = U_ZERO_ERROR; 565 const UChar *uBcpKey; 566 int32_t bcpKeyLen; 567 568 if (keyLen < 0) { 569 keyLen = (int32_t)uprv_strlen(key); 570 } 571 572 if (keyLen >= sizeof(keyBuf)) { 573 /* no known valid LDML key exceeding 21 */ 574 *status = U_ILLEGAL_ARGUMENT_ERROR; 575 return 0; 576 } 577 578 uprv_memcpy(keyBuf, key, keyLen); 579 keyBuf[keyLen] = 0; 580 581 /* to lower case */ 582 for (i = 0; i < keyLen; i++) { 583 keyBuf[i] = uprv_tolower(keyBuf[i]); 584 } 585 586 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 587 ures_getByKey(rb, KEYMAP, rb, status); 588 589 if (U_FAILURE(*status)) { 590 ures_close(rb); 591 return 0; 592 } 593 594 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus); 595 if (U_SUCCESS(tmpStatus)) { 596 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen); 597 bcpKeyBuf[bcpKeyLen] = 0; 598 resultLen = bcpKeyLen; 599 } else { 600 if (_isLDMLKey(key, keyLen)) { 601 uprv_memcpy(bcpKeyBuf, key, keyLen); 602 bcpKeyBuf[keyLen] = 0; 603 resultLen = keyLen; 604 } else { 605 /* mapping not availabe */ 606 *status = U_ILLEGAL_ARGUMENT_ERROR; 607 } 608 } 609 ures_close(rb); 610 611 if (U_FAILURE(*status)) { 612 return 0; 613 } 614 615 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity)); 616 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status); 617 } 618 619 static int32_t 620 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen, 621 char* key, int32_t keyCapacity, 622 UErrorCode *status) { 623 UResourceBundle *rb; 624 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 625 int32_t resultLen = 0; 626 int32_t i; 627 const char *resKey = NULL; 628 UResourceBundle *mapData; 629 630 if (bcpKeyLen < 0) { 631 bcpKeyLen = (int32_t)uprv_strlen(bcpKey); 632 } 633 634 if (bcpKeyLen >= sizeof(bcpKeyBuf)) { 635 *status = U_ILLEGAL_ARGUMENT_ERROR; 636 return 0; 637 } 638 639 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen); 640 bcpKeyBuf[bcpKeyLen] = 0; 641 642 /* to lower case */ 643 for (i = 0; i < bcpKeyLen; i++) { 644 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]); 645 } 646 647 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 648 ures_getByKey(rb, KEYMAP, rb, status); 649 if (U_FAILURE(*status)) { 650 ures_close(rb); 651 return 0; 652 } 653 654 mapData = ures_getNextResource(rb, NULL, status); 655 while (U_SUCCESS(*status)) { 656 const UChar *uBcpKey; 657 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 658 int32_t tmpBcpKeyLen; 659 660 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status); 661 if (U_FAILURE(*status)) { 662 break; 663 } 664 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen); 665 tmpBcpKeyBuf[tmpBcpKeyLen] = 0; 666 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) { 667 /* found a matching BCP47 key */ 668 resKey = ures_getKey(mapData); 669 resultLen = (int32_t)uprv_strlen(resKey); 670 break; 671 } 672 if (!ures_hasNext(rb)) { 673 break; 674 } 675 ures_getNextResource(rb, mapData, status); 676 } 677 ures_close(mapData); 678 ures_close(rb); 679 680 if (U_FAILURE(*status)) { 681 return 0; 682 } 683 684 if (resKey == NULL) { 685 resKey = bcpKeyBuf; 686 resultLen = bcpKeyLen; 687 } 688 689 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity)); 690 return u_terminateChars(key, keyCapacity, resultLen, status); 691 } 692 693 static int32_t 694 _ldmlTypeToBCP47(const char* key, int32_t keyLen, 695 const char* type, int32_t typeLen, 696 char* bcpType, int32_t bcpTypeCapacity, 697 UErrorCode *status) { 698 UResourceBundle *rb, *keyTypeData, *typeMapForKey; 699 char keyBuf[MAX_LDML_KEY_LEN]; 700 char typeBuf[MAX_LDML_TYPE_LEN]; 701 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 702 int32_t resultLen = 0; 703 int32_t i; 704 UErrorCode tmpStatus = U_ZERO_ERROR; 705 const UChar *uBcpType, *uCanonicalType; 706 int32_t bcpTypeLen, canonicalTypeLen; 707 UBool isTimezone = FALSE; 708 709 if (keyLen < 0) { 710 keyLen = (int32_t)uprv_strlen(key); 711 } 712 if (keyLen >= sizeof(keyBuf)) { 713 /* no known valid LDML key exceeding 21 */ 714 *status = U_ILLEGAL_ARGUMENT_ERROR; 715 return 0; 716 } 717 uprv_memcpy(keyBuf, key, keyLen); 718 keyBuf[keyLen] = 0; 719 720 /* to lower case */ 721 for (i = 0; i < keyLen; i++) { 722 keyBuf[i] = uprv_tolower(keyBuf[i]); 723 } 724 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 725 isTimezone = TRUE; 726 } 727 728 if (typeLen < 0) { 729 typeLen = (int32_t)uprv_strlen(type); 730 } 731 if (typeLen >= sizeof(typeBuf)) { 732 *status = U_ILLEGAL_ARGUMENT_ERROR; 733 return 0; 734 } 735 736 if (isTimezone) { 737 /* replace '/' with ':' */ 738 for (i = 0; i < typeLen; i++) { 739 if (*(type + i) == '/') { 740 typeBuf[i] = ':'; 741 } else { 742 typeBuf[i] = *(type + i); 743 } 744 } 745 typeBuf[typeLen] = 0; 746 type = &typeBuf[0]; 747 } 748 749 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status); 750 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status); 751 if (U_FAILURE(*status)) { 752 ures_close(rb); 753 ures_close(keyTypeData); 754 return 0; 755 } 756 757 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus); 758 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus); 759 if (U_SUCCESS(tmpStatus)) { 760 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 761 resultLen = bcpTypeLen; 762 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 763 /* is this type alias? */ 764 tmpStatus = U_ZERO_ERROR; 765 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus); 766 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 767 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus); 768 if (U_SUCCESS(tmpStatus)) { 769 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen); 770 if (isTimezone) { 771 /* replace '/' with ':' */ 772 for (i = 0; i < canonicalTypeLen; i++) { 773 if (typeBuf[i] == '/') { 774 typeBuf[i] = ':'; 775 } 776 } 777 } 778 typeBuf[canonicalTypeLen] = 0; 779 780 /* look up the canonical type */ 781 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus); 782 if (U_SUCCESS(tmpStatus)) { 783 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 784 resultLen = bcpTypeLen; 785 } 786 } 787 if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 788 if (_isLDMLType(type, typeLen)) { 789 uprv_memcpy(bcpTypeBuf, type, typeLen); 790 resultLen = typeLen; 791 } else { 792 /* mapping not availabe */ 793 *status = U_ILLEGAL_ARGUMENT_ERROR; 794 } 795 } 796 } else { 797 *status = tmpStatus; 798 } 799 ures_close(rb); 800 ures_close(typeMapForKey); 801 ures_close(keyTypeData); 802 803 if (U_FAILURE(*status)) { 804 return 0; 805 } 806 807 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity)); 808 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status); 809 } 810 811 static int32_t 812 _bcp47ToLDMLType(const char* key, int32_t keyLen, 813 const char* bcpType, int32_t bcpTypeLen, 814 char* type, int32_t typeCapacity, 815 UErrorCode *status) { 816 UResourceBundle *rb; 817 char keyBuf[MAX_LDML_KEY_LEN]; 818 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 819 int32_t resultLen = 0; 820 int32_t i; 821 const char *resType = NULL; 822 UResourceBundle *mapData; 823 UErrorCode tmpStatus = U_ZERO_ERROR; 824 int32_t copyLen; 825 826 if (keyLen < 0) { 827 keyLen = (int32_t)uprv_strlen(key); 828 } 829 830 if (keyLen >= sizeof(keyBuf)) { 831 /* no known valid LDML key exceeding 21 */ 832 *status = U_ILLEGAL_ARGUMENT_ERROR; 833 return 0; 834 } 835 uprv_memcpy(keyBuf, key, keyLen); 836 keyBuf[keyLen] = 0; 837 838 /* to lower case */ 839 for (i = 0; i < keyLen; i++) { 840 keyBuf[i] = uprv_tolower(keyBuf[i]); 841 } 842 843 844 if (bcpTypeLen < 0) { 845 bcpTypeLen = (int32_t)uprv_strlen(bcpType); 846 } 847 848 if (bcpTypeLen >= sizeof(bcpTypeBuf)) { 849 *status = U_ILLEGAL_ARGUMENT_ERROR; 850 return 0; 851 } 852 853 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen); 854 bcpTypeBuf[bcpTypeLen] = 0; 855 856 /* to lower case */ 857 for (i = 0; i < bcpTypeLen; i++) { 858 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]); 859 } 860 861 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 862 ures_getByKey(rb, TYPEMAP, rb, status); 863 if (U_FAILURE(*status)) { 864 ures_close(rb); 865 return 0; 866 } 867 868 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 869 mapData = ures_getNextResource(rb, NULL, &tmpStatus); 870 while (U_SUCCESS(tmpStatus)) { 871 const UChar *uBcpType; 872 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 873 int32_t tmpBcpTypeLen; 874 875 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus); 876 if (U_FAILURE(tmpStatus)) { 877 break; 878 } 879 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen); 880 tmpBcpTypeBuf[tmpBcpTypeLen] = 0; 881 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) { 882 /* found a matching BCP47 type */ 883 resType = ures_getKey(mapData); 884 resultLen = (int32_t)uprv_strlen(resType); 885 break; 886 } 887 if (!ures_hasNext(rb)) { 888 break; 889 } 890 ures_getNextResource(rb, mapData, &tmpStatus); 891 } 892 ures_close(mapData); 893 ures_close(rb); 894 895 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) { 896 *status = tmpStatus; 897 return 0; 898 } 899 900 if (resType == NULL) { 901 resType = bcpTypeBuf; 902 resultLen = bcpTypeLen; 903 } 904 905 copyLen = uprv_min(resultLen, typeCapacity); 906 uprv_memcpy(type, resType, copyLen); 907 908 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 909 for (i = 0; i < copyLen; i++) { 910 if (*(type + i) == ':') { 911 *(type + i) = '/'; 912 } 913 } 914 } 915 916 return u_terminateChars(type, typeCapacity, resultLen, status); 917 } 918 919 static int32_t 920 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 921 char buf[ULOC_LANG_CAPACITY]; 922 UErrorCode tmpStatus = U_ZERO_ERROR; 923 int32_t len, i; 924 int32_t reslen = 0; 925 926 if (U_FAILURE(*status)) { 927 return 0; 928 } 929 930 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); 931 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 932 if (strict) { 933 *status = U_ILLEGAL_ARGUMENT_ERROR; 934 return 0; 935 } 936 len = 0; 937 } 938 939 /* Note: returned language code is in lower case letters */ 940 941 if (len == 0) { 942 if (reslen < capacity) { 943 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 944 } 945 reslen += LANG_UND_LEN; 946 } else if (!_isLanguageSubtag(buf, len)) { 947 /* invalid language code */ 948 if (strict) { 949 *status = U_ILLEGAL_ARGUMENT_ERROR; 950 return 0; 951 } 952 if (reslen < capacity) { 953 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 954 } 955 reslen += LANG_UND_LEN; 956 } else { 957 /* resolve deprecated */ 958 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 959 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { 960 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); 961 len = (int32_t)uprv_strlen(buf); 962 break; 963 } 964 } 965 if (reslen < capacity) { 966 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 967 } 968 reslen += len; 969 } 970 u_terminateChars(appendAt, capacity, reslen, status); 971 return reslen; 972 } 973 974 static int32_t 975 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 976 char buf[ULOC_SCRIPT_CAPACITY]; 977 UErrorCode tmpStatus = U_ZERO_ERROR; 978 int32_t len; 979 int32_t reslen = 0; 980 981 if (U_FAILURE(*status)) { 982 return 0; 983 } 984 985 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); 986 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 987 if (strict) { 988 *status = U_ILLEGAL_ARGUMENT_ERROR; 989 } 990 return 0; 991 } 992 993 if (len > 0) { 994 if (!_isScriptSubtag(buf, len)) { 995 /* invalid script code */ 996 if (strict) { 997 *status = U_ILLEGAL_ARGUMENT_ERROR; 998 } 999 return 0; 1000 } else { 1001 if (reslen < capacity) { 1002 *(appendAt + reslen) = SEP; 1003 } 1004 reslen++; 1005 1006 if (reslen < capacity) { 1007 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1008 } 1009 reslen += len; 1010 } 1011 } 1012 u_terminateChars(appendAt, capacity, reslen, status); 1013 return reslen; 1014 } 1015 1016 static int32_t 1017 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1018 char buf[ULOC_COUNTRY_CAPACITY]; 1019 UErrorCode tmpStatus = U_ZERO_ERROR; 1020 int32_t len; 1021 int32_t reslen = 0; 1022 1023 if (U_FAILURE(*status)) { 1024 return 0; 1025 } 1026 1027 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); 1028 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1029 if (strict) { 1030 *status = U_ILLEGAL_ARGUMENT_ERROR; 1031 } 1032 return 0; 1033 } 1034 1035 if (len > 0) { 1036 if (!_isRegionSubtag(buf, len)) { 1037 /* invalid region code */ 1038 if (strict) { 1039 *status = U_ILLEGAL_ARGUMENT_ERROR; 1040 } 1041 return 0; 1042 } else { 1043 if (reslen < capacity) { 1044 *(appendAt + reslen) = SEP; 1045 } 1046 reslen++; 1047 1048 if (reslen < capacity) { 1049 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1050 } 1051 reslen += len; 1052 } 1053 } 1054 u_terminateChars(appendAt, capacity, reslen, status); 1055 return reslen; 1056 } 1057 1058 static int32_t 1059 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1060 char buf[ULOC_FULLNAME_CAPACITY]; 1061 UErrorCode tmpStatus = U_ZERO_ERROR; 1062 int32_t len, i; 1063 int32_t reslen = 0; 1064 1065 if (U_FAILURE(*status)) { 1066 return 0; 1067 } 1068 1069 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1070 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1071 if (strict) { 1072 *status = U_ILLEGAL_ARGUMENT_ERROR; 1073 } 1074 return 0; 1075 } 1076 1077 if (len > 0) { 1078 char *p, *pVar; 1079 UBool bNext = TRUE; 1080 VariantListEntry *var; 1081 VariantListEntry *varFirst = NULL; 1082 1083 pVar = NULL; 1084 p = buf; 1085 while (bNext) { 1086 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1087 if (*p == 0) { 1088 bNext = FALSE; 1089 } else { 1090 *p = 0; /* terminate */ 1091 } 1092 if (pVar == NULL) { 1093 if (strict) { 1094 *status = U_ILLEGAL_ARGUMENT_ERROR; 1095 break; 1096 } 1097 /* ignore empty variant */ 1098 } else { 1099 /* ICU uses upper case letters for variants, but 1100 the canonical format is lowercase in BCP47 */ 1101 for (i = 0; *(pVar + i) != 0; i++) { 1102 *(pVar + i) = uprv_tolower(*(pVar + i)); 1103 } 1104 1105 /* validate */ 1106 if (_isVariantSubtag(pVar, -1)) { 1107 /* emit the variant to the list */ 1108 var = uprv_malloc(sizeof(VariantListEntry)); 1109 if (var == NULL) { 1110 *status = U_MEMORY_ALLOCATION_ERROR; 1111 break; 1112 } 1113 var->variant = pVar; 1114 if (!_addVariantToList(&varFirst, var)) { 1115 /* duplicated variant */ 1116 uprv_free(var); 1117 if (strict) { 1118 *status = U_ILLEGAL_ARGUMENT_ERROR; 1119 break; 1120 } 1121 } 1122 } else if (strict) { 1123 *status = U_ILLEGAL_ARGUMENT_ERROR; 1124 break; 1125 } 1126 } 1127 /* reset variant starting position */ 1128 pVar = NULL; 1129 } else if (pVar == NULL) { 1130 pVar = p; 1131 } 1132 p++; 1133 } 1134 1135 if (U_SUCCESS(*status)) { 1136 if (varFirst != NULL) { 1137 int32_t varLen; 1138 1139 /* write out sorted/validated/normalized variants to the target */ 1140 var = varFirst; 1141 while (var != NULL) { 1142 if (reslen < capacity) { 1143 *(appendAt + reslen) = SEP; 1144 } 1145 reslen++; 1146 varLen = (int32_t)uprv_strlen(var->variant); 1147 if (reslen < capacity) { 1148 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); 1149 } 1150 reslen += varLen; 1151 var = var->next; 1152 } 1153 } 1154 } 1155 1156 /* clean up */ 1157 var = varFirst; 1158 while (var != NULL) { 1159 VariantListEntry *tmpVar = var->next; 1160 uprv_free(var); 1161 var = tmpVar; 1162 } 1163 1164 if (U_FAILURE(*status)) { 1165 return 0; 1166 } 1167 } 1168 1169 u_terminateChars(appendAt, capacity, reslen, status); 1170 return reslen; 1171 } 1172 1173 static int32_t 1174 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1175 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1176 UEnumeration *keywordEnum = NULL; 1177 int32_t reslen = 0; 1178 1179 keywordEnum = uloc_openKeywords(localeID, status); 1180 if (U_FAILURE(*status)) { 1181 uenum_close(keywordEnum); 1182 return 0; 1183 } 1184 if (keywordEnum != NULL) { 1185 /* reorder extensions */ 1186 int32_t len; 1187 const char *key; 1188 ExtensionListEntry *firstExt = NULL; 1189 ExtensionListEntry *ext; 1190 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1191 char *pExtBuf = extBuf; 1192 int32_t extBufCapacity = sizeof(extBuf); 1193 const char *bcpKey, *bcpValue; 1194 UErrorCode tmpStatus = U_ZERO_ERROR; 1195 int32_t keylen; 1196 UBool isLDMLKeyword; 1197 1198 while (TRUE) { 1199 key = uenum_next(keywordEnum, NULL, status); 1200 if (key == NULL) { 1201 break; 1202 } 1203 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); 1204 if (U_FAILURE(tmpStatus)) { 1205 if (strict) { 1206 *status = U_ILLEGAL_ARGUMENT_ERROR; 1207 break; 1208 } 1209 /* ignore this keyword */ 1210 tmpStatus = U_ZERO_ERROR; 1211 continue; 1212 } 1213 1214 keylen = (int32_t)uprv_strlen(key); 1215 isLDMLKeyword = (keylen > 1); 1216 1217 if (isLDMLKeyword) { 1218 int32_t modKeyLen; 1219 1220 /* transform key and value to bcp47 style */ 1221 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus); 1222 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1223 if (strict) { 1224 *status = U_ILLEGAL_ARGUMENT_ERROR; 1225 break; 1226 } 1227 tmpStatus = U_ZERO_ERROR; 1228 continue; 1229 } 1230 1231 bcpKey = pExtBuf; 1232 pExtBuf += (modKeyLen + 1); 1233 extBufCapacity -= (modKeyLen + 1); 1234 1235 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus); 1236 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1237 if (strict) { 1238 *status = U_ILLEGAL_ARGUMENT_ERROR; 1239 break; 1240 } 1241 tmpStatus = U_ZERO_ERROR; 1242 continue; 1243 } 1244 bcpValue = pExtBuf; 1245 pExtBuf += (len + 1); 1246 extBufCapacity -= (len + 1); 1247 } else { 1248 if (*key == PRIVATEUSE) { 1249 if (!_isPrivateuseValueSubtags(buf, len)) { 1250 if (strict) { 1251 *status = U_ILLEGAL_ARGUMENT_ERROR; 1252 break; 1253 } 1254 continue; 1255 } 1256 } else { 1257 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { 1258 if (strict) { 1259 *status = U_ILLEGAL_ARGUMENT_ERROR; 1260 break; 1261 } 1262 continue; 1263 } 1264 } 1265 bcpKey = key; 1266 if ((len + 1) < extBufCapacity) { 1267 uprv_memcpy(pExtBuf, buf, len); 1268 bcpValue = pExtBuf; 1269 1270 pExtBuf += len; 1271 1272 *pExtBuf = 0; 1273 pExtBuf++; 1274 1275 extBufCapacity -= (len + 1); 1276 } else { 1277 *status = U_ILLEGAL_ARGUMENT_ERROR; 1278 break; 1279 } 1280 } 1281 1282 /* create ExtensionListEntry */ 1283 ext = uprv_malloc(sizeof(ExtensionListEntry)); 1284 if (ext == NULL) { 1285 *status = U_MEMORY_ALLOCATION_ERROR; 1286 break; 1287 } 1288 ext->key = bcpKey; 1289 ext->value = bcpValue; 1290 1291 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1292 uprv_free(ext); 1293 if (strict) { 1294 *status = U_ILLEGAL_ARGUMENT_ERROR; 1295 break; 1296 } 1297 } 1298 } 1299 if (U_SUCCESS(*status) && (firstExt != NULL)) { 1300 UBool startLDMLExtension = FALSE; 1301 1302 /* write out the sorted BCP47 extensions and private use */ 1303 ext = firstExt; 1304 while (ext != NULL) { 1305 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) { 1306 /* write LDML singleton extension */ 1307 if (reslen < capacity) { 1308 *(appendAt + reslen) = SEP; 1309 } 1310 reslen++; 1311 if (reslen < capacity) { 1312 *(appendAt + reslen) = LDMLEXT; 1313 } 1314 reslen++; 1315 startLDMLExtension = TRUE; 1316 } 1317 1318 if (reslen < capacity) { 1319 *(appendAt + reslen) = SEP; 1320 } 1321 reslen++; 1322 len = (int32_t)uprv_strlen(ext->key); 1323 if (reslen < capacity) { 1324 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); 1325 } 1326 reslen += len; 1327 if (reslen < capacity) { 1328 *(appendAt + reslen) = SEP; 1329 } 1330 reslen++; 1331 len = (int32_t)uprv_strlen(ext->value); 1332 if (reslen < capacity) { 1333 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); 1334 } 1335 reslen += len; 1336 1337 ext = ext->next; 1338 } 1339 } 1340 /* clean up */ 1341 ext = firstExt; 1342 while (ext != NULL) { 1343 ExtensionListEntry *tmpExt = ext->next; 1344 uprv_free(ext); 1345 ext = tmpExt; 1346 } 1347 1348 uenum_close(keywordEnum); 1349 1350 if (U_FAILURE(*status)) { 1351 return 0; 1352 } 1353 } 1354 1355 return u_terminateChars(appendAt, capacity, reslen, status); 1356 } 1357 1358 /** 1359 * Append keywords parsed from LDML extension value 1360 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} 1361 * Note: char* buf is used for storing keywords 1362 */ 1363 static void 1364 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UErrorCode *status) { 1365 const char *p, *pNext, *pSep; 1366 const char *pBcpKey, *pBcpType; 1367 const char *pKey, *pType; 1368 int32_t bcpKeyLen = 0, bcpTypeLen; 1369 ExtensionListEntry *kwd, *nextKwd; 1370 ExtensionListEntry *kwdFirst = NULL; 1371 int32_t bufIdx = 0; 1372 int32_t len; 1373 1374 pNext = ldmlext; 1375 pBcpKey = pBcpType = NULL; 1376 while (pNext) { 1377 p = pSep = pNext; 1378 1379 /* locate next separator char */ 1380 while (*pSep) { 1381 if (*pSep == SEP) { 1382 break; 1383 } 1384 pSep++; 1385 } 1386 if (*pSep == 0) { 1387 /* last subtag */ 1388 pNext = NULL; 1389 } else { 1390 pNext = pSep + 1; 1391 } 1392 1393 if (pBcpKey == NULL) { 1394 pBcpKey = p; 1395 bcpKeyLen = (int32_t)(pSep - p); 1396 } else { 1397 pBcpType = p; 1398 bcpTypeLen = (int32_t)(pSep - p); 1399 1400 /* BCP key to locale key */ 1401 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1402 if (U_FAILURE(*status)) { 1403 goto cleanup; 1404 } 1405 pKey = buf + bufIdx; 1406 bufIdx += len; 1407 *(buf + bufIdx) = 0; 1408 bufIdx++; 1409 1410 /* BCP type to locale type */ 1411 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1412 if (U_FAILURE(*status)) { 1413 goto cleanup; 1414 } 1415 pType = buf + bufIdx; 1416 bufIdx += len; 1417 *(buf + bufIdx) = 0; 1418 bufIdx++; 1419 1420 /* create an ExtensionListEntry for this keyword */ 1421 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1422 if (kwd == NULL) { 1423 *status = U_MEMORY_ALLOCATION_ERROR; 1424 goto cleanup; 1425 } 1426 1427 kwd->key = pKey; 1428 kwd->value = pType; 1429 1430 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1431 *status = U_ILLEGAL_ARGUMENT_ERROR; 1432 uprv_free(kwd); 1433 goto cleanup; 1434 } 1435 1436 /* for next pair */ 1437 pBcpKey = NULL; 1438 pBcpType = NULL; 1439 } 1440 } 1441 1442 if (pBcpKey != NULL) { 1443 *status = U_ILLEGAL_ARGUMENT_ERROR; 1444 goto cleanup; 1445 } 1446 1447 kwd = kwdFirst; 1448 while (kwd != NULL) { 1449 nextKwd = kwd->next; 1450 _addExtensionToList(appendTo, kwd, FALSE); 1451 kwd = nextKwd; 1452 } 1453 1454 return; 1455 1456 cleanup: 1457 kwd = kwdFirst; 1458 while (kwd != NULL) { 1459 nextKwd = kwd->next; 1460 uprv_free(kwd); 1461 kwd = nextKwd; 1462 } 1463 } 1464 1465 1466 static int32_t 1467 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { 1468 int32_t reslen = 0; 1469 int32_t i, n; 1470 int32_t len; 1471 ExtensionListEntry *kwdFirst = NULL; 1472 ExtensionListEntry *kwd; 1473 const char *key, *type; 1474 char kwdBuf[ULOC_KEYWORDS_CAPACITY]; 1475 1476 if (U_FAILURE(*status)) { 1477 return 0; 1478 } 1479 1480 n = ultag_getExtensionsSize(langtag); 1481 1482 /* resolve locale keywords and reordering keys */ 1483 for (i = 0; i < n; i++) { 1484 key = ultag_getExtensionKey(langtag, i); 1485 type = ultag_getExtensionValue(langtag, i); 1486 if (*key == LDMLEXT) { 1487 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), status); 1488 if (U_FAILURE(*status)) { 1489 break; 1490 } 1491 } else { 1492 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1493 if (kwd == NULL) { 1494 *status = U_MEMORY_ALLOCATION_ERROR; 1495 break; 1496 } 1497 kwd->key = key; 1498 kwd->value = type; 1499 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1500 uprv_free(kwd); 1501 *status = U_ILLEGAL_ARGUMENT_ERROR; 1502 break; 1503 } 1504 } 1505 } 1506 1507 if (U_SUCCESS(*status)) { 1508 type = ultag_getPrivateUse(langtag); 1509 if ((int32_t)uprv_strlen(type) > 0) { 1510 /* add private use as a keyword */ 1511 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1512 if (kwd == NULL) { 1513 *status = U_MEMORY_ALLOCATION_ERROR; 1514 } else { 1515 kwd->key = PRIVATEUSE_KEY; 1516 kwd->value = type; 1517 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1518 uprv_free(kwd); 1519 *status = U_ILLEGAL_ARGUMENT_ERROR; 1520 } 1521 } 1522 } 1523 } 1524 1525 if (U_SUCCESS(*status) && kwdFirst != NULL) { 1526 /* write out the sorted keywords */ 1527 kwd = kwdFirst; 1528 while (kwd != NULL) { 1529 if (reslen < capacity) { 1530 if (kwd == kwdFirst) { 1531 /* '@' */ 1532 *(appendAt + reslen) = LOCALE_EXT_SEP; 1533 } else { 1534 /* ';' */ 1535 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; 1536 } 1537 } 1538 reslen++; 1539 1540 /* key */ 1541 len = (int32_t)uprv_strlen(kwd->key); 1542 if (reslen < capacity) { 1543 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); 1544 } 1545 reslen += len; 1546 1547 /* '=' */ 1548 if (reslen < capacity) { 1549 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1550 } 1551 reslen++; 1552 1553 /* type */ 1554 len = (int32_t)uprv_strlen(kwd->value); 1555 if (reslen < capacity) { 1556 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); 1557 } 1558 reslen += len; 1559 1560 kwd = kwd->next; 1561 } 1562 } 1563 1564 /* clean up */ 1565 kwd = kwdFirst; 1566 while (kwd != NULL) { 1567 ExtensionListEntry *tmpKwd = kwd->next; 1568 uprv_free(kwd); 1569 kwd = tmpKwd; 1570 } 1571 1572 if (U_FAILURE(*status)) { 1573 return 0; 1574 } 1575 1576 return u_terminateChars(appendAt, capacity, reslen, status); 1577 } 1578 1579 /* 1580 * ------------------------------------------------- 1581 * 1582 * ultag_ functions 1583 * 1584 * ------------------------------------------------- 1585 */ 1586 1587 /* Bit flags used by the parser */ 1588 #define LANG 0x0001 1589 #define EXTL 0x0002 1590 #define SCRT 0x0004 1591 #define REGN 0x0008 1592 #define VART 0x0010 1593 #define EXTS 0x0020 1594 #define EXTV 0x0040 1595 #define PRIV 0x0080 1596 1597 static ULanguageTag* 1598 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { 1599 ULanguageTag *t; 1600 char *tagBuf; 1601 int16_t next; 1602 char *pSubtag, *pNext, *pLastGoodPosition; 1603 int32_t subtagLen; 1604 int32_t extlangIdx; 1605 ExtensionListEntry *pExtension; 1606 char *pExtValueSubtag, *pExtValueSubtagEnd; 1607 int32_t i; 1608 UBool isLDMLExtension, reqLDMLType; 1609 1610 if (parsedLen != NULL) { 1611 *parsedLen = 0; 1612 } 1613 1614 if (U_FAILURE(*status)) { 1615 return NULL; 1616 } 1617 1618 if (tagLen < 0) { 1619 tagLen = (int32_t)uprv_strlen(tag); 1620 } 1621 1622 /* copy the entire string */ 1623 tagBuf = (char*)uprv_malloc(tagLen + 1); 1624 if (tagBuf == NULL) { 1625 *status = U_MEMORY_ALLOCATION_ERROR; 1626 return NULL; 1627 } 1628 uprv_memcpy(tagBuf, tag, tagLen); 1629 *(tagBuf + tagLen) = 0; 1630 1631 /* create a ULanguageTag */ 1632 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); 1633 _initializeULanguageTag(t); 1634 t->buf = tagBuf; 1635 if (t == NULL) { 1636 uprv_free(tagBuf); 1637 *status = U_MEMORY_ALLOCATION_ERROR; 1638 return NULL; 1639 } 1640 1641 if (tagLen < MINLEN) { 1642 /* the input tag is too short - return empty ULanguageTag */ 1643 return t; 1644 } 1645 1646 /* check if the tag is grandfathered */ 1647 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { 1648 if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) { 1649 /* a grandfathered tag is always longer than its preferred mapping */ 1650 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); 1651 t->language = t->buf; 1652 if (parsedLen != NULL) { 1653 *parsedLen = tagLen; 1654 } 1655 return t; 1656 } 1657 } 1658 1659 /* 1660 * langtag = language 1661 * ["-" script] 1662 * ["-" region] 1663 * *("-" variant) 1664 * *("-" extension) 1665 * ["-" privateuse] 1666 */ 1667 1668 next = LANG | PRIV; 1669 pNext = pLastGoodPosition = tagBuf; 1670 extlangIdx = 0; 1671 pExtension = NULL; 1672 pExtValueSubtag = NULL; 1673 pExtValueSubtagEnd = NULL; 1674 isLDMLExtension = FALSE; 1675 reqLDMLType = FALSE; 1676 1677 while (pNext) { 1678 char *pSep; 1679 1680 pSubtag = pNext; 1681 1682 /* locate next separator char */ 1683 pSep = pSubtag; 1684 while (*pSep) { 1685 if (*pSep == SEP) { 1686 break; 1687 } 1688 pSep++; 1689 } 1690 if (*pSep == 0) { 1691 /* last subtag */ 1692 pNext = NULL; 1693 } else { 1694 pNext = pSep + 1; 1695 } 1696 subtagLen = (int32_t)(pSep - pSubtag); 1697 1698 if (next & LANG) { 1699 if (_isLanguageSubtag(pSubtag, subtagLen)) { 1700 *pSep = 0; /* terminate */ 1701 t->language = T_CString_toLowerCase(pSubtag); 1702 1703 pLastGoodPosition = pSep; 1704 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1705 continue; 1706 } 1707 } 1708 if (next & EXTL) { 1709 if (_isExtlangSubtag(pSubtag, subtagLen)) { 1710 *pSep = 0; 1711 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); 1712 1713 pLastGoodPosition = pSep; 1714 if (extlangIdx < 3) { 1715 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1716 } else { 1717 next = SCRT | REGN | VART | EXTS | PRIV; 1718 } 1719 continue; 1720 } 1721 } 1722 if (next & SCRT) { 1723 if (_isScriptSubtag(pSubtag, subtagLen)) { 1724 char *p = pSubtag; 1725 1726 *pSep = 0; 1727 1728 /* to title case */ 1729 *p = uprv_toupper(*p); 1730 p++; 1731 for (; *p; p++) { 1732 *p = uprv_tolower(*p); 1733 } 1734 1735 t->script = pSubtag; 1736 1737 pLastGoodPosition = pSep; 1738 next = REGN | VART | EXTS | PRIV; 1739 continue; 1740 } 1741 } 1742 if (next & REGN) { 1743 if (_isRegionSubtag(pSubtag, subtagLen)) { 1744 *pSep = 0; 1745 t->region = T_CString_toUpperCase(pSubtag); 1746 1747 pLastGoodPosition = pSep; 1748 next = VART | EXTS | PRIV; 1749 continue; 1750 } 1751 } 1752 if (next & VART) { 1753 if (_isVariantSubtag(pSubtag, subtagLen)) { 1754 VariantListEntry *var; 1755 UBool isAdded; 1756 1757 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 1758 if (var == NULL) { 1759 *status = U_MEMORY_ALLOCATION_ERROR; 1760 goto error; 1761 } 1762 *pSep = 0; 1763 var->variant = T_CString_toUpperCase(pSubtag); 1764 isAdded = _addVariantToList(&(t->variants), var); 1765 if (!isAdded) { 1766 /* duplicated variant entry */ 1767 uprv_free(var); 1768 break; 1769 } 1770 pLastGoodPosition = pSep; 1771 next = VART | EXTS | PRIV; 1772 continue; 1773 } 1774 } 1775 if (next & EXTS) { 1776 if (_isExtensionSingleton(pSubtag, subtagLen)) { 1777 if (pExtension != NULL) { 1778 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1779 /* the previous extension is incomplete */ 1780 uprv_free(pExtension); 1781 pExtension = NULL; 1782 break; 1783 } 1784 1785 /* terminate the previous extension value */ 1786 *pExtValueSubtagEnd = 0; 1787 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1788 1789 /* insert the extension to the list */ 1790 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1791 pLastGoodPosition = pExtValueSubtagEnd; 1792 } else { 1793 /* stop parsing here */ 1794 uprv_free(pExtension); 1795 pExtension = NULL; 1796 break; 1797 } 1798 1799 if (isLDMLExtension && reqLDMLType) { 1800 /* incomplete LDML extension key and type pair */ 1801 pExtension = NULL; 1802 break; 1803 } 1804 } 1805 1806 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT); 1807 1808 /* create a new extension */ 1809 pExtension = uprv_malloc(sizeof(ExtensionListEntry)); 1810 if (pExtension == NULL) { 1811 *status = U_MEMORY_ALLOCATION_ERROR; 1812 goto error; 1813 } 1814 *pSep = 0; 1815 pExtension->key = T_CString_toLowerCase(pSubtag); 1816 pExtension->value = NULL; /* will be set later */ 1817 1818 /* 1819 * reset the start and the end location of extension value 1820 * subtags for this extension 1821 */ 1822 pExtValueSubtag = NULL; 1823 pExtValueSubtagEnd = NULL; 1824 1825 next = EXTV; 1826 continue; 1827 } 1828 } 1829 if (next & EXTV) { 1830 if (_isExtensionSubtag(pSubtag, subtagLen)) { 1831 if (isLDMLExtension) { 1832 if (reqLDMLType) { 1833 /* already saw an LDML key */ 1834 if (!_isLDMLType(pSubtag, subtagLen)) { 1835 /* stop parsing here and let the valid LDML extension key/type 1836 pairs processed by the code out of this while loop */ 1837 break; 1838 } 1839 pExtValueSubtagEnd = pSep; 1840 reqLDMLType = FALSE; 1841 next = EXTS | EXTV | PRIV; 1842 } else { 1843 /* LDML key */ 1844 if (!_isLDMLKey(pSubtag, subtagLen)) { 1845 /* stop parsing here and let the valid LDML extension key/type 1846 pairs processed by the code out of this while loop */ 1847 break; 1848 } 1849 reqLDMLType = TRUE; 1850 next = EXTV; 1851 } 1852 } else { 1853 /* Mark the end of this subtag */ 1854 pExtValueSubtagEnd = pSep; 1855 next = EXTS | EXTV | PRIV; 1856 } 1857 1858 if (pExtValueSubtag == NULL) { 1859 /* if the start postion of this extension's value is not yet, 1860 this one is the first value subtag */ 1861 pExtValueSubtag = pSubtag; 1862 } 1863 continue; 1864 } 1865 } 1866 if (next & PRIV) { 1867 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { 1868 char *pPrivuseVal; 1869 1870 if (pExtension != NULL) { 1871 /* Process the last extension */ 1872 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1873 /* the previous extension is incomplete */ 1874 uprv_free(pExtension); 1875 pExtension = NULL; 1876 break; 1877 } else { 1878 /* terminate the previous extension value */ 1879 *pExtValueSubtagEnd = 0; 1880 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1881 1882 /* insert the extension to the list */ 1883 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1884 pLastGoodPosition = pExtValueSubtagEnd; 1885 pExtension = NULL; 1886 } else { 1887 /* stop parsing here */ 1888 uprv_free(pExtension); 1889 pExtension = NULL; 1890 break; 1891 } 1892 } 1893 } 1894 1895 /* The rest of part will be private use value subtags */ 1896 if (pNext == NULL) { 1897 /* empty private use subtag */ 1898 break; 1899 } 1900 /* back up the private use value start position */ 1901 pPrivuseVal = pNext; 1902 1903 /* validate private use value subtags */ 1904 while (pNext) { 1905 pSubtag = pNext; 1906 pSep = pSubtag; 1907 while (*pSep) { 1908 if (*pSep == SEP) { 1909 break; 1910 } 1911 pSep++; 1912 } 1913 if (*pSep == 0) { 1914 /* last subtag */ 1915 pNext = NULL; 1916 } else { 1917 pNext = pSep + 1; 1918 } 1919 subtagLen = (int32_t)(pSep - pSubtag); 1920 1921 if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { 1922 pLastGoodPosition = pSep; 1923 } else { 1924 break; 1925 } 1926 } 1927 if (pLastGoodPosition - pPrivuseVal > 0) { 1928 *pLastGoodPosition = 0; 1929 t->privateuse = T_CString_toLowerCase(pPrivuseVal); 1930 } 1931 /* No more subtags, exiting the parse loop */ 1932 break; 1933 } 1934 break; 1935 } 1936 /* If we fell through here, it means this subtag is illegal - quit parsing */ 1937 break; 1938 } 1939 1940 if (pExtension != NULL) { 1941 /* Process the last extension */ 1942 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1943 /* the previous extension is incomplete */ 1944 uprv_free(pExtension); 1945 } else { 1946 /* terminate the previous extension value */ 1947 *pExtValueSubtagEnd = 0; 1948 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1949 /* insert the extension to the list */ 1950 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1951 pLastGoodPosition = pExtValueSubtagEnd; 1952 } else { 1953 uprv_free(pExtension); 1954 } 1955 } 1956 } 1957 1958 if (parsedLen != NULL) { 1959 *parsedLen = (int32_t)(pLastGoodPosition - t->buf); 1960 } 1961 1962 return t; 1963 1964 error: 1965 uprv_free(t); 1966 return NULL; 1967 } 1968 1969 static void 1970 ultag_close(ULanguageTag* langtag) { 1971 1972 if (langtag == NULL) { 1973 return; 1974 } 1975 1976 uprv_free(langtag->buf); 1977 1978 if (langtag->variants) { 1979 VariantListEntry *curVar = langtag->variants; 1980 while (curVar) { 1981 VariantListEntry *nextVar = curVar->next; 1982 uprv_free(curVar); 1983 curVar = nextVar; 1984 } 1985 } 1986 1987 if (langtag->extensions) { 1988 ExtensionListEntry *curExt = langtag->extensions; 1989 while (curExt) { 1990 ExtensionListEntry *nextExt = curExt->next; 1991 uprv_free(curExt); 1992 curExt = nextExt; 1993 } 1994 } 1995 1996 uprv_free(langtag); 1997 } 1998 1999 static const char* 2000 ultag_getLanguage(const ULanguageTag* langtag) { 2001 return langtag->language; 2002 } 2003 2004 #if 0 2005 static const char* 2006 ultag_getJDKLanguage(const ULanguageTag* langtag) { 2007 int32_t i; 2008 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 2009 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { 2010 return DEPRECATEDLANGS[i + 1]; 2011 } 2012 } 2013 return langtag->language; 2014 } 2015 #endif 2016 2017 static const char* 2018 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { 2019 if (idx >= 0 && idx < MAXEXTLANG) { 2020 return langtag->extlang[idx]; 2021 } 2022 return NULL; 2023 } 2024 2025 static int32_t 2026 ultag_getExtlangSize(const ULanguageTag* langtag) { 2027 int32_t size = 0; 2028 int32_t i; 2029 for (i = 0; i < MAXEXTLANG; i++) { 2030 if (langtag->extlang[i]) { 2031 size++; 2032 } 2033 } 2034 return size; 2035 } 2036 2037 static const char* 2038 ultag_getScript(const ULanguageTag* langtag) { 2039 return langtag->script; 2040 } 2041 2042 static const char* 2043 ultag_getRegion(const ULanguageTag* langtag) { 2044 return langtag->region; 2045 } 2046 2047 static const char* 2048 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { 2049 const char *var = NULL; 2050 VariantListEntry *cur = langtag->variants; 2051 int32_t i = 0; 2052 while (cur) { 2053 if (i == idx) { 2054 var = cur->variant; 2055 break; 2056 } 2057 cur = cur->next; 2058 i++; 2059 } 2060 return var; 2061 } 2062 2063 static int32_t 2064 ultag_getVariantsSize(const ULanguageTag* langtag) { 2065 int32_t size = 0; 2066 VariantListEntry *cur = langtag->variants; 2067 while (TRUE) { 2068 if (cur == NULL) { 2069 break; 2070 } 2071 size++; 2072 cur = cur->next; 2073 } 2074 return size; 2075 } 2076 2077 static const char* 2078 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { 2079 const char *key = NULL; 2080 ExtensionListEntry *cur = langtag->extensions; 2081 int32_t i = 0; 2082 while (cur) { 2083 if (i == idx) { 2084 key = cur->key; 2085 break; 2086 } 2087 cur = cur->next; 2088 i++; 2089 } 2090 return key; 2091 } 2092 2093 static const char* 2094 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { 2095 const char *val = NULL; 2096 ExtensionListEntry *cur = langtag->extensions; 2097 int32_t i = 0; 2098 while (cur) { 2099 if (i == idx) { 2100 val = cur->value; 2101 break; 2102 } 2103 cur = cur->next; 2104 i++; 2105 } 2106 return val; 2107 } 2108 2109 static int32_t 2110 ultag_getExtensionsSize(const ULanguageTag* langtag) { 2111 int32_t size = 0; 2112 ExtensionListEntry *cur = langtag->extensions; 2113 while (TRUE) { 2114 if (cur == NULL) { 2115 break; 2116 } 2117 size++; 2118 cur = cur->next; 2119 } 2120 return size; 2121 } 2122 2123 static const char* 2124 ultag_getPrivateUse(const ULanguageTag* langtag) { 2125 return langtag->privateuse; 2126 } 2127 2128 #if 0 2129 static const char* 2130 ultag_getGrandfathered(const ULanguageTag* langtag) { 2131 return langtag->grandfathered; 2132 } 2133 #endif 2134 2135 2136 /* 2137 * ------------------------------------------------- 2138 * 2139 * Locale/BCP47 conversion APIs, exposed as uloc_* 2140 * 2141 * ------------------------------------------------- 2142 */ 2143 U_DRAFT int32_t U_EXPORT2 2144 uloc_toLanguageTag(const char* localeID, 2145 char* langtag, 2146 int32_t langtagCapacity, 2147 UBool strict, 2148 UErrorCode* status) { 2149 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ 2150 char canonical[256]; 2151 int32_t reslen = 0; 2152 UErrorCode tmpStatus = U_ZERO_ERROR; 2153 2154 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ 2155 canonical[0] = 0; 2156 if (uprv_strlen(localeID) > 0) { 2157 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); 2158 if (tmpStatus != U_ZERO_ERROR) { 2159 *status = U_ILLEGAL_ARGUMENT_ERROR; 2160 return 0; 2161 } 2162 } 2163 2164 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); 2165 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2166 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2167 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2168 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2169 2170 return reslen; 2171 } 2172 2173 2174 U_DRAFT int32_t U_EXPORT2 2175 uloc_forLanguageTag(const char* langtag, 2176 char* localeID, 2177 int32_t localeIDCapacity, 2178 int32_t* parsedLength, 2179 UErrorCode* status) { 2180 ULanguageTag *lt; 2181 int32_t reslen = 0; 2182 const char *subtag, *p; 2183 int32_t len; 2184 int32_t i, n; 2185 UBool noRegion = TRUE; 2186 2187 lt = ultag_parse(langtag, -1, parsedLength, status); 2188 if (U_FAILURE(*status)) { 2189 return 0; 2190 } 2191 2192 /* language */ 2193 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); 2194 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { 2195 len = (int32_t)uprv_strlen(subtag); 2196 if (len > 0) { 2197 if (reslen < localeIDCapacity) { 2198 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); 2199 } 2200 reslen += len; 2201 } 2202 } 2203 2204 /* script */ 2205 subtag = ultag_getScript(lt); 2206 len = (int32_t)uprv_strlen(subtag); 2207 if (len > 0) { 2208 if (reslen < localeIDCapacity) { 2209 *(localeID + reslen) = LOCALE_SEP; 2210 } 2211 reslen++; 2212 2213 /* write out the script in title case */ 2214 p = subtag; 2215 while (*p) { 2216 if (reslen < localeIDCapacity) { 2217 if (p == subtag) { 2218 *(localeID + reslen) = uprv_toupper(*p); 2219 } else { 2220 *(localeID + reslen) = *p; 2221 } 2222 } 2223 reslen++; 2224 p++; 2225 } 2226 } 2227 2228 /* region */ 2229 subtag = ultag_getRegion(lt); 2230 len = (int32_t)uprv_strlen(subtag); 2231 if (len > 0) { 2232 if (reslen < localeIDCapacity) { 2233 *(localeID + reslen) = LOCALE_SEP; 2234 } 2235 reslen++; 2236 /* write out the retion in upper case */ 2237 p = subtag; 2238 while (*p) { 2239 if (reslen < localeIDCapacity) { 2240 *(localeID + reslen) = uprv_toupper(*p); 2241 } 2242 reslen++; 2243 p++; 2244 } 2245 noRegion = FALSE; 2246 } 2247 2248 /* variants */ 2249 n = ultag_getVariantsSize(lt); 2250 if (n > 0) { 2251 if (noRegion) { 2252 if (reslen < localeIDCapacity) { 2253 *(localeID + reslen) = LOCALE_SEP; 2254 } 2255 reslen++; 2256 } 2257 2258 for (i = 0; i < n; i++) { 2259 subtag = ultag_getVariant(lt, i); 2260 if (reslen < localeIDCapacity) { 2261 *(localeID + reslen) = LOCALE_SEP; 2262 } 2263 reslen++; 2264 /* write out the variant in upper case */ 2265 p = subtag; 2266 while (*p) { 2267 if (reslen < localeIDCapacity) { 2268 *(localeID + reslen) = uprv_toupper(*p); 2269 } 2270 reslen++; 2271 p++; 2272 } 2273 } 2274 } 2275 2276 /* keywords */ 2277 n = ultag_getExtensionsSize(lt); 2278 subtag = ultag_getPrivateUse(lt); 2279 if (n > 0 || uprv_strlen(subtag) > 0) { 2280 if (reslen == 0) { 2281 /* need a language */ 2282 if (reslen < localeIDCapacity) { 2283 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); 2284 } 2285 reslen += LANG_UND_LEN; 2286 } 2287 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); 2288 reslen += len; 2289 } 2290 2291 ultag_close(lt); 2292 return u_terminateChars(localeID, localeIDCapacity, reslen, status); 2293 } 2294 2295 2296