1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2009-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 #include "unicode/utypes.h" 11 #include "unicode/ures.h" 12 #include "unicode/putil.h" 13 #include "unicode/uloc.h" 14 #include "ustr_imp.h" 15 #include "cmemory.h" 16 #include "cstring.h" 17 #include "putilimp.h" 18 #include "uinvchar.h" 19 #include "ulocimp.h" 20 #include "uassert.h" 21 22 23 /* struct holding a single variant */ 24 typedef struct VariantListEntry { 25 const char *variant; 26 struct VariantListEntry *next; 27 } VariantListEntry; 28 29 /* struct holding a single attribute value */ 30 typedef struct AttributeListEntry { 31 const char *attribute; 32 struct AttributeListEntry *next; 33 } AttributeListEntry; 34 35 /* struct holding a single extension */ 36 typedef struct ExtensionListEntry { 37 const char *key; 38 const char *value; 39 struct ExtensionListEntry *next; 40 } ExtensionListEntry; 41 42 #define MAXEXTLANG 3 43 typedef struct ULanguageTag { 44 char *buf; /* holding parsed subtags */ 45 const char *language; 46 const char *extlang[MAXEXTLANG]; 47 const char *script; 48 const char *region; 49 VariantListEntry *variants; 50 ExtensionListEntry *extensions; 51 const char *privateuse; 52 const char *grandfathered; 53 } ULanguageTag; 54 55 #define MINLEN 2 56 #define SEP '-' 57 #define PRIVATEUSE 'x' 58 #define LDMLEXT 'u' 59 60 #define LOCALE_SEP '_' 61 #define LOCALE_EXT_SEP '@' 62 #define LOCALE_KEYWORD_SEP ';' 63 #define LOCALE_KEY_TYPE_SEP '=' 64 65 #define ISALPHA(c) uprv_isASCIILetter(c) 66 #define ISNUMERIC(c) ((c)>='0' && (c)<='9') 67 68 static const char EMPTY[] = ""; 69 static const char LANG_UND[] = "und"; 70 static const char PRIVATEUSE_KEY[] = "x"; 71 static const char _POSIX[] = "_POSIX"; 72 static const char POSIX_KEY[] = "va"; 73 static const char POSIX_VALUE[] = "posix"; 74 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; 75 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; 76 static const char LOCALE_TYPE_YES[] = "yes"; 77 78 #define LANG_UND_LEN 3 79 80 static const char* const GRANDFATHERED[] = { 81 /* grandfathered preferred */ 82 "art-lojban", "jbo", 83 "cel-gaulish", "xtg-x-cel-gaulish", 84 "en-GB-oed", "en-GB-x-oed", 85 "i-ami", "ami", 86 "i-bnn", "bnn", 87 "i-default", "en-x-i-default", 88 "i-enochian", "und-x-i-enochian", 89 "i-hak", "hak", 90 "i-klingon", "tlh", 91 "i-lux", "lb", 92 "i-mingo", "see-x-i-mingo", 93 "i-navajo", "nv", 94 "i-pwn", "pwn", 95 "i-tao", "tao", 96 "i-tay", "tay", 97 "i-tsu", "tsu", 98 "no-bok", "nb", 99 "no-nyn", "nn", 100 "sgn-be-fr", "sfb", 101 "sgn-be-nl", "vgt", 102 "sgn-ch-de", "sgg", 103 "zh-guoyu", "cmn", 104 "zh-hakka", "hak", 105 "zh-min", "nan-x-zh-min", 106 "zh-min-nan", "nan", 107 "zh-xiang", "hsn", 108 NULL, NULL 109 }; 110 111 static const char DEPRECATEDLANGS[][4] = { 112 /* deprecated new */ 113 "iw", "he", 114 "ji", "yi", 115 "in", "id" 116 }; 117 118 /* 119 * ------------------------------------------------- 120 * 121 * These ultag_ functions may be exposed as APIs later 122 * 123 * ------------------------------------------------- 124 */ 125 126 static ULanguageTag* 127 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); 128 129 static void 130 ultag_close(ULanguageTag* langtag); 131 132 static const char* 133 ultag_getLanguage(const ULanguageTag* langtag); 134 135 #if 0 136 static const char* 137 ultag_getJDKLanguage(const ULanguageTag* langtag); 138 #endif 139 140 static const char* 141 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); 142 143 static int32_t 144 ultag_getExtlangSize(const ULanguageTag* langtag); 145 146 static const char* 147 ultag_getScript(const ULanguageTag* langtag); 148 149 static const char* 150 ultag_getRegion(const ULanguageTag* langtag); 151 152 static const char* 153 ultag_getVariant(const ULanguageTag* langtag, int32_t idx); 154 155 static int32_t 156 ultag_getVariantsSize(const ULanguageTag* langtag); 157 158 static const char* 159 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); 160 161 static const char* 162 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); 163 164 static int32_t 165 ultag_getExtensionsSize(const ULanguageTag* langtag); 166 167 static const char* 168 ultag_getPrivateUse(const ULanguageTag* langtag); 169 170 #if 0 171 static const char* 172 ultag_getGrandfathered(const ULanguageTag* langtag); 173 #endif 174 175 /* 176 * ------------------------------------------------- 177 * 178 * Language subtag syntax validation functions 179 * 180 * ------------------------------------------------- 181 */ 182 183 static UBool 184 _isAlphaString(const char* s, int32_t len) { 185 int32_t i; 186 for (i = 0; i < len; i++) { 187 if (!ISALPHA(*(s + i))) { 188 return FALSE; 189 } 190 } 191 return TRUE; 192 } 193 194 static UBool 195 _isNumericString(const char* s, int32_t len) { 196 int32_t i; 197 for (i = 0; i < len; i++) { 198 if (!ISNUMERIC(*(s + i))) { 199 return FALSE; 200 } 201 } 202 return TRUE; 203 } 204 205 static UBool 206 _isAlphaNumericString(const char* s, int32_t len) { 207 int32_t i; 208 for (i = 0; i < len; i++) { 209 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { 210 return FALSE; 211 } 212 } 213 return TRUE; 214 } 215 216 static UBool 217 _isLanguageSubtag(const char* s, int32_t len) { 218 /* 219 * language = 2*3ALPHA ; shortest ISO 639 code 220 * ["-" extlang] ; sometimes followed by 221 * ; extended language subtags 222 * / 4ALPHA ; or reserved for future use 223 * / 5*8ALPHA ; or registered language subtag 224 */ 225 if (len < 0) { 226 len = (int32_t)uprv_strlen(s); 227 } 228 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { 229 return TRUE; 230 } 231 return FALSE; 232 } 233 234 static UBool 235 _isExtlangSubtag(const char* s, int32_t len) { 236 /* 237 * extlang = 3ALPHA ; selected ISO 639 codes 238 * *2("-" 3ALPHA) ; permanently reserved 239 */ 240 if (len < 0) { 241 len = (int32_t)uprv_strlen(s); 242 } 243 if (len == 3 && _isAlphaString(s, len)) { 244 return TRUE; 245 } 246 return FALSE; 247 } 248 249 static UBool 250 _isScriptSubtag(const char* s, int32_t len) { 251 /* 252 * script = 4ALPHA ; ISO 15924 code 253 */ 254 if (len < 0) { 255 len = (int32_t)uprv_strlen(s); 256 } 257 if (len == 4 && _isAlphaString(s, len)) { 258 return TRUE; 259 } 260 return FALSE; 261 } 262 263 static UBool 264 _isRegionSubtag(const char* s, int32_t len) { 265 /* 266 * region = 2ALPHA ; ISO 3166-1 code 267 * / 3DIGIT ; UN M.49 code 268 */ 269 if (len < 0) { 270 len = (int32_t)uprv_strlen(s); 271 } 272 if (len == 2 && _isAlphaString(s, len)) { 273 return TRUE; 274 } 275 if (len == 3 && _isNumericString(s, len)) { 276 return TRUE; 277 } 278 return FALSE; 279 } 280 281 static UBool 282 _isVariantSubtag(const char* s, int32_t len) { 283 /* 284 * variant = 5*8alphanum ; registered variants 285 * / (DIGIT 3alphanum) 286 */ 287 if (len < 0) { 288 len = (int32_t)uprv_strlen(s); 289 } 290 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { 291 return TRUE; 292 } 293 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { 294 return TRUE; 295 } 296 return FALSE; 297 } 298 299 static UBool 300 _isPrivateuseVariantSubtag(const char* s, int32_t len) { 301 /* 302 * variant = 1*8alphanum ; registered variants 303 * / (DIGIT 3alphanum) 304 */ 305 if (len < 0) { 306 len = (int32_t)uprv_strlen(s); 307 } 308 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 309 return TRUE; 310 } 311 return FALSE; 312 } 313 314 static UBool 315 _isExtensionSingleton(const char* s, int32_t len) { 316 /* 317 * extension = singleton 1*("-" (2*8alphanum)) 318 */ 319 if (len < 0) { 320 len = (int32_t)uprv_strlen(s); 321 } 322 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { 323 return TRUE; 324 } 325 return FALSE; 326 } 327 328 static UBool 329 _isExtensionSubtag(const char* s, int32_t len) { 330 /* 331 * extension = singleton 1*("-" (2*8alphanum)) 332 */ 333 if (len < 0) { 334 len = (int32_t)uprv_strlen(s); 335 } 336 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { 337 return TRUE; 338 } 339 return FALSE; 340 } 341 342 static UBool 343 _isExtensionSubtags(const char* s, int32_t len) { 344 const char *p = s; 345 const char *pSubtag = NULL; 346 347 if (len < 0) { 348 len = (int32_t)uprv_strlen(s); 349 } 350 351 while ((p - s) < len) { 352 if (*p == SEP) { 353 if (pSubtag == NULL) { 354 return FALSE; 355 } 356 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { 357 return FALSE; 358 } 359 pSubtag = NULL; 360 } else if (pSubtag == NULL) { 361 pSubtag = p; 362 } 363 p++; 364 } 365 if (pSubtag == NULL) { 366 return FALSE; 367 } 368 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); 369 } 370 371 static UBool 372 _isPrivateuseValueSubtag(const char* s, int32_t len) { 373 /* 374 * privateuse = "x" 1*("-" (1*8alphanum)) 375 */ 376 if (len < 0) { 377 len = (int32_t)uprv_strlen(s); 378 } 379 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 380 return TRUE; 381 } 382 return FALSE; 383 } 384 385 static UBool 386 _isPrivateuseValueSubtags(const char* s, int32_t len) { 387 const char *p = s; 388 const char *pSubtag = NULL; 389 390 if (len < 0) { 391 len = (int32_t)uprv_strlen(s); 392 } 393 394 while ((p - s) < len) { 395 if (*p == SEP) { 396 if (pSubtag == NULL) { 397 return FALSE; 398 } 399 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { 400 return FALSE; 401 } 402 pSubtag = NULL; 403 } else if (pSubtag == NULL) { 404 pSubtag = p; 405 } 406 p++; 407 } 408 if (pSubtag == NULL) { 409 return FALSE; 410 } 411 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); 412 } 413 414 U_CFUNC UBool 415 ultag_isUnicodeLocaleKey(const char* s, int32_t len) { 416 if (len < 0) { 417 len = (int32_t)uprv_strlen(s); 418 } 419 if (len == 2 && _isAlphaNumericString(s, len)) { 420 return TRUE; 421 } 422 return FALSE; 423 } 424 425 U_CFUNC UBool 426 ultag_isUnicodeLocaleType(const char*s, int32_t len) { 427 const char* p; 428 int32_t subtagLen = 0; 429 430 if (len < 0) { 431 len = (int32_t)uprv_strlen(s); 432 } 433 434 for (p = s; len > 0; p++, len--) { 435 if (*p == SEP) { 436 if (subtagLen < 3) { 437 return FALSE; 438 } 439 subtagLen = 0; 440 } else if (ISALPHA(*p) || ISNUMERIC(*p)) { 441 subtagLen++; 442 if (subtagLen > 8) { 443 return FALSE; 444 } 445 } else { 446 return FALSE; 447 } 448 } 449 450 return (subtagLen >= 3); 451 } 452 /* 453 * ------------------------------------------------- 454 * 455 * Helper functions 456 * 457 * ------------------------------------------------- 458 */ 459 460 static UBool 461 _addVariantToList(VariantListEntry **first, VariantListEntry *var) { 462 UBool bAdded = TRUE; 463 464 if (*first == NULL) { 465 var->next = NULL; 466 *first = var; 467 } else { 468 VariantListEntry *prev, *cur; 469 int32_t cmp; 470 471 /* variants order should be preserved */ 472 prev = NULL; 473 cur = *first; 474 while (TRUE) { 475 if (cur == NULL) { 476 prev->next = var; 477 var->next = NULL; 478 break; 479 } 480 481 /* Checking for duplicate variant */ 482 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); 483 if (cmp == 0) { 484 /* duplicated variant */ 485 bAdded = FALSE; 486 break; 487 } 488 prev = cur; 489 cur = cur->next; 490 } 491 } 492 493 return bAdded; 494 } 495 496 static UBool 497 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { 498 UBool bAdded = TRUE; 499 500 if (*first == NULL) { 501 attr->next = NULL; 502 *first = attr; 503 } else { 504 AttributeListEntry *prev, *cur; 505 int32_t cmp; 506 507 /* reorder variants in alphabetical order */ 508 prev = NULL; 509 cur = *first; 510 while (TRUE) { 511 if (cur == NULL) { 512 prev->next = attr; 513 attr->next = NULL; 514 break; 515 } 516 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); 517 if (cmp < 0) { 518 if (prev == NULL) { 519 *first = attr; 520 } else { 521 prev->next = attr; 522 } 523 attr->next = cur; 524 break; 525 } 526 if (cmp == 0) { 527 /* duplicated variant */ 528 bAdded = FALSE; 529 break; 530 } 531 prev = cur; 532 cur = cur->next; 533 } 534 } 535 536 return bAdded; 537 } 538 539 540 static UBool 541 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { 542 UBool bAdded = TRUE; 543 544 if (*first == NULL) { 545 ext->next = NULL; 546 *first = ext; 547 } else { 548 ExtensionListEntry *prev, *cur; 549 int32_t cmp; 550 551 /* reorder variants in alphabetical order */ 552 prev = NULL; 553 cur = *first; 554 while (TRUE) { 555 if (cur == NULL) { 556 prev->next = ext; 557 ext->next = NULL; 558 break; 559 } 560 if (localeToBCP) { 561 /* special handling for locale to bcp conversion */ 562 int32_t len, curlen; 563 564 len = (int32_t)uprv_strlen(ext->key); 565 curlen = (int32_t)uprv_strlen(cur->key); 566 567 if (len == 1 && curlen == 1) { 568 if (*(ext->key) == *(cur->key)) { 569 cmp = 0; 570 } else if (*(ext->key) == PRIVATEUSE) { 571 cmp = 1; 572 } else if (*(cur->key) == PRIVATEUSE) { 573 cmp = -1; 574 } else { 575 cmp = *(ext->key) - *(cur->key); 576 } 577 } else if (len == 1) { 578 cmp = *(ext->key) - LDMLEXT; 579 } else if (curlen == 1) { 580 cmp = LDMLEXT - *(cur->key); 581 } else { 582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 583 /* Both are u extension keys - we need special handling for 'attribute' */ 584 if (cmp != 0) { 585 if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { 586 cmp = 1; 587 } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { 588 cmp = -1; 589 } 590 } 591 } 592 } else { 593 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 594 } 595 if (cmp < 0) { 596 if (prev == NULL) { 597 *first = ext; 598 } else { 599 prev->next = ext; 600 } 601 ext->next = cur; 602 break; 603 } 604 if (cmp == 0) { 605 /* duplicated extension key */ 606 bAdded = FALSE; 607 break; 608 } 609 prev = cur; 610 cur = cur->next; 611 } 612 } 613 614 return bAdded; 615 } 616 617 static void 618 _initializeULanguageTag(ULanguageTag* langtag) { 619 int32_t i; 620 621 langtag->buf = NULL; 622 623 langtag->language = EMPTY; 624 for (i = 0; i < MAXEXTLANG; i++) { 625 langtag->extlang[i] = NULL; 626 } 627 628 langtag->script = EMPTY; 629 langtag->region = EMPTY; 630 631 langtag->variants = NULL; 632 langtag->extensions = NULL; 633 634 langtag->grandfathered = EMPTY; 635 langtag->privateuse = EMPTY; 636 } 637 638 static int32_t 639 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 640 char buf[ULOC_LANG_CAPACITY]; 641 UErrorCode tmpStatus = U_ZERO_ERROR; 642 int32_t len, i; 643 int32_t reslen = 0; 644 645 if (U_FAILURE(*status)) { 646 return 0; 647 } 648 649 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); 650 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 651 if (strict) { 652 *status = U_ILLEGAL_ARGUMENT_ERROR; 653 return 0; 654 } 655 len = 0; 656 } 657 658 /* Note: returned language code is in lower case letters */ 659 660 if (len == 0) { 661 if (reslen < capacity) { 662 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 663 } 664 reslen += LANG_UND_LEN; 665 } else if (!_isLanguageSubtag(buf, len)) { 666 /* invalid language code */ 667 if (strict) { 668 *status = U_ILLEGAL_ARGUMENT_ERROR; 669 return 0; 670 } 671 if (reslen < capacity) { 672 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 673 } 674 reslen += LANG_UND_LEN; 675 } else { 676 /* resolve deprecated */ 677 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { 678 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { 679 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); 680 len = (int32_t)uprv_strlen(buf); 681 break; 682 } 683 } 684 if (reslen < capacity) { 685 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 686 } 687 reslen += len; 688 } 689 u_terminateChars(appendAt, capacity, reslen, status); 690 return reslen; 691 } 692 693 static int32_t 694 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 695 char buf[ULOC_SCRIPT_CAPACITY]; 696 UErrorCode tmpStatus = U_ZERO_ERROR; 697 int32_t len; 698 int32_t reslen = 0; 699 700 if (U_FAILURE(*status)) { 701 return 0; 702 } 703 704 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); 705 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 706 if (strict) { 707 *status = U_ILLEGAL_ARGUMENT_ERROR; 708 } 709 return 0; 710 } 711 712 if (len > 0) { 713 if (!_isScriptSubtag(buf, len)) { 714 /* invalid script code */ 715 if (strict) { 716 *status = U_ILLEGAL_ARGUMENT_ERROR; 717 } 718 return 0; 719 } else { 720 if (reslen < capacity) { 721 *(appendAt + reslen) = SEP; 722 } 723 reslen++; 724 725 if (reslen < capacity) { 726 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 727 } 728 reslen += len; 729 } 730 } 731 u_terminateChars(appendAt, capacity, reslen, status); 732 return reslen; 733 } 734 735 static int32_t 736 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 737 char buf[ULOC_COUNTRY_CAPACITY]; 738 UErrorCode tmpStatus = U_ZERO_ERROR; 739 int32_t len; 740 int32_t reslen = 0; 741 742 if (U_FAILURE(*status)) { 743 return 0; 744 } 745 746 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); 747 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 748 if (strict) { 749 *status = U_ILLEGAL_ARGUMENT_ERROR; 750 } 751 return 0; 752 } 753 754 if (len > 0) { 755 if (!_isRegionSubtag(buf, len)) { 756 /* invalid region code */ 757 if (strict) { 758 *status = U_ILLEGAL_ARGUMENT_ERROR; 759 } 760 return 0; 761 } else { 762 if (reslen < capacity) { 763 *(appendAt + reslen) = SEP; 764 } 765 reslen++; 766 767 if (reslen < capacity) { 768 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 769 } 770 reslen += len; 771 } 772 } 773 u_terminateChars(appendAt, capacity, reslen, status); 774 return reslen; 775 } 776 777 static int32_t 778 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { 779 char buf[ULOC_FULLNAME_CAPACITY]; 780 UErrorCode tmpStatus = U_ZERO_ERROR; 781 int32_t len, i; 782 int32_t reslen = 0; 783 784 if (U_FAILURE(*status)) { 785 return 0; 786 } 787 788 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 789 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 790 if (strict) { 791 *status = U_ILLEGAL_ARGUMENT_ERROR; 792 } 793 return 0; 794 } 795 796 if (len > 0) { 797 char *p, *pVar; 798 UBool bNext = TRUE; 799 VariantListEntry *var; 800 VariantListEntry *varFirst = NULL; 801 802 pVar = NULL; 803 p = buf; 804 while (bNext) { 805 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 806 if (*p == 0) { 807 bNext = FALSE; 808 } else { 809 *p = 0; /* terminate */ 810 } 811 if (pVar == NULL) { 812 if (strict) { 813 *status = U_ILLEGAL_ARGUMENT_ERROR; 814 break; 815 } 816 /* ignore empty variant */ 817 } else { 818 /* ICU uses upper case letters for variants, but 819 the canonical format is lowercase in BCP47 */ 820 for (i = 0; *(pVar + i) != 0; i++) { 821 *(pVar + i) = uprv_tolower(*(pVar + i)); 822 } 823 824 /* validate */ 825 if (_isVariantSubtag(pVar, -1)) { 826 if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { 827 /* emit the variant to the list */ 828 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 829 if (var == NULL) { 830 *status = U_MEMORY_ALLOCATION_ERROR; 831 break; 832 } 833 var->variant = pVar; 834 if (!_addVariantToList(&varFirst, var)) { 835 /* duplicated variant */ 836 uprv_free(var); 837 if (strict) { 838 *status = U_ILLEGAL_ARGUMENT_ERROR; 839 break; 840 } 841 } 842 } else { 843 /* Special handling for POSIX variant, need to remember that we had it and then */ 844 /* treat it like an extension later. */ 845 *hadPosix = TRUE; 846 } 847 } else if (strict) { 848 *status = U_ILLEGAL_ARGUMENT_ERROR; 849 break; 850 } else if (_isPrivateuseValueSubtag(pVar, -1)) { 851 /* Handle private use subtags separately */ 852 break; 853 } 854 } 855 /* reset variant starting position */ 856 pVar = NULL; 857 } else if (pVar == NULL) { 858 pVar = p; 859 } 860 p++; 861 } 862 863 if (U_SUCCESS(*status)) { 864 if (varFirst != NULL) { 865 int32_t varLen; 866 867 /* write out validated/normalized variants to the target */ 868 var = varFirst; 869 while (var != NULL) { 870 if (reslen < capacity) { 871 *(appendAt + reslen) = SEP; 872 } 873 reslen++; 874 varLen = (int32_t)uprv_strlen(var->variant); 875 if (reslen < capacity) { 876 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); 877 } 878 reslen += varLen; 879 var = var->next; 880 } 881 } 882 } 883 884 /* clean up */ 885 var = varFirst; 886 while (var != NULL) { 887 VariantListEntry *tmpVar = var->next; 888 uprv_free(var); 889 var = tmpVar; 890 } 891 892 if (U_FAILURE(*status)) { 893 return 0; 894 } 895 } 896 897 u_terminateChars(appendAt, capacity, reslen, status); 898 return reslen; 899 } 900 901 static int32_t 902 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 903 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 904 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; 905 int32_t attrBufLength = 0; 906 UEnumeration *keywordEnum = NULL; 907 int32_t reslen = 0; 908 909 keywordEnum = uloc_openKeywords(localeID, status); 910 if (U_FAILURE(*status) && !hadPosix) { 911 uenum_close(keywordEnum); 912 return 0; 913 } 914 if (keywordEnum != NULL || hadPosix) { 915 /* reorder extensions */ 916 int32_t len; 917 const char *key; 918 ExtensionListEntry *firstExt = NULL; 919 ExtensionListEntry *ext; 920 AttributeListEntry *firstAttr = NULL; 921 AttributeListEntry *attr; 922 char *attrValue; 923 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 924 char *pExtBuf = extBuf; 925 int32_t extBufCapacity = sizeof(extBuf); 926 const char *bcpKey=nullptr, *bcpValue=nullptr; 927 UErrorCode tmpStatus = U_ZERO_ERROR; 928 int32_t keylen; 929 UBool isBcpUExt; 930 931 while (TRUE) { 932 key = uenum_next(keywordEnum, NULL, status); 933 if (key == NULL) { 934 break; 935 } 936 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); 937 /* buf must be null-terminated */ 938 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 939 if (strict) { 940 *status = U_ILLEGAL_ARGUMENT_ERROR; 941 break; 942 } 943 /* ignore this keyword */ 944 tmpStatus = U_ZERO_ERROR; 945 continue; 946 } 947 948 keylen = (int32_t)uprv_strlen(key); 949 isBcpUExt = (keylen > 1); 950 951 /* special keyword used for representing Unicode locale attributes */ 952 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { 953 if (len > 0) { 954 int32_t i = 0; 955 while (TRUE) { 956 attrBufLength = 0; 957 for (; i < len; i++) { 958 if (buf[i] != '-') { 959 attrBuf[attrBufLength++] = buf[i]; 960 } else { 961 i++; 962 break; 963 } 964 } 965 if (attrBufLength > 0) { 966 attrBuf[attrBufLength] = 0; 967 968 } else if (i >= len){ 969 break; 970 } 971 972 /* create AttributeListEntry */ 973 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); 974 if (attr == NULL) { 975 *status = U_MEMORY_ALLOCATION_ERROR; 976 break; 977 } 978 attrValue = (char*)uprv_malloc(attrBufLength + 1); 979 if (attrValue == NULL) { 980 *status = U_MEMORY_ALLOCATION_ERROR; 981 break; 982 } 983 uprv_strcpy(attrValue, attrBuf); 984 attr->attribute = attrValue; 985 986 if (!_addAttributeToList(&firstAttr, attr)) { 987 uprv_free(attr); 988 uprv_free(attrValue); 989 if (strict) { 990 *status = U_ILLEGAL_ARGUMENT_ERROR; 991 break; 992 } 993 } 994 } 995 /* for a place holder ExtensionListEntry */ 996 bcpKey = LOCALE_ATTRIBUTE_KEY; 997 bcpValue = NULL; 998 } 999 } else if (isBcpUExt) { 1000 bcpKey = uloc_toUnicodeLocaleKey(key); 1001 if (bcpKey == NULL) { 1002 if (strict) { 1003 *status = U_ILLEGAL_ARGUMENT_ERROR; 1004 break; 1005 } 1006 continue; 1007 } 1008 1009 /* we've checked buf is null-terminated above */ 1010 bcpValue = uloc_toUnicodeLocaleType(key, buf); 1011 if (bcpValue == NULL) { 1012 if (strict) { 1013 *status = U_ILLEGAL_ARGUMENT_ERROR; 1014 break; 1015 } 1016 continue; 1017 } 1018 if (bcpValue == buf) { 1019 /* 1020 When uloc_toUnicodeLocaleType(key, buf) returns the 1021 input value as is, the value is well-formed, but has 1022 no known mapping. This implementation normalizes the 1023 the value to lower case 1024 */ 1025 int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue)); 1026 if (bcpValueLen < extBufCapacity) { 1027 uprv_strcpy(pExtBuf, bcpValue); 1028 T_CString_toLowerCase(pExtBuf); 1029 1030 bcpValue = pExtBuf; 1031 1032 pExtBuf += (bcpValueLen + 1); 1033 extBufCapacity -= (bcpValueLen + 1); 1034 } else { 1035 if (strict) { 1036 *status = U_ILLEGAL_ARGUMENT_ERROR; 1037 break; 1038 } 1039 continue; 1040 } 1041 } 1042 } else { 1043 if (*key == PRIVATEUSE) { 1044 if (!_isPrivateuseValueSubtags(buf, len)) { 1045 if (strict) { 1046 *status = U_ILLEGAL_ARGUMENT_ERROR; 1047 break; 1048 } 1049 continue; 1050 } 1051 } else { 1052 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { 1053 if (strict) { 1054 *status = U_ILLEGAL_ARGUMENT_ERROR; 1055 break; 1056 } 1057 continue; 1058 } 1059 } 1060 bcpKey = key; 1061 if ((len + 1) < extBufCapacity) { 1062 uprv_memcpy(pExtBuf, buf, len); 1063 bcpValue = pExtBuf; 1064 1065 pExtBuf += len; 1066 1067 *pExtBuf = 0; 1068 pExtBuf++; 1069 1070 extBufCapacity -= (len + 1); 1071 } else { 1072 *status = U_ILLEGAL_ARGUMENT_ERROR; 1073 break; 1074 } 1075 } 1076 1077 /* create ExtensionListEntry */ 1078 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1079 if (ext == NULL) { 1080 *status = U_MEMORY_ALLOCATION_ERROR; 1081 break; 1082 } 1083 ext->key = bcpKey; 1084 ext->value = bcpValue; 1085 1086 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1087 uprv_free(ext); 1088 if (strict) { 1089 *status = U_ILLEGAL_ARGUMENT_ERROR; 1090 break; 1091 } 1092 } 1093 } 1094 1095 /* Special handling for POSIX variant - add the keywords for POSIX */ 1096 if (hadPosix) { 1097 /* create ExtensionListEntry for POSIX */ 1098 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1099 if (ext == NULL) { 1100 *status = U_MEMORY_ALLOCATION_ERROR; 1101 goto cleanup; 1102 } 1103 ext->key = POSIX_KEY; 1104 ext->value = POSIX_VALUE; 1105 1106 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1107 uprv_free(ext); 1108 } 1109 } 1110 1111 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { 1112 UBool startLDMLExtension = FALSE; 1113 for (ext = firstExt; ext; ext = ext->next) { 1114 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { 1115 /* first LDML u singlton extension */ 1116 if (reslen < capacity) { 1117 *(appendAt + reslen) = SEP; 1118 } 1119 reslen++; 1120 if (reslen < capacity) { 1121 *(appendAt + reslen) = LDMLEXT; 1122 } 1123 reslen++; 1124 1125 startLDMLExtension = TRUE; 1126 } 1127 1128 /* write out the sorted BCP47 attributes, extensions and private use */ 1129 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { 1130 /* write the value for the attributes */ 1131 for (attr = firstAttr; attr; attr = attr->next) { 1132 if (reslen < capacity) { 1133 *(appendAt + reslen) = SEP; 1134 } 1135 reslen++; 1136 len = (int32_t)uprv_strlen(attr->attribute); 1137 if (reslen < capacity) { 1138 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); 1139 } 1140 reslen += len; 1141 } 1142 } else { 1143 if (reslen < capacity) { 1144 *(appendAt + reslen) = SEP; 1145 } 1146 reslen++; 1147 len = (int32_t)uprv_strlen(ext->key); 1148 if (reslen < capacity) { 1149 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); 1150 } 1151 reslen += len; 1152 if (reslen < capacity) { 1153 *(appendAt + reslen) = SEP; 1154 } 1155 reslen++; 1156 len = (int32_t)uprv_strlen(ext->value); 1157 if (reslen < capacity) { 1158 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); 1159 } 1160 reslen += len; 1161 } 1162 } 1163 } 1164 cleanup: 1165 /* clean up */ 1166 ext = firstExt; 1167 while (ext != NULL) { 1168 ExtensionListEntry *tmpExt = ext->next; 1169 uprv_free(ext); 1170 ext = tmpExt; 1171 } 1172 1173 attr = firstAttr; 1174 while (attr != NULL) { 1175 AttributeListEntry *tmpAttr = attr->next; 1176 char *pValue = (char *)attr->attribute; 1177 uprv_free(pValue); 1178 uprv_free(attr); 1179 attr = tmpAttr; 1180 } 1181 1182 uenum_close(keywordEnum); 1183 1184 if (U_FAILURE(*status)) { 1185 return 0; 1186 } 1187 } 1188 1189 return u_terminateChars(appendAt, capacity, reslen, status); 1190 } 1191 1192 /** 1193 * Append keywords parsed from LDML extension value 1194 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} 1195 * Note: char* buf is used for storing keywords 1196 */ 1197 static void 1198 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { 1199 const char *pTag; /* beginning of current subtag */ 1200 const char *pKwds; /* beginning of key-type pairs */ 1201 UBool variantExists = *posixVariant; 1202 1203 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ 1204 ExtensionListEntry *kwd, *nextKwd; 1205 1206 AttributeListEntry *attrFirst = NULL; /* first attribute */ 1207 AttributeListEntry *attr, *nextAttr; 1208 1209 int32_t len; 1210 int32_t bufIdx = 0; 1211 1212 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1213 int32_t attrBufIdx = 0; 1214 1215 /* Reset the posixVariant value */ 1216 *posixVariant = FALSE; 1217 1218 pTag = ldmlext; 1219 pKwds = NULL; 1220 1221 /* Iterate through u extension attributes */ 1222 while (*pTag) { 1223 /* locate next separator char */ 1224 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); 1225 1226 if (ultag_isUnicodeLocaleKey(pTag, len)) { 1227 pKwds = pTag; 1228 break; 1229 } 1230 1231 /* add this attribute to the list */ 1232 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); 1233 if (attr == NULL) { 1234 *status = U_MEMORY_ALLOCATION_ERROR; 1235 goto cleanup; 1236 } 1237 1238 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { 1239 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); 1240 attrBuf[attrBufIdx + len] = 0; 1241 attr->attribute = &attrBuf[attrBufIdx]; 1242 attrBufIdx += (len + 1); 1243 } else { 1244 *status = U_ILLEGAL_ARGUMENT_ERROR; 1245 goto cleanup; 1246 } 1247 1248 if (!_addAttributeToList(&attrFirst, attr)) { 1249 *status = U_ILLEGAL_ARGUMENT_ERROR; 1250 uprv_free(attr); 1251 goto cleanup; 1252 } 1253 1254 /* next tag */ 1255 pTag += len; 1256 if (*pTag) { 1257 /* next to the separator */ 1258 pTag++; 1259 } 1260 } 1261 1262 if (attrFirst) { 1263 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ 1264 1265 if (attrBufIdx > bufSize) { 1266 /* attrBufIdx == <total length of attribute subtag> + 1 */ 1267 *status = U_ILLEGAL_ARGUMENT_ERROR; 1268 goto cleanup; 1269 } 1270 1271 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1272 if (kwd == NULL) { 1273 *status = U_MEMORY_ALLOCATION_ERROR; 1274 goto cleanup; 1275 } 1276 1277 kwd->key = LOCALE_ATTRIBUTE_KEY; 1278 kwd->value = buf; 1279 1280 /* attribute subtags sorted in alphabetical order as type */ 1281 attr = attrFirst; 1282 while (attr != NULL) { 1283 nextAttr = attr->next; 1284 1285 /* buffer size check is done above */ 1286 if (attr != attrFirst) { 1287 *(buf + bufIdx) = SEP; 1288 bufIdx++; 1289 } 1290 1291 len = static_cast<int32_t>(uprv_strlen(attr->attribute)); 1292 uprv_memcpy(buf + bufIdx, attr->attribute, len); 1293 bufIdx += len; 1294 1295 attr = nextAttr; 1296 } 1297 *(buf + bufIdx) = 0; 1298 bufIdx++; 1299 1300 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1301 *status = U_ILLEGAL_ARGUMENT_ERROR; 1302 uprv_free(kwd); 1303 goto cleanup; 1304 } 1305 1306 /* once keyword entry is created, delete the attribute list */ 1307 attr = attrFirst; 1308 while (attr != NULL) { 1309 nextAttr = attr->next; 1310 uprv_free(attr); 1311 attr = nextAttr; 1312 } 1313 attrFirst = NULL; 1314 } 1315 1316 if (pKwds) { 1317 const char *pBcpKey = NULL; /* u extenstion key subtag */ 1318 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ 1319 int32_t bcpKeyLen = 0; 1320 int32_t bcpTypeLen = 0; 1321 UBool isDone = FALSE; 1322 1323 pTag = pKwds; 1324 /* BCP47 representation of LDML key/type pairs */ 1325 while (!isDone) { 1326 const char *pNextBcpKey = NULL; 1327 int32_t nextBcpKeyLen = 0; 1328 UBool emitKeyword = FALSE; 1329 1330 if (*pTag) { 1331 /* locate next separator char */ 1332 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); 1333 1334 if (ultag_isUnicodeLocaleKey(pTag, len)) { 1335 if (pBcpKey) { 1336 emitKeyword = TRUE; 1337 pNextBcpKey = pTag; 1338 nextBcpKeyLen = len; 1339 } else { 1340 pBcpKey = pTag; 1341 bcpKeyLen = len; 1342 } 1343 } else { 1344 U_ASSERT(pBcpKey != NULL); 1345 /* within LDML type subtags */ 1346 if (pBcpType) { 1347 bcpTypeLen += (len + 1); 1348 } else { 1349 pBcpType = pTag; 1350 bcpTypeLen = len; 1351 } 1352 } 1353 1354 /* next tag */ 1355 pTag += len; 1356 if (*pTag) { 1357 /* next to the separator */ 1358 pTag++; 1359 } 1360 } else { 1361 /* processing last one */ 1362 emitKeyword = TRUE; 1363 isDone = TRUE; 1364 } 1365 1366 if (emitKeyword) { 1367 const char *pKey = NULL; /* LDML key */ 1368 const char *pType = NULL; /* LDML type */ 1369 1370 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ 1371 1372 U_ASSERT(pBcpKey != NULL); 1373 1374 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { 1375 /* the BCP key is invalid */ 1376 *status = U_ILLEGAL_ARGUMENT_ERROR; 1377 goto cleanup; 1378 } 1379 1380 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); 1381 bcpKeyBuf[bcpKeyLen] = 0; 1382 1383 /* u extension key to LDML key */ 1384 pKey = uloc_toLegacyKey(bcpKeyBuf); 1385 if (pKey == NULL) { 1386 *status = U_ILLEGAL_ARGUMENT_ERROR; 1387 goto cleanup; 1388 } 1389 if (pKey == bcpKeyBuf) { 1390 /* 1391 The key returned by toLegacyKey points to the input buffer. 1392 We normalize the result key to lower case. 1393 */ 1394 T_CString_toLowerCase(bcpKeyBuf); 1395 if (bufSize - bufIdx - 1 >= bcpKeyLen) { 1396 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); 1397 pKey = buf + bufIdx; 1398 bufIdx += bcpKeyLen; 1399 *(buf + bufIdx) = 0; 1400 bufIdx++; 1401 } else { 1402 *status = U_BUFFER_OVERFLOW_ERROR; 1403 goto cleanup; 1404 } 1405 } 1406 1407 if (pBcpType) { 1408 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ 1409 if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { 1410 /* the BCP type is too long */ 1411 *status = U_ILLEGAL_ARGUMENT_ERROR; 1412 goto cleanup; 1413 } 1414 1415 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); 1416 bcpTypeBuf[bcpTypeLen] = 0; 1417 1418 /* BCP type to locale type */ 1419 pType = uloc_toLegacyType(pKey, bcpTypeBuf); 1420 if (pType == NULL) { 1421 *status = U_ILLEGAL_ARGUMENT_ERROR; 1422 goto cleanup; 1423 } 1424 if (pType == bcpTypeBuf) { 1425 /* 1426 The type returned by toLegacyType points to the input buffer. 1427 We normalize the result type to lower case. 1428 */ 1429 /* normalize to lower case */ 1430 T_CString_toLowerCase(bcpTypeBuf); 1431 if (bufSize - bufIdx - 1 >= bcpTypeLen) { 1432 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); 1433 pType = buf + bufIdx; 1434 bufIdx += bcpTypeLen; 1435 *(buf + bufIdx) = 0; 1436 bufIdx++; 1437 } else { 1438 *status = U_BUFFER_OVERFLOW_ERROR; 1439 goto cleanup; 1440 } 1441 } 1442 } else { 1443 /* typeless - default type value is "yes" */ 1444 pType = LOCALE_TYPE_YES; 1445 } 1446 1447 /* Special handling for u-va-posix, since we want to treat this as a variant, 1448 not as a keyword */ 1449 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { 1450 *posixVariant = TRUE; 1451 } else { 1452 /* create an ExtensionListEntry for this keyword */ 1453 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1454 if (kwd == NULL) { 1455 *status = U_MEMORY_ALLOCATION_ERROR; 1456 goto cleanup; 1457 } 1458 1459 kwd->key = pKey; 1460 kwd->value = pType; 1461 1462 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1463 *status = U_ILLEGAL_ARGUMENT_ERROR; 1464 uprv_free(kwd); 1465 goto cleanup; 1466 } 1467 } 1468 1469 pBcpKey = pNextBcpKey; 1470 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; 1471 pBcpType = NULL; 1472 bcpTypeLen = 0; 1473 } 1474 } 1475 } 1476 1477 kwd = kwdFirst; 1478 while (kwd != NULL) { 1479 nextKwd = kwd->next; 1480 _addExtensionToList(appendTo, kwd, FALSE); 1481 kwd = nextKwd; 1482 } 1483 1484 return; 1485 1486 cleanup: 1487 attr = attrFirst; 1488 while (attr != NULL) { 1489 nextAttr = attr->next; 1490 uprv_free(attr); 1491 attr = nextAttr; 1492 } 1493 1494 kwd = kwdFirst; 1495 while (kwd != NULL) { 1496 nextKwd = kwd->next; 1497 uprv_free(kwd); 1498 kwd = nextKwd; 1499 } 1500 } 1501 1502 1503 static int32_t 1504 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { 1505 int32_t reslen = 0; 1506 int32_t i, n; 1507 int32_t len; 1508 ExtensionListEntry *kwdFirst = NULL; 1509 ExtensionListEntry *kwd; 1510 const char *key, *type; 1511 char *kwdBuf = NULL; 1512 int32_t kwdBufLength = capacity; 1513 UBool posixVariant = FALSE; 1514 1515 if (U_FAILURE(*status)) { 1516 return 0; 1517 } 1518 1519 kwdBuf = (char*)uprv_malloc(kwdBufLength); 1520 if (kwdBuf == NULL) { 1521 *status = U_MEMORY_ALLOCATION_ERROR; 1522 return 0; 1523 } 1524 1525 /* Determine if variants already exists */ 1526 if (ultag_getVariantsSize(langtag)) { 1527 posixVariant = TRUE; 1528 } 1529 1530 n = ultag_getExtensionsSize(langtag); 1531 1532 /* resolve locale keywords and reordering keys */ 1533 for (i = 0; i < n; i++) { 1534 key = ultag_getExtensionKey(langtag, i); 1535 type = ultag_getExtensionValue(langtag, i); 1536 if (*key == LDMLEXT) { 1537 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); 1538 if (U_FAILURE(*status)) { 1539 break; 1540 } 1541 } else { 1542 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1543 if (kwd == NULL) { 1544 *status = U_MEMORY_ALLOCATION_ERROR; 1545 break; 1546 } 1547 kwd->key = key; 1548 kwd->value = type; 1549 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1550 uprv_free(kwd); 1551 *status = U_ILLEGAL_ARGUMENT_ERROR; 1552 break; 1553 } 1554 } 1555 } 1556 1557 if (U_SUCCESS(*status)) { 1558 type = ultag_getPrivateUse(langtag); 1559 if ((int32_t)uprv_strlen(type) > 0) { 1560 /* add private use as a keyword */ 1561 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1562 if (kwd == NULL) { 1563 *status = U_MEMORY_ALLOCATION_ERROR; 1564 } else { 1565 kwd->key = PRIVATEUSE_KEY; 1566 kwd->value = type; 1567 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1568 uprv_free(kwd); 1569 *status = U_ILLEGAL_ARGUMENT_ERROR; 1570 } 1571 } 1572 } 1573 } 1574 1575 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ 1576 1577 if (U_SUCCESS(*status) && posixVariant) { 1578 len = (int32_t) uprv_strlen(_POSIX); 1579 if (reslen < capacity) { 1580 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); 1581 } 1582 reslen += len; 1583 } 1584 1585 if (U_SUCCESS(*status) && kwdFirst != NULL) { 1586 /* write out the sorted keywords */ 1587 UBool firstValue = TRUE; 1588 kwd = kwdFirst; 1589 do { 1590 if (reslen < capacity) { 1591 if (firstValue) { 1592 /* '@' */ 1593 *(appendAt + reslen) = LOCALE_EXT_SEP; 1594 firstValue = FALSE; 1595 } else { 1596 /* ';' */ 1597 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; 1598 } 1599 } 1600 reslen++; 1601 1602 /* key */ 1603 len = (int32_t)uprv_strlen(kwd->key); 1604 if (reslen < capacity) { 1605 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); 1606 } 1607 reslen += len; 1608 1609 /* '=' */ 1610 if (reslen < capacity) { 1611 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1612 } 1613 reslen++; 1614 1615 /* type */ 1616 len = (int32_t)uprv_strlen(kwd->value); 1617 if (reslen < capacity) { 1618 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); 1619 } 1620 reslen += len; 1621 1622 kwd = kwd->next; 1623 } while (kwd); 1624 } 1625 1626 /* clean up */ 1627 kwd = kwdFirst; 1628 while (kwd != NULL) { 1629 ExtensionListEntry *tmpKwd = kwd->next; 1630 uprv_free(kwd); 1631 kwd = tmpKwd; 1632 } 1633 1634 uprv_free(kwdBuf); 1635 1636 if (U_FAILURE(*status)) { 1637 return 0; 1638 } 1639 1640 return u_terminateChars(appendAt, capacity, reslen, status); 1641 } 1642 1643 static int32_t 1644 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1645 (void)hadPosix; 1646 char buf[ULOC_FULLNAME_CAPACITY]; 1647 char tmpAppend[ULOC_FULLNAME_CAPACITY]; 1648 UErrorCode tmpStatus = U_ZERO_ERROR; 1649 int32_t len, i; 1650 int32_t reslen = 0; 1651 1652 if (U_FAILURE(*status)) { 1653 return 0; 1654 } 1655 1656 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1657 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1658 if (strict) { 1659 *status = U_ILLEGAL_ARGUMENT_ERROR; 1660 } 1661 return 0; 1662 } 1663 1664 if (len > 0) { 1665 char *p, *pPriv; 1666 UBool bNext = TRUE; 1667 UBool firstValue = TRUE; 1668 UBool writeValue; 1669 1670 pPriv = NULL; 1671 p = buf; 1672 while (bNext) { 1673 writeValue = FALSE; 1674 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1675 if (*p == 0) { 1676 bNext = FALSE; 1677 } else { 1678 *p = 0; /* terminate */ 1679 } 1680 if (pPriv != NULL) { 1681 /* Private use in the canonical format is lowercase in BCP47 */ 1682 for (i = 0; *(pPriv + i) != 0; i++) { 1683 *(pPriv + i) = uprv_tolower(*(pPriv + i)); 1684 } 1685 1686 /* validate */ 1687 if (_isPrivateuseValueSubtag(pPriv, -1)) { 1688 if (firstValue) { 1689 if (!_isVariantSubtag(pPriv, -1)) { 1690 writeValue = TRUE; 1691 } 1692 } else { 1693 writeValue = TRUE; 1694 } 1695 } else if (strict) { 1696 *status = U_ILLEGAL_ARGUMENT_ERROR; 1697 break; 1698 } else { 1699 break; 1700 } 1701 1702 if (writeValue) { 1703 if (reslen < capacity) { 1704 tmpAppend[reslen++] = SEP; 1705 } 1706 1707 if (firstValue) { 1708 if (reslen < capacity) { 1709 tmpAppend[reslen++] = *PRIVATEUSE_KEY; 1710 } 1711 1712 if (reslen < capacity) { 1713 tmpAppend[reslen++] = SEP; 1714 } 1715 1716 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); 1717 if (reslen < capacity) { 1718 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); 1719 } 1720 reslen += len; 1721 1722 if (reslen < capacity) { 1723 tmpAppend[reslen++] = SEP; 1724 } 1725 1726 firstValue = FALSE; 1727 } 1728 1729 len = (int32_t)uprv_strlen(pPriv); 1730 if (reslen < capacity) { 1731 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); 1732 } 1733 reslen += len; 1734 } 1735 } 1736 /* reset private use starting position */ 1737 pPriv = NULL; 1738 } else if (pPriv == NULL) { 1739 pPriv = p; 1740 } 1741 p++; 1742 } 1743 1744 if (U_FAILURE(*status)) { 1745 return 0; 1746 } 1747 } 1748 1749 if (U_SUCCESS(*status)) { 1750 len = reslen; 1751 if (reslen < capacity) { 1752 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); 1753 } 1754 } 1755 1756 u_terminateChars(appendAt, capacity, reslen, status); 1757 1758 return reslen; 1759 } 1760 1761 /* 1762 * ------------------------------------------------- 1763 * 1764 * ultag_ functions 1765 * 1766 * ------------------------------------------------- 1767 */ 1768 1769 /* Bit flags used by the parser */ 1770 #define LANG 0x0001 1771 #define EXTL 0x0002 1772 #define SCRT 0x0004 1773 #define REGN 0x0008 1774 #define VART 0x0010 1775 #define EXTS 0x0020 1776 #define EXTV 0x0040 1777 #define PRIV 0x0080 1778 1779 /** 1780 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing 1781 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) 1782 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. 1783 */ 1784 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) 1785 #pragma optimize( "", off ) 1786 #endif 1787 1788 static ULanguageTag* 1789 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { 1790 ULanguageTag *t; 1791 char *tagBuf; 1792 int16_t next; 1793 char *pSubtag, *pNext, *pLastGoodPosition; 1794 int32_t subtagLen; 1795 int32_t extlangIdx; 1796 ExtensionListEntry *pExtension; 1797 char *pExtValueSubtag, *pExtValueSubtagEnd; 1798 int32_t i; 1799 UBool privateuseVar = FALSE; 1800 int32_t grandfatheredLen = 0; 1801 1802 if (parsedLen != NULL) { 1803 *parsedLen = 0; 1804 } 1805 1806 if (U_FAILURE(*status)) { 1807 return NULL; 1808 } 1809 1810 if (tagLen < 0) { 1811 tagLen = (int32_t)uprv_strlen(tag); 1812 } 1813 1814 /* copy the entire string */ 1815 tagBuf = (char*)uprv_malloc(tagLen + 1); 1816 if (tagBuf == NULL) { 1817 *status = U_MEMORY_ALLOCATION_ERROR; 1818 return NULL; 1819 } 1820 uprv_memcpy(tagBuf, tag, tagLen); 1821 *(tagBuf + tagLen) = 0; 1822 1823 /* create a ULanguageTag */ 1824 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); 1825 if (t == NULL) { 1826 uprv_free(tagBuf); 1827 *status = U_MEMORY_ALLOCATION_ERROR; 1828 return NULL; 1829 } 1830 _initializeULanguageTag(t); 1831 t->buf = tagBuf; 1832 1833 if (tagLen < MINLEN) { 1834 /* the input tag is too short - return empty ULanguageTag */ 1835 return t; 1836 } 1837 1838 /* check if the tag is grandfathered */ 1839 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { 1840 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { 1841 int32_t newTagLength; 1842 1843 grandfatheredLen = tagLen; /* back up for output parsedLen */ 1844 newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1])); 1845 if (tagLen < newTagLength) { 1846 uprv_free(tagBuf); 1847 tagBuf = (char*)uprv_malloc(newTagLength + 1); 1848 if (tagBuf == NULL) { 1849 *status = U_MEMORY_ALLOCATION_ERROR; 1850 ultag_close(t); 1851 return NULL; 1852 } 1853 t->buf = tagBuf; 1854 tagLen = newTagLength; 1855 } 1856 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); 1857 break; 1858 } 1859 } 1860 1861 /* 1862 * langtag = language 1863 * ["-" script] 1864 * ["-" region] 1865 * *("-" variant) 1866 * *("-" extension) 1867 * ["-" privateuse] 1868 */ 1869 1870 next = LANG | PRIV; 1871 pNext = pLastGoodPosition = tagBuf; 1872 extlangIdx = 0; 1873 pExtension = NULL; 1874 pExtValueSubtag = NULL; 1875 pExtValueSubtagEnd = NULL; 1876 1877 while (pNext) { 1878 char *pSep; 1879 1880 pSubtag = pNext; 1881 1882 /* locate next separator char */ 1883 pSep = pSubtag; 1884 while (*pSep) { 1885 if (*pSep == SEP) { 1886 break; 1887 } 1888 pSep++; 1889 } 1890 if (*pSep == 0) { 1891 /* last subtag */ 1892 pNext = NULL; 1893 } else { 1894 pNext = pSep + 1; 1895 } 1896 subtagLen = (int32_t)(pSep - pSubtag); 1897 1898 if (next & LANG) { 1899 if (_isLanguageSubtag(pSubtag, subtagLen)) { 1900 *pSep = 0; /* terminate */ 1901 t->language = T_CString_toLowerCase(pSubtag); 1902 1903 pLastGoodPosition = pSep; 1904 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1905 continue; 1906 } 1907 } 1908 if (next & EXTL) { 1909 if (_isExtlangSubtag(pSubtag, subtagLen)) { 1910 *pSep = 0; 1911 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); 1912 1913 pLastGoodPosition = pSep; 1914 if (extlangIdx < 3) { 1915 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 1916 } else { 1917 next = SCRT | REGN | VART | EXTS | PRIV; 1918 } 1919 continue; 1920 } 1921 } 1922 if (next & SCRT) { 1923 if (_isScriptSubtag(pSubtag, subtagLen)) { 1924 char *p = pSubtag; 1925 1926 *pSep = 0; 1927 1928 /* to title case */ 1929 *p = uprv_toupper(*p); 1930 p++; 1931 for (; *p; p++) { 1932 *p = uprv_tolower(*p); 1933 } 1934 1935 t->script = pSubtag; 1936 1937 pLastGoodPosition = pSep; 1938 next = REGN | VART | EXTS | PRIV; 1939 continue; 1940 } 1941 } 1942 if (next & REGN) { 1943 if (_isRegionSubtag(pSubtag, subtagLen)) { 1944 *pSep = 0; 1945 t->region = T_CString_toUpperCase(pSubtag); 1946 1947 pLastGoodPosition = pSep; 1948 next = VART | EXTS | PRIV; 1949 continue; 1950 } 1951 } 1952 if (next & VART) { 1953 if (_isVariantSubtag(pSubtag, subtagLen) || 1954 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { 1955 VariantListEntry *var; 1956 UBool isAdded; 1957 1958 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 1959 if (var == NULL) { 1960 *status = U_MEMORY_ALLOCATION_ERROR; 1961 goto error; 1962 } 1963 *pSep = 0; 1964 var->variant = T_CString_toUpperCase(pSubtag); 1965 isAdded = _addVariantToList(&(t->variants), var); 1966 if (!isAdded) { 1967 /* duplicated variant entry */ 1968 uprv_free(var); 1969 break; 1970 } 1971 pLastGoodPosition = pSep; 1972 next = VART | EXTS | PRIV; 1973 continue; 1974 } 1975 } 1976 if (next & EXTS) { 1977 if (_isExtensionSingleton(pSubtag, subtagLen)) { 1978 if (pExtension != NULL) { 1979 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 1980 /* the previous extension is incomplete */ 1981 uprv_free(pExtension); 1982 pExtension = NULL; 1983 break; 1984 } 1985 1986 /* terminate the previous extension value */ 1987 *pExtValueSubtagEnd = 0; 1988 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 1989 1990 /* insert the extension to the list */ 1991 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 1992 pLastGoodPosition = pExtValueSubtagEnd; 1993 } else { 1994 /* stop parsing here */ 1995 uprv_free(pExtension); 1996 pExtension = NULL; 1997 break; 1998 } 1999 } 2000 2001 /* create a new extension */ 2002 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 2003 if (pExtension == NULL) { 2004 *status = U_MEMORY_ALLOCATION_ERROR; 2005 goto error; 2006 } 2007 *pSep = 0; 2008 pExtension->key = T_CString_toLowerCase(pSubtag); 2009 pExtension->value = NULL; /* will be set later */ 2010 2011 /* 2012 * reset the start and the end location of extension value 2013 * subtags for this extension 2014 */ 2015 pExtValueSubtag = NULL; 2016 pExtValueSubtagEnd = NULL; 2017 2018 next = EXTV; 2019 continue; 2020 } 2021 } 2022 if (next & EXTV) { 2023 if (_isExtensionSubtag(pSubtag, subtagLen)) { 2024 if (pExtValueSubtag == NULL) { 2025 /* if the start postion of this extension's value is not yet, 2026 this one is the first value subtag */ 2027 pExtValueSubtag = pSubtag; 2028 } 2029 2030 /* Mark the end of this subtag */ 2031 pExtValueSubtagEnd = pSep; 2032 next = EXTS | EXTV | PRIV; 2033 2034 continue; 2035 } 2036 } 2037 if (next & PRIV) { 2038 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { 2039 char *pPrivuseVal; 2040 2041 if (pExtension != NULL) { 2042 /* Process the last extension */ 2043 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2044 /* the previous extension is incomplete */ 2045 uprv_free(pExtension); 2046 pExtension = NULL; 2047 break; 2048 } else { 2049 /* terminate the previous extension value */ 2050 *pExtValueSubtagEnd = 0; 2051 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2052 2053 /* insert the extension to the list */ 2054 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2055 pLastGoodPosition = pExtValueSubtagEnd; 2056 pExtension = NULL; 2057 } else { 2058 /* stop parsing here */ 2059 uprv_free(pExtension); 2060 pExtension = NULL; 2061 break; 2062 } 2063 } 2064 } 2065 2066 /* The rest of part will be private use value subtags */ 2067 if (pNext == NULL) { 2068 /* empty private use subtag */ 2069 break; 2070 } 2071 /* back up the private use value start position */ 2072 pPrivuseVal = pNext; 2073 2074 /* validate private use value subtags */ 2075 while (pNext) { 2076 pSubtag = pNext; 2077 pSep = pSubtag; 2078 while (*pSep) { 2079 if (*pSep == SEP) { 2080 break; 2081 } 2082 pSep++; 2083 } 2084 if (*pSep == 0) { 2085 /* last subtag */ 2086 pNext = NULL; 2087 } else { 2088 pNext = pSep + 1; 2089 } 2090 subtagLen = (int32_t)(pSep - pSubtag); 2091 2092 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { 2093 *pSep = 0; 2094 next = VART; 2095 privateuseVar = TRUE; 2096 break; 2097 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { 2098 pLastGoodPosition = pSep; 2099 } else { 2100 break; 2101 } 2102 } 2103 2104 if (next == VART) { 2105 continue; 2106 } 2107 2108 if (pLastGoodPosition - pPrivuseVal > 0) { 2109 *pLastGoodPosition = 0; 2110 t->privateuse = T_CString_toLowerCase(pPrivuseVal); 2111 } 2112 /* No more subtags, exiting the parse loop */ 2113 break; 2114 } 2115 break; 2116 } 2117 2118 /* If we fell through here, it means this subtag is illegal - quit parsing */ 2119 break; 2120 } 2121 2122 if (pExtension != NULL) { 2123 /* Process the last extension */ 2124 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2125 /* the previous extension is incomplete */ 2126 uprv_free(pExtension); 2127 } else { 2128 /* terminate the previous extension value */ 2129 *pExtValueSubtagEnd = 0; 2130 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2131 /* insert the extension to the list */ 2132 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2133 pLastGoodPosition = pExtValueSubtagEnd; 2134 } else { 2135 uprv_free(pExtension); 2136 } 2137 } 2138 } 2139 2140 if (parsedLen != NULL) { 2141 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); 2142 } 2143 2144 return t; 2145 2146 error: 2147 ultag_close(t); 2148 return NULL; 2149 } 2150 2151 /** 2152 * Ticket #12705 - Turn optimization back on. 2153 */ 2154 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) 2155 #pragma optimize( "", on ) 2156 #endif 2157 2158 static void 2159 ultag_close(ULanguageTag* langtag) { 2160 2161 if (langtag == NULL) { 2162 return; 2163 } 2164 2165 uprv_free(langtag->buf); 2166 2167 if (langtag->variants) { 2168 VariantListEntry *curVar = langtag->variants; 2169 while (curVar) { 2170 VariantListEntry *nextVar = curVar->next; 2171 uprv_free(curVar); 2172 curVar = nextVar; 2173 } 2174 } 2175 2176 if (langtag->extensions) { 2177 ExtensionListEntry *curExt = langtag->extensions; 2178 while (curExt) { 2179 ExtensionListEntry *nextExt = curExt->next; 2180 uprv_free(curExt); 2181 curExt = nextExt; 2182 } 2183 } 2184 2185 uprv_free(langtag); 2186 } 2187 2188 static const char* 2189 ultag_getLanguage(const ULanguageTag* langtag) { 2190 return langtag->language; 2191 } 2192 2193 #if 0 2194 static const char* 2195 ultag_getJDKLanguage(const ULanguageTag* langtag) { 2196 int32_t i; 2197 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 2198 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { 2199 return DEPRECATEDLANGS[i + 1]; 2200 } 2201 } 2202 return langtag->language; 2203 } 2204 #endif 2205 2206 static const char* 2207 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { 2208 if (idx >= 0 && idx < MAXEXTLANG) { 2209 return langtag->extlang[idx]; 2210 } 2211 return NULL; 2212 } 2213 2214 static int32_t 2215 ultag_getExtlangSize(const ULanguageTag* langtag) { 2216 int32_t size = 0; 2217 int32_t i; 2218 for (i = 0; i < MAXEXTLANG; i++) { 2219 if (langtag->extlang[i]) { 2220 size++; 2221 } 2222 } 2223 return size; 2224 } 2225 2226 static const char* 2227 ultag_getScript(const ULanguageTag* langtag) { 2228 return langtag->script; 2229 } 2230 2231 static const char* 2232 ultag_getRegion(const ULanguageTag* langtag) { 2233 return langtag->region; 2234 } 2235 2236 static const char* 2237 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { 2238 const char *var = NULL; 2239 VariantListEntry *cur = langtag->variants; 2240 int32_t i = 0; 2241 while (cur) { 2242 if (i == idx) { 2243 var = cur->variant; 2244 break; 2245 } 2246 cur = cur->next; 2247 i++; 2248 } 2249 return var; 2250 } 2251 2252 static int32_t 2253 ultag_getVariantsSize(const ULanguageTag* langtag) { 2254 int32_t size = 0; 2255 VariantListEntry *cur = langtag->variants; 2256 while (TRUE) { 2257 if (cur == NULL) { 2258 break; 2259 } 2260 size++; 2261 cur = cur->next; 2262 } 2263 return size; 2264 } 2265 2266 static const char* 2267 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { 2268 const char *key = NULL; 2269 ExtensionListEntry *cur = langtag->extensions; 2270 int32_t i = 0; 2271 while (cur) { 2272 if (i == idx) { 2273 key = cur->key; 2274 break; 2275 } 2276 cur = cur->next; 2277 i++; 2278 } 2279 return key; 2280 } 2281 2282 static const char* 2283 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { 2284 const char *val = NULL; 2285 ExtensionListEntry *cur = langtag->extensions; 2286 int32_t i = 0; 2287 while (cur) { 2288 if (i == idx) { 2289 val = cur->value; 2290 break; 2291 } 2292 cur = cur->next; 2293 i++; 2294 } 2295 return val; 2296 } 2297 2298 static int32_t 2299 ultag_getExtensionsSize(const ULanguageTag* langtag) { 2300 int32_t size = 0; 2301 ExtensionListEntry *cur = langtag->extensions; 2302 while (TRUE) { 2303 if (cur == NULL) { 2304 break; 2305 } 2306 size++; 2307 cur = cur->next; 2308 } 2309 return size; 2310 } 2311 2312 static const char* 2313 ultag_getPrivateUse(const ULanguageTag* langtag) { 2314 return langtag->privateuse; 2315 } 2316 2317 #if 0 2318 static const char* 2319 ultag_getGrandfathered(const ULanguageTag* langtag) { 2320 return langtag->grandfathered; 2321 } 2322 #endif 2323 2324 2325 /* 2326 * ------------------------------------------------- 2327 * 2328 * Locale/BCP47 conversion APIs, exposed as uloc_* 2329 * 2330 * ------------------------------------------------- 2331 */ 2332 U_CAPI int32_t U_EXPORT2 2333 uloc_toLanguageTag(const char* localeID, 2334 char* langtag, 2335 int32_t langtagCapacity, 2336 UBool strict, 2337 UErrorCode* status) { 2338 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ 2339 char canonical[256]; 2340 int32_t reslen = 0; 2341 UErrorCode tmpStatus = U_ZERO_ERROR; 2342 UBool hadPosix = FALSE; 2343 const char* pKeywordStart; 2344 2345 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ 2346 canonical[0] = 0; 2347 if (uprv_strlen(localeID) > 0) { 2348 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); 2349 if (tmpStatus != U_ZERO_ERROR) { 2350 *status = U_ILLEGAL_ARGUMENT_ERROR; 2351 return 0; 2352 } 2353 } 2354 2355 /* For handling special case - private use only tag */ 2356 pKeywordStart = locale_getKeywordsStart(canonical); 2357 if (pKeywordStart == canonical) { 2358 UEnumeration *kwdEnum; 2359 int kwdCnt = 0; 2360 UBool done = FALSE; 2361 2362 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); 2363 if (kwdEnum != NULL) { 2364 kwdCnt = uenum_count(kwdEnum, &tmpStatus); 2365 if (kwdCnt == 1) { 2366 const char *key; 2367 int32_t len = 0; 2368 2369 key = uenum_next(kwdEnum, &len, &tmpStatus); 2370 if (len == 1 && *key == PRIVATEUSE) { 2371 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2372 buf[0] = PRIVATEUSE; 2373 buf[1] = SEP; 2374 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); 2375 if (U_SUCCESS(tmpStatus)) { 2376 if (_isPrivateuseValueSubtags(&buf[2], len)) { 2377 /* return private use only tag */ 2378 reslen = len + 2; 2379 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); 2380 u_terminateChars(langtag, langtagCapacity, reslen, status); 2381 done = TRUE; 2382 } else if (strict) { 2383 *status = U_ILLEGAL_ARGUMENT_ERROR; 2384 done = TRUE; 2385 } 2386 /* if not strict mode, then "und" will be returned */ 2387 } else { 2388 *status = U_ILLEGAL_ARGUMENT_ERROR; 2389 done = TRUE; 2390 } 2391 } 2392 } 2393 uenum_close(kwdEnum); 2394 if (done) { 2395 return reslen; 2396 } 2397 } 2398 } 2399 2400 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); 2401 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2402 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2403 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); 2404 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2405 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2406 2407 return reslen; 2408 } 2409 2410 2411 U_CAPI int32_t U_EXPORT2 2412 uloc_forLanguageTag(const char* langtag, 2413 char* localeID, 2414 int32_t localeIDCapacity, 2415 int32_t* parsedLength, 2416 UErrorCode* status) { 2417 ULanguageTag *lt; 2418 int32_t reslen = 0; 2419 const char *subtag, *p; 2420 int32_t len; 2421 int32_t i, n; 2422 UBool noRegion = TRUE; 2423 2424 lt = ultag_parse(langtag, -1, parsedLength, status); 2425 if (U_FAILURE(*status)) { 2426 return 0; 2427 } 2428 2429 /* language */ 2430 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); 2431 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { 2432 len = (int32_t)uprv_strlen(subtag); 2433 if (len > 0) { 2434 if (reslen < localeIDCapacity) { 2435 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); 2436 } 2437 reslen += len; 2438 } 2439 } 2440 2441 /* script */ 2442 subtag = ultag_getScript(lt); 2443 len = (int32_t)uprv_strlen(subtag); 2444 if (len > 0) { 2445 if (reslen < localeIDCapacity) { 2446 *(localeID + reslen) = LOCALE_SEP; 2447 } 2448 reslen++; 2449 2450 /* write out the script in title case */ 2451 p = subtag; 2452 while (*p) { 2453 if (reslen < localeIDCapacity) { 2454 if (p == subtag) { 2455 *(localeID + reslen) = uprv_toupper(*p); 2456 } else { 2457 *(localeID + reslen) = *p; 2458 } 2459 } 2460 reslen++; 2461 p++; 2462 } 2463 } 2464 2465 /* region */ 2466 subtag = ultag_getRegion(lt); 2467 len = (int32_t)uprv_strlen(subtag); 2468 if (len > 0) { 2469 if (reslen < localeIDCapacity) { 2470 *(localeID + reslen) = LOCALE_SEP; 2471 } 2472 reslen++; 2473 /* write out the retion in upper case */ 2474 p = subtag; 2475 while (*p) { 2476 if (reslen < localeIDCapacity) { 2477 *(localeID + reslen) = uprv_toupper(*p); 2478 } 2479 reslen++; 2480 p++; 2481 } 2482 noRegion = FALSE; 2483 } 2484 2485 /* variants */ 2486 n = ultag_getVariantsSize(lt); 2487 if (n > 0) { 2488 if (noRegion) { 2489 if (reslen < localeIDCapacity) { 2490 *(localeID + reslen) = LOCALE_SEP; 2491 } 2492 reslen++; 2493 } 2494 2495 for (i = 0; i < n; i++) { 2496 subtag = ultag_getVariant(lt, i); 2497 if (reslen < localeIDCapacity) { 2498 *(localeID + reslen) = LOCALE_SEP; 2499 } 2500 reslen++; 2501 /* write out the variant in upper case */ 2502 p = subtag; 2503 while (*p) { 2504 if (reslen < localeIDCapacity) { 2505 *(localeID + reslen) = uprv_toupper(*p); 2506 } 2507 reslen++; 2508 p++; 2509 } 2510 } 2511 } 2512 2513 /* keywords */ 2514 n = ultag_getExtensionsSize(lt); 2515 subtag = ultag_getPrivateUse(lt); 2516 if (n > 0 || uprv_strlen(subtag) > 0) { 2517 if (reslen == 0 && n > 0) { 2518 /* need a language */ 2519 if (reslen < localeIDCapacity) { 2520 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); 2521 } 2522 reslen += LANG_UND_LEN; 2523 } 2524 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); 2525 reslen += len; 2526 } 2527 2528 ultag_close(lt); 2529 return u_terminateChars(localeID, localeIDCapacity, reslen, status); 2530 } 2531 2532 2533