1 /* 2 ********************************************************************** 3 * Copyright (C) 2009-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #include "unicode/utypes.h" 9 #include "unicode/ures.h" 10 #include "unicode/putil.h" 11 #include "unicode/uloc.h" 12 #include "ustr_imp.h" 13 #include "cmemory.h" 14 #include "cstring.h" 15 #include "putilimp.h" 16 #include "uinvchar.h" 17 #include "ulocimp.h" 18 #include "uassert.h" 19 20 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 21 22 /* struct holding a single variant */ 23 typedef struct VariantListEntry { 24 const char *variant; 25 struct VariantListEntry *next; 26 } VariantListEntry; 27 28 /* struct holding a single attribute value */ 29 typedef struct AttributeListEntry { 30 const char *attribute; 31 struct AttributeListEntry *next; 32 } AttributeListEntry; 33 34 /* struct holding a single extension */ 35 typedef struct ExtensionListEntry { 36 const char *key; 37 const char *value; 38 struct ExtensionListEntry *next; 39 } ExtensionListEntry; 40 41 #define MAXEXTLANG 3 42 typedef struct ULanguageTag { 43 char *buf; /* holding parsed subtags */ 44 const char *language; 45 const char *extlang[MAXEXTLANG]; 46 const char *script; 47 const char *region; 48 VariantListEntry *variants; 49 ExtensionListEntry *extensions; 50 const char *privateuse; 51 const char *grandfathered; 52 } ULanguageTag; 53 54 #define MINLEN 2 55 #define SEP '-' 56 #define PRIVATEUSE 'x' 57 #define LDMLEXT 'u' 58 59 #define LOCALE_SEP '_' 60 #define LOCALE_EXT_SEP '@' 61 #define LOCALE_KEYWORD_SEP ';' 62 #define LOCALE_KEY_TYPE_SEP '=' 63 64 #define ISALPHA(c) uprv_isASCIILetter(c) 65 #define ISNUMERIC(c) ((c)>='0' && (c)<='9') 66 67 static const char EMPTY[] = ""; 68 static const char LANG_UND[] = "und"; 69 static const char PRIVATEUSE_KEY[] = "x"; 70 static const char _POSIX[] = "_POSIX"; 71 static const char POSIX_KEY[] = "va"; 72 static const char POSIX_VALUE[] = "posix"; 73 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; 74 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; 75 static const char LOCALE_TYPE_YES[] = "yes"; 76 77 #define LANG_UND_LEN 3 78 79 static const char* const GRANDFATHERED[] = { 80 /* grandfathered preferred */ 81 "art-lojban", "jbo", 82 "cel-gaulish", "xtg-x-cel-gaulish", 83 "en-GB-oed", "en-GB-x-oed", 84 "i-ami", "ami", 85 "i-bnn", "bnn", 86 "i-default", "en-x-i-default", 87 "i-enochian", "und-x-i-enochian", 88 "i-hak", "hak", 89 "i-klingon", "tlh", 90 "i-lux", "lb", 91 "i-mingo", "see-x-i-mingo", 92 "i-navajo", "nv", 93 "i-pwn", "pwn", 94 "i-tao", "tao", 95 "i-tay", "tay", 96 "i-tsu", "tsu", 97 "no-bok", "nb", 98 "no-nyn", "nn", 99 "sgn-be-fr", "sfb", 100 "sgn-be-nl", "vgt", 101 "sgn-ch-de", "sgg", 102 "zh-guoyu", "cmn", 103 "zh-hakka", "hak", 104 "zh-min", "nan-x-zh-min", 105 "zh-min-nan", "nan", 106 "zh-xiang", "hsn", 107 NULL, NULL 108 }; 109 110 static const char DEPRECATEDLANGS[][4] = { 111 /* deprecated new */ 112 "iw", "he", 113 "ji", "yi", 114 "in", "id" 115 }; 116 117 /* 118 * ------------------------------------------------- 119 * 120 * These ultag_ functions may be exposed as APIs later 121 * 122 * ------------------------------------------------- 123 */ 124 125 static ULanguageTag* 126 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); 127 128 static void 129 ultag_close(ULanguageTag* langtag); 130 131 static const char* 132 ultag_getLanguage(const ULanguageTag* langtag); 133 134 #if 0 135 static const char* 136 ultag_getJDKLanguage(const ULanguageTag* langtag); 137 #endif 138 139 static const char* 140 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); 141 142 static int32_t 143 ultag_getExtlangSize(const ULanguageTag* langtag); 144 145 static const char* 146 ultag_getScript(const ULanguageTag* langtag); 147 148 static const char* 149 ultag_getRegion(const ULanguageTag* langtag); 150 151 static const char* 152 ultag_getVariant(const ULanguageTag* langtag, int32_t idx); 153 154 static int32_t 155 ultag_getVariantsSize(const ULanguageTag* langtag); 156 157 static const char* 158 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); 159 160 static const char* 161 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); 162 163 static int32_t 164 ultag_getExtensionsSize(const ULanguageTag* langtag); 165 166 static const char* 167 ultag_getPrivateUse(const ULanguageTag* langtag); 168 169 #if 0 170 static const char* 171 ultag_getGrandfathered(const ULanguageTag* langtag); 172 #endif 173 174 /* 175 * ------------------------------------------------- 176 * 177 * Language subtag syntax validation functions 178 * 179 * ------------------------------------------------- 180 */ 181 182 static UBool 183 _isAlphaString(const char* s, int32_t len) { 184 int32_t i; 185 for (i = 0; i < len; i++) { 186 if (!ISALPHA(*(s + i))) { 187 return FALSE; 188 } 189 } 190 return TRUE; 191 } 192 193 static UBool 194 _isNumericString(const char* s, int32_t len) { 195 int32_t i; 196 for (i = 0; i < len; i++) { 197 if (!ISNUMERIC(*(s + i))) { 198 return FALSE; 199 } 200 } 201 return TRUE; 202 } 203 204 static UBool 205 _isAlphaNumericString(const char* s, int32_t len) { 206 int32_t i; 207 for (i = 0; i < len; i++) { 208 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { 209 return FALSE; 210 } 211 } 212 return TRUE; 213 } 214 215 static UBool 216 _isLanguageSubtag(const char* s, int32_t len) { 217 /* 218 * language = 2*3ALPHA ; shortest ISO 639 code 219 * ["-" extlang] ; sometimes followed by 220 * ; extended language subtags 221 * / 4ALPHA ; or reserved for future use 222 * / 5*8ALPHA ; or registered language subtag 223 */ 224 if (len < 0) { 225 len = (int32_t)uprv_strlen(s); 226 } 227 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { 228 return TRUE; 229 } 230 return FALSE; 231 } 232 233 static UBool 234 _isExtlangSubtag(const char* s, int32_t len) { 235 /* 236 * extlang = 3ALPHA ; selected ISO 639 codes 237 * *2("-" 3ALPHA) ; permanently reserved 238 */ 239 if (len < 0) { 240 len = (int32_t)uprv_strlen(s); 241 } 242 if (len == 3 && _isAlphaString(s, len)) { 243 return TRUE; 244 } 245 return FALSE; 246 } 247 248 static UBool 249 _isScriptSubtag(const char* s, int32_t len) { 250 /* 251 * script = 4ALPHA ; ISO 15924 code 252 */ 253 if (len < 0) { 254 len = (int32_t)uprv_strlen(s); 255 } 256 if (len == 4 && _isAlphaString(s, len)) { 257 return TRUE; 258 } 259 return FALSE; 260 } 261 262 static UBool 263 _isRegionSubtag(const char* s, int32_t len) { 264 /* 265 * region = 2ALPHA ; ISO 3166-1 code 266 * / 3DIGIT ; UN M.49 code 267 */ 268 if (len < 0) { 269 len = (int32_t)uprv_strlen(s); 270 } 271 if (len == 2 && _isAlphaString(s, len)) { 272 return TRUE; 273 } 274 if (len == 3 && _isNumericString(s, len)) { 275 return TRUE; 276 } 277 return FALSE; 278 } 279 280 static UBool 281 _isVariantSubtag(const char* s, int32_t len) { 282 /* 283 * variant = 5*8alphanum ; registered variants 284 * / (DIGIT 3alphanum) 285 */ 286 if (len < 0) { 287 len = (int32_t)uprv_strlen(s); 288 } 289 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { 290 return TRUE; 291 } 292 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { 293 return TRUE; 294 } 295 return FALSE; 296 } 297 298 static UBool 299 _isPrivateuseVariantSubtag(const char* s, int32_t len) { 300 /* 301 * variant = 1*8alphanum ; registered variants 302 * / (DIGIT 3alphanum) 303 */ 304 if (len < 0) { 305 len = (int32_t)uprv_strlen(s); 306 } 307 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 308 return TRUE; 309 } 310 return FALSE; 311 } 312 313 static UBool 314 _isExtensionSingleton(const char* s, int32_t len) { 315 /* 316 * extension = singleton 1*("-" (2*8alphanum)) 317 */ 318 if (len < 0) { 319 len = (int32_t)uprv_strlen(s); 320 } 321 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { 322 return TRUE; 323 } 324 return FALSE; 325 } 326 327 static UBool 328 _isExtensionSubtag(const char* s, int32_t len) { 329 /* 330 * extension = singleton 1*("-" (2*8alphanum)) 331 */ 332 if (len < 0) { 333 len = (int32_t)uprv_strlen(s); 334 } 335 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { 336 return TRUE; 337 } 338 return FALSE; 339 } 340 341 static UBool 342 _isExtensionSubtags(const char* s, int32_t len) { 343 const char *p = s; 344 const char *pSubtag = NULL; 345 346 if (len < 0) { 347 len = (int32_t)uprv_strlen(s); 348 } 349 350 while ((p - s) < len) { 351 if (*p == SEP) { 352 if (pSubtag == NULL) { 353 return FALSE; 354 } 355 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { 356 return FALSE; 357 } 358 pSubtag = NULL; 359 } else if (pSubtag == NULL) { 360 pSubtag = p; 361 } 362 p++; 363 } 364 if (pSubtag == NULL) { 365 return FALSE; 366 } 367 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); 368 } 369 370 static UBool 371 _isPrivateuseValueSubtag(const char* s, int32_t len) { 372 /* 373 * privateuse = "x" 1*("-" (1*8alphanum)) 374 */ 375 if (len < 0) { 376 len = (int32_t)uprv_strlen(s); 377 } 378 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 379 return TRUE; 380 } 381 return FALSE; 382 } 383 384 static UBool 385 _isPrivateuseValueSubtags(const char* s, int32_t len) { 386 const char *p = s; 387 const char *pSubtag = NULL; 388 389 if (len < 0) { 390 len = (int32_t)uprv_strlen(s); 391 } 392 393 while ((p - s) < len) { 394 if (*p == SEP) { 395 if (pSubtag == NULL) { 396 return FALSE; 397 } 398 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { 399 return FALSE; 400 } 401 pSubtag = NULL; 402 } else if (pSubtag == NULL) { 403 pSubtag = p; 404 } 405 p++; 406 } 407 if (pSubtag == NULL) { 408 return FALSE; 409 } 410 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); 411 } 412 413 static UBool 414 _isLDMLKey(const char* s, int32_t len) { 415 if (len < 0) { 416 len = (int32_t)uprv_strlen(s); 417 } 418 if (len == 2 && _isAlphaNumericString(s, len)) { 419 return TRUE; 420 } 421 return FALSE; 422 } 423 424 static UBool 425 _isLDMLType(const char* s, int32_t len) { 426 if (len < 0) { 427 len = (int32_t)uprv_strlen(s); 428 } 429 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) { 430 return TRUE; 431 } 432 return FALSE; 433 } 434 435 /* 436 * ------------------------------------------------- 437 * 438 * Helper functions 439 * 440 * ------------------------------------------------- 441 */ 442 443 static UBool 444 _addVariantToList(VariantListEntry **first, VariantListEntry *var) { 445 UBool bAdded = TRUE; 446 447 if (*first == NULL) { 448 var->next = NULL; 449 *first = var; 450 } else { 451 VariantListEntry *prev, *cur; 452 int32_t cmp; 453 454 /* variants order should be preserved */ 455 prev = NULL; 456 cur = *first; 457 while (TRUE) { 458 if (cur == NULL) { 459 prev->next = var; 460 var->next = NULL; 461 break; 462 } 463 464 /* Checking for duplicate variant */ 465 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); 466 if (cmp == 0) { 467 /* duplicated variant */ 468 bAdded = FALSE; 469 break; 470 } 471 prev = cur; 472 cur = cur->next; 473 } 474 } 475 476 return bAdded; 477 } 478 479 static UBool 480 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { 481 UBool bAdded = TRUE; 482 483 if (*first == NULL) { 484 attr->next = NULL; 485 *first = attr; 486 } else { 487 AttributeListEntry *prev, *cur; 488 int32_t cmp; 489 490 /* reorder variants in alphabetical order */ 491 prev = NULL; 492 cur = *first; 493 while (TRUE) { 494 if (cur == NULL) { 495 prev->next = attr; 496 attr->next = NULL; 497 break; 498 } 499 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); 500 if (cmp < 0) { 501 if (prev == NULL) { 502 *first = attr; 503 } else { 504 prev->next = attr; 505 } 506 attr->next = cur; 507 break; 508 } 509 if (cmp == 0) { 510 /* duplicated variant */ 511 bAdded = FALSE; 512 break; 513 } 514 prev = cur; 515 cur = cur->next; 516 } 517 } 518 519 return bAdded; 520 } 521 522 523 static UBool 524 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { 525 UBool bAdded = TRUE; 526 527 if (*first == NULL) { 528 ext->next = NULL; 529 *first = ext; 530 } else { 531 ExtensionListEntry *prev, *cur; 532 int32_t cmp; 533 534 /* reorder variants in alphabetical order */ 535 prev = NULL; 536 cur = *first; 537 while (TRUE) { 538 if (cur == NULL) { 539 prev->next = ext; 540 ext->next = NULL; 541 break; 542 } 543 if (localeToBCP) { 544 /* special handling for locale to bcp conversion */ 545 int32_t len, curlen; 546 547 len = (int32_t)uprv_strlen(ext->key); 548 curlen = (int32_t)uprv_strlen(cur->key); 549 550 if (len == 1 && curlen == 1) { 551 if (*(ext->key) == *(cur->key)) { 552 cmp = 0; 553 } else if (*(ext->key) == PRIVATEUSE) { 554 cmp = 1; 555 } else if (*(cur->key) == PRIVATEUSE) { 556 cmp = -1; 557 } else { 558 cmp = *(ext->key) - *(cur->key); 559 } 560 } else if (len == 1) { 561 cmp = *(ext->key) - LDMLEXT; 562 } else if (curlen == 1) { 563 cmp = LDMLEXT - *(cur->key); 564 } else { 565 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 566 } 567 } else { 568 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 569 } 570 if (cmp < 0) { 571 if (prev == NULL) { 572 *first = ext; 573 } else { 574 prev->next = ext; 575 } 576 ext->next = cur; 577 break; 578 } 579 if (cmp == 0) { 580 /* duplicated extension key */ 581 bAdded = FALSE; 582 break; 583 } 584 prev = cur; 585 cur = cur->next; 586 } 587 } 588 589 return bAdded; 590 } 591 592 static void 593 _initializeULanguageTag(ULanguageTag* langtag) { 594 int32_t i; 595 596 langtag->buf = NULL; 597 598 langtag->language = EMPTY; 599 for (i = 0; i < MAXEXTLANG; i++) { 600 langtag->extlang[i] = NULL; 601 } 602 603 langtag->script = EMPTY; 604 langtag->region = EMPTY; 605 606 langtag->variants = NULL; 607 langtag->extensions = NULL; 608 609 langtag->grandfathered = EMPTY; 610 langtag->privateuse = EMPTY; 611 } 612 613 #define KEYTYPEDATA "keyTypeData" 614 #define KEYMAP "keyMap" 615 #define TYPEMAP "typeMap" 616 #define TYPEALIAS "typeAlias" 617 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */ 618 #define MAX_LDML_KEY_LEN 22 619 #define MAX_LDML_TYPE_LEN 32 620 621 static int32_t 622 _ldmlKeyToBCP47(const char* key, int32_t keyLen, 623 char* bcpKey, int32_t bcpKeyCapacity, 624 UErrorCode *status) { 625 UResourceBundle *rb; 626 char keyBuf[MAX_LDML_KEY_LEN]; 627 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 628 int32_t resultLen = 0; 629 int32_t i; 630 UErrorCode tmpStatus = U_ZERO_ERROR; 631 const UChar *uBcpKey; 632 int32_t bcpKeyLen; 633 634 if (keyLen < 0) { 635 keyLen = (int32_t)uprv_strlen(key); 636 } 637 638 if (keyLen >= sizeof(keyBuf)) { 639 /* no known valid LDML key exceeding 21 */ 640 *status = U_ILLEGAL_ARGUMENT_ERROR; 641 return 0; 642 } 643 644 uprv_memcpy(keyBuf, key, keyLen); 645 keyBuf[keyLen] = 0; 646 647 /* to lower case */ 648 for (i = 0; i < keyLen; i++) { 649 keyBuf[i] = uprv_tolower(keyBuf[i]); 650 } 651 652 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 653 ures_getByKey(rb, KEYMAP, rb, status); 654 655 if (U_FAILURE(*status)) { 656 ures_close(rb); 657 return 0; 658 } 659 660 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus); 661 if (U_SUCCESS(tmpStatus)) { 662 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen); 663 bcpKeyBuf[bcpKeyLen] = 0; 664 resultLen = bcpKeyLen; 665 } else { 666 if (_isLDMLKey(key, keyLen)) { 667 uprv_memcpy(bcpKeyBuf, key, keyLen); 668 bcpKeyBuf[keyLen] = 0; 669 resultLen = keyLen; 670 } else { 671 /* mapping not availabe */ 672 *status = U_ILLEGAL_ARGUMENT_ERROR; 673 } 674 } 675 ures_close(rb); 676 677 if (U_FAILURE(*status)) { 678 return 0; 679 } 680 681 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity)); 682 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status); 683 } 684 685 static int32_t 686 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen, 687 char* key, int32_t keyCapacity, 688 UErrorCode *status) { 689 UResourceBundle *rb; 690 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 691 int32_t resultLen = 0; 692 int32_t i; 693 const char *resKey = NULL; 694 UResourceBundle *mapData; 695 696 if (bcpKeyLen < 0) { 697 bcpKeyLen = (int32_t)uprv_strlen(bcpKey); 698 } 699 700 if (bcpKeyLen >= sizeof(bcpKeyBuf)) { 701 *status = U_ILLEGAL_ARGUMENT_ERROR; 702 return 0; 703 } 704 705 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen); 706 bcpKeyBuf[bcpKeyLen] = 0; 707 708 /* to lower case */ 709 for (i = 0; i < bcpKeyLen; i++) { 710 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]); 711 } 712 713 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 714 ures_getByKey(rb, KEYMAP, rb, status); 715 if (U_FAILURE(*status)) { 716 ures_close(rb); 717 return 0; 718 } 719 720 mapData = ures_getNextResource(rb, NULL, status); 721 while (U_SUCCESS(*status)) { 722 const UChar *uBcpKey; 723 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 724 int32_t tmpBcpKeyLen; 725 726 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status); 727 if (U_FAILURE(*status)) { 728 break; 729 } 730 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen); 731 tmpBcpKeyBuf[tmpBcpKeyLen] = 0; 732 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) { 733 /* found a matching BCP47 key */ 734 resKey = ures_getKey(mapData); 735 resultLen = (int32_t)uprv_strlen(resKey); 736 break; 737 } 738 if (!ures_hasNext(rb)) { 739 break; 740 } 741 ures_getNextResource(rb, mapData, status); 742 } 743 ures_close(mapData); 744 ures_close(rb); 745 746 if (U_FAILURE(*status)) { 747 return 0; 748 } 749 750 if (resKey == NULL) { 751 resKey = bcpKeyBuf; 752 resultLen = bcpKeyLen; 753 } 754 755 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity)); 756 return u_terminateChars(key, keyCapacity, resultLen, status); 757 } 758 759 static int32_t 760 _ldmlTypeToBCP47(const char* key, int32_t keyLen, 761 const char* type, int32_t typeLen, 762 char* bcpType, int32_t bcpTypeCapacity, 763 UErrorCode *status) { 764 UResourceBundle *rb, *keyTypeData, *typeMapForKey; 765 char keyBuf[MAX_LDML_KEY_LEN]; 766 char typeBuf[MAX_LDML_TYPE_LEN]; 767 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 768 int32_t resultLen = 0; 769 int32_t i; 770 UErrorCode tmpStatus = U_ZERO_ERROR; 771 const UChar *uBcpType, *uCanonicalType; 772 int32_t bcpTypeLen, canonicalTypeLen; 773 UBool isTimezone = FALSE; 774 775 if (keyLen < 0) { 776 keyLen = (int32_t)uprv_strlen(key); 777 } 778 if (keyLen >= sizeof(keyBuf)) { 779 /* no known valid LDML key exceeding 21 */ 780 *status = U_ILLEGAL_ARGUMENT_ERROR; 781 return 0; 782 } 783 uprv_memcpy(keyBuf, key, keyLen); 784 keyBuf[keyLen] = 0; 785 786 /* to lower case */ 787 for (i = 0; i < keyLen; i++) { 788 keyBuf[i] = uprv_tolower(keyBuf[i]); 789 } 790 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 791 isTimezone = TRUE; 792 } 793 794 if (typeLen < 0) { 795 typeLen = (int32_t)uprv_strlen(type); 796 } 797 if (typeLen >= sizeof(typeBuf)) { 798 *status = U_ILLEGAL_ARGUMENT_ERROR; 799 return 0; 800 } 801 802 if (isTimezone) { 803 /* replace '/' with ':' */ 804 for (i = 0; i < typeLen; i++) { 805 if (*(type + i) == '/') { 806 typeBuf[i] = ':'; 807 } else { 808 typeBuf[i] = *(type + i); 809 } 810 } 811 typeBuf[typeLen] = 0; 812 type = &typeBuf[0]; 813 } 814 815 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status); 816 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status); 817 if (U_FAILURE(*status)) { 818 ures_close(rb); 819 ures_close(keyTypeData); 820 return 0; 821 } 822 823 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus); 824 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus); 825 if (U_SUCCESS(tmpStatus)) { 826 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 827 resultLen = bcpTypeLen; 828 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 829 /* is this type alias? */ 830 tmpStatus = U_ZERO_ERROR; 831 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus); 832 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 833 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus); 834 if (U_SUCCESS(tmpStatus)) { 835 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen); 836 if (isTimezone) { 837 /* replace '/' with ':' */ 838 for (i = 0; i < canonicalTypeLen; i++) { 839 if (typeBuf[i] == '/') { 840 typeBuf[i] = ':'; 841 } 842 } 843 } 844 typeBuf[canonicalTypeLen] = 0; 845 846 /* look up the canonical type */ 847 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus); 848 if (U_SUCCESS(tmpStatus)) { 849 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 850 resultLen = bcpTypeLen; 851 } 852 } 853 if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 854 if (_isLDMLType(type, typeLen)) { 855 uprv_memcpy(bcpTypeBuf, type, typeLen); 856 resultLen = typeLen; 857 } else { 858 /* mapping not availabe */ 859 *status = U_ILLEGAL_ARGUMENT_ERROR; 860 } 861 } 862 } else { 863 *status = tmpStatus; 864 } 865 ures_close(rb); 866 ures_close(typeMapForKey); 867 ures_close(keyTypeData); 868 869 if (U_FAILURE(*status)) { 870 return 0; 871 } 872 873 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity)); 874 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status); 875 } 876 877 static int32_t 878 _bcp47ToLDMLType(const char* key, int32_t keyLen, 879 const char* bcpType, int32_t bcpTypeLen, 880 char* type, int32_t typeCapacity, 881 UErrorCode *status) { 882 UResourceBundle *rb; 883 char keyBuf[MAX_LDML_KEY_LEN]; 884 char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */ 885 int32_t resultLen = 0; 886 int32_t i, typeSize; 887 const char *resType = NULL; 888 UResourceBundle *mapData; 889 UErrorCode tmpStatus = U_ZERO_ERROR; 890 int32_t copyLen; 891 892 if (keyLen < 0) { 893 keyLen = (int32_t)uprv_strlen(key); 894 } 895 896 if (keyLen >= sizeof(keyBuf)) { 897 /* no known valid LDML key exceeding 21 */ 898 *status = U_ILLEGAL_ARGUMENT_ERROR; 899 return 0; 900 } 901 uprv_memcpy(keyBuf, key, keyLen); 902 keyBuf[keyLen] = 0; 903 904 /* to lower case */ 905 for (i = 0; i < keyLen; i++) { 906 keyBuf[i] = uprv_tolower(keyBuf[i]); 907 } 908 909 910 if (bcpTypeLen < 0) { 911 bcpTypeLen = (int32_t)uprv_strlen(bcpType); 912 } 913 914 typeSize = 0; 915 for (i = 0; i < bcpTypeLen; i++) { 916 if (bcpType[i] == SEP) { 917 if (typeSize >= MAX_BCP47_SUBTAG_LEN) { 918 *status = U_ILLEGAL_ARGUMENT_ERROR; 919 return 0; 920 } 921 typeSize = 0; 922 } else { 923 typeSize++; 924 } 925 } 926 927 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen); 928 bcpTypeBuf[bcpTypeLen] = 0; 929 930 /* to lower case */ 931 for (i = 0; i < bcpTypeLen; i++) { 932 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]); 933 } 934 935 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 936 ures_getByKey(rb, TYPEMAP, rb, status); 937 if (U_FAILURE(*status)) { 938 ures_close(rb); 939 return 0; 940 } 941 942 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 943 mapData = ures_getNextResource(rb, NULL, &tmpStatus); 944 while (U_SUCCESS(tmpStatus)) { 945 const UChar *uBcpType; 946 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 947 int32_t tmpBcpTypeLen; 948 949 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus); 950 if (U_FAILURE(tmpStatus)) { 951 break; 952 } 953 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen); 954 tmpBcpTypeBuf[tmpBcpTypeLen] = 0; 955 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) { 956 /* found a matching BCP47 type */ 957 resType = ures_getKey(mapData); 958 resultLen = (int32_t)uprv_strlen(resType); 959 break; 960 } 961 if (!ures_hasNext(rb)) { 962 break; 963 } 964 ures_getNextResource(rb, mapData, &tmpStatus); 965 } 966 ures_close(mapData); 967 ures_close(rb); 968 969 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) { 970 *status = tmpStatus; 971 return 0; 972 } 973 974 if (resType == NULL) { 975 resType = bcpTypeBuf; 976 resultLen = bcpTypeLen; 977 } 978 979 copyLen = uprv_min(resultLen, typeCapacity); 980 uprv_memcpy(type, resType, copyLen); 981 982 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 983 for (i = 0; i < copyLen; i++) { 984 if (*(type + i) == ':') { 985 *(type + i) = '/'; 986 } 987 } 988 } 989 990 return u_terminateChars(type, typeCapacity, resultLen, status); 991 } 992 993 static int32_t 994 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 995 char buf[ULOC_LANG_CAPACITY]; 996 UErrorCode tmpStatus = U_ZERO_ERROR; 997 int32_t len, i; 998 int32_t reslen = 0; 999 1000 if (U_FAILURE(*status)) { 1001 return 0; 1002 } 1003 1004 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); 1005 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1006 if (strict) { 1007 *status = U_ILLEGAL_ARGUMENT_ERROR; 1008 return 0; 1009 } 1010 len = 0; 1011 } 1012 1013 /* Note: returned language code is in lower case letters */ 1014 1015 if (len == 0) { 1016 if (reslen < capacity) { 1017 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 1018 } 1019 reslen += LANG_UND_LEN; 1020 } else if (!_isLanguageSubtag(buf, len)) { 1021 /* invalid language code */ 1022 if (strict) { 1023 *status = U_ILLEGAL_ARGUMENT_ERROR; 1024 return 0; 1025 } 1026 if (reslen < capacity) { 1027 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 1028 } 1029 reslen += LANG_UND_LEN; 1030 } else { 1031 /* resolve deprecated */ 1032 for (i = 0; i < LENGTHOF(DEPRECATEDLANGS); i += 2) { 1033 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { 1034 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); 1035 len = (int32_t)uprv_strlen(buf); 1036 break; 1037 } 1038 } 1039 if (reslen < capacity) { 1040 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1041 } 1042 reslen += len; 1043 } 1044 u_terminateChars(appendAt, capacity, reslen, status); 1045 return reslen; 1046 } 1047 1048 static int32_t 1049 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1050 char buf[ULOC_SCRIPT_CAPACITY]; 1051 UErrorCode tmpStatus = U_ZERO_ERROR; 1052 int32_t len; 1053 int32_t reslen = 0; 1054 1055 if (U_FAILURE(*status)) { 1056 return 0; 1057 } 1058 1059 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); 1060 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1061 if (strict) { 1062 *status = U_ILLEGAL_ARGUMENT_ERROR; 1063 } 1064 return 0; 1065 } 1066 1067 if (len > 0) { 1068 if (!_isScriptSubtag(buf, len)) { 1069 /* invalid script code */ 1070 if (strict) { 1071 *status = U_ILLEGAL_ARGUMENT_ERROR; 1072 } 1073 return 0; 1074 } else { 1075 if (reslen < capacity) { 1076 *(appendAt + reslen) = SEP; 1077 } 1078 reslen++; 1079 1080 if (reslen < capacity) { 1081 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1082 } 1083 reslen += len; 1084 } 1085 } 1086 u_terminateChars(appendAt, capacity, reslen, status); 1087 return reslen; 1088 } 1089 1090 static int32_t 1091 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1092 char buf[ULOC_COUNTRY_CAPACITY]; 1093 UErrorCode tmpStatus = U_ZERO_ERROR; 1094 int32_t len; 1095 int32_t reslen = 0; 1096 1097 if (U_FAILURE(*status)) { 1098 return 0; 1099 } 1100 1101 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); 1102 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1103 if (strict) { 1104 *status = U_ILLEGAL_ARGUMENT_ERROR; 1105 } 1106 return 0; 1107 } 1108 1109 if (len > 0) { 1110 if (!_isRegionSubtag(buf, len)) { 1111 /* invalid region code */ 1112 if (strict) { 1113 *status = U_ILLEGAL_ARGUMENT_ERROR; 1114 } 1115 return 0; 1116 } else { 1117 if (reslen < capacity) { 1118 *(appendAt + reslen) = SEP; 1119 } 1120 reslen++; 1121 1122 if (reslen < capacity) { 1123 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1124 } 1125 reslen += len; 1126 } 1127 } 1128 u_terminateChars(appendAt, capacity, reslen, status); 1129 return reslen; 1130 } 1131 1132 static int32_t 1133 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { 1134 char buf[ULOC_FULLNAME_CAPACITY]; 1135 UErrorCode tmpStatus = U_ZERO_ERROR; 1136 int32_t len, i; 1137 int32_t reslen = 0; 1138 1139 if (U_FAILURE(*status)) { 1140 return 0; 1141 } 1142 1143 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1144 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1145 if (strict) { 1146 *status = U_ILLEGAL_ARGUMENT_ERROR; 1147 } 1148 return 0; 1149 } 1150 1151 if (len > 0) { 1152 char *p, *pVar; 1153 UBool bNext = TRUE; 1154 VariantListEntry *var; 1155 VariantListEntry *varFirst = NULL; 1156 1157 pVar = NULL; 1158 p = buf; 1159 while (bNext) { 1160 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1161 if (*p == 0) { 1162 bNext = FALSE; 1163 } else { 1164 *p = 0; /* terminate */ 1165 } 1166 if (pVar == NULL) { 1167 if (strict) { 1168 *status = U_ILLEGAL_ARGUMENT_ERROR; 1169 break; 1170 } 1171 /* ignore empty variant */ 1172 } else { 1173 /* ICU uses upper case letters for variants, but 1174 the canonical format is lowercase in BCP47 */ 1175 for (i = 0; *(pVar + i) != 0; i++) { 1176 *(pVar + i) = uprv_tolower(*(pVar + i)); 1177 } 1178 1179 /* validate */ 1180 if (_isVariantSubtag(pVar, -1)) { 1181 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { 1182 /* emit the variant to the list */ 1183 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 1184 if (var == NULL) { 1185 *status = U_MEMORY_ALLOCATION_ERROR; 1186 break; 1187 } 1188 var->variant = pVar; 1189 if (!_addVariantToList(&varFirst, var)) { 1190 /* duplicated variant */ 1191 uprv_free(var); 1192 if (strict) { 1193 *status = U_ILLEGAL_ARGUMENT_ERROR; 1194 break; 1195 } 1196 } 1197 } else { 1198 /* Special handling for POSIX variant, need to remember that we had it and then */ 1199 /* treat it like an extension later. */ 1200 *hadPosix = TRUE; 1201 } 1202 } else if (strict) { 1203 *status = U_ILLEGAL_ARGUMENT_ERROR; 1204 break; 1205 } else if (_isPrivateuseValueSubtag(pVar, -1)) { 1206 /* Handle private use subtags separately */ 1207 break; 1208 } 1209 } 1210 /* reset variant starting position */ 1211 pVar = NULL; 1212 } else if (pVar == NULL) { 1213 pVar = p; 1214 } 1215 p++; 1216 } 1217 1218 if (U_SUCCESS(*status)) { 1219 if (varFirst != NULL) { 1220 int32_t varLen; 1221 1222 /* write out validated/normalized variants to the target */ 1223 var = varFirst; 1224 while (var != NULL) { 1225 if (reslen < capacity) { 1226 *(appendAt + reslen) = SEP; 1227 } 1228 reslen++; 1229 varLen = (int32_t)uprv_strlen(var->variant); 1230 if (reslen < capacity) { 1231 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); 1232 } 1233 reslen += varLen; 1234 var = var->next; 1235 } 1236 } 1237 } 1238 1239 /* clean up */ 1240 var = varFirst; 1241 while (var != NULL) { 1242 VariantListEntry *tmpVar = var->next; 1243 uprv_free(var); 1244 var = tmpVar; 1245 } 1246 1247 if (U_FAILURE(*status)) { 1248 return 0; 1249 } 1250 } 1251 1252 u_terminateChars(appendAt, capacity, reslen, status); 1253 return reslen; 1254 } 1255 1256 static int32_t 1257 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1258 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1259 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; 1260 int32_t attrBufLength = 0; 1261 UBool isAttribute = FALSE; 1262 UEnumeration *keywordEnum = NULL; 1263 int32_t reslen = 0; 1264 1265 keywordEnum = uloc_openKeywords(localeID, status); 1266 if (U_FAILURE(*status) && !hadPosix) { 1267 uenum_close(keywordEnum); 1268 return 0; 1269 } 1270 if (keywordEnum != NULL || hadPosix) { 1271 /* reorder extensions */ 1272 int32_t len; 1273 const char *key; 1274 ExtensionListEntry *firstExt = NULL; 1275 ExtensionListEntry *ext; 1276 AttributeListEntry *firstAttr = NULL; 1277 AttributeListEntry *attr; 1278 char *attrValue; 1279 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1280 char *pExtBuf = extBuf; 1281 int32_t extBufCapacity = sizeof(extBuf); 1282 const char *bcpKey, *bcpValue; 1283 UErrorCode tmpStatus = U_ZERO_ERROR; 1284 int32_t keylen; 1285 UBool isLDMLKeyword; 1286 1287 while (TRUE) { 1288 isAttribute = FALSE; 1289 key = uenum_next(keywordEnum, NULL, status); 1290 if (key == NULL) { 1291 break; 1292 } 1293 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); 1294 if (U_FAILURE(tmpStatus)) { 1295 if (strict) { 1296 *status = U_ILLEGAL_ARGUMENT_ERROR; 1297 break; 1298 } 1299 /* ignore this keyword */ 1300 tmpStatus = U_ZERO_ERROR; 1301 continue; 1302 } 1303 1304 keylen = (int32_t)uprv_strlen(key); 1305 isLDMLKeyword = (keylen > 1); 1306 1307 /* special keyword used for representing Unicode locale attributes */ 1308 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { 1309 isAttribute = TRUE; 1310 if (len > 0) { 1311 int32_t i = 0; 1312 while (TRUE) { 1313 attrBufLength = 0; 1314 for (; i < len; i++) { 1315 if (buf[i] != '-') { 1316 attrBuf[attrBufLength++] = buf[i]; 1317 } else { 1318 i++; 1319 break; 1320 } 1321 } 1322 if (attrBufLength > 0) { 1323 attrBuf[attrBufLength] = 0; 1324 1325 } else if (i >= len){ 1326 break; 1327 } 1328 1329 /* create AttributeListEntry */ 1330 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); 1331 if (attr == NULL) { 1332 *status = U_MEMORY_ALLOCATION_ERROR; 1333 break; 1334 } 1335 attrValue = (char*)uprv_malloc(attrBufLength + 1); 1336 if (attrValue == NULL) { 1337 *status = U_MEMORY_ALLOCATION_ERROR; 1338 break; 1339 } 1340 uprv_strcpy(attrValue, attrBuf); 1341 attr->attribute = attrValue; 1342 1343 if (!_addAttributeToList(&firstAttr, attr)) { 1344 uprv_free(attr); 1345 uprv_free(attrValue); 1346 if (strict) { 1347 *status = U_ILLEGAL_ARGUMENT_ERROR; 1348 break; 1349 } 1350 } 1351 } 1352 } 1353 } else if (isLDMLKeyword) { 1354 int32_t modKeyLen; 1355 1356 /* transform key and value to bcp47 style */ 1357 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus); 1358 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1359 if (strict) { 1360 *status = U_ILLEGAL_ARGUMENT_ERROR; 1361 break; 1362 } 1363 tmpStatus = U_ZERO_ERROR; 1364 continue; 1365 } 1366 1367 bcpKey = pExtBuf; 1368 pExtBuf += (modKeyLen + 1); 1369 extBufCapacity -= (modKeyLen + 1); 1370 1371 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus); 1372 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1373 if (strict) { 1374 *status = U_ILLEGAL_ARGUMENT_ERROR; 1375 break; 1376 } 1377 tmpStatus = U_ZERO_ERROR; 1378 continue; 1379 } 1380 bcpValue = pExtBuf; 1381 pExtBuf += (len + 1); 1382 extBufCapacity -= (len + 1); 1383 } else { 1384 if (*key == PRIVATEUSE) { 1385 if (!_isPrivateuseValueSubtags(buf, len)) { 1386 if (strict) { 1387 *status = U_ILLEGAL_ARGUMENT_ERROR; 1388 break; 1389 } 1390 continue; 1391 } 1392 } else { 1393 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { 1394 if (strict) { 1395 *status = U_ILLEGAL_ARGUMENT_ERROR; 1396 break; 1397 } 1398 continue; 1399 } 1400 } 1401 bcpKey = key; 1402 if ((len + 1) < extBufCapacity) { 1403 uprv_memcpy(pExtBuf, buf, len); 1404 bcpValue = pExtBuf; 1405 1406 pExtBuf += len; 1407 1408 *pExtBuf = 0; 1409 pExtBuf++; 1410 1411 extBufCapacity -= (len + 1); 1412 } else { 1413 *status = U_ILLEGAL_ARGUMENT_ERROR; 1414 break; 1415 } 1416 } 1417 1418 if (!isAttribute) { 1419 /* create ExtensionListEntry */ 1420 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1421 if (ext == NULL) { 1422 *status = U_MEMORY_ALLOCATION_ERROR; 1423 break; 1424 } 1425 ext->key = bcpKey; 1426 ext->value = bcpValue; 1427 1428 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1429 uprv_free(ext); 1430 if (strict) { 1431 *status = U_ILLEGAL_ARGUMENT_ERROR; 1432 break; 1433 } 1434 } 1435 } 1436 } 1437 1438 /* Special handling for POSIX variant - add the keywords for POSIX */ 1439 if (hadPosix) { 1440 /* create ExtensionListEntry for POSIX */ 1441 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1442 if (ext == NULL) { 1443 *status = U_MEMORY_ALLOCATION_ERROR; 1444 goto cleanup; 1445 } 1446 ext->key = POSIX_KEY; 1447 ext->value = POSIX_VALUE; 1448 1449 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1450 uprv_free(ext); 1451 } 1452 } 1453 1454 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { 1455 UBool startLDMLExtension = FALSE; 1456 1457 attr = firstAttr; 1458 ext = firstExt; 1459 do { 1460 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) { 1461 /* write LDML singleton extension */ 1462 if (reslen < capacity) { 1463 *(appendAt + reslen) = SEP; 1464 } 1465 reslen++; 1466 if (reslen < capacity) { 1467 *(appendAt + reslen) = LDMLEXT; 1468 } 1469 reslen++; 1470 1471 startLDMLExtension = TRUE; 1472 } 1473 1474 /* write out the sorted BCP47 attributes, extensions and private use */ 1475 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) { 1476 if (reslen < capacity) { 1477 *(appendAt + reslen) = SEP; 1478 } 1479 reslen++; 1480 len = (int32_t)uprv_strlen(ext->key); 1481 if (reslen < capacity) { 1482 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); 1483 } 1484 reslen += len; 1485 if (reslen < capacity) { 1486 *(appendAt + reslen) = SEP; 1487 } 1488 reslen++; 1489 len = (int32_t)uprv_strlen(ext->value); 1490 if (reslen < capacity) { 1491 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); 1492 } 1493 reslen += len; 1494 1495 ext = ext->next; 1496 } else if (attr) { 1497 /* write the value for the attributes */ 1498 if (reslen < capacity) { 1499 *(appendAt + reslen) = SEP; 1500 } 1501 reslen++; 1502 len = (int32_t)uprv_strlen(attr->attribute); 1503 if (reslen < capacity) { 1504 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); 1505 } 1506 reslen += len; 1507 1508 attr = attr->next; 1509 } 1510 } while (attr != NULL || ext != NULL); 1511 } 1512 cleanup: 1513 /* clean up */ 1514 ext = firstExt; 1515 while (ext != NULL) { 1516 ExtensionListEntry *tmpExt = ext->next; 1517 uprv_free(ext); 1518 ext = tmpExt; 1519 } 1520 1521 attr = firstAttr; 1522 while (attr != NULL) { 1523 AttributeListEntry *tmpAttr = attr->next; 1524 char *pValue = (char *)attr->attribute; 1525 uprv_free(pValue); 1526 uprv_free(attr); 1527 attr = tmpAttr; 1528 } 1529 1530 uenum_close(keywordEnum); 1531 1532 if (U_FAILURE(*status)) { 1533 return 0; 1534 } 1535 } 1536 1537 return u_terminateChars(appendAt, capacity, reslen, status); 1538 } 1539 1540 /** 1541 * Append keywords parsed from LDML extension value 1542 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} 1543 * Note: char* buf is used for storing keywords 1544 */ 1545 static void 1546 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { 1547 const char *pTag; /* beginning of current subtag */ 1548 const char *pKwds; /* beginning of key-type pairs */ 1549 UBool variantExists = *posixVariant; 1550 1551 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ 1552 ExtensionListEntry *kwd, *nextKwd; 1553 1554 AttributeListEntry *attrFirst = NULL; /* first attribute */ 1555 AttributeListEntry *attr, *nextAttr; 1556 1557 int32_t len; 1558 int32_t bufIdx = 0; 1559 1560 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1561 int32_t attrBufIdx = 0; 1562 1563 /* Reset the posixVariant value */ 1564 *posixVariant = FALSE; 1565 1566 pTag = ldmlext; 1567 pKwds = NULL; 1568 1569 /* Iterate through u extension attributes */ 1570 while (*pTag) { 1571 /* locate next separator char */ 1572 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); 1573 1574 if (_isLDMLKey(pTag, len)) { 1575 pKwds = pTag; 1576 break; 1577 } 1578 1579 /* add this attribute to the list */ 1580 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); 1581 if (attr == NULL) { 1582 *status = U_MEMORY_ALLOCATION_ERROR; 1583 goto cleanup; 1584 } 1585 1586 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { 1587 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); 1588 attrBuf[attrBufIdx + len] = 0; 1589 attr->attribute = &attrBuf[attrBufIdx]; 1590 attrBufIdx += (len + 1); 1591 } else { 1592 *status = U_ILLEGAL_ARGUMENT_ERROR; 1593 goto cleanup; 1594 } 1595 1596 if (!_addAttributeToList(&attrFirst, attr)) { 1597 *status = U_ILLEGAL_ARGUMENT_ERROR; 1598 uprv_free(attr); 1599 goto cleanup; 1600 } 1601 1602 /* next tag */ 1603 pTag += len; 1604 if (*pTag) { 1605 /* next to the separator */ 1606 pTag++; 1607 } 1608 } 1609 1610 if (attrFirst) { 1611 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ 1612 1613 if (attrBufIdx > bufSize) { 1614 /* attrBufIdx == <total length of attribute subtag> + 1 */ 1615 *status = U_ILLEGAL_ARGUMENT_ERROR; 1616 goto cleanup; 1617 } 1618 1619 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1620 if (kwd == NULL) { 1621 *status = U_MEMORY_ALLOCATION_ERROR; 1622 goto cleanup; 1623 } 1624 1625 kwd->key = LOCALE_ATTRIBUTE_KEY; 1626 kwd->value = buf; 1627 1628 /* attribute subtags sorted in alphabetical order as type */ 1629 attr = attrFirst; 1630 while (attr != NULL) { 1631 nextAttr = attr->next; 1632 1633 /* buffer size check is done above */ 1634 if (attr != attrFirst) { 1635 *(buf + bufIdx) = SEP; 1636 bufIdx++; 1637 } 1638 1639 len = uprv_strlen(attr->attribute); 1640 uprv_memcpy(buf + bufIdx, attr->attribute, len); 1641 bufIdx += len; 1642 1643 attr = nextAttr; 1644 } 1645 *(buf + bufIdx) = 0; 1646 bufIdx++; 1647 1648 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1649 *status = U_ILLEGAL_ARGUMENT_ERROR; 1650 uprv_free(kwd); 1651 goto cleanup; 1652 } 1653 1654 /* once keyword entry is created, delete the attribute list */ 1655 attr = attrFirst; 1656 while (attr != NULL) { 1657 nextAttr = attr->next; 1658 uprv_free(attr); 1659 attr = nextAttr; 1660 } 1661 attrFirst = NULL; 1662 } 1663 1664 if (pKwds) { 1665 const char *pBcpKey = NULL; /* u extenstion key subtag */ 1666 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ 1667 int32_t bcpKeyLen = 0; 1668 int32_t bcpTypeLen = 0; 1669 UBool isDone = FALSE; 1670 1671 pTag = pKwds; 1672 /* BCP47 representation of LDML key/type pairs */ 1673 while (!isDone) { 1674 const char *pNextBcpKey = NULL; 1675 int32_t nextBcpKeyLen; 1676 UBool emitKeyword = FALSE; 1677 1678 if (*pTag) { 1679 /* locate next separator char */ 1680 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); 1681 1682 if (_isLDMLKey(pTag, len)) { 1683 if (pBcpKey) { 1684 emitKeyword = TRUE; 1685 pNextBcpKey = pTag; 1686 nextBcpKeyLen = len; 1687 } else { 1688 pBcpKey = pTag; 1689 bcpKeyLen = len; 1690 } 1691 } else { 1692 U_ASSERT(pBcpKey != NULL); 1693 /* within LDML type subtags */ 1694 if (pBcpType) { 1695 bcpTypeLen += (len + 1); 1696 } else { 1697 pBcpType = pTag; 1698 bcpTypeLen = len; 1699 } 1700 } 1701 1702 /* next tag */ 1703 pTag += len; 1704 if (*pTag) { 1705 /* next to the separator */ 1706 pTag++; 1707 } 1708 } else { 1709 /* processing last one */ 1710 emitKeyword = TRUE; 1711 isDone = TRUE; 1712 } 1713 1714 if (emitKeyword) { 1715 const char *pKey = NULL; /* LDML key */ 1716 const char *pType = NULL; /* LDML type */ 1717 1718 U_ASSERT(pBcpKey != NULL); 1719 1720 /* u extension key to LDML key */ 1721 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1722 if (U_FAILURE(*status)) { 1723 goto cleanup; 1724 } 1725 pKey = buf + bufIdx; 1726 bufIdx += len; 1727 *(buf + bufIdx) = 0; 1728 bufIdx++; 1729 1730 if (pBcpType) { 1731 /* BCP type to locale type */ 1732 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1733 if (U_FAILURE(*status)) { 1734 goto cleanup; 1735 } 1736 pType = buf + bufIdx; 1737 bufIdx += len; 1738 *(buf + bufIdx) = 0; 1739 bufIdx++; 1740 } else { 1741 /* typeless - default type value is "yes" */ 1742 pType = LOCALE_TYPE_YES; 1743 } 1744 1745 /* Special handling for u-va-posix, since we want to treat this as a variant, 1746 not as a keyword */ 1747 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { 1748 *posixVariant = TRUE; 1749 } else { 1750 /* create an ExtensionListEntry for this keyword */ 1751 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1752 if (kwd == NULL) { 1753 *status = U_MEMORY_ALLOCATION_ERROR; 1754 goto cleanup; 1755 } 1756 1757 kwd->key = pKey; 1758 kwd->value = pType; 1759 1760 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1761 *status = U_ILLEGAL_ARGUMENT_ERROR; 1762 uprv_free(kwd); 1763 goto cleanup; 1764 } 1765 } 1766 1767 pBcpKey = pNextBcpKey; 1768 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; 1769 pBcpType = NULL; 1770 bcpTypeLen = 0; 1771 } 1772 } 1773 } 1774 1775 kwd = kwdFirst; 1776 while (kwd != NULL) { 1777 nextKwd = kwd->next; 1778 _addExtensionToList(appendTo, kwd, FALSE); 1779 kwd = nextKwd; 1780 } 1781 1782 return; 1783 1784 cleanup: 1785 attr = attrFirst; 1786 while (attr != NULL) { 1787 nextAttr = attr->next; 1788 uprv_free(attr); 1789 attr = nextAttr; 1790 } 1791 1792 kwd = kwdFirst; 1793 while (kwd != NULL) { 1794 nextKwd = kwd->next; 1795 uprv_free(kwd); 1796 kwd = nextKwd; 1797 } 1798 } 1799 1800 1801 static int32_t 1802 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { 1803 int32_t reslen = 0; 1804 int32_t i, n; 1805 int32_t len; 1806 ExtensionListEntry *kwdFirst = NULL; 1807 ExtensionListEntry *kwd; 1808 const char *key, *type; 1809 char *kwdBuf = NULL; 1810 int32_t kwdBufLength = capacity; 1811 UBool posixVariant = FALSE; 1812 1813 if (U_FAILURE(*status)) { 1814 return 0; 1815 } 1816 1817 kwdBuf = (char*)uprv_malloc(kwdBufLength); 1818 if (kwdBuf == NULL) { 1819 *status = U_MEMORY_ALLOCATION_ERROR; 1820 return 0; 1821 } 1822 1823 /* Determine if variants already exists */ 1824 if (ultag_getVariantsSize(langtag)) { 1825 posixVariant = TRUE; 1826 } 1827 1828 n = ultag_getExtensionsSize(langtag); 1829 1830 /* resolve locale keywords and reordering keys */ 1831 for (i = 0; i < n; i++) { 1832 key = ultag_getExtensionKey(langtag, i); 1833 type = ultag_getExtensionValue(langtag, i); 1834 if (*key == LDMLEXT) { 1835 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); 1836 if (U_FAILURE(*status)) { 1837 break; 1838 } 1839 } else { 1840 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1841 if (kwd == NULL) { 1842 *status = U_MEMORY_ALLOCATION_ERROR; 1843 break; 1844 } 1845 kwd->key = key; 1846 kwd->value = type; 1847 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1848 uprv_free(kwd); 1849 *status = U_ILLEGAL_ARGUMENT_ERROR; 1850 break; 1851 } 1852 } 1853 } 1854 1855 if (U_SUCCESS(*status)) { 1856 type = ultag_getPrivateUse(langtag); 1857 if ((int32_t)uprv_strlen(type) > 0) { 1858 /* add private use as a keyword */ 1859 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 1860 if (kwd == NULL) { 1861 *status = U_MEMORY_ALLOCATION_ERROR; 1862 } else { 1863 kwd->key = PRIVATEUSE_KEY; 1864 kwd->value = type; 1865 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1866 uprv_free(kwd); 1867 *status = U_ILLEGAL_ARGUMENT_ERROR; 1868 } 1869 } 1870 } 1871 } 1872 1873 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ 1874 1875 if (U_SUCCESS(*status) && posixVariant) { 1876 len = (int32_t) uprv_strlen(_POSIX); 1877 if (reslen < capacity) { 1878 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); 1879 } 1880 reslen += len; 1881 } 1882 1883 if (U_SUCCESS(*status) && kwdFirst != NULL) { 1884 /* write out the sorted keywords */ 1885 UBool firstValue = TRUE; 1886 kwd = kwdFirst; 1887 do { 1888 if (reslen < capacity) { 1889 if (firstValue) { 1890 /* '@' */ 1891 *(appendAt + reslen) = LOCALE_EXT_SEP; 1892 firstValue = FALSE; 1893 } else { 1894 /* ';' */ 1895 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; 1896 } 1897 } 1898 reslen++; 1899 1900 /* key */ 1901 len = (int32_t)uprv_strlen(kwd->key); 1902 if (reslen < capacity) { 1903 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); 1904 } 1905 reslen += len; 1906 1907 /* '=' */ 1908 if (reslen < capacity) { 1909 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1910 } 1911 reslen++; 1912 1913 /* type */ 1914 len = (int32_t)uprv_strlen(kwd->value); 1915 if (reslen < capacity) { 1916 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); 1917 } 1918 reslen += len; 1919 1920 kwd = kwd->next; 1921 } while (kwd); 1922 } 1923 1924 /* clean up */ 1925 kwd = kwdFirst; 1926 while (kwd != NULL) { 1927 ExtensionListEntry *tmpKwd = kwd->next; 1928 uprv_free(kwd); 1929 kwd = tmpKwd; 1930 } 1931 1932 uprv_free(kwdBuf); 1933 1934 if (U_FAILURE(*status)) { 1935 return 0; 1936 } 1937 1938 return u_terminateChars(appendAt, capacity, reslen, status); 1939 } 1940 1941 static int32_t 1942 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1943 char buf[ULOC_FULLNAME_CAPACITY]; 1944 char tmpAppend[ULOC_FULLNAME_CAPACITY]; 1945 UErrorCode tmpStatus = U_ZERO_ERROR; 1946 int32_t len, i; 1947 int32_t reslen = 0; 1948 1949 if (U_FAILURE(*status)) { 1950 return 0; 1951 } 1952 1953 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1954 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1955 if (strict) { 1956 *status = U_ILLEGAL_ARGUMENT_ERROR; 1957 } 1958 return 0; 1959 } 1960 1961 if (len > 0) { 1962 char *p, *pPriv; 1963 UBool bNext = TRUE; 1964 UBool firstValue = TRUE; 1965 UBool writeValue; 1966 1967 pPriv = NULL; 1968 p = buf; 1969 while (bNext) { 1970 writeValue = FALSE; 1971 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1972 if (*p == 0) { 1973 bNext = FALSE; 1974 } else { 1975 *p = 0; /* terminate */ 1976 } 1977 if (pPriv != NULL) { 1978 /* Private use in the canonical format is lowercase in BCP47 */ 1979 for (i = 0; *(pPriv + i) != 0; i++) { 1980 *(pPriv + i) = uprv_tolower(*(pPriv + i)); 1981 } 1982 1983 /* validate */ 1984 if (_isPrivateuseValueSubtag(pPriv, -1)) { 1985 if (firstValue) { 1986 if (!_isVariantSubtag(pPriv, -1)) { 1987 writeValue = TRUE; 1988 } 1989 } else { 1990 writeValue = TRUE; 1991 } 1992 } else if (strict) { 1993 *status = U_ILLEGAL_ARGUMENT_ERROR; 1994 break; 1995 } else { 1996 break; 1997 } 1998 1999 if (writeValue) { 2000 if (reslen < capacity) { 2001 tmpAppend[reslen++] = SEP; 2002 } 2003 2004 if (firstValue) { 2005 if (reslen < capacity) { 2006 tmpAppend[reslen++] = *PRIVATEUSE_KEY; 2007 } 2008 2009 if (reslen < capacity) { 2010 tmpAppend[reslen++] = SEP; 2011 } 2012 2013 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); 2014 if (reslen < capacity) { 2015 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); 2016 } 2017 reslen += len; 2018 2019 if (reslen < capacity) { 2020 tmpAppend[reslen++] = SEP; 2021 } 2022 2023 firstValue = FALSE; 2024 } 2025 2026 len = (int32_t)uprv_strlen(pPriv); 2027 if (reslen < capacity) { 2028 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); 2029 } 2030 reslen += len; 2031 } 2032 } 2033 /* reset private use starting position */ 2034 pPriv = NULL; 2035 } else if (pPriv == NULL) { 2036 pPriv = p; 2037 } 2038 p++; 2039 } 2040 2041 if (U_FAILURE(*status)) { 2042 return 0; 2043 } 2044 } 2045 2046 if (U_SUCCESS(*status)) { 2047 len = reslen; 2048 if (reslen < capacity) { 2049 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); 2050 } 2051 } 2052 2053 u_terminateChars(appendAt, capacity, reslen, status); 2054 2055 return reslen; 2056 } 2057 2058 /* 2059 * ------------------------------------------------- 2060 * 2061 * ultag_ functions 2062 * 2063 * ------------------------------------------------- 2064 */ 2065 2066 /* Bit flags used by the parser */ 2067 #define LANG 0x0001 2068 #define EXTL 0x0002 2069 #define SCRT 0x0004 2070 #define REGN 0x0008 2071 #define VART 0x0010 2072 #define EXTS 0x0020 2073 #define EXTV 0x0040 2074 #define PRIV 0x0080 2075 2076 static ULanguageTag* 2077 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { 2078 ULanguageTag *t; 2079 char *tagBuf; 2080 int16_t next; 2081 char *pSubtag, *pNext, *pLastGoodPosition; 2082 int32_t subtagLen; 2083 int32_t extlangIdx; 2084 ExtensionListEntry *pExtension; 2085 AttributeListEntry *pAttribute; 2086 char *pExtValueSubtag, *pExtValueSubtagEnd; 2087 int32_t i; 2088 UBool isLDMLExtension, privateuseVar = FALSE; 2089 int32_t grandfatheredLen = 0; 2090 2091 if (parsedLen != NULL) { 2092 *parsedLen = 0; 2093 } 2094 2095 if (U_FAILURE(*status)) { 2096 return NULL; 2097 } 2098 2099 if (tagLen < 0) { 2100 tagLen = (int32_t)uprv_strlen(tag); 2101 } 2102 2103 /* copy the entire string */ 2104 tagBuf = (char*)uprv_malloc(tagLen + 1); 2105 if (tagBuf == NULL) { 2106 *status = U_MEMORY_ALLOCATION_ERROR; 2107 return NULL; 2108 } 2109 uprv_memcpy(tagBuf, tag, tagLen); 2110 *(tagBuf + tagLen) = 0; 2111 2112 /* create a ULanguageTag */ 2113 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); 2114 if (t == NULL) { 2115 uprv_free(tagBuf); 2116 *status = U_MEMORY_ALLOCATION_ERROR; 2117 return NULL; 2118 } 2119 _initializeULanguageTag(t); 2120 t->buf = tagBuf; 2121 2122 if (tagLen < MINLEN) { 2123 /* the input tag is too short - return empty ULanguageTag */ 2124 return t; 2125 } 2126 2127 /* check if the tag is grandfathered */ 2128 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { 2129 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { 2130 int32_t newTagLength; 2131 2132 grandfatheredLen = tagLen; /* back up for output parsedLen */ 2133 newTagLength = uprv_strlen(GRANDFATHERED[i+1]); 2134 if (tagLen < newTagLength) { 2135 uprv_free(tagBuf); 2136 tagBuf = (char*)uprv_malloc(newTagLength + 1); 2137 if (tagBuf == NULL) { 2138 *status = U_MEMORY_ALLOCATION_ERROR; 2139 return NULL; 2140 } 2141 t->buf = tagBuf; 2142 tagLen = newTagLength; 2143 } 2144 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); 2145 break; 2146 } 2147 } 2148 2149 /* 2150 * langtag = language 2151 * ["-" script] 2152 * ["-" region] 2153 * *("-" variant) 2154 * *("-" extension) 2155 * ["-" privateuse] 2156 */ 2157 2158 next = LANG | PRIV; 2159 pNext = pLastGoodPosition = tagBuf; 2160 extlangIdx = 0; 2161 pExtension = NULL; 2162 pExtValueSubtag = NULL; 2163 pExtValueSubtagEnd = NULL; 2164 pAttribute = NULL; 2165 isLDMLExtension = FALSE; 2166 2167 while (pNext) { 2168 char *pSep; 2169 2170 pSubtag = pNext; 2171 2172 /* locate next separator char */ 2173 pSep = pSubtag; 2174 while (*pSep) { 2175 if (*pSep == SEP) { 2176 break; 2177 } 2178 pSep++; 2179 } 2180 if (*pSep == 0) { 2181 /* last subtag */ 2182 pNext = NULL; 2183 } else { 2184 pNext = pSep + 1; 2185 } 2186 subtagLen = (int32_t)(pSep - pSubtag); 2187 2188 if (next & LANG) { 2189 if (_isLanguageSubtag(pSubtag, subtagLen)) { 2190 *pSep = 0; /* terminate */ 2191 t->language = T_CString_toLowerCase(pSubtag); 2192 2193 pLastGoodPosition = pSep; 2194 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 2195 continue; 2196 } 2197 } 2198 if (next & EXTL) { 2199 if (_isExtlangSubtag(pSubtag, subtagLen)) { 2200 *pSep = 0; 2201 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); 2202 2203 pLastGoodPosition = pSep; 2204 if (extlangIdx < 3) { 2205 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 2206 } else { 2207 next = SCRT | REGN | VART | EXTS | PRIV; 2208 } 2209 continue; 2210 } 2211 } 2212 if (next & SCRT) { 2213 if (_isScriptSubtag(pSubtag, subtagLen)) { 2214 char *p = pSubtag; 2215 2216 *pSep = 0; 2217 2218 /* to title case */ 2219 *p = uprv_toupper(*p); 2220 p++; 2221 for (; *p; p++) { 2222 *p = uprv_tolower(*p); 2223 } 2224 2225 t->script = pSubtag; 2226 2227 pLastGoodPosition = pSep; 2228 next = REGN | VART | EXTS | PRIV; 2229 continue; 2230 } 2231 } 2232 if (next & REGN) { 2233 if (_isRegionSubtag(pSubtag, subtagLen)) { 2234 *pSep = 0; 2235 t->region = T_CString_toUpperCase(pSubtag); 2236 2237 pLastGoodPosition = pSep; 2238 next = VART | EXTS | PRIV; 2239 continue; 2240 } 2241 } 2242 if (next & VART) { 2243 if (_isVariantSubtag(pSubtag, subtagLen) || 2244 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { 2245 VariantListEntry *var; 2246 UBool isAdded; 2247 2248 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 2249 if (var == NULL) { 2250 *status = U_MEMORY_ALLOCATION_ERROR; 2251 goto error; 2252 } 2253 *pSep = 0; 2254 var->variant = T_CString_toUpperCase(pSubtag); 2255 isAdded = _addVariantToList(&(t->variants), var); 2256 if (!isAdded) { 2257 /* duplicated variant entry */ 2258 uprv_free(var); 2259 break; 2260 } 2261 pLastGoodPosition = pSep; 2262 next = VART | EXTS | PRIV; 2263 continue; 2264 } 2265 } 2266 if (next & EXTS) { 2267 if (_isExtensionSingleton(pSubtag, subtagLen)) { 2268 if (pExtension != NULL) { 2269 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2270 /* the previous extension is incomplete */ 2271 uprv_free(pExtension); 2272 pExtension = NULL; 2273 break; 2274 } 2275 2276 /* terminate the previous extension value */ 2277 *pExtValueSubtagEnd = 0; 2278 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2279 2280 /* insert the extension to the list */ 2281 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2282 pLastGoodPosition = pExtValueSubtagEnd; 2283 } else { 2284 /* stop parsing here */ 2285 uprv_free(pExtension); 2286 pExtension = NULL; 2287 break; 2288 } 2289 } 2290 2291 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT); 2292 2293 /* create a new extension */ 2294 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); 2295 if (pExtension == NULL) { 2296 *status = U_MEMORY_ALLOCATION_ERROR; 2297 goto error; 2298 } 2299 *pSep = 0; 2300 pExtension->key = T_CString_toLowerCase(pSubtag); 2301 pExtension->value = NULL; /* will be set later */ 2302 2303 /* 2304 * reset the start and the end location of extension value 2305 * subtags for this extension 2306 */ 2307 pExtValueSubtag = NULL; 2308 pExtValueSubtagEnd = NULL; 2309 2310 next = EXTV; 2311 continue; 2312 } 2313 } 2314 if (next & EXTV) { 2315 if (_isExtensionSubtag(pSubtag, subtagLen)) { 2316 if (pExtValueSubtag == NULL) { 2317 /* if the start postion of this extension's value is not yet, 2318 this one is the first value subtag */ 2319 pExtValueSubtag = pSubtag; 2320 } 2321 2322 /* Mark the end of this subtag */ 2323 pExtValueSubtagEnd = pSep; 2324 next = EXTS | EXTV | PRIV; 2325 2326 continue; 2327 } 2328 } 2329 if (next & PRIV) { 2330 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { 2331 char *pPrivuseVal; 2332 2333 if (pExtension != NULL) { 2334 /* Process the last extension */ 2335 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2336 /* the previous extension is incomplete */ 2337 uprv_free(pExtension); 2338 pExtension = NULL; 2339 break; 2340 } else { 2341 /* terminate the previous extension value */ 2342 *pExtValueSubtagEnd = 0; 2343 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2344 2345 /* insert the extension to the list */ 2346 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2347 pLastGoodPosition = pExtValueSubtagEnd; 2348 pExtension = NULL; 2349 } else { 2350 /* stop parsing here */ 2351 uprv_free(pExtension); 2352 pExtension = NULL; 2353 break; 2354 } 2355 } 2356 } 2357 2358 /* The rest of part will be private use value subtags */ 2359 if (pNext == NULL) { 2360 /* empty private use subtag */ 2361 break; 2362 } 2363 /* back up the private use value start position */ 2364 pPrivuseVal = pNext; 2365 2366 /* validate private use value subtags */ 2367 while (pNext) { 2368 pSubtag = pNext; 2369 pSep = pSubtag; 2370 while (*pSep) { 2371 if (*pSep == SEP) { 2372 break; 2373 } 2374 pSep++; 2375 } 2376 if (*pSep == 0) { 2377 /* last subtag */ 2378 pNext = NULL; 2379 } else { 2380 pNext = pSep + 1; 2381 } 2382 subtagLen = (int32_t)(pSep - pSubtag); 2383 2384 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { 2385 *pSep = 0; 2386 next = VART; 2387 privateuseVar = TRUE; 2388 break; 2389 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { 2390 pLastGoodPosition = pSep; 2391 } else { 2392 break; 2393 } 2394 } 2395 2396 if (next == VART) { 2397 continue; 2398 } 2399 2400 if (pLastGoodPosition - pPrivuseVal > 0) { 2401 *pLastGoodPosition = 0; 2402 t->privateuse = T_CString_toLowerCase(pPrivuseVal); 2403 } 2404 /* No more subtags, exiting the parse loop */ 2405 break; 2406 } 2407 break; 2408 } 2409 2410 /* If we fell through here, it means this subtag is illegal - quit parsing */ 2411 break; 2412 } 2413 2414 if (pExtension != NULL) { 2415 /* Process the last extension */ 2416 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2417 /* the previous extension is incomplete */ 2418 uprv_free(pExtension); 2419 } else { 2420 /* terminate the previous extension value */ 2421 *pExtValueSubtagEnd = 0; 2422 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2423 /* insert the extension to the list */ 2424 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2425 pLastGoodPosition = pExtValueSubtagEnd; 2426 } else { 2427 uprv_free(pExtension); 2428 } 2429 } 2430 } 2431 2432 if (parsedLen != NULL) { 2433 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); 2434 } 2435 2436 return t; 2437 2438 error: 2439 uprv_free(t); 2440 return NULL; 2441 } 2442 2443 static void 2444 ultag_close(ULanguageTag* langtag) { 2445 2446 if (langtag == NULL) { 2447 return; 2448 } 2449 2450 uprv_free(langtag->buf); 2451 2452 if (langtag->variants) { 2453 VariantListEntry *curVar = langtag->variants; 2454 while (curVar) { 2455 VariantListEntry *nextVar = curVar->next; 2456 uprv_free(curVar); 2457 curVar = nextVar; 2458 } 2459 } 2460 2461 if (langtag->extensions) { 2462 ExtensionListEntry *curExt = langtag->extensions; 2463 while (curExt) { 2464 ExtensionListEntry *nextExt = curExt->next; 2465 uprv_free(curExt); 2466 curExt = nextExt; 2467 } 2468 } 2469 2470 uprv_free(langtag); 2471 } 2472 2473 static const char* 2474 ultag_getLanguage(const ULanguageTag* langtag) { 2475 return langtag->language; 2476 } 2477 2478 #if 0 2479 static const char* 2480 ultag_getJDKLanguage(const ULanguageTag* langtag) { 2481 int32_t i; 2482 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 2483 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { 2484 return DEPRECATEDLANGS[i + 1]; 2485 } 2486 } 2487 return langtag->language; 2488 } 2489 #endif 2490 2491 static const char* 2492 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { 2493 if (idx >= 0 && idx < MAXEXTLANG) { 2494 return langtag->extlang[idx]; 2495 } 2496 return NULL; 2497 } 2498 2499 static int32_t 2500 ultag_getExtlangSize(const ULanguageTag* langtag) { 2501 int32_t size = 0; 2502 int32_t i; 2503 for (i = 0; i < MAXEXTLANG; i++) { 2504 if (langtag->extlang[i]) { 2505 size++; 2506 } 2507 } 2508 return size; 2509 } 2510 2511 static const char* 2512 ultag_getScript(const ULanguageTag* langtag) { 2513 return langtag->script; 2514 } 2515 2516 static const char* 2517 ultag_getRegion(const ULanguageTag* langtag) { 2518 return langtag->region; 2519 } 2520 2521 static const char* 2522 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { 2523 const char *var = NULL; 2524 VariantListEntry *cur = langtag->variants; 2525 int32_t i = 0; 2526 while (cur) { 2527 if (i == idx) { 2528 var = cur->variant; 2529 break; 2530 } 2531 cur = cur->next; 2532 i++; 2533 } 2534 return var; 2535 } 2536 2537 static int32_t 2538 ultag_getVariantsSize(const ULanguageTag* langtag) { 2539 int32_t size = 0; 2540 VariantListEntry *cur = langtag->variants; 2541 while (TRUE) { 2542 if (cur == NULL) { 2543 break; 2544 } 2545 size++; 2546 cur = cur->next; 2547 } 2548 return size; 2549 } 2550 2551 static const char* 2552 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { 2553 const char *key = NULL; 2554 ExtensionListEntry *cur = langtag->extensions; 2555 int32_t i = 0; 2556 while (cur) { 2557 if (i == idx) { 2558 key = cur->key; 2559 break; 2560 } 2561 cur = cur->next; 2562 i++; 2563 } 2564 return key; 2565 } 2566 2567 static const char* 2568 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { 2569 const char *val = NULL; 2570 ExtensionListEntry *cur = langtag->extensions; 2571 int32_t i = 0; 2572 while (cur) { 2573 if (i == idx) { 2574 val = cur->value; 2575 break; 2576 } 2577 cur = cur->next; 2578 i++; 2579 } 2580 return val; 2581 } 2582 2583 static int32_t 2584 ultag_getExtensionsSize(const ULanguageTag* langtag) { 2585 int32_t size = 0; 2586 ExtensionListEntry *cur = langtag->extensions; 2587 while (TRUE) { 2588 if (cur == NULL) { 2589 break; 2590 } 2591 size++; 2592 cur = cur->next; 2593 } 2594 return size; 2595 } 2596 2597 static const char* 2598 ultag_getPrivateUse(const ULanguageTag* langtag) { 2599 return langtag->privateuse; 2600 } 2601 2602 #if 0 2603 static const char* 2604 ultag_getGrandfathered(const ULanguageTag* langtag) { 2605 return langtag->grandfathered; 2606 } 2607 #endif 2608 2609 2610 /* 2611 * ------------------------------------------------- 2612 * 2613 * Locale/BCP47 conversion APIs, exposed as uloc_* 2614 * 2615 * ------------------------------------------------- 2616 */ 2617 U_CAPI int32_t U_EXPORT2 2618 uloc_toLanguageTag(const char* localeID, 2619 char* langtag, 2620 int32_t langtagCapacity, 2621 UBool strict, 2622 UErrorCode* status) { 2623 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ 2624 char canonical[256]; 2625 int32_t reslen = 0; 2626 UErrorCode tmpStatus = U_ZERO_ERROR; 2627 UBool hadPosix = FALSE; 2628 const char* pKeywordStart; 2629 2630 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ 2631 canonical[0] = 0; 2632 if (uprv_strlen(localeID) > 0) { 2633 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); 2634 if (tmpStatus != U_ZERO_ERROR) { 2635 *status = U_ILLEGAL_ARGUMENT_ERROR; 2636 return 0; 2637 } 2638 } 2639 2640 /* For handling special case - private use only tag */ 2641 pKeywordStart = locale_getKeywordsStart(canonical); 2642 if (pKeywordStart == canonical) { 2643 UEnumeration *kwdEnum; 2644 int kwdCnt = 0; 2645 UBool done = FALSE; 2646 2647 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); 2648 if (kwdEnum != NULL) { 2649 kwdCnt = uenum_count(kwdEnum, &tmpStatus); 2650 if (kwdCnt == 1) { 2651 const char *key; 2652 int32_t len = 0; 2653 2654 key = uenum_next(kwdEnum, &len, &tmpStatus); 2655 if (len == 1 && *key == PRIVATEUSE) { 2656 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2657 buf[0] = PRIVATEUSE; 2658 buf[1] = SEP; 2659 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); 2660 if (U_SUCCESS(tmpStatus)) { 2661 if (_isPrivateuseValueSubtags(&buf[2], len)) { 2662 /* return private use only tag */ 2663 reslen = len + 2; 2664 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); 2665 u_terminateChars(langtag, langtagCapacity, reslen, status); 2666 done = TRUE; 2667 } else if (strict) { 2668 *status = U_ILLEGAL_ARGUMENT_ERROR; 2669 done = TRUE; 2670 } 2671 /* if not strict mode, then "und" will be returned */ 2672 } else { 2673 *status = U_ILLEGAL_ARGUMENT_ERROR; 2674 done = TRUE; 2675 } 2676 } 2677 } 2678 uenum_close(kwdEnum); 2679 if (done) { 2680 return reslen; 2681 } 2682 } 2683 } 2684 2685 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); 2686 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2687 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2688 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); 2689 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2690 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2691 2692 return reslen; 2693 } 2694 2695 2696 U_CAPI int32_t U_EXPORT2 2697 uloc_forLanguageTag(const char* langtag, 2698 char* localeID, 2699 int32_t localeIDCapacity, 2700 int32_t* parsedLength, 2701 UErrorCode* status) { 2702 ULanguageTag *lt; 2703 int32_t reslen = 0; 2704 const char *subtag, *p; 2705 int32_t len; 2706 int32_t i, n; 2707 UBool noRegion = TRUE; 2708 2709 lt = ultag_parse(langtag, -1, parsedLength, status); 2710 if (U_FAILURE(*status)) { 2711 return 0; 2712 } 2713 2714 /* language */ 2715 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); 2716 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { 2717 len = (int32_t)uprv_strlen(subtag); 2718 if (len > 0) { 2719 if (reslen < localeIDCapacity) { 2720 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); 2721 } 2722 reslen += len; 2723 } 2724 } 2725 2726 /* script */ 2727 subtag = ultag_getScript(lt); 2728 len = (int32_t)uprv_strlen(subtag); 2729 if (len > 0) { 2730 if (reslen < localeIDCapacity) { 2731 *(localeID + reslen) = LOCALE_SEP; 2732 } 2733 reslen++; 2734 2735 /* write out the script in title case */ 2736 p = subtag; 2737 while (*p) { 2738 if (reslen < localeIDCapacity) { 2739 if (p == subtag) { 2740 *(localeID + reslen) = uprv_toupper(*p); 2741 } else { 2742 *(localeID + reslen) = *p; 2743 } 2744 } 2745 reslen++; 2746 p++; 2747 } 2748 } 2749 2750 /* region */ 2751 subtag = ultag_getRegion(lt); 2752 len = (int32_t)uprv_strlen(subtag); 2753 if (len > 0) { 2754 if (reslen < localeIDCapacity) { 2755 *(localeID + reslen) = LOCALE_SEP; 2756 } 2757 reslen++; 2758 /* write out the retion in upper case */ 2759 p = subtag; 2760 while (*p) { 2761 if (reslen < localeIDCapacity) { 2762 *(localeID + reslen) = uprv_toupper(*p); 2763 } 2764 reslen++; 2765 p++; 2766 } 2767 noRegion = FALSE; 2768 } 2769 2770 /* variants */ 2771 n = ultag_getVariantsSize(lt); 2772 if (n > 0) { 2773 if (noRegion) { 2774 if (reslen < localeIDCapacity) { 2775 *(localeID + reslen) = LOCALE_SEP; 2776 } 2777 reslen++; 2778 } 2779 2780 for (i = 0; i < n; i++) { 2781 subtag = ultag_getVariant(lt, i); 2782 if (reslen < localeIDCapacity) { 2783 *(localeID + reslen) = LOCALE_SEP; 2784 } 2785 reslen++; 2786 /* write out the variant in upper case */ 2787 p = subtag; 2788 while (*p) { 2789 if (reslen < localeIDCapacity) { 2790 *(localeID + reslen) = uprv_toupper(*p); 2791 } 2792 reslen++; 2793 p++; 2794 } 2795 } 2796 } 2797 2798 /* keywords */ 2799 n = ultag_getExtensionsSize(lt); 2800 subtag = ultag_getPrivateUse(lt); 2801 if (n > 0 || uprv_strlen(subtag) > 0) { 2802 if (reslen == 0 && n > 0) { 2803 /* need a language */ 2804 if (reslen < localeIDCapacity) { 2805 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); 2806 } 2807 reslen += LANG_UND_LEN; 2808 } 2809 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); 2810 reslen += len; 2811 } 2812 2813 ultag_close(lt); 2814 return u_terminateChars(localeID, localeIDCapacity, reslen, status); 2815 } 2816 2817 2818