1 /* 2 ********************************************************************** 3 * Copyright (C) 2009-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 8 #include "unicode/utypes.h" 9 #include "unicode/ures.h" 10 #include "unicode/putil.h" 11 #include "unicode/uloc.h" 12 #include "ustr_imp.h" 13 #include "cmemory.h" 14 #include "cstring.h" 15 #include "putilimp.h" 16 #include "uinvchar.h" 17 #include "ulocimp.h" 18 19 /* struct holding a single variant */ 20 typedef struct VariantListEntry { 21 const char *variant; 22 struct VariantListEntry *next; 23 } VariantListEntry; 24 25 /* struct holding a single attribute value */ 26 typedef struct AttributeListEntry { 27 const char *attribute; 28 struct AttributeListEntry *next; 29 } AttributeListEntry; 30 31 /* struct holding a single extension */ 32 typedef struct ExtensionListEntry { 33 const char *key; 34 const char *value; 35 struct ExtensionListEntry *next; 36 } ExtensionListEntry; 37 38 #define MAXEXTLANG 3 39 typedef struct ULanguageTag { 40 char *buf; /* holding parsed subtags */ 41 const char *language; 42 const char *extlang[MAXEXTLANG]; 43 const char *script; 44 const char *region; 45 VariantListEntry *variants; 46 ExtensionListEntry *extensions; 47 AttributeListEntry *attributes; 48 const char *privateuse; 49 const char *grandfathered; 50 } ULanguageTag; 51 52 #define MINLEN 2 53 #define SEP '-' 54 #define PRIVATEUSE 'x' 55 #define LDMLEXT 'u' 56 57 #define LOCALE_SEP '_' 58 #define LOCALE_EXT_SEP '@' 59 #define LOCALE_KEYWORD_SEP ';' 60 #define LOCALE_KEY_TYPE_SEP '=' 61 62 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z')) 63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9') 64 65 static const char* EMPTY = ""; 66 static const char* LANG_UND = "und"; 67 static const char* PRIVATEUSE_KEY = "x"; 68 static const char* _POSIX = "_POSIX"; 69 static const char* POSIX_KEY = "va"; 70 static const char* POSIX_VALUE = "posix"; 71 static const char* LOCALE_ATTRIBUTE_KEY = "attribute"; 72 static const char* PRIVUSE_VARIANT_PREFIX = "lvariant"; 73 74 #define LANG_UND_LEN 3 75 76 static const char* GRANDFATHERED[] = { 77 /* grandfathered preferred */ 78 "art-lojban", "jbo", 79 "cel-gaulish", "xtg-x-cel-gaulish", 80 "en-GB-oed", "en-GB-x-oed", 81 "i-ami", "ami", 82 "i-bnn", "bnn", 83 "i-default", "en-x-i-default", 84 "i-enochian", "und-x-i-enochian", 85 "i-hak", "hak", 86 "i-klingon", "tlh", 87 "i-lux", "lb", 88 "i-mingo", "see-x-i-mingo", 89 "i-navajo", "nv", 90 "i-pwn", "pwn", 91 "i-tao", "tao", 92 "i-tay", "tay", 93 "i-tsu", "tsu", 94 "no-bok", "nb", 95 "no-nyn", "nn", 96 "sgn-be-fr", "sfb", 97 "sgn-be-nl", "vgt", 98 "sgn-ch-de", "sgg", 99 "zh-guoyu", "cmn", 100 "zh-hakka", "hak", 101 "zh-min", "nan-x-zh-min", 102 "zh-min-nan", "nan", 103 "zh-xiang", "hsn", 104 NULL, NULL 105 }; 106 107 static const char* DEPRECATEDLANGS[] = { 108 /* deprecated new */ 109 "iw", "he", 110 "ji", "yi", 111 "in", "id", 112 NULL, NULL 113 }; 114 115 /* 116 * ------------------------------------------------- 117 * 118 * These ultag_ functions may be exposed as APIs later 119 * 120 * ------------------------------------------------- 121 */ 122 123 static ULanguageTag* 124 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); 125 126 static void 127 ultag_close(ULanguageTag* langtag); 128 129 static const char* 130 ultag_getLanguage(const ULanguageTag* langtag); 131 132 #if 0 133 static const char* 134 ultag_getJDKLanguage(const ULanguageTag* langtag); 135 #endif 136 137 static const char* 138 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); 139 140 static int32_t 141 ultag_getExtlangSize(const ULanguageTag* langtag); 142 143 static const char* 144 ultag_getScript(const ULanguageTag* langtag); 145 146 static const char* 147 ultag_getRegion(const ULanguageTag* langtag); 148 149 static const char* 150 ultag_getVariant(const ULanguageTag* langtag, int32_t idx); 151 152 static int32_t 153 ultag_getVariantsSize(const ULanguageTag* langtag); 154 155 #if 0 156 /* Currently not being used. */ 157 static const char* 158 ultag_getAttribute(const ULanguageTag* langtag, int32_t idx); 159 #endif 160 161 static int32_t 162 ultag_getAttributesSize(const ULanguageTag* langtag); 163 164 static const char* 165 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); 166 167 static const char* 168 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); 169 170 static int32_t 171 ultag_getExtensionsSize(const ULanguageTag* langtag); 172 173 static const char* 174 ultag_getPrivateUse(const ULanguageTag* langtag); 175 176 #if 0 177 static const char* 178 ultag_getGrandfathered(const ULanguageTag* langtag); 179 #endif 180 181 /* 182 * ------------------------------------------------- 183 * 184 * Language subtag syntax validation functions 185 * 186 * ------------------------------------------------- 187 */ 188 189 static UBool 190 _isAlphaString(const char* s, int32_t len) { 191 int32_t i; 192 for (i = 0; i < len; i++) { 193 if (!ISALPHA(*(s + i))) { 194 return FALSE; 195 } 196 } 197 return TRUE; 198 } 199 200 static UBool 201 _isNumericString(const char* s, int32_t len) { 202 int32_t i; 203 for (i = 0; i < len; i++) { 204 if (!ISNUMERIC(*(s + i))) { 205 return FALSE; 206 } 207 } 208 return TRUE; 209 } 210 211 static UBool 212 _isAlphaNumericString(const char* s, int32_t len) { 213 int32_t i; 214 for (i = 0; i < len; i++) { 215 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { 216 return FALSE; 217 } 218 } 219 return TRUE; 220 } 221 222 static UBool 223 _isLanguageSubtag(const char* s, int32_t len) { 224 /* 225 * language = 2*3ALPHA ; shortest ISO 639 code 226 * ["-" extlang] ; sometimes followed by 227 * ; extended language subtags 228 * / 4ALPHA ; or reserved for future use 229 * / 5*8ALPHA ; or registered language subtag 230 */ 231 if (len < 0) { 232 len = (int32_t)uprv_strlen(s); 233 } 234 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { 235 return TRUE; 236 } 237 return FALSE; 238 } 239 240 static UBool 241 _isExtlangSubtag(const char* s, int32_t len) { 242 /* 243 * extlang = 3ALPHA ; selected ISO 639 codes 244 * *2("-" 3ALPHA) ; permanently reserved 245 */ 246 if (len < 0) { 247 len = (int32_t)uprv_strlen(s); 248 } 249 if (len == 3 && _isAlphaString(s, len)) { 250 return TRUE; 251 } 252 return FALSE; 253 } 254 255 static UBool 256 _isScriptSubtag(const char* s, int32_t len) { 257 /* 258 * script = 4ALPHA ; ISO 15924 code 259 */ 260 if (len < 0) { 261 len = (int32_t)uprv_strlen(s); 262 } 263 if (len == 4 && _isAlphaString(s, len)) { 264 return TRUE; 265 } 266 return FALSE; 267 } 268 269 static UBool 270 _isRegionSubtag(const char* s, int32_t len) { 271 /* 272 * region = 2ALPHA ; ISO 3166-1 code 273 * / 3DIGIT ; UN M.49 code 274 */ 275 if (len < 0) { 276 len = (int32_t)uprv_strlen(s); 277 } 278 if (len == 2 && _isAlphaString(s, len)) { 279 return TRUE; 280 } 281 if (len == 3 && _isNumericString(s, len)) { 282 return TRUE; 283 } 284 return FALSE; 285 } 286 287 static UBool 288 _isVariantSubtag(const char* s, int32_t len) { 289 /* 290 * variant = 5*8alphanum ; registered variants 291 * / (DIGIT 3alphanum) 292 */ 293 if (len < 0) { 294 len = (int32_t)uprv_strlen(s); 295 } 296 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { 297 return TRUE; 298 } 299 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { 300 return TRUE; 301 } 302 return FALSE; 303 } 304 305 static UBool 306 _isPrivateuseVariantSubtag(const char* s, int32_t len) { 307 /* 308 * variant = 1*8alphanum ; registered variants 309 * / (DIGIT 3alphanum) 310 */ 311 if (len < 0) { 312 len = (int32_t)uprv_strlen(s); 313 } 314 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 315 return TRUE; 316 } 317 return FALSE; 318 } 319 320 static UBool 321 _isAttributeSubtag(const char* s, int32_t len) { 322 /* 323 * attribute = 3*8alphanum 324 */ 325 if (len < 0) { 326 len = (int32_t)uprv_strlen(s); 327 } 328 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) { 329 return TRUE; 330 } 331 return FALSE; 332 } 333 334 static UBool 335 _isExtensionSingleton(const char* s, int32_t len) { 336 /* 337 * extension = singleton 1*("-" (2*8alphanum)) 338 */ 339 if (len < 0) { 340 len = (int32_t)uprv_strlen(s); 341 } 342 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { 343 return TRUE; 344 } 345 return FALSE; 346 } 347 348 static UBool 349 _isExtensionSubtag(const char* s, int32_t len) { 350 /* 351 * extension = singleton 1*("-" (2*8alphanum)) 352 */ 353 if (len < 0) { 354 len = (int32_t)uprv_strlen(s); 355 } 356 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { 357 return TRUE; 358 } 359 return FALSE; 360 } 361 362 static UBool 363 _isExtensionSubtags(const char* s, int32_t len) { 364 const char *p = s; 365 const char *pSubtag = NULL; 366 367 if (len < 0) { 368 len = (int32_t)uprv_strlen(s); 369 } 370 371 while ((p - s) < len) { 372 if (*p == SEP) { 373 if (pSubtag == NULL) { 374 return FALSE; 375 } 376 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { 377 return FALSE; 378 } 379 pSubtag = NULL; 380 } else if (pSubtag == NULL) { 381 pSubtag = p; 382 } 383 p++; 384 } 385 if (pSubtag == NULL) { 386 return FALSE; 387 } 388 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); 389 } 390 391 static UBool 392 _isPrivateuseValueSubtag(const char* s, int32_t len) { 393 /* 394 * privateuse = "x" 1*("-" (1*8alphanum)) 395 */ 396 if (len < 0) { 397 len = (int32_t)uprv_strlen(s); 398 } 399 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { 400 return TRUE; 401 } 402 return FALSE; 403 } 404 405 static UBool 406 _isPrivateuseValueSubtags(const char* s, int32_t len) { 407 const char *p = s; 408 const char *pSubtag = NULL; 409 410 if (len < 0) { 411 len = (int32_t)uprv_strlen(s); 412 } 413 414 while ((p - s) < len) { 415 if (*p == SEP) { 416 if (pSubtag == NULL) { 417 return FALSE; 418 } 419 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { 420 return FALSE; 421 } 422 pSubtag = NULL; 423 } else if (pSubtag == NULL) { 424 pSubtag = p; 425 } 426 p++; 427 } 428 if (pSubtag == NULL) { 429 return FALSE; 430 } 431 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); 432 } 433 434 static UBool 435 _isLDMLKey(const char* s, int32_t len) { 436 if (len < 0) { 437 len = (int32_t)uprv_strlen(s); 438 } 439 if (len == 2 && _isAlphaNumericString(s, len)) { 440 return TRUE; 441 } 442 return FALSE; 443 } 444 445 static UBool 446 _isLDMLType(const char* s, int32_t len) { 447 if (len < 0) { 448 len = (int32_t)uprv_strlen(s); 449 } 450 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) { 451 return TRUE; 452 } 453 return FALSE; 454 } 455 456 /* 457 * ------------------------------------------------- 458 * 459 * Helper functions 460 * 461 * ------------------------------------------------- 462 */ 463 464 static UBool 465 _addVariantToList(VariantListEntry **first, VariantListEntry *var) { 466 UBool bAdded = TRUE; 467 468 if (*first == NULL) { 469 var->next = NULL; 470 *first = var; 471 } else { 472 VariantListEntry *prev, *cur; 473 int32_t cmp; 474 475 /* variants order should be preserved */ 476 prev = NULL; 477 cur = *first; 478 while (TRUE) { 479 if (cur == NULL) { 480 prev->next = var; 481 var->next = NULL; 482 break; 483 } 484 485 /* Checking for duplicate variant */ 486 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); 487 if (cmp == 0) { 488 /* duplicated variant */ 489 bAdded = FALSE; 490 break; 491 } 492 prev = cur; 493 cur = cur->next; 494 } 495 } 496 497 return bAdded; 498 } 499 500 static UBool 501 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { 502 UBool bAdded = TRUE; 503 504 if (*first == NULL) { 505 attr->next = NULL; 506 *first = attr; 507 } else { 508 AttributeListEntry *prev, *cur; 509 int32_t cmp; 510 511 /* reorder variants in alphabetical order */ 512 prev = NULL; 513 cur = *first; 514 while (TRUE) { 515 if (cur == NULL) { 516 prev->next = attr; 517 attr->next = NULL; 518 break; 519 } 520 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); 521 if (cmp < 0) { 522 if (prev == NULL) { 523 *first = attr; 524 } else { 525 prev->next = attr; 526 } 527 attr->next = cur; 528 break; 529 } 530 if (cmp == 0) { 531 /* duplicated variant */ 532 bAdded = FALSE; 533 break; 534 } 535 prev = cur; 536 cur = cur->next; 537 } 538 } 539 540 return bAdded; 541 } 542 543 544 static UBool 545 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { 546 UBool bAdded = TRUE; 547 548 if (*first == NULL) { 549 ext->next = NULL; 550 *first = ext; 551 } else { 552 ExtensionListEntry *prev, *cur; 553 int32_t cmp; 554 555 /* reorder variants in alphabetical order */ 556 prev = NULL; 557 cur = *first; 558 while (TRUE) { 559 if (cur == NULL) { 560 prev->next = ext; 561 ext->next = NULL; 562 break; 563 } 564 if (localeToBCP) { 565 /* special handling for locale to bcp conversion */ 566 int32_t len, curlen; 567 568 len = (int32_t)uprv_strlen(ext->key); 569 curlen = (int32_t)uprv_strlen(cur->key); 570 571 if (len == 1 && curlen == 1) { 572 if (*(ext->key) == *(cur->key)) { 573 cmp = 0; 574 } else if (*(ext->key) == PRIVATEUSE) { 575 cmp = 1; 576 } else if (*(cur->key) == PRIVATEUSE) { 577 cmp = -1; 578 } else { 579 cmp = *(ext->key) - *(cur->key); 580 } 581 } else if (len == 1) { 582 cmp = *(ext->key) - LDMLEXT; 583 } else if (curlen == 1) { 584 cmp = LDMLEXT - *(cur->key); 585 } else { 586 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 587 } 588 } else { 589 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); 590 } 591 if (cmp < 0) { 592 if (prev == NULL) { 593 *first = ext; 594 } else { 595 prev->next = ext; 596 } 597 ext->next = cur; 598 break; 599 } 600 if (cmp == 0) { 601 /* duplicated extension key */ 602 bAdded = FALSE; 603 break; 604 } 605 prev = cur; 606 cur = cur->next; 607 } 608 } 609 610 return bAdded; 611 } 612 613 static void 614 _initializeULanguageTag(ULanguageTag* langtag) { 615 int32_t i; 616 617 langtag->buf = NULL; 618 619 langtag->language = EMPTY; 620 for (i = 0; i < MAXEXTLANG; i++) { 621 langtag->extlang[i] = NULL; 622 } 623 624 langtag->script = EMPTY; 625 langtag->region = EMPTY; 626 627 langtag->variants = NULL; 628 langtag->extensions = NULL; 629 630 langtag->attributes = NULL; 631 632 langtag->grandfathered = EMPTY; 633 langtag->privateuse = EMPTY; 634 } 635 636 #define KEYTYPEDATA "keyTypeData" 637 #define KEYMAP "keyMap" 638 #define TYPEMAP "typeMap" 639 #define TYPEALIAS "typeAlias" 640 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */ 641 #define MAX_LDML_KEY_LEN 22 642 #define MAX_LDML_TYPE_LEN 32 643 644 static int32_t 645 _ldmlKeyToBCP47(const char* key, int32_t keyLen, 646 char* bcpKey, int32_t bcpKeyCapacity, 647 UErrorCode *status) { 648 UResourceBundle *rb; 649 char keyBuf[MAX_LDML_KEY_LEN]; 650 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 651 int32_t resultLen = 0; 652 int32_t i; 653 UErrorCode tmpStatus = U_ZERO_ERROR; 654 const UChar *uBcpKey; 655 int32_t bcpKeyLen; 656 657 if (keyLen < 0) { 658 keyLen = (int32_t)uprv_strlen(key); 659 } 660 661 if (keyLen >= sizeof(keyBuf)) { 662 /* no known valid LDML key exceeding 21 */ 663 *status = U_ILLEGAL_ARGUMENT_ERROR; 664 return 0; 665 } 666 667 uprv_memcpy(keyBuf, key, keyLen); 668 keyBuf[keyLen] = 0; 669 670 /* to lower case */ 671 for (i = 0; i < keyLen; i++) { 672 keyBuf[i] = uprv_tolower(keyBuf[i]); 673 } 674 675 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 676 ures_getByKey(rb, KEYMAP, rb, status); 677 678 if (U_FAILURE(*status)) { 679 ures_close(rb); 680 return 0; 681 } 682 683 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus); 684 if (U_SUCCESS(tmpStatus)) { 685 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen); 686 bcpKeyBuf[bcpKeyLen] = 0; 687 resultLen = bcpKeyLen; 688 } else { 689 if (_isLDMLKey(key, keyLen)) { 690 uprv_memcpy(bcpKeyBuf, key, keyLen); 691 bcpKeyBuf[keyLen] = 0; 692 resultLen = keyLen; 693 } else { 694 /* mapping not availabe */ 695 *status = U_ILLEGAL_ARGUMENT_ERROR; 696 } 697 } 698 ures_close(rb); 699 700 if (U_FAILURE(*status)) { 701 return 0; 702 } 703 704 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity)); 705 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status); 706 } 707 708 static int32_t 709 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen, 710 char* key, int32_t keyCapacity, 711 UErrorCode *status) { 712 UResourceBundle *rb; 713 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 714 int32_t resultLen = 0; 715 int32_t i; 716 const char *resKey = NULL; 717 UResourceBundle *mapData; 718 719 if (bcpKeyLen < 0) { 720 bcpKeyLen = (int32_t)uprv_strlen(bcpKey); 721 } 722 723 if (bcpKeyLen >= sizeof(bcpKeyBuf)) { 724 *status = U_ILLEGAL_ARGUMENT_ERROR; 725 return 0; 726 } 727 728 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen); 729 bcpKeyBuf[bcpKeyLen] = 0; 730 731 /* to lower case */ 732 for (i = 0; i < bcpKeyLen; i++) { 733 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]); 734 } 735 736 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 737 ures_getByKey(rb, KEYMAP, rb, status); 738 if (U_FAILURE(*status)) { 739 ures_close(rb); 740 return 0; 741 } 742 743 mapData = ures_getNextResource(rb, NULL, status); 744 while (U_SUCCESS(*status)) { 745 const UChar *uBcpKey; 746 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; 747 int32_t tmpBcpKeyLen; 748 749 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status); 750 if (U_FAILURE(*status)) { 751 break; 752 } 753 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen); 754 tmpBcpKeyBuf[tmpBcpKeyLen] = 0; 755 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) { 756 /* found a matching BCP47 key */ 757 resKey = ures_getKey(mapData); 758 resultLen = (int32_t)uprv_strlen(resKey); 759 break; 760 } 761 if (!ures_hasNext(rb)) { 762 break; 763 } 764 ures_getNextResource(rb, mapData, status); 765 } 766 ures_close(mapData); 767 ures_close(rb); 768 769 if (U_FAILURE(*status)) { 770 return 0; 771 } 772 773 if (resKey == NULL) { 774 resKey = bcpKeyBuf; 775 resultLen = bcpKeyLen; 776 } 777 778 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity)); 779 return u_terminateChars(key, keyCapacity, resultLen, status); 780 } 781 782 static int32_t 783 _ldmlTypeToBCP47(const char* key, int32_t keyLen, 784 const char* type, int32_t typeLen, 785 char* bcpType, int32_t bcpTypeCapacity, 786 UErrorCode *status) { 787 UResourceBundle *rb, *keyTypeData, *typeMapForKey; 788 char keyBuf[MAX_LDML_KEY_LEN]; 789 char typeBuf[MAX_LDML_TYPE_LEN]; 790 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 791 int32_t resultLen = 0; 792 int32_t i; 793 UErrorCode tmpStatus = U_ZERO_ERROR; 794 const UChar *uBcpType, *uCanonicalType; 795 int32_t bcpTypeLen, canonicalTypeLen; 796 UBool isTimezone = FALSE; 797 798 if (keyLen < 0) { 799 keyLen = (int32_t)uprv_strlen(key); 800 } 801 if (keyLen >= sizeof(keyBuf)) { 802 /* no known valid LDML key exceeding 21 */ 803 *status = U_ILLEGAL_ARGUMENT_ERROR; 804 return 0; 805 } 806 uprv_memcpy(keyBuf, key, keyLen); 807 keyBuf[keyLen] = 0; 808 809 /* to lower case */ 810 for (i = 0; i < keyLen; i++) { 811 keyBuf[i] = uprv_tolower(keyBuf[i]); 812 } 813 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 814 isTimezone = TRUE; 815 } 816 817 if (typeLen < 0) { 818 typeLen = (int32_t)uprv_strlen(type); 819 } 820 if (typeLen >= sizeof(typeBuf)) { 821 *status = U_ILLEGAL_ARGUMENT_ERROR; 822 return 0; 823 } 824 825 if (isTimezone) { 826 /* replace '/' with ':' */ 827 for (i = 0; i < typeLen; i++) { 828 if (*(type + i) == '/') { 829 typeBuf[i] = ':'; 830 } else { 831 typeBuf[i] = *(type + i); 832 } 833 } 834 typeBuf[typeLen] = 0; 835 type = &typeBuf[0]; 836 } 837 838 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status); 839 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status); 840 if (U_FAILURE(*status)) { 841 ures_close(rb); 842 ures_close(keyTypeData); 843 return 0; 844 } 845 846 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus); 847 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus); 848 if (U_SUCCESS(tmpStatus)) { 849 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 850 resultLen = bcpTypeLen; 851 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 852 /* is this type alias? */ 853 tmpStatus = U_ZERO_ERROR; 854 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus); 855 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 856 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus); 857 if (U_SUCCESS(tmpStatus)) { 858 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen); 859 if (isTimezone) { 860 /* replace '/' with ':' */ 861 for (i = 0; i < canonicalTypeLen; i++) { 862 if (typeBuf[i] == '/') { 863 typeBuf[i] = ':'; 864 } 865 } 866 } 867 typeBuf[canonicalTypeLen] = 0; 868 869 /* look up the canonical type */ 870 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus); 871 if (U_SUCCESS(tmpStatus)) { 872 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); 873 resultLen = bcpTypeLen; 874 } 875 } 876 if (tmpStatus == U_MISSING_RESOURCE_ERROR) { 877 if (_isLDMLType(type, typeLen)) { 878 uprv_memcpy(bcpTypeBuf, type, typeLen); 879 resultLen = typeLen; 880 } else { 881 /* mapping not availabe */ 882 *status = U_ILLEGAL_ARGUMENT_ERROR; 883 } 884 } 885 } else { 886 *status = tmpStatus; 887 } 888 ures_close(rb); 889 ures_close(typeMapForKey); 890 ures_close(keyTypeData); 891 892 if (U_FAILURE(*status)) { 893 return 0; 894 } 895 896 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity)); 897 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status); 898 } 899 900 static int32_t 901 _bcp47ToLDMLType(const char* key, int32_t keyLen, 902 const char* bcpType, int32_t bcpTypeLen, 903 char* type, int32_t typeCapacity, 904 UErrorCode *status) { 905 UResourceBundle *rb; 906 char keyBuf[MAX_LDML_KEY_LEN]; 907 char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */ 908 int32_t resultLen = 0; 909 int32_t i, typeSize; 910 const char *resType = NULL; 911 UResourceBundle *mapData; 912 UErrorCode tmpStatus = U_ZERO_ERROR; 913 int32_t copyLen; 914 915 if (keyLen < 0) { 916 keyLen = (int32_t)uprv_strlen(key); 917 } 918 919 if (keyLen >= sizeof(keyBuf)) { 920 /* no known valid LDML key exceeding 21 */ 921 *status = U_ILLEGAL_ARGUMENT_ERROR; 922 return 0; 923 } 924 uprv_memcpy(keyBuf, key, keyLen); 925 keyBuf[keyLen] = 0; 926 927 /* to lower case */ 928 for (i = 0; i < keyLen; i++) { 929 keyBuf[i] = uprv_tolower(keyBuf[i]); 930 } 931 932 933 if (bcpTypeLen < 0) { 934 bcpTypeLen = (int32_t)uprv_strlen(bcpType); 935 } 936 937 typeSize = 0; 938 for (i = 0; i < bcpTypeLen; i++) { 939 if (bcpType[i] == SEP) { 940 if (typeSize >= MAX_BCP47_SUBTAG_LEN) { 941 *status = U_ILLEGAL_ARGUMENT_ERROR; 942 return 0; 943 } 944 typeSize = 0; 945 } else { 946 typeSize++; 947 } 948 } 949 950 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen); 951 bcpTypeBuf[bcpTypeLen] = 0; 952 953 /* to lower case */ 954 for (i = 0; i < bcpTypeLen; i++) { 955 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]); 956 } 957 958 rb = ures_openDirect(NULL, KEYTYPEDATA, status); 959 ures_getByKey(rb, TYPEMAP, rb, status); 960 if (U_FAILURE(*status)) { 961 ures_close(rb); 962 return 0; 963 } 964 965 ures_getByKey(rb, keyBuf, rb, &tmpStatus); 966 mapData = ures_getNextResource(rb, NULL, &tmpStatus); 967 while (U_SUCCESS(tmpStatus)) { 968 const UChar *uBcpType; 969 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; 970 int32_t tmpBcpTypeLen; 971 972 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus); 973 if (U_FAILURE(tmpStatus)) { 974 break; 975 } 976 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen); 977 tmpBcpTypeBuf[tmpBcpTypeLen] = 0; 978 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) { 979 /* found a matching BCP47 type */ 980 resType = ures_getKey(mapData); 981 resultLen = (int32_t)uprv_strlen(resType); 982 break; 983 } 984 if (!ures_hasNext(rb)) { 985 break; 986 } 987 ures_getNextResource(rb, mapData, &tmpStatus); 988 } 989 ures_close(mapData); 990 ures_close(rb); 991 992 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) { 993 *status = tmpStatus; 994 return 0; 995 } 996 997 if (resType == NULL) { 998 resType = bcpTypeBuf; 999 resultLen = bcpTypeLen; 1000 } 1001 1002 copyLen = uprv_min(resultLen, typeCapacity); 1003 uprv_memcpy(type, resType, copyLen); 1004 1005 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { 1006 for (i = 0; i < copyLen; i++) { 1007 if (*(type + i) == ':') { 1008 *(type + i) = '/'; 1009 } 1010 } 1011 } 1012 1013 return u_terminateChars(type, typeCapacity, resultLen, status); 1014 } 1015 1016 static int32_t 1017 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1018 char buf[ULOC_LANG_CAPACITY]; 1019 UErrorCode tmpStatus = U_ZERO_ERROR; 1020 int32_t len, i; 1021 int32_t reslen = 0; 1022 1023 if (U_FAILURE(*status)) { 1024 return 0; 1025 } 1026 1027 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); 1028 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1029 if (strict) { 1030 *status = U_ILLEGAL_ARGUMENT_ERROR; 1031 return 0; 1032 } 1033 len = 0; 1034 } 1035 1036 /* Note: returned language code is in lower case letters */ 1037 1038 if (len == 0) { 1039 if (reslen < capacity) { 1040 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 1041 } 1042 reslen += LANG_UND_LEN; 1043 } else if (!_isLanguageSubtag(buf, len)) { 1044 /* invalid language code */ 1045 if (strict) { 1046 *status = U_ILLEGAL_ARGUMENT_ERROR; 1047 return 0; 1048 } 1049 if (reslen < capacity) { 1050 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); 1051 } 1052 reslen += LANG_UND_LEN; 1053 } else { 1054 /* resolve deprecated */ 1055 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 1056 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { 1057 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); 1058 len = (int32_t)uprv_strlen(buf); 1059 break; 1060 } 1061 } 1062 if (reslen < capacity) { 1063 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1064 } 1065 reslen += len; 1066 } 1067 u_terminateChars(appendAt, capacity, reslen, status); 1068 return reslen; 1069 } 1070 1071 static int32_t 1072 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1073 char buf[ULOC_SCRIPT_CAPACITY]; 1074 UErrorCode tmpStatus = U_ZERO_ERROR; 1075 int32_t len; 1076 int32_t reslen = 0; 1077 1078 if (U_FAILURE(*status)) { 1079 return 0; 1080 } 1081 1082 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); 1083 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1084 if (strict) { 1085 *status = U_ILLEGAL_ARGUMENT_ERROR; 1086 } 1087 return 0; 1088 } 1089 1090 if (len > 0) { 1091 if (!_isScriptSubtag(buf, len)) { 1092 /* invalid script code */ 1093 if (strict) { 1094 *status = U_ILLEGAL_ARGUMENT_ERROR; 1095 } 1096 return 0; 1097 } else { 1098 if (reslen < capacity) { 1099 *(appendAt + reslen) = SEP; 1100 } 1101 reslen++; 1102 1103 if (reslen < capacity) { 1104 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1105 } 1106 reslen += len; 1107 } 1108 } 1109 u_terminateChars(appendAt, capacity, reslen, status); 1110 return reslen; 1111 } 1112 1113 static int32_t 1114 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { 1115 char buf[ULOC_COUNTRY_CAPACITY]; 1116 UErrorCode tmpStatus = U_ZERO_ERROR; 1117 int32_t len; 1118 int32_t reslen = 0; 1119 1120 if (U_FAILURE(*status)) { 1121 return 0; 1122 } 1123 1124 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); 1125 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1126 if (strict) { 1127 *status = U_ILLEGAL_ARGUMENT_ERROR; 1128 } 1129 return 0; 1130 } 1131 1132 if (len > 0) { 1133 if (!_isRegionSubtag(buf, len)) { 1134 /* invalid region code */ 1135 if (strict) { 1136 *status = U_ILLEGAL_ARGUMENT_ERROR; 1137 } 1138 return 0; 1139 } else { 1140 if (reslen < capacity) { 1141 *(appendAt + reslen) = SEP; 1142 } 1143 reslen++; 1144 1145 if (reslen < capacity) { 1146 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); 1147 } 1148 reslen += len; 1149 } 1150 } 1151 u_terminateChars(appendAt, capacity, reslen, status); 1152 return reslen; 1153 } 1154 1155 static int32_t 1156 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { 1157 char buf[ULOC_FULLNAME_CAPACITY]; 1158 UErrorCode tmpStatus = U_ZERO_ERROR; 1159 int32_t len, i; 1160 int32_t reslen = 0; 1161 1162 if (U_FAILURE(*status)) { 1163 return 0; 1164 } 1165 1166 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1167 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1168 if (strict) { 1169 *status = U_ILLEGAL_ARGUMENT_ERROR; 1170 } 1171 return 0; 1172 } 1173 1174 if (len > 0) { 1175 char *p, *pVar; 1176 UBool bNext = TRUE; 1177 VariantListEntry *var; 1178 VariantListEntry *varFirst = NULL; 1179 1180 pVar = NULL; 1181 p = buf; 1182 while (bNext) { 1183 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1184 if (*p == 0) { 1185 bNext = FALSE; 1186 } else { 1187 *p = 0; /* terminate */ 1188 } 1189 if (pVar == NULL) { 1190 if (strict) { 1191 *status = U_ILLEGAL_ARGUMENT_ERROR; 1192 break; 1193 } 1194 /* ignore empty variant */ 1195 } else { 1196 /* ICU uses upper case letters for variants, but 1197 the canonical format is lowercase in BCP47 */ 1198 for (i = 0; *(pVar + i) != 0; i++) { 1199 *(pVar + i) = uprv_tolower(*(pVar + i)); 1200 } 1201 1202 /* validate */ 1203 if (_isVariantSubtag(pVar, -1)) { 1204 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { 1205 /* emit the variant to the list */ 1206 var = uprv_malloc(sizeof(VariantListEntry)); 1207 if (var == NULL) { 1208 *status = U_MEMORY_ALLOCATION_ERROR; 1209 break; 1210 } 1211 var->variant = pVar; 1212 if (!_addVariantToList(&varFirst, var)) { 1213 /* duplicated variant */ 1214 uprv_free(var); 1215 if (strict) { 1216 *status = U_ILLEGAL_ARGUMENT_ERROR; 1217 break; 1218 } 1219 } 1220 } else { 1221 /* Special handling for POSIX variant, need to remember that we had it and then */ 1222 /* treat it like an extension later. */ 1223 *hadPosix = TRUE; 1224 } 1225 } else if (strict) { 1226 *status = U_ILLEGAL_ARGUMENT_ERROR; 1227 break; 1228 } else if (_isPrivateuseValueSubtag(pVar, -1)) { 1229 /* Handle private use subtags separately */ 1230 break; 1231 } 1232 } 1233 /* reset variant starting position */ 1234 pVar = NULL; 1235 } else if (pVar == NULL) { 1236 pVar = p; 1237 } 1238 p++; 1239 } 1240 1241 if (U_SUCCESS(*status)) { 1242 if (varFirst != NULL) { 1243 int32_t varLen; 1244 1245 /* write out validated/normalized variants to the target */ 1246 var = varFirst; 1247 while (var != NULL) { 1248 if (reslen < capacity) { 1249 *(appendAt + reslen) = SEP; 1250 } 1251 reslen++; 1252 varLen = (int32_t)uprv_strlen(var->variant); 1253 if (reslen < capacity) { 1254 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); 1255 } 1256 reslen += varLen; 1257 var = var->next; 1258 } 1259 } 1260 } 1261 1262 /* clean up */ 1263 var = varFirst; 1264 while (var != NULL) { 1265 VariantListEntry *tmpVar = var->next; 1266 uprv_free(var); 1267 var = tmpVar; 1268 } 1269 1270 if (U_FAILURE(*status)) { 1271 return 0; 1272 } 1273 } 1274 1275 u_terminateChars(appendAt, capacity, reslen, status); 1276 return reslen; 1277 } 1278 1279 static int32_t 1280 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1281 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1282 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1283 int32_t attrBufLength = 0; 1284 UBool isAttribute = FALSE; 1285 UEnumeration *keywordEnum = NULL; 1286 int32_t reslen = 0; 1287 1288 keywordEnum = uloc_openKeywords(localeID, status); 1289 if (U_FAILURE(*status) && !hadPosix) { 1290 uenum_close(keywordEnum); 1291 return 0; 1292 } 1293 if (keywordEnum != NULL || hadPosix) { 1294 /* reorder extensions */ 1295 int32_t len; 1296 const char *key; 1297 ExtensionListEntry *firstExt = NULL; 1298 ExtensionListEntry *ext; 1299 AttributeListEntry *firstAttr = NULL; 1300 AttributeListEntry *attr; 1301 char *attrValue; 1302 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 1303 char *pExtBuf = extBuf; 1304 int32_t extBufCapacity = sizeof(extBuf); 1305 const char *bcpKey, *bcpValue; 1306 UErrorCode tmpStatus = U_ZERO_ERROR; 1307 int32_t keylen; 1308 UBool isLDMLKeyword; 1309 1310 while (TRUE) { 1311 isAttribute = FALSE; 1312 key = uenum_next(keywordEnum, NULL, status); 1313 if (key == NULL) { 1314 break; 1315 } 1316 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); 1317 if (U_FAILURE(tmpStatus)) { 1318 if (strict) { 1319 *status = U_ILLEGAL_ARGUMENT_ERROR; 1320 break; 1321 } 1322 /* ignore this keyword */ 1323 tmpStatus = U_ZERO_ERROR; 1324 continue; 1325 } 1326 1327 keylen = (int32_t)uprv_strlen(key); 1328 isLDMLKeyword = (keylen > 1); 1329 1330 /* special keyword used for representing Unicode locale attributes */ 1331 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { 1332 isAttribute = TRUE; 1333 if (len > 0) { 1334 int32_t i = 0; 1335 while (TRUE) { 1336 attrBufLength = 0; 1337 for (; i < len; i++) { 1338 if (buf[i] != '-') { 1339 attrBuf[attrBufLength++] = buf[i]; 1340 } else { 1341 i++; 1342 break; 1343 } 1344 } 1345 if (attrBufLength > 0) { 1346 attrBuf[attrBufLength] = 0; 1347 1348 } else if (i >= len){ 1349 break; 1350 } 1351 1352 /* create AttributeListEntry */ 1353 attr = uprv_malloc(sizeof(AttributeListEntry)); 1354 if (attr == NULL) { 1355 *status = U_MEMORY_ALLOCATION_ERROR; 1356 break; 1357 } 1358 attrValue = uprv_malloc(attrBufLength + 1); 1359 if (attrValue == NULL) { 1360 *status = U_MEMORY_ALLOCATION_ERROR; 1361 break; 1362 } 1363 uprv_strcpy(attrValue, attrBuf); 1364 attr->attribute = attrValue; 1365 1366 if (!_addAttributeToList(&firstAttr, attr)) { 1367 uprv_free(attr); 1368 uprv_free(attrValue); 1369 if (strict) { 1370 *status = U_ILLEGAL_ARGUMENT_ERROR; 1371 break; 1372 } 1373 } 1374 } 1375 } 1376 } else if (isLDMLKeyword) { 1377 int32_t modKeyLen; 1378 1379 /* transform key and value to bcp47 style */ 1380 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus); 1381 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1382 if (strict) { 1383 *status = U_ILLEGAL_ARGUMENT_ERROR; 1384 break; 1385 } 1386 tmpStatus = U_ZERO_ERROR; 1387 continue; 1388 } 1389 1390 bcpKey = pExtBuf; 1391 pExtBuf += (modKeyLen + 1); 1392 extBufCapacity -= (modKeyLen + 1); 1393 1394 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus); 1395 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1396 if (strict) { 1397 *status = U_ILLEGAL_ARGUMENT_ERROR; 1398 break; 1399 } 1400 tmpStatus = U_ZERO_ERROR; 1401 continue; 1402 } 1403 bcpValue = pExtBuf; 1404 pExtBuf += (len + 1); 1405 extBufCapacity -= (len + 1); 1406 } else { 1407 if (*key == PRIVATEUSE) { 1408 if (!_isPrivateuseValueSubtags(buf, len)) { 1409 if (strict) { 1410 *status = U_ILLEGAL_ARGUMENT_ERROR; 1411 break; 1412 } 1413 continue; 1414 } 1415 } else { 1416 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { 1417 if (strict) { 1418 *status = U_ILLEGAL_ARGUMENT_ERROR; 1419 break; 1420 } 1421 continue; 1422 } 1423 } 1424 bcpKey = key; 1425 if ((len + 1) < extBufCapacity) { 1426 uprv_memcpy(pExtBuf, buf, len); 1427 bcpValue = pExtBuf; 1428 1429 pExtBuf += len; 1430 1431 *pExtBuf = 0; 1432 pExtBuf++; 1433 1434 extBufCapacity -= (len + 1); 1435 } else { 1436 *status = U_ILLEGAL_ARGUMENT_ERROR; 1437 break; 1438 } 1439 } 1440 1441 if (!isAttribute) { 1442 /* create ExtensionListEntry */ 1443 ext = uprv_malloc(sizeof(ExtensionListEntry)); 1444 if (ext == NULL) { 1445 *status = U_MEMORY_ALLOCATION_ERROR; 1446 break; 1447 } 1448 ext->key = bcpKey; 1449 ext->value = bcpValue; 1450 1451 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1452 uprv_free(ext); 1453 if (strict) { 1454 *status = U_ILLEGAL_ARGUMENT_ERROR; 1455 break; 1456 } 1457 } 1458 } 1459 } 1460 1461 /* Special handling for POSIX variant - add the keywords for POSIX */ 1462 if (hadPosix) { 1463 /* create ExtensionListEntry for POSIX */ 1464 ext = uprv_malloc(sizeof(ExtensionListEntry)); 1465 if (ext == NULL) { 1466 *status = U_MEMORY_ALLOCATION_ERROR; 1467 goto cleanup; 1468 } 1469 ext->key = POSIX_KEY; 1470 ext->value = POSIX_VALUE; 1471 1472 if (!_addExtensionToList(&firstExt, ext, TRUE)) { 1473 uprv_free(ext); 1474 } 1475 } 1476 1477 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { 1478 UBool startLDMLExtension = FALSE; 1479 1480 attr = firstAttr; 1481 ext = firstExt; 1482 do { 1483 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) { 1484 /* write LDML singleton extension */ 1485 if (reslen < capacity) { 1486 *(appendAt + reslen) = SEP; 1487 } 1488 reslen++; 1489 if (reslen < capacity) { 1490 *(appendAt + reslen) = LDMLEXT; 1491 } 1492 reslen++; 1493 1494 startLDMLExtension = TRUE; 1495 } 1496 1497 /* write out the sorted BCP47 attributes, extensions and private use */ 1498 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) { 1499 if (reslen < capacity) { 1500 *(appendAt + reslen) = SEP; 1501 } 1502 reslen++; 1503 len = (int32_t)uprv_strlen(ext->key); 1504 if (reslen < capacity) { 1505 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); 1506 } 1507 reslen += len; 1508 if (reslen < capacity) { 1509 *(appendAt + reslen) = SEP; 1510 } 1511 reslen++; 1512 len = (int32_t)uprv_strlen(ext->value); 1513 if (reslen < capacity) { 1514 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); 1515 } 1516 reslen += len; 1517 1518 ext = ext->next; 1519 } else if (attr) { 1520 /* write the value for the attributes */ 1521 if (reslen < capacity) { 1522 *(appendAt + reslen) = SEP; 1523 } 1524 reslen++; 1525 len = (int32_t)uprv_strlen(attr->attribute); 1526 if (reslen < capacity) { 1527 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); 1528 } 1529 reslen += len; 1530 1531 attr = attr->next; 1532 } 1533 } while (attr != NULL || ext != NULL); 1534 } 1535 cleanup: 1536 /* clean up */ 1537 ext = firstExt; 1538 while (ext != NULL) { 1539 ExtensionListEntry *tmpExt = ext->next; 1540 uprv_free(ext); 1541 ext = tmpExt; 1542 } 1543 1544 attr = firstAttr; 1545 while (attr != NULL) { 1546 AttributeListEntry *tmpAttr = attr->next; 1547 char *pValue = (char *)attr->attribute; 1548 uprv_free(pValue); 1549 uprv_free(attr); 1550 attr = tmpAttr; 1551 } 1552 1553 uenum_close(keywordEnum); 1554 1555 if (U_FAILURE(*status)) { 1556 return 0; 1557 } 1558 } 1559 1560 return u_terminateChars(appendAt, capacity, reslen, status); 1561 } 1562 1563 /** 1564 * Append keywords parsed from LDML extension value 1565 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} 1566 * Note: char* buf is used for storing keywords 1567 */ 1568 static void 1569 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { 1570 const char *p, *pNext, *pSep, *pTmp, *pTmpStart; 1571 const char *pBcpKey, *pBcpType; 1572 const char *pKey, *pType; 1573 int32_t bcpKeyLen = 0, bcpTypeLen; 1574 ExtensionListEntry *kwd, *nextKwd; 1575 ExtensionListEntry *kwdFirst = NULL; 1576 int32_t bufIdx = 0; 1577 int32_t len; 1578 UBool variantExists = *posixVariant; 1579 UBool searchFurther; 1580 1581 /* Reset the posixVariant value */ 1582 *posixVariant = FALSE; 1583 1584 pNext = ldmlext; 1585 pBcpKey = pBcpType = NULL; 1586 while (pNext) { 1587 p = pSep = pNext; 1588 1589 /* locate next separator char */ 1590 while (*pSep) { 1591 if (*pSep == SEP) { 1592 searchFurther = FALSE; 1593 if (pBcpKey != NULL) { 1594 pTmpStart = (pSep + 1); 1595 pTmp = pTmpStart; 1596 /* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */ 1597 while (*pTmp) { 1598 if (*pTmp == SEP || *(pTmp + 1) == 0) { 1599 if (!_isLDMLKey(pTmpStart, (int32_t)(pTmp - pTmpStart))) { 1600 searchFurther = TRUE; 1601 } 1602 break; 1603 } 1604 pTmp++; 1605 } 1606 } 1607 if (searchFurther) { 1608 pSep++; 1609 continue; 1610 } else { 1611 break; 1612 } 1613 } 1614 pSep++; 1615 } 1616 if (*pSep == 0) { 1617 /* last subtag */ 1618 pNext = NULL; 1619 } else { 1620 pNext = pSep + 1; 1621 } 1622 1623 if (pBcpKey == NULL) { 1624 pBcpKey = p; 1625 bcpKeyLen = (int32_t)(pSep - p); 1626 } else { 1627 pBcpType = p; 1628 bcpTypeLen = (int32_t)(pSep - p); 1629 1630 /* BCP key to locale key */ 1631 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1632 if (U_FAILURE(*status)) { 1633 goto cleanup; 1634 } 1635 pKey = buf + bufIdx; 1636 bufIdx += len; 1637 *(buf + bufIdx) = 0; 1638 bufIdx++; 1639 1640 /* BCP type to locale type */ 1641 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status); 1642 if (U_FAILURE(*status)) { 1643 goto cleanup; 1644 } 1645 pType = buf + bufIdx; 1646 bufIdx += len; 1647 *(buf + bufIdx) = 0; 1648 bufIdx++; 1649 1650 /* Special handling for u-va-posix, since we want to treat this as a variant, not */ 1651 /* as a keyword. */ 1652 1653 if (!variantExists && !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) { 1654 *posixVariant = TRUE; 1655 } else { 1656 /* create an ExtensionListEntry for this keyword */ 1657 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1658 if (kwd == NULL) { 1659 *status = U_MEMORY_ALLOCATION_ERROR; 1660 goto cleanup; 1661 } 1662 1663 kwd->key = pKey; 1664 kwd->value = pType; 1665 1666 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1667 *status = U_ILLEGAL_ARGUMENT_ERROR; 1668 uprv_free(kwd); 1669 goto cleanup; 1670 } 1671 } 1672 1673 /* for next pair */ 1674 pBcpKey = NULL; 1675 pBcpType = NULL; 1676 } 1677 } 1678 1679 if (pBcpKey != NULL) { 1680 *status = U_ILLEGAL_ARGUMENT_ERROR; 1681 goto cleanup; 1682 } 1683 1684 kwd = kwdFirst; 1685 while (kwd != NULL) { 1686 nextKwd = kwd->next; 1687 _addExtensionToList(appendTo, kwd, FALSE); 1688 kwd = nextKwd; 1689 } 1690 1691 return; 1692 1693 cleanup: 1694 kwd = kwdFirst; 1695 while (kwd != NULL) { 1696 nextKwd = kwd->next; 1697 uprv_free(kwd); 1698 kwd = nextKwd; 1699 } 1700 } 1701 1702 1703 static int32_t 1704 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { 1705 int32_t reslen = 0; 1706 int32_t i, n; 1707 int32_t len; 1708 ExtensionListEntry *kwdFirst = NULL; 1709 ExtensionListEntry *kwd; 1710 AttributeListEntry *attrFirst = NULL; 1711 AttributeListEntry *attr; 1712 const char *key, *type; 1713 char *kwdBuf = NULL; 1714 int32_t kwdBufLength = capacity; 1715 UBool posixVariant = FALSE; 1716 1717 if (U_FAILURE(*status)) { 1718 return 0; 1719 } 1720 1721 kwdBuf = (char *)uprv_malloc(kwdBufLength); 1722 if (kwdBuf == NULL) { 1723 *status = U_MEMORY_ALLOCATION_ERROR; 1724 return 0; 1725 } 1726 1727 /* Determine if variants already exists */ 1728 if (ultag_getVariantsSize(langtag)) { 1729 posixVariant = TRUE; 1730 } 1731 1732 n = ultag_getExtensionsSize(langtag); 1733 1734 /* resolve locale keywords and reordering keys */ 1735 for (i = 0; i < n; i++) { 1736 key = ultag_getExtensionKey(langtag, i); 1737 type = ultag_getExtensionValue(langtag, i); 1738 if (*key == LDMLEXT) { 1739 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); 1740 if (U_FAILURE(*status)) { 1741 break; 1742 } 1743 } else { 1744 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1745 if (kwd == NULL) { 1746 *status = U_MEMORY_ALLOCATION_ERROR; 1747 break; 1748 } 1749 kwd->key = key; 1750 kwd->value = type; 1751 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1752 uprv_free(kwd); 1753 *status = U_ILLEGAL_ARGUMENT_ERROR; 1754 break; 1755 } 1756 } 1757 } 1758 1759 if (U_SUCCESS(*status)) { 1760 type = ultag_getPrivateUse(langtag); 1761 if ((int32_t)uprv_strlen(type) > 0) { 1762 /* add private use as a keyword */ 1763 kwd = uprv_malloc(sizeof(ExtensionListEntry)); 1764 if (kwd == NULL) { 1765 *status = U_MEMORY_ALLOCATION_ERROR; 1766 } else { 1767 kwd->key = PRIVATEUSE_KEY; 1768 kwd->value = type; 1769 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { 1770 uprv_free(kwd); 1771 *status = U_ILLEGAL_ARGUMENT_ERROR; 1772 } 1773 } 1774 } 1775 } 1776 1777 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ 1778 1779 if (U_SUCCESS(*status) && posixVariant) { 1780 len = (int32_t) uprv_strlen(_POSIX); 1781 if (reslen < capacity) { 1782 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); 1783 } 1784 reslen += len; 1785 } 1786 1787 attrFirst = langtag->attributes; 1788 if (U_SUCCESS(*status) && (kwdFirst != NULL || attrFirst != NULL)) { 1789 /* write out the sorted keywords */ 1790 UBool firstValue = TRUE; 1791 UBool firstAttr = TRUE; 1792 kwd = kwdFirst; 1793 attr = attrFirst; 1794 do { 1795 if (reslen < capacity) { 1796 if (firstValue) { 1797 /* '@' */ 1798 *(appendAt + reslen) = LOCALE_EXT_SEP; 1799 firstValue = FALSE; 1800 } else if (attr) { 1801 /* '-' */ 1802 *(appendAt + reslen) = SEP; 1803 }else { 1804 /* ';' */ 1805 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; 1806 } 1807 } 1808 reslen++; 1809 1810 if (attr) { 1811 if (firstAttr) { 1812 len = (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY); 1813 if (reslen < capacity) { 1814 uprv_memcpy(appendAt + reslen, LOCALE_ATTRIBUTE_KEY, uprv_min(len, capacity - reslen)); 1815 } 1816 reslen += len; 1817 1818 /* '=' */ 1819 if (reslen < capacity) { 1820 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1821 } 1822 1823 reslen++; 1824 1825 firstAttr = FALSE; 1826 } 1827 1828 len = (int32_t)uprv_strlen(attr->attribute); 1829 if (reslen < capacity) { 1830 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); 1831 } 1832 reslen += len; 1833 1834 attr = attr->next; 1835 } else if (kwd) { 1836 /* key */ 1837 len = (int32_t)uprv_strlen(kwd->key); 1838 if (reslen < capacity) { 1839 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); 1840 } 1841 reslen += len; 1842 1843 /* '=' */ 1844 if (reslen < capacity) { 1845 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; 1846 } 1847 reslen++; 1848 1849 /* type */ 1850 len = (int32_t)uprv_strlen(kwd->value); 1851 if (reslen < capacity) { 1852 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); 1853 } 1854 reslen += len; 1855 1856 kwd = kwd->next; 1857 } 1858 } while (kwd || attr); 1859 } 1860 1861 /* clean up */ 1862 kwd = kwdFirst; 1863 while (kwd != NULL) { 1864 ExtensionListEntry *tmpKwd = kwd->next; 1865 uprv_free(kwd); 1866 kwd = tmpKwd; 1867 } 1868 1869 uprv_free(kwdBuf); 1870 1871 if (U_FAILURE(*status)) { 1872 return 0; 1873 } 1874 1875 return u_terminateChars(appendAt, capacity, reslen, status); 1876 } 1877 1878 static int32_t 1879 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { 1880 char buf[ULOC_FULLNAME_CAPACITY]; 1881 char tmpAppend[ULOC_FULLNAME_CAPACITY]; 1882 UErrorCode tmpStatus = U_ZERO_ERROR; 1883 int32_t len, i; 1884 int32_t reslen = 0; 1885 1886 if (U_FAILURE(*status)) { 1887 return 0; 1888 } 1889 1890 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); 1891 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { 1892 if (strict) { 1893 *status = U_ILLEGAL_ARGUMENT_ERROR; 1894 } 1895 return 0; 1896 } 1897 1898 if (len > 0) { 1899 char *p, *pPriv; 1900 UBool bNext = TRUE; 1901 UBool firstValue = TRUE; 1902 UBool writeValue; 1903 1904 pPriv = NULL; 1905 p = buf; 1906 while (bNext) { 1907 writeValue = FALSE; 1908 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { 1909 if (*p == 0) { 1910 bNext = FALSE; 1911 } else { 1912 *p = 0; /* terminate */ 1913 } 1914 if (pPriv != NULL) { 1915 /* Private use in the canonical format is lowercase in BCP47 */ 1916 for (i = 0; *(pPriv + i) != 0; i++) { 1917 *(pPriv + i) = uprv_tolower(*(pPriv + i)); 1918 } 1919 1920 /* validate */ 1921 if (_isPrivateuseValueSubtag(pPriv, -1)) { 1922 if (firstValue) { 1923 if (!_isVariantSubtag(pPriv, -1)) { 1924 writeValue = TRUE; 1925 } 1926 } else { 1927 writeValue = TRUE; 1928 } 1929 } else if (strict) { 1930 *status = U_ILLEGAL_ARGUMENT_ERROR; 1931 break; 1932 } else { 1933 break; 1934 } 1935 1936 if (writeValue) { 1937 if (reslen < capacity) { 1938 tmpAppend[reslen++] = SEP; 1939 } 1940 1941 if (firstValue) { 1942 if (reslen < capacity) { 1943 tmpAppend[reslen++] = *PRIVATEUSE_KEY; 1944 } 1945 1946 if (reslen < capacity) { 1947 tmpAppend[reslen++] = SEP; 1948 } 1949 1950 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); 1951 if (reslen < capacity) { 1952 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); 1953 } 1954 reslen += len; 1955 1956 if (reslen < capacity) { 1957 tmpAppend[reslen++] = SEP; 1958 } 1959 1960 firstValue = FALSE; 1961 } 1962 1963 len = (int32_t)uprv_strlen(pPriv); 1964 if (reslen < capacity) { 1965 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); 1966 } 1967 reslen += len; 1968 } 1969 } 1970 /* reset private use starting position */ 1971 pPriv = NULL; 1972 } else if (pPriv == NULL) { 1973 pPriv = p; 1974 } 1975 p++; 1976 } 1977 1978 if (U_FAILURE(*status)) { 1979 return 0; 1980 } 1981 } 1982 1983 if (U_SUCCESS(*status)) { 1984 len = reslen; 1985 if (reslen < capacity) { 1986 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); 1987 } 1988 } 1989 1990 u_terminateChars(appendAt, capacity, reslen, status); 1991 1992 return reslen; 1993 } 1994 1995 /* 1996 * ------------------------------------------------- 1997 * 1998 * ultag_ functions 1999 * 2000 * ------------------------------------------------- 2001 */ 2002 2003 /* Bit flags used by the parser */ 2004 #define LANG 0x0001 2005 #define EXTL 0x0002 2006 #define SCRT 0x0004 2007 #define REGN 0x0008 2008 #define VART 0x0010 2009 #define EXTS 0x0020 2010 #define EXTV 0x0040 2011 #define PRIV 0x0080 2012 #define ATTR 0x0100 2013 2014 static ULanguageTag* 2015 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { 2016 ULanguageTag *t; 2017 char *tagBuf; 2018 int16_t next; 2019 char *pSubtag, *pNext, *pLastGoodPosition; 2020 int32_t subtagLen; 2021 int32_t extlangIdx; 2022 ExtensionListEntry *pExtension; 2023 AttributeListEntry *pAttribute; 2024 char *pExtValueSubtag, *pExtValueSubtagEnd; 2025 char *pAttrValue; 2026 int32_t i; 2027 UBool isLDMLExtension, reqLDMLType, privateuseVar = FALSE; 2028 2029 if (parsedLen != NULL) { 2030 *parsedLen = 0; 2031 } 2032 2033 if (U_FAILURE(*status)) { 2034 return NULL; 2035 } 2036 2037 if (tagLen < 0) { 2038 tagLen = (int32_t)uprv_strlen(tag); 2039 } 2040 2041 /* copy the entire string */ 2042 tagBuf = (char*)uprv_malloc(tagLen + 1); 2043 if (tagBuf == NULL) { 2044 *status = U_MEMORY_ALLOCATION_ERROR; 2045 return NULL; 2046 } 2047 uprv_memcpy(tagBuf, tag, tagLen); 2048 *(tagBuf + tagLen) = 0; 2049 2050 /* create a ULanguageTag */ 2051 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); 2052 if (t == NULL) { 2053 uprv_free(tagBuf); 2054 *status = U_MEMORY_ALLOCATION_ERROR; 2055 return NULL; 2056 } 2057 _initializeULanguageTag(t); 2058 t->buf = tagBuf; 2059 2060 if (tagLen < MINLEN) { 2061 /* the input tag is too short - return empty ULanguageTag */ 2062 return t; 2063 } 2064 2065 /* check if the tag is grandfathered */ 2066 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { 2067 if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) { 2068 /* a grandfathered tag is always longer than its preferred mapping */ 2069 int32_t newTagLength = uprv_strlen(GRANDFATHERED[i+1]); 2070 if (tagLen < newTagLength) { 2071 uprv_free(tagBuf); 2072 tagBuf = (char*)uprv_malloc(newTagLength + 1); 2073 if (tagBuf == NULL) { 2074 *status = U_MEMORY_ALLOCATION_ERROR; 2075 return NULL; 2076 } 2077 t->buf = tagBuf; 2078 tagLen = newTagLength; 2079 } 2080 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); 2081 2082 break; 2083 } 2084 } 2085 2086 /* 2087 * langtag = language 2088 * ["-" script] 2089 * ["-" region] 2090 * *("-" variant) 2091 * *("-" extension) 2092 * ["-" privateuse] 2093 */ 2094 2095 next = LANG | PRIV; 2096 pNext = pLastGoodPosition = tagBuf; 2097 extlangIdx = 0; 2098 pExtension = NULL; 2099 pExtValueSubtag = NULL; 2100 pExtValueSubtagEnd = NULL; 2101 pAttribute = NULL; 2102 pAttrValue = NULL; 2103 isLDMLExtension = FALSE; 2104 reqLDMLType = FALSE; 2105 2106 while (pNext) { 2107 char *pSep; 2108 2109 pSubtag = pNext; 2110 2111 /* locate next separator char */ 2112 pSep = pSubtag; 2113 while (*pSep) { 2114 if (*pSep == SEP) { 2115 break; 2116 } 2117 pSep++; 2118 } 2119 if (*pSep == 0) { 2120 /* last subtag */ 2121 pNext = NULL; 2122 } else { 2123 pNext = pSep + 1; 2124 } 2125 subtagLen = (int32_t)(pSep - pSubtag); 2126 2127 if (next & LANG) { 2128 if (_isLanguageSubtag(pSubtag, subtagLen)) { 2129 *pSep = 0; /* terminate */ 2130 t->language = T_CString_toLowerCase(pSubtag); 2131 2132 pLastGoodPosition = pSep; 2133 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 2134 continue; 2135 } 2136 } 2137 if (next & EXTL) { 2138 if (_isExtlangSubtag(pSubtag, subtagLen)) { 2139 *pSep = 0; 2140 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); 2141 2142 pLastGoodPosition = pSep; 2143 if (extlangIdx < 3) { 2144 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; 2145 } else { 2146 next = SCRT | REGN | VART | EXTS | PRIV; 2147 } 2148 continue; 2149 } 2150 } 2151 if (next & SCRT) { 2152 if (_isScriptSubtag(pSubtag, subtagLen)) { 2153 char *p = pSubtag; 2154 2155 *pSep = 0; 2156 2157 /* to title case */ 2158 *p = uprv_toupper(*p); 2159 p++; 2160 for (; *p; p++) { 2161 *p = uprv_tolower(*p); 2162 } 2163 2164 t->script = pSubtag; 2165 2166 pLastGoodPosition = pSep; 2167 next = REGN | VART | EXTS | PRIV; 2168 continue; 2169 } 2170 } 2171 if (next & REGN) { 2172 if (_isRegionSubtag(pSubtag, subtagLen)) { 2173 *pSep = 0; 2174 t->region = T_CString_toUpperCase(pSubtag); 2175 2176 pLastGoodPosition = pSep; 2177 next = VART | EXTS | PRIV; 2178 continue; 2179 } 2180 } 2181 if (next & VART) { 2182 if (_isVariantSubtag(pSubtag, subtagLen) || 2183 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { 2184 VariantListEntry *var; 2185 UBool isAdded; 2186 2187 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); 2188 if (var == NULL) { 2189 *status = U_MEMORY_ALLOCATION_ERROR; 2190 goto error; 2191 } 2192 *pSep = 0; 2193 var->variant = T_CString_toUpperCase(pSubtag); 2194 isAdded = _addVariantToList(&(t->variants), var); 2195 if (!isAdded) { 2196 /* duplicated variant entry */ 2197 uprv_free(var); 2198 break; 2199 } 2200 pLastGoodPosition = pSep; 2201 next = VART | EXTS | PRIV; 2202 continue; 2203 } 2204 } 2205 if (next & EXTS) { 2206 if (_isExtensionSingleton(pSubtag, subtagLen)) { 2207 if (pExtension != NULL) { 2208 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2209 /* the previous extension is incomplete */ 2210 uprv_free(pExtension); 2211 pExtension = NULL; 2212 break; 2213 } 2214 2215 /* terminate the previous extension value */ 2216 *pExtValueSubtagEnd = 0; 2217 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2218 2219 /* insert the extension to the list */ 2220 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2221 pLastGoodPosition = pExtValueSubtagEnd; 2222 } else { 2223 /* stop parsing here */ 2224 uprv_free(pExtension); 2225 pExtension = NULL; 2226 break; 2227 } 2228 2229 if (isLDMLExtension && reqLDMLType) { 2230 /* incomplete LDML extension key and type pair */ 2231 pExtension = NULL; 2232 break; 2233 } 2234 } 2235 2236 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT); 2237 2238 /* create a new extension */ 2239 pExtension = uprv_malloc(sizeof(ExtensionListEntry)); 2240 if (pExtension == NULL) { 2241 *status = U_MEMORY_ALLOCATION_ERROR; 2242 goto error; 2243 } 2244 *pSep = 0; 2245 pExtension->key = T_CString_toLowerCase(pSubtag); 2246 pExtension->value = NULL; /* will be set later */ 2247 2248 /* 2249 * reset the start and the end location of extension value 2250 * subtags for this extension 2251 */ 2252 pExtValueSubtag = NULL; 2253 pExtValueSubtagEnd = NULL; 2254 2255 next = EXTV; 2256 continue; 2257 } 2258 } 2259 if (next & EXTV) { 2260 if (_isExtensionSubtag(pSubtag, subtagLen)) { 2261 if (isLDMLExtension) { 2262 if (reqLDMLType) { 2263 /* already saw an LDML key */ 2264 if (!_isLDMLType(pSubtag, subtagLen)) { 2265 /* stop parsing here and let the valid LDML extension key/type 2266 pairs processed by the code out of this while loop */ 2267 break; 2268 } 2269 pExtValueSubtagEnd = pSep; 2270 reqLDMLType = FALSE; 2271 next = EXTS | EXTV | PRIV; 2272 } else { 2273 /* LDML key */ 2274 if (!_isLDMLKey(pSubtag, subtagLen)) { 2275 /* May be part of incomplete type */ 2276 if (pExtValueSubtag != NULL) { 2277 if (_isLDMLType(pSubtag, subtagLen)) { 2278 pExtValueSubtagEnd = pSep; 2279 reqLDMLType = FALSE; 2280 next = EXTS | EXTV | PRIV; 2281 } 2282 } else if (pExtValueSubtag == NULL && _isAttributeSubtag(pSubtag, subtagLen)) { 2283 /* Get attribute */ 2284 next = ATTR; 2285 } else { 2286 /* stop parsing here and let the valid LDML extension key/type 2287 pairs processed by the code out of this while loop */ 2288 break; 2289 } 2290 } else { 2291 reqLDMLType = TRUE; 2292 next = EXTV; 2293 } 2294 } 2295 } else { 2296 /* Mark the end of this subtag */ 2297 pExtValueSubtagEnd = pSep; 2298 next = EXTS | EXTV | PRIV; 2299 } 2300 2301 if (next != ATTR) { 2302 if (pExtValueSubtag == NULL) { 2303 /* if the start postion of this extension's value is not yet, 2304 this one is the first value subtag */ 2305 pExtValueSubtag = pSubtag; 2306 } 2307 2308 continue; 2309 } 2310 } 2311 } 2312 if (next & PRIV) { 2313 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { 2314 char *pPrivuseVal; 2315 2316 if (pExtension != NULL) { 2317 /* Process the last extension */ 2318 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2319 /* the previous extension is incomplete */ 2320 uprv_free(pExtension); 2321 pExtension = NULL; 2322 break; 2323 } else { 2324 /* terminate the previous extension value */ 2325 *pExtValueSubtagEnd = 0; 2326 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2327 2328 /* insert the extension to the list */ 2329 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2330 pLastGoodPosition = pExtValueSubtagEnd; 2331 pExtension = NULL; 2332 } else { 2333 /* stop parsing here */ 2334 uprv_free(pExtension); 2335 pExtension = NULL; 2336 break; 2337 } 2338 } 2339 } 2340 2341 /* The rest of part will be private use value subtags */ 2342 if (pNext == NULL) { 2343 /* empty private use subtag */ 2344 break; 2345 } 2346 /* back up the private use value start position */ 2347 pPrivuseVal = pNext; 2348 2349 /* validate private use value subtags */ 2350 while (pNext) { 2351 pSubtag = pNext; 2352 pSep = pSubtag; 2353 while (*pSep) { 2354 if (*pSep == SEP) { 2355 break; 2356 } 2357 pSep++; 2358 } 2359 if (*pSep == 0) { 2360 /* last subtag */ 2361 pNext = NULL; 2362 } else { 2363 pNext = pSep + 1; 2364 } 2365 subtagLen = (int32_t)(pSep - pSubtag); 2366 2367 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { 2368 *pSep = 0; 2369 next = VART; 2370 privateuseVar = TRUE; 2371 break; 2372 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { 2373 pLastGoodPosition = pSep; 2374 } else { 2375 break; 2376 } 2377 } 2378 2379 if (next == VART) { 2380 continue; 2381 } 2382 2383 if (pLastGoodPosition - pPrivuseVal > 0) { 2384 *pLastGoodPosition = 0; 2385 t->privateuse = T_CString_toLowerCase(pPrivuseVal); 2386 } 2387 /* No more subtags, exiting the parse loop */ 2388 break; 2389 } 2390 break; 2391 } 2392 2393 if (next & ATTR) { 2394 /* create a new attribute */ 2395 pAttribute = uprv_malloc(sizeof(AttributeListEntry)); 2396 if (pAttribute == NULL) { 2397 *status = U_MEMORY_ALLOCATION_ERROR; 2398 goto error; 2399 } 2400 2401 *pSep = 0; 2402 pAttribute->attribute =T_CString_toLowerCase(pSubtag); 2403 2404 if (!_addAttributeToList(&(t->attributes), pAttribute)) { 2405 uprv_free(pAttribute); 2406 } 2407 2408 next = EXTS | EXTV | PRIV; 2409 continue; 2410 } 2411 /* If we fell through here, it means this subtag is illegal - quit parsing */ 2412 break; 2413 } 2414 2415 if (pExtension != NULL) { 2416 /* Process the last extension */ 2417 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { 2418 /* the previous extension is incomplete */ 2419 uprv_free(pExtension); 2420 } else { 2421 /* terminate the previous extension value */ 2422 *pExtValueSubtagEnd = 0; 2423 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); 2424 /* insert the extension to the list */ 2425 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { 2426 pLastGoodPosition = pExtValueSubtagEnd; 2427 } else { 2428 uprv_free(pExtension); 2429 } 2430 } 2431 } 2432 2433 if (parsedLen != NULL) { 2434 *parsedLen = (int32_t)(pLastGoodPosition - t->buf); 2435 } 2436 2437 return t; 2438 2439 error: 2440 uprv_free(t); 2441 return NULL; 2442 } 2443 2444 static void 2445 ultag_close(ULanguageTag* langtag) { 2446 2447 if (langtag == NULL) { 2448 return; 2449 } 2450 2451 uprv_free(langtag->buf); 2452 2453 if (langtag->variants) { 2454 VariantListEntry *curVar = langtag->variants; 2455 while (curVar) { 2456 VariantListEntry *nextVar = curVar->next; 2457 uprv_free(curVar); 2458 curVar = nextVar; 2459 } 2460 } 2461 2462 if (langtag->extensions) { 2463 ExtensionListEntry *curExt = langtag->extensions; 2464 while (curExt) { 2465 ExtensionListEntry *nextExt = curExt->next; 2466 uprv_free(curExt); 2467 curExt = nextExt; 2468 } 2469 } 2470 2471 if (langtag->attributes) { 2472 AttributeListEntry *curAttr = langtag->attributes; 2473 while (curAttr) { 2474 AttributeListEntry *nextAttr = curAttr->next; 2475 uprv_free(curAttr); 2476 curAttr = nextAttr; 2477 } 2478 } 2479 2480 uprv_free(langtag); 2481 } 2482 2483 static const char* 2484 ultag_getLanguage(const ULanguageTag* langtag) { 2485 return langtag->language; 2486 } 2487 2488 #if 0 2489 static const char* 2490 ultag_getJDKLanguage(const ULanguageTag* langtag) { 2491 int32_t i; 2492 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { 2493 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { 2494 return DEPRECATEDLANGS[i + 1]; 2495 } 2496 } 2497 return langtag->language; 2498 } 2499 #endif 2500 2501 static const char* 2502 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { 2503 if (idx >= 0 && idx < MAXEXTLANG) { 2504 return langtag->extlang[idx]; 2505 } 2506 return NULL; 2507 } 2508 2509 static int32_t 2510 ultag_getExtlangSize(const ULanguageTag* langtag) { 2511 int32_t size = 0; 2512 int32_t i; 2513 for (i = 0; i < MAXEXTLANG; i++) { 2514 if (langtag->extlang[i]) { 2515 size++; 2516 } 2517 } 2518 return size; 2519 } 2520 2521 static const char* 2522 ultag_getScript(const ULanguageTag* langtag) { 2523 return langtag->script; 2524 } 2525 2526 static const char* 2527 ultag_getRegion(const ULanguageTag* langtag) { 2528 return langtag->region; 2529 } 2530 2531 static const char* 2532 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { 2533 const char *var = NULL; 2534 VariantListEntry *cur = langtag->variants; 2535 int32_t i = 0; 2536 while (cur) { 2537 if (i == idx) { 2538 var = cur->variant; 2539 break; 2540 } 2541 cur = cur->next; 2542 i++; 2543 } 2544 return var; 2545 } 2546 2547 static int32_t 2548 ultag_getVariantsSize(const ULanguageTag* langtag) { 2549 int32_t size = 0; 2550 VariantListEntry *cur = langtag->variants; 2551 while (TRUE) { 2552 if (cur == NULL) { 2553 break; 2554 } 2555 size++; 2556 cur = cur->next; 2557 } 2558 return size; 2559 } 2560 2561 #if 0 2562 /* Currently not being used. */ 2563 static const char* 2564 ultag_getAttribute(const ULanguageTag* langtag, int32_t idx) { 2565 const char *attr = NULL; 2566 AttributeListEntry *cur = langtag->attributes; 2567 int32_t i = 0; 2568 while (cur) { 2569 if (i == idx) { 2570 attr = cur->attribute; 2571 break; 2572 } 2573 cur = cur->next; 2574 i++; 2575 } 2576 return attr; 2577 } 2578 #endif 2579 2580 static int32_t 2581 ultag_getAttributesSize(const ULanguageTag* langtag) { 2582 int32_t size = 0; 2583 AttributeListEntry *cur = langtag->attributes; 2584 while (TRUE) { 2585 if (cur == NULL) { 2586 break; 2587 } 2588 size++; 2589 cur = cur->next; 2590 } 2591 return size; 2592 } 2593 2594 static const char* 2595 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { 2596 const char *key = NULL; 2597 ExtensionListEntry *cur = langtag->extensions; 2598 int32_t i = 0; 2599 while (cur) { 2600 if (i == idx) { 2601 key = cur->key; 2602 break; 2603 } 2604 cur = cur->next; 2605 i++; 2606 } 2607 return key; 2608 } 2609 2610 static const char* 2611 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { 2612 const char *val = NULL; 2613 ExtensionListEntry *cur = langtag->extensions; 2614 int32_t i = 0; 2615 while (cur) { 2616 if (i == idx) { 2617 val = cur->value; 2618 break; 2619 } 2620 cur = cur->next; 2621 i++; 2622 } 2623 return val; 2624 } 2625 2626 static int32_t 2627 ultag_getExtensionsSize(const ULanguageTag* langtag) { 2628 int32_t size = 0; 2629 ExtensionListEntry *cur = langtag->extensions; 2630 while (TRUE) { 2631 if (cur == NULL) { 2632 break; 2633 } 2634 size++; 2635 cur = cur->next; 2636 } 2637 return size; 2638 } 2639 2640 static const char* 2641 ultag_getPrivateUse(const ULanguageTag* langtag) { 2642 return langtag->privateuse; 2643 } 2644 2645 #if 0 2646 static const char* 2647 ultag_getGrandfathered(const ULanguageTag* langtag) { 2648 return langtag->grandfathered; 2649 } 2650 #endif 2651 2652 2653 /* 2654 * ------------------------------------------------- 2655 * 2656 * Locale/BCP47 conversion APIs, exposed as uloc_* 2657 * 2658 * ------------------------------------------------- 2659 */ 2660 U_DRAFT int32_t U_EXPORT2 2661 uloc_toLanguageTag(const char* localeID, 2662 char* langtag, 2663 int32_t langtagCapacity, 2664 UBool strict, 2665 UErrorCode* status) { 2666 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ 2667 char canonical[256]; 2668 int32_t reslen = 0; 2669 UErrorCode tmpStatus = U_ZERO_ERROR; 2670 UBool hadPosix = FALSE; 2671 const char* pKeywordStart; 2672 2673 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ 2674 canonical[0] = 0; 2675 if (uprv_strlen(localeID) > 0) { 2676 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); 2677 if (tmpStatus != U_ZERO_ERROR) { 2678 *status = U_ILLEGAL_ARGUMENT_ERROR; 2679 return 0; 2680 } 2681 } 2682 2683 /* For handling special case - private use only tag */ 2684 pKeywordStart = locale_getKeywordsStart(canonical); 2685 if (pKeywordStart == canonical) { 2686 UEnumeration *kwdEnum; 2687 int kwdCnt = 0; 2688 UBool done = FALSE; 2689 2690 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); 2691 if (kwdEnum != NULL) { 2692 kwdCnt = uenum_count(kwdEnum, &tmpStatus); 2693 if (kwdCnt == 1) { 2694 const char *key; 2695 int32_t len = 0; 2696 2697 key = uenum_next(kwdEnum, &len, &tmpStatus); 2698 if (len == 1 && *key == PRIVATEUSE) { 2699 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2700 buf[0] = PRIVATEUSE; 2701 buf[1] = SEP; 2702 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); 2703 if (U_SUCCESS(tmpStatus)) { 2704 if (_isPrivateuseValueSubtags(&buf[2], len)) { 2705 /* return private use only tag */ 2706 reslen = len + 2; 2707 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); 2708 u_terminateChars(langtag, langtagCapacity, reslen, status); 2709 done = TRUE; 2710 } else if (strict) { 2711 *status = U_ILLEGAL_ARGUMENT_ERROR; 2712 done = TRUE; 2713 } 2714 /* if not strict mode, then "und" will be returned */ 2715 } else { 2716 *status = U_ILLEGAL_ARGUMENT_ERROR; 2717 done = TRUE; 2718 } 2719 } 2720 } 2721 uenum_close(kwdEnum); 2722 if (done) { 2723 return reslen; 2724 } 2725 } 2726 } 2727 2728 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); 2729 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2730 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); 2731 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); 2732 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2733 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); 2734 2735 return reslen; 2736 } 2737 2738 2739 U_DRAFT int32_t U_EXPORT2 2740 uloc_forLanguageTag(const char* langtag, 2741 char* localeID, 2742 int32_t localeIDCapacity, 2743 int32_t* parsedLength, 2744 UErrorCode* status) { 2745 ULanguageTag *lt; 2746 int32_t reslen = 0; 2747 const char *subtag, *p; 2748 int32_t len; 2749 int32_t i, n, m; 2750 UBool noRegion = TRUE; 2751 2752 lt = ultag_parse(langtag, -1, parsedLength, status); 2753 if (U_FAILURE(*status)) { 2754 return 0; 2755 } 2756 2757 /* language */ 2758 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); 2759 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { 2760 len = (int32_t)uprv_strlen(subtag); 2761 if (len > 0) { 2762 if (reslen < localeIDCapacity) { 2763 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); 2764 } 2765 reslen += len; 2766 } 2767 } 2768 2769 /* script */ 2770 subtag = ultag_getScript(lt); 2771 len = (int32_t)uprv_strlen(subtag); 2772 if (len > 0) { 2773 if (reslen < localeIDCapacity) { 2774 *(localeID + reslen) = LOCALE_SEP; 2775 } 2776 reslen++; 2777 2778 /* write out the script in title case */ 2779 p = subtag; 2780 while (*p) { 2781 if (reslen < localeIDCapacity) { 2782 if (p == subtag) { 2783 *(localeID + reslen) = uprv_toupper(*p); 2784 } else { 2785 *(localeID + reslen) = *p; 2786 } 2787 } 2788 reslen++; 2789 p++; 2790 } 2791 } 2792 2793 /* region */ 2794 subtag = ultag_getRegion(lt); 2795 len = (int32_t)uprv_strlen(subtag); 2796 if (len > 0) { 2797 if (reslen < localeIDCapacity) { 2798 *(localeID + reslen) = LOCALE_SEP; 2799 } 2800 reslen++; 2801 /* write out the retion in upper case */ 2802 p = subtag; 2803 while (*p) { 2804 if (reslen < localeIDCapacity) { 2805 *(localeID + reslen) = uprv_toupper(*p); 2806 } 2807 reslen++; 2808 p++; 2809 } 2810 noRegion = FALSE; 2811 } 2812 2813 /* variants */ 2814 n = ultag_getVariantsSize(lt); 2815 if (n > 0) { 2816 if (noRegion) { 2817 if (reslen < localeIDCapacity) { 2818 *(localeID + reslen) = LOCALE_SEP; 2819 } 2820 reslen++; 2821 } 2822 2823 for (i = 0; i < n; i++) { 2824 subtag = ultag_getVariant(lt, i); 2825 if (reslen < localeIDCapacity) { 2826 *(localeID + reslen) = LOCALE_SEP; 2827 } 2828 reslen++; 2829 /* write out the variant in upper case */ 2830 p = subtag; 2831 while (*p) { 2832 if (reslen < localeIDCapacity) { 2833 *(localeID + reslen) = uprv_toupper(*p); 2834 } 2835 reslen++; 2836 p++; 2837 } 2838 } 2839 } 2840 2841 /* keywords */ 2842 n = ultag_getExtensionsSize(lt); 2843 m = ultag_getAttributesSize(lt); 2844 subtag = ultag_getPrivateUse(lt); 2845 if (n > 0 || m > 0 || uprv_strlen(subtag) > 0) { 2846 if (reslen == 0 && (n > 0 || m > 0)) { 2847 /* need a language */ 2848 if (reslen < localeIDCapacity) { 2849 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); 2850 } 2851 reslen += LANG_UND_LEN; 2852 } 2853 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); 2854 reslen += len; 2855 } 2856 2857 ultag_close(lt); 2858 return u_terminateChars(localeID, localeIDCapacity, reslen, status); 2859 } 2860 2861 2862