1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1997-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: loclikely.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010feb25 14 * created by: Markus W. Scherer 15 * 16 * Code for likely and minimized locale subtags, separated out from other .cpp files 17 * that then do not depend on resource bundle code and likely-subtags data. 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/uloc.h" 23 #include "unicode/ures.h" 24 #include "cmemory.h" 25 #include "cstring.h" 26 #include "ulocimp.h" 27 #include "ustr_imp.h" 28 29 /** 30 * This function looks for the localeID in the likelySubtags resource. 31 * 32 * @param localeID The tag to find. 33 * @param buffer A buffer to hold the matching entry 34 * @param bufferLength The length of the output buffer 35 * @return A pointer to "buffer" if found, or a null pointer if not. 36 */ 37 static const char* U_CALLCONV 38 findLikelySubtags(const char* localeID, 39 char* buffer, 40 int32_t bufferLength, 41 UErrorCode* err) { 42 const char* result = NULL; 43 44 if (!U_FAILURE(*err)) { 45 int32_t resLen = 0; 46 const UChar* s = NULL; 47 UErrorCode tmpErr = U_ZERO_ERROR; 48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 49 if (U_SUCCESS(tmpErr)) { 50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 51 52 if (U_FAILURE(tmpErr)) { 53 /* 54 * If a resource is missing, it's not really an error, it's 55 * just that we don't have any data for that particular locale ID. 56 */ 57 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 58 *err = tmpErr; 59 } 60 } 61 else if (resLen >= bufferLength) { 62 /* The buffer should never overflow. */ 63 *err = U_INTERNAL_PROGRAM_ERROR; 64 } 65 else { 66 u_UCharsToChars(s, buffer, resLen + 1); 67 result = buffer; 68 } 69 70 ures_close(subtags); 71 } else { 72 *err = tmpErr; 73 } 74 } 75 76 return result; 77 } 78 79 /** 80 * Append a tag to a buffer, adding the separator if necessary. The buffer 81 * must be large enough to contain the resulting tag plus any separator 82 * necessary. The tag must not be a zero-length string. 83 * 84 * @param tag The tag to add. 85 * @param tagLength The length of the tag. 86 * @param buffer The output buffer. 87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 88 **/ 89 static void U_CALLCONV 90 appendTag( 91 const char* tag, 92 int32_t tagLength, 93 char* buffer, 94 int32_t* bufferLength) { 95 96 if (*bufferLength > 0) { 97 buffer[*bufferLength] = '_'; 98 ++(*bufferLength); 99 } 100 101 uprv_memmove( 102 &buffer[*bufferLength], 103 tag, 104 tagLength); 105 106 *bufferLength += tagLength; 107 } 108 109 /** 110 * These are the canonical strings for unknown languages, scripts and regions. 111 **/ 112 static const char* const unknownLanguage = "und"; 113 static const char* const unknownScript = "Zzzz"; 114 static const char* const unknownRegion = "ZZ"; 115 116 /** 117 * Create a tag string from the supplied parameters. The lang, script and region 118 * parameters may be NULL pointers. If they are, their corresponding length parameters 119 * must be less than or equal to 0. 120 * 121 * If any of the language, script or region parameters are empty, and the alternateTags 122 * parameter is not NULL, it will be parsed for potential language, script and region tags 123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 124 * it contains no language tag, the default tag for the unknown language is used. 125 * 126 * If the length of the new string exceeds the capacity of the output buffer, 127 * the function copies as many bytes to the output buffer as it can, and returns 128 * the error U_BUFFER_OVERFLOW_ERROR. 129 * 130 * If an illegal argument is provided, the function returns the error 131 * U_ILLEGAL_ARGUMENT_ERROR. 132 * 133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 134 * the tag string fits in the output buffer, but the null terminator doesn't. 135 * 136 * @param lang The language tag to use. 137 * @param langLength The length of the language tag. 138 * @param script The script tag to use. 139 * @param scriptLength The length of the script tag. 140 * @param region The region tag to use. 141 * @param regionLength The length of the region tag. 142 * @param trailing Any trailing data to append to the new tag. 143 * @param trailingLength The length of the trailing data. 144 * @param alternateTags A string containing any alternate tags. 145 * @param tag The output buffer. 146 * @param tagCapacity The capacity of the output buffer. 147 * @param err A pointer to a UErrorCode for error reporting. 148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 149 **/ 150 static int32_t U_CALLCONV 151 createTagStringWithAlternates( 152 const char* lang, 153 int32_t langLength, 154 const char* script, 155 int32_t scriptLength, 156 const char* region, 157 int32_t regionLength, 158 const char* trailing, 159 int32_t trailingLength, 160 const char* alternateTags, 161 char* tag, 162 int32_t tagCapacity, 163 UErrorCode* err) { 164 165 if (U_FAILURE(*err)) { 166 goto error; 167 } 168 else if (tag == NULL || 169 tagCapacity <= 0 || 170 langLength >= ULOC_LANG_CAPACITY || 171 scriptLength >= ULOC_SCRIPT_CAPACITY || 172 regionLength >= ULOC_COUNTRY_CAPACITY) { 173 goto error; 174 } 175 else { 176 /** 177 * ULOC_FULLNAME_CAPACITY will provide enough capacity 178 * that we can build a string that contains the language, 179 * script and region code without worrying about overrunning 180 * the user-supplied buffer. 181 **/ 182 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 183 int32_t tagLength = 0; 184 int32_t capacityRemaining = tagCapacity; 185 UBool regionAppended = FALSE; 186 187 if (langLength > 0) { 188 appendTag( 189 lang, 190 langLength, 191 tagBuffer, 192 &tagLength); 193 } 194 else if (alternateTags == NULL) { 195 /* 196 * Append the value for an unknown language, if 197 * we found no language. 198 */ 199 appendTag( 200 unknownLanguage, 201 (int32_t)uprv_strlen(unknownLanguage), 202 tagBuffer, 203 &tagLength); 204 } 205 else { 206 /* 207 * Parse the alternateTags string for the language. 208 */ 209 char alternateLang[ULOC_LANG_CAPACITY]; 210 int32_t alternateLangLength = sizeof(alternateLang); 211 212 alternateLangLength = 213 uloc_getLanguage( 214 alternateTags, 215 alternateLang, 216 alternateLangLength, 217 err); 218 if(U_FAILURE(*err) || 219 alternateLangLength >= ULOC_LANG_CAPACITY) { 220 goto error; 221 } 222 else if (alternateLangLength == 0) { 223 /* 224 * Append the value for an unknown language, if 225 * we found no language. 226 */ 227 appendTag( 228 unknownLanguage, 229 (int32_t)uprv_strlen(unknownLanguage), 230 tagBuffer, 231 &tagLength); 232 } 233 else { 234 appendTag( 235 alternateLang, 236 alternateLangLength, 237 tagBuffer, 238 &tagLength); 239 } 240 } 241 242 if (scriptLength > 0) { 243 appendTag( 244 script, 245 scriptLength, 246 tagBuffer, 247 &tagLength); 248 } 249 else if (alternateTags != NULL) { 250 /* 251 * Parse the alternateTags string for the script. 252 */ 253 char alternateScript[ULOC_SCRIPT_CAPACITY]; 254 255 const int32_t alternateScriptLength = 256 uloc_getScript( 257 alternateTags, 258 alternateScript, 259 sizeof(alternateScript), 260 err); 261 262 if (U_FAILURE(*err) || 263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 264 goto error; 265 } 266 else if (alternateScriptLength > 0) { 267 appendTag( 268 alternateScript, 269 alternateScriptLength, 270 tagBuffer, 271 &tagLength); 272 } 273 } 274 275 if (regionLength > 0) { 276 appendTag( 277 region, 278 regionLength, 279 tagBuffer, 280 &tagLength); 281 282 regionAppended = TRUE; 283 } 284 else if (alternateTags != NULL) { 285 /* 286 * Parse the alternateTags string for the region. 287 */ 288 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 289 290 const int32_t alternateRegionLength = 291 uloc_getCountry( 292 alternateTags, 293 alternateRegion, 294 sizeof(alternateRegion), 295 err); 296 if (U_FAILURE(*err) || 297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 298 goto error; 299 } 300 else if (alternateRegionLength > 0) { 301 appendTag( 302 alternateRegion, 303 alternateRegionLength, 304 tagBuffer, 305 &tagLength); 306 307 regionAppended = TRUE; 308 } 309 } 310 311 { 312 const int32_t toCopy = 313 tagLength >= tagCapacity ? tagCapacity : tagLength; 314 315 /** 316 * Copy the partial tag from our internal buffer to the supplied 317 * target. 318 **/ 319 uprv_memcpy( 320 tag, 321 tagBuffer, 322 toCopy); 323 324 capacityRemaining -= toCopy; 325 } 326 327 if (trailingLength > 0) { 328 if (*trailing != '@' && capacityRemaining > 0) { 329 tag[tagLength++] = '_'; 330 --capacityRemaining; 331 if (capacityRemaining > 0 && !regionAppended) { 332 /* extra separator is required */ 333 tag[tagLength++] = '_'; 334 --capacityRemaining; 335 } 336 } 337 338 if (capacityRemaining > 0) { 339 /* 340 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 341 * don't know if the user-supplied buffers overlap. 342 */ 343 const int32_t toCopy = 344 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 345 346 uprv_memmove( 347 &tag[tagLength], 348 trailing, 349 toCopy); 350 } 351 } 352 353 tagLength += trailingLength; 354 355 return u_terminateChars( 356 tag, 357 tagCapacity, 358 tagLength, 359 err); 360 } 361 362 error: 363 364 /** 365 * An overflow indicates the locale ID passed in 366 * is ill-formed. If we got here, and there was 367 * no previous error, it's an implicit overflow. 368 **/ 369 if (*err == U_BUFFER_OVERFLOW_ERROR || 370 U_SUCCESS(*err)) { 371 *err = U_ILLEGAL_ARGUMENT_ERROR; 372 } 373 374 return -1; 375 } 376 377 /** 378 * Create a tag string from the supplied parameters. The lang, script and region 379 * parameters may be NULL pointers. If they are, their corresponding length parameters 380 * must be less than or equal to 0. If the lang parameter is an empty string, the 381 * default value for an unknown language is written to the output buffer. 382 * 383 * If the length of the new string exceeds the capacity of the output buffer, 384 * the function copies as many bytes to the output buffer as it can, and returns 385 * the error U_BUFFER_OVERFLOW_ERROR. 386 * 387 * If an illegal argument is provided, the function returns the error 388 * U_ILLEGAL_ARGUMENT_ERROR. 389 * 390 * @param lang The language tag to use. 391 * @param langLength The length of the language tag. 392 * @param script The script tag to use. 393 * @param scriptLength The length of the script tag. 394 * @param region The region tag to use. 395 * @param regionLength The length of the region tag. 396 * @param trailing Any trailing data to append to the new tag. 397 * @param trailingLength The length of the trailing data. 398 * @param tag The output buffer. 399 * @param tagCapacity The capacity of the output buffer. 400 * @param err A pointer to a UErrorCode for error reporting. 401 * @return The length of the tag string, which may be greater than tagCapacity. 402 **/ 403 static int32_t U_CALLCONV 404 createTagString( 405 const char* lang, 406 int32_t langLength, 407 const char* script, 408 int32_t scriptLength, 409 const char* region, 410 int32_t regionLength, 411 const char* trailing, 412 int32_t trailingLength, 413 char* tag, 414 int32_t tagCapacity, 415 UErrorCode* err) 416 { 417 return createTagStringWithAlternates( 418 lang, 419 langLength, 420 script, 421 scriptLength, 422 region, 423 regionLength, 424 trailing, 425 trailingLength, 426 NULL, 427 tag, 428 tagCapacity, 429 err); 430 } 431 432 /** 433 * Parse the language, script, and region subtags from a tag string, and copy the 434 * results into the corresponding output parameters. The buffers are null-terminated, 435 * unless overflow occurs. 436 * 437 * The langLength, scriptLength, and regionLength parameters are input/output 438 * parameters, and must contain the capacity of their corresponding buffers on 439 * input. On output, they will contain the actual length of the buffers, not 440 * including the null terminator. 441 * 442 * If the length of any of the output subtags exceeds the capacity of the corresponding 443 * buffer, the function copies as many bytes to the output buffer as it can, and returns 444 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 445 * occurs. 446 * 447 * If an illegal argument is provided, the function returns the error 448 * U_ILLEGAL_ARGUMENT_ERROR. 449 * 450 * @param localeID The locale ID to parse. 451 * @param lang The language tag buffer. 452 * @param langLength The length of the language tag. 453 * @param script The script tag buffer. 454 * @param scriptLength The length of the script tag. 455 * @param region The region tag buffer. 456 * @param regionLength The length of the region tag. 457 * @param err A pointer to a UErrorCode for error reporting. 458 * @return The number of chars of the localeID parameter consumed. 459 **/ 460 static int32_t U_CALLCONV 461 parseTagString( 462 const char* localeID, 463 char* lang, 464 int32_t* langLength, 465 char* script, 466 int32_t* scriptLength, 467 char* region, 468 int32_t* regionLength, 469 UErrorCode* err) 470 { 471 const char* position = localeID; 472 int32_t subtagLength = 0; 473 474 if(U_FAILURE(*err) || 475 localeID == NULL || 476 lang == NULL || 477 langLength == NULL || 478 script == NULL || 479 scriptLength == NULL || 480 region == NULL || 481 regionLength == NULL) { 482 goto error; 483 } 484 485 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); 486 u_terminateChars(lang, *langLength, subtagLength, err); 487 488 /* 489 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 490 * to be an error, because it indicates the user-supplied tag is 491 * not well-formed. 492 */ 493 if(U_FAILURE(*err)) { 494 goto error; 495 } 496 497 *langLength = subtagLength; 498 499 /* 500 * If no language was present, use the value of unknownLanguage 501 * instead. Otherwise, move past any separator. 502 */ 503 if (*langLength == 0) { 504 uprv_strcpy( 505 lang, 506 unknownLanguage); 507 *langLength = (int32_t)uprv_strlen(lang); 508 } 509 else if (_isIDSeparator(*position)) { 510 ++position; 511 } 512 513 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); 514 u_terminateChars(script, *scriptLength, subtagLength, err); 515 516 if(U_FAILURE(*err)) { 517 goto error; 518 } 519 520 *scriptLength = subtagLength; 521 522 if (*scriptLength > 0) { 523 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 524 /** 525 * If the script part is the "unknown" script, then don't return it. 526 **/ 527 *scriptLength = 0; 528 } 529 530 /* 531 * Move past any separator. 532 */ 533 if (_isIDSeparator(*position)) { 534 ++position; 535 } 536 } 537 538 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); 539 u_terminateChars(region, *regionLength, subtagLength, err); 540 541 if(U_FAILURE(*err)) { 542 goto error; 543 } 544 545 *regionLength = subtagLength; 546 547 if (*regionLength > 0) { 548 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 549 /** 550 * If the region part is the "unknown" region, then don't return it. 551 **/ 552 *regionLength = 0; 553 } 554 } else if (*position != 0 && *position != '@') { 555 /* back up over consumed trailing separator */ 556 --position; 557 } 558 559 exit: 560 561 return (int32_t)(position - localeID); 562 563 error: 564 565 /** 566 * If we get here, we have no explicit error, it's the result of an 567 * illegal argument. 568 **/ 569 if (!U_FAILURE(*err)) { 570 *err = U_ILLEGAL_ARGUMENT_ERROR; 571 } 572 573 goto exit; 574 } 575 576 static int32_t U_CALLCONV 577 createLikelySubtagsString( 578 const char* lang, 579 int32_t langLength, 580 const char* script, 581 int32_t scriptLength, 582 const char* region, 583 int32_t regionLength, 584 const char* variants, 585 int32_t variantsLength, 586 char* tag, 587 int32_t tagCapacity, 588 UErrorCode* err) 589 { 590 /** 591 * ULOC_FULLNAME_CAPACITY will provide enough capacity 592 * that we can build a string that contains the language, 593 * script and region code without worrying about overrunning 594 * the user-supplied buffer. 595 **/ 596 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 597 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 598 599 if(U_FAILURE(*err)) { 600 goto error; 601 } 602 603 /** 604 * Try the language with the script and region first. 605 **/ 606 if (scriptLength > 0 && regionLength > 0) { 607 608 const char* likelySubtags = NULL; 609 610 createTagString( 611 lang, 612 langLength, 613 script, 614 scriptLength, 615 region, 616 regionLength, 617 NULL, 618 0, 619 tagBuffer, 620 sizeof(tagBuffer), 621 err); 622 if(U_FAILURE(*err)) { 623 goto error; 624 } 625 626 likelySubtags = 627 findLikelySubtags( 628 tagBuffer, 629 likelySubtagsBuffer, 630 sizeof(likelySubtagsBuffer), 631 err); 632 if(U_FAILURE(*err)) { 633 goto error; 634 } 635 636 if (likelySubtags != NULL) { 637 /* Always use the language tag from the 638 maximal string, since it may be more 639 specific than the one provided. */ 640 return createTagStringWithAlternates( 641 NULL, 642 0, 643 NULL, 644 0, 645 NULL, 646 0, 647 variants, 648 variantsLength, 649 likelySubtags, 650 tag, 651 tagCapacity, 652 err); 653 } 654 } 655 656 /** 657 * Try the language with just the script. 658 **/ 659 if (scriptLength > 0) { 660 661 const char* likelySubtags = NULL; 662 663 createTagString( 664 lang, 665 langLength, 666 script, 667 scriptLength, 668 NULL, 669 0, 670 NULL, 671 0, 672 tagBuffer, 673 sizeof(tagBuffer), 674 err); 675 if(U_FAILURE(*err)) { 676 goto error; 677 } 678 679 likelySubtags = 680 findLikelySubtags( 681 tagBuffer, 682 likelySubtagsBuffer, 683 sizeof(likelySubtagsBuffer), 684 err); 685 if(U_FAILURE(*err)) { 686 goto error; 687 } 688 689 if (likelySubtags != NULL) { 690 /* Always use the language tag from the 691 maximal string, since it may be more 692 specific than the one provided. */ 693 return createTagStringWithAlternates( 694 NULL, 695 0, 696 NULL, 697 0, 698 region, 699 regionLength, 700 variants, 701 variantsLength, 702 likelySubtags, 703 tag, 704 tagCapacity, 705 err); 706 } 707 } 708 709 /** 710 * Try the language with just the region. 711 **/ 712 if (regionLength > 0) { 713 714 const char* likelySubtags = NULL; 715 716 createTagString( 717 lang, 718 langLength, 719 NULL, 720 0, 721 region, 722 regionLength, 723 NULL, 724 0, 725 tagBuffer, 726 sizeof(tagBuffer), 727 err); 728 if(U_FAILURE(*err)) { 729 goto error; 730 } 731 732 likelySubtags = 733 findLikelySubtags( 734 tagBuffer, 735 likelySubtagsBuffer, 736 sizeof(likelySubtagsBuffer), 737 err); 738 if(U_FAILURE(*err)) { 739 goto error; 740 } 741 742 if (likelySubtags != NULL) { 743 /* Always use the language tag from the 744 maximal string, since it may be more 745 specific than the one provided. */ 746 return createTagStringWithAlternates( 747 NULL, 748 0, 749 script, 750 scriptLength, 751 NULL, 752 0, 753 variants, 754 variantsLength, 755 likelySubtags, 756 tag, 757 tagCapacity, 758 err); 759 } 760 } 761 762 /** 763 * Finally, try just the language. 764 **/ 765 { 766 const char* likelySubtags = NULL; 767 768 createTagString( 769 lang, 770 langLength, 771 NULL, 772 0, 773 NULL, 774 0, 775 NULL, 776 0, 777 tagBuffer, 778 sizeof(tagBuffer), 779 err); 780 if(U_FAILURE(*err)) { 781 goto error; 782 } 783 784 likelySubtags = 785 findLikelySubtags( 786 tagBuffer, 787 likelySubtagsBuffer, 788 sizeof(likelySubtagsBuffer), 789 err); 790 if(U_FAILURE(*err)) { 791 goto error; 792 } 793 794 if (likelySubtags != NULL) { 795 /* Always use the language tag from the 796 maximal string, since it may be more 797 specific than the one provided. */ 798 return createTagStringWithAlternates( 799 NULL, 800 0, 801 script, 802 scriptLength, 803 region, 804 regionLength, 805 variants, 806 variantsLength, 807 likelySubtags, 808 tag, 809 tagCapacity, 810 err); 811 } 812 } 813 814 return u_terminateChars( 815 tag, 816 tagCapacity, 817 0, 818 err); 819 820 error: 821 822 if (!U_FAILURE(*err)) { 823 *err = U_ILLEGAL_ARGUMENT_ERROR; 824 } 825 826 return -1; 827 } 828 829 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ 830 { int32_t count = 0; \ 831 int32_t i; \ 832 for (i = 0; i < trailingLength; i++) { \ 833 if (trailing[i] == '-' || trailing[i] == '_') { \ 834 count = 0; \ 835 if (count > 8) { \ 836 goto error; \ 837 } \ 838 } else if (trailing[i] == '@') { \ 839 break; \ 840 } else if (count > 8) { \ 841 goto error; \ 842 } else { \ 843 count++; \ 844 } \ 845 } \ 846 } 847 848 static int32_t 849 _uloc_addLikelySubtags(const char* localeID, 850 char* maximizedLocaleID, 851 int32_t maximizedLocaleIDCapacity, 852 UErrorCode* err) 853 { 854 char lang[ULOC_LANG_CAPACITY]; 855 int32_t langLength = sizeof(lang); 856 char script[ULOC_SCRIPT_CAPACITY]; 857 int32_t scriptLength = sizeof(script); 858 char region[ULOC_COUNTRY_CAPACITY]; 859 int32_t regionLength = sizeof(region); 860 const char* trailing = ""; 861 int32_t trailingLength = 0; 862 int32_t trailingIndex = 0; 863 int32_t resultLength = 0; 864 865 if(U_FAILURE(*err)) { 866 goto error; 867 } 868 else if (localeID == NULL || 869 maximizedLocaleID == NULL || 870 maximizedLocaleIDCapacity <= 0) { 871 goto error; 872 } 873 874 trailingIndex = parseTagString( 875 localeID, 876 lang, 877 &langLength, 878 script, 879 &scriptLength, 880 region, 881 ®ionLength, 882 err); 883 if(U_FAILURE(*err)) { 884 /* Overflow indicates an illegal argument error */ 885 if (*err == U_BUFFER_OVERFLOW_ERROR) { 886 *err = U_ILLEGAL_ARGUMENT_ERROR; 887 } 888 889 goto error; 890 } 891 892 /* Find the length of the trailing portion. */ 893 while (_isIDSeparator(localeID[trailingIndex])) { 894 trailingIndex++; 895 } 896 trailing = &localeID[trailingIndex]; 897 trailingLength = (int32_t)uprv_strlen(trailing); 898 899 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 900 901 resultLength = 902 createLikelySubtagsString( 903 lang, 904 langLength, 905 script, 906 scriptLength, 907 region, 908 regionLength, 909 trailing, 910 trailingLength, 911 maximizedLocaleID, 912 maximizedLocaleIDCapacity, 913 err); 914 915 if (resultLength == 0) { 916 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 917 918 /* 919 * If we get here, we need to return localeID. 920 */ 921 uprv_memcpy( 922 maximizedLocaleID, 923 localeID, 924 localIDLength <= maximizedLocaleIDCapacity ? 925 localIDLength : maximizedLocaleIDCapacity); 926 927 resultLength = 928 u_terminateChars( 929 maximizedLocaleID, 930 maximizedLocaleIDCapacity, 931 localIDLength, 932 err); 933 } 934 935 return resultLength; 936 937 error: 938 939 if (!U_FAILURE(*err)) { 940 *err = U_ILLEGAL_ARGUMENT_ERROR; 941 } 942 943 return -1; 944 } 945 946 static int32_t 947 _uloc_minimizeSubtags(const char* localeID, 948 char* minimizedLocaleID, 949 int32_t minimizedLocaleIDCapacity, 950 UErrorCode* err) 951 { 952 /** 953 * ULOC_FULLNAME_CAPACITY will provide enough capacity 954 * that we can build a string that contains the language, 955 * script and region code without worrying about overrunning 956 * the user-supplied buffer. 957 **/ 958 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 959 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 960 961 char lang[ULOC_LANG_CAPACITY]; 962 int32_t langLength = sizeof(lang); 963 char script[ULOC_SCRIPT_CAPACITY]; 964 int32_t scriptLength = sizeof(script); 965 char region[ULOC_COUNTRY_CAPACITY]; 966 int32_t regionLength = sizeof(region); 967 const char* trailing = ""; 968 int32_t trailingLength = 0; 969 int32_t trailingIndex = 0; 970 971 if(U_FAILURE(*err)) { 972 goto error; 973 } 974 else if (localeID == NULL || 975 minimizedLocaleID == NULL || 976 minimizedLocaleIDCapacity <= 0) { 977 goto error; 978 } 979 980 trailingIndex = 981 parseTagString( 982 localeID, 983 lang, 984 &langLength, 985 script, 986 &scriptLength, 987 region, 988 ®ionLength, 989 err); 990 if(U_FAILURE(*err)) { 991 992 /* Overflow indicates an illegal argument error */ 993 if (*err == U_BUFFER_OVERFLOW_ERROR) { 994 *err = U_ILLEGAL_ARGUMENT_ERROR; 995 } 996 997 goto error; 998 } 999 1000 /* Find the spot where the variants or the keywords begin, if any. */ 1001 while (_isIDSeparator(localeID[trailingIndex])) { 1002 trailingIndex++; 1003 } 1004 trailing = &localeID[trailingIndex]; 1005 trailingLength = (int32_t)uprv_strlen(trailing); 1006 1007 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 1008 1009 createTagString( 1010 lang, 1011 langLength, 1012 script, 1013 scriptLength, 1014 region, 1015 regionLength, 1016 NULL, 1017 0, 1018 maximizedTagBuffer, 1019 maximizedTagBufferLength, 1020 err); 1021 if(U_FAILURE(*err)) { 1022 goto error; 1023 } 1024 1025 /** 1026 * First, we need to first get the maximization 1027 * from AddLikelySubtags. 1028 **/ 1029 maximizedTagBufferLength = 1030 uloc_addLikelySubtags( 1031 maximizedTagBuffer, 1032 maximizedTagBuffer, 1033 maximizedTagBufferLength, 1034 err); 1035 1036 if(U_FAILURE(*err)) { 1037 goto error; 1038 } 1039 1040 /** 1041 * Start first with just the language. 1042 **/ 1043 { 1044 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1045 1046 const int32_t tagBufferLength = 1047 createLikelySubtagsString( 1048 lang, 1049 langLength, 1050 NULL, 1051 0, 1052 NULL, 1053 0, 1054 NULL, 1055 0, 1056 tagBuffer, 1057 sizeof(tagBuffer), 1058 err); 1059 1060 if(U_FAILURE(*err)) { 1061 goto error; 1062 } 1063 else if (uprv_strnicmp( 1064 maximizedTagBuffer, 1065 tagBuffer, 1066 tagBufferLength) == 0) { 1067 1068 return createTagString( 1069 lang, 1070 langLength, 1071 NULL, 1072 0, 1073 NULL, 1074 0, 1075 trailing, 1076 trailingLength, 1077 minimizedLocaleID, 1078 minimizedLocaleIDCapacity, 1079 err); 1080 } 1081 } 1082 1083 /** 1084 * Next, try the language and region. 1085 **/ 1086 if (regionLength > 0) { 1087 1088 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1089 1090 const int32_t tagBufferLength = 1091 createLikelySubtagsString( 1092 lang, 1093 langLength, 1094 NULL, 1095 0, 1096 region, 1097 regionLength, 1098 NULL, 1099 0, 1100 tagBuffer, 1101 sizeof(tagBuffer), 1102 err); 1103 1104 if(U_FAILURE(*err)) { 1105 goto error; 1106 } 1107 else if (uprv_strnicmp( 1108 maximizedTagBuffer, 1109 tagBuffer, 1110 tagBufferLength) == 0) { 1111 1112 return createTagString( 1113 lang, 1114 langLength, 1115 NULL, 1116 0, 1117 region, 1118 regionLength, 1119 trailing, 1120 trailingLength, 1121 minimizedLocaleID, 1122 minimizedLocaleIDCapacity, 1123 err); 1124 } 1125 } 1126 1127 /** 1128 * Finally, try the language and script. This is our last chance, 1129 * since trying with all three subtags would only yield the 1130 * maximal version that we already have. 1131 **/ 1132 if (scriptLength > 0 && regionLength > 0) { 1133 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1134 1135 const int32_t tagBufferLength = 1136 createLikelySubtagsString( 1137 lang, 1138 langLength, 1139 script, 1140 scriptLength, 1141 NULL, 1142 0, 1143 NULL, 1144 0, 1145 tagBuffer, 1146 sizeof(tagBuffer), 1147 err); 1148 1149 if(U_FAILURE(*err)) { 1150 goto error; 1151 } 1152 else if (uprv_strnicmp( 1153 maximizedTagBuffer, 1154 tagBuffer, 1155 tagBufferLength) == 0) { 1156 1157 return createTagString( 1158 lang, 1159 langLength, 1160 script, 1161 scriptLength, 1162 NULL, 1163 0, 1164 trailing, 1165 trailingLength, 1166 minimizedLocaleID, 1167 minimizedLocaleIDCapacity, 1168 err); 1169 } 1170 } 1171 1172 { 1173 /** 1174 * If we got here, return the locale ID parameter. 1175 **/ 1176 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1177 1178 uprv_memcpy( 1179 minimizedLocaleID, 1180 localeID, 1181 localeIDLength <= minimizedLocaleIDCapacity ? 1182 localeIDLength : minimizedLocaleIDCapacity); 1183 1184 return u_terminateChars( 1185 minimizedLocaleID, 1186 minimizedLocaleIDCapacity, 1187 localeIDLength, 1188 err); 1189 } 1190 1191 error: 1192 1193 if (!U_FAILURE(*err)) { 1194 *err = U_ILLEGAL_ARGUMENT_ERROR; 1195 } 1196 1197 return -1; 1198 1199 1200 } 1201 1202 static UBool 1203 do_canonicalize(const char* localeID, 1204 char* buffer, 1205 int32_t bufferCapacity, 1206 UErrorCode* err) 1207 { 1208 uloc_canonicalize( 1209 localeID, 1210 buffer, 1211 bufferCapacity, 1212 err); 1213 1214 if (*err == U_STRING_NOT_TERMINATED_WARNING || 1215 *err == U_BUFFER_OVERFLOW_ERROR) { 1216 *err = U_ILLEGAL_ARGUMENT_ERROR; 1217 1218 return FALSE; 1219 } 1220 else if (U_FAILURE(*err)) { 1221 1222 return FALSE; 1223 } 1224 else { 1225 return TRUE; 1226 } 1227 } 1228 1229 U_CAPI int32_t U_EXPORT2 1230 uloc_addLikelySubtags(const char* localeID, 1231 char* maximizedLocaleID, 1232 int32_t maximizedLocaleIDCapacity, 1233 UErrorCode* err) 1234 { 1235 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1236 1237 if (!do_canonicalize( 1238 localeID, 1239 localeBuffer, 1240 sizeof(localeBuffer), 1241 err)) { 1242 return -1; 1243 } 1244 else { 1245 return _uloc_addLikelySubtags( 1246 localeBuffer, 1247 maximizedLocaleID, 1248 maximizedLocaleIDCapacity, 1249 err); 1250 } 1251 } 1252 1253 U_CAPI int32_t U_EXPORT2 1254 uloc_minimizeSubtags(const char* localeID, 1255 char* minimizedLocaleID, 1256 int32_t minimizedLocaleIDCapacity, 1257 UErrorCode* err) 1258 { 1259 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1260 1261 if (!do_canonicalize( 1262 localeID, 1263 localeBuffer, 1264 sizeof(localeBuffer), 1265 err)) { 1266 return -1; 1267 } 1268 else { 1269 return _uloc_minimizeSubtags( 1270 localeBuffer, 1271 minimizedLocaleID, 1272 minimizedLocaleIDCapacity, 1273 err); 1274 } 1275 } 1276