1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1997-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: loclikely.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010feb25 14 * created by: Markus W. Scherer 15 * 16 * Code for likely and minimized locale subtags, separated out from other .cpp files 17 * that then do not depend on resource bundle code and likely-subtags data. 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/uloc.h" 23 #include "unicode/ures.h" 24 #include "cmemory.h" 25 #include "cstring.h" 26 #include "ulocimp.h" 27 #include "ustr_imp.h" 28 29 /** 30 * This function looks for the localeID in the likelySubtags resource. 31 * 32 * @param localeID The tag to find. 33 * @param buffer A buffer to hold the matching entry 34 * @param bufferLength The length of the output buffer 35 * @return A pointer to "buffer" if found, or a null pointer if not. 36 */ 37 static const char* U_CALLCONV 38 findLikelySubtags(const char* localeID, 39 char* buffer, 40 int32_t bufferLength, 41 UErrorCode* err) { 42 const char* result = NULL; 43 44 if (!U_FAILURE(*err)) { 45 int32_t resLen = 0; 46 const UChar* s = NULL; 47 UErrorCode tmpErr = U_ZERO_ERROR; 48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 49 if (U_SUCCESS(tmpErr)) { 50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 51 52 if (U_FAILURE(tmpErr)) { 53 /* 54 * If a resource is missing, it's not really an error, it's 55 * just that we don't have any data for that particular locale ID. 56 */ 57 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 58 *err = tmpErr; 59 } 60 } 61 else if (resLen >= bufferLength) { 62 /* The buffer should never overflow. */ 63 *err = U_INTERNAL_PROGRAM_ERROR; 64 } 65 else { 66 u_UCharsToChars(s, buffer, resLen + 1); 67 result = buffer; 68 } 69 70 ures_close(subtags); 71 } else { 72 *err = tmpErr; 73 } 74 } 75 76 return result; 77 } 78 79 /** 80 * Append a tag to a buffer, adding the separator if necessary. The buffer 81 * must be large enough to contain the resulting tag plus any separator 82 * necessary. The tag must not be a zero-length string. 83 * 84 * @param tag The tag to add. 85 * @param tagLength The length of the tag. 86 * @param buffer The output buffer. 87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 88 **/ 89 static void U_CALLCONV 90 appendTag( 91 const char* tag, 92 int32_t tagLength, 93 char* buffer, 94 int32_t* bufferLength) { 95 96 if (*bufferLength > 0) { 97 buffer[*bufferLength] = '_'; 98 ++(*bufferLength); 99 } 100 101 uprv_memmove( 102 &buffer[*bufferLength], 103 tag, 104 tagLength); 105 106 *bufferLength += tagLength; 107 } 108 109 /** 110 * These are the canonical strings for unknown languages, scripts and regions. 111 **/ 112 static const char* const unknownLanguage = "und"; 113 static const char* const unknownScript = "Zzzz"; 114 static const char* const unknownRegion = "ZZ"; 115 116 /** 117 * Create a tag string from the supplied parameters. The lang, script and region 118 * parameters may be NULL pointers. If they are, their corresponding length parameters 119 * must be less than or equal to 0. 120 * 121 * If any of the language, script or region parameters are empty, and the alternateTags 122 * parameter is not NULL, it will be parsed for potential language, script and region tags 123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 124 * it contains no language tag, the default tag for the unknown language is used. 125 * 126 * If the length of the new string exceeds the capacity of the output buffer, 127 * the function copies as many bytes to the output buffer as it can, and returns 128 * the error U_BUFFER_OVERFLOW_ERROR. 129 * 130 * If an illegal argument is provided, the function returns the error 131 * U_ILLEGAL_ARGUMENT_ERROR. 132 * 133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 134 * the tag string fits in the output buffer, but the null terminator doesn't. 135 * 136 * @param lang The language tag to use. 137 * @param langLength The length of the language tag. 138 * @param script The script tag to use. 139 * @param scriptLength The length of the script tag. 140 * @param region The region tag to use. 141 * @param regionLength The length of the region tag. 142 * @param trailing Any trailing data to append to the new tag. 143 * @param trailingLength The length of the trailing data. 144 * @param alternateTags A string containing any alternate tags. 145 * @param tag The output buffer. 146 * @param tagCapacity The capacity of the output buffer. 147 * @param err A pointer to a UErrorCode for error reporting. 148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 149 **/ 150 static int32_t U_CALLCONV 151 createTagStringWithAlternates( 152 const char* lang, 153 int32_t langLength, 154 const char* script, 155 int32_t scriptLength, 156 const char* region, 157 int32_t regionLength, 158 const char* trailing, 159 int32_t trailingLength, 160 const char* alternateTags, 161 char* tag, 162 int32_t tagCapacity, 163 UErrorCode* err) { 164 165 if (U_FAILURE(*err)) { 166 goto error; 167 } 168 else if (tag == NULL || 169 tagCapacity <= 0 || 170 langLength >= ULOC_LANG_CAPACITY || 171 scriptLength >= ULOC_SCRIPT_CAPACITY || 172 regionLength >= ULOC_COUNTRY_CAPACITY) { 173 goto error; 174 } 175 else { 176 /** 177 * ULOC_FULLNAME_CAPACITY will provide enough capacity 178 * that we can build a string that contains the language, 179 * script and region code without worrying about overrunning 180 * the user-supplied buffer. 181 **/ 182 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 183 int32_t tagLength = 0; 184 int32_t capacityRemaining = tagCapacity; 185 UBool regionAppended = FALSE; 186 187 if (langLength > 0) { 188 appendTag( 189 lang, 190 langLength, 191 tagBuffer, 192 &tagLength); 193 } 194 else if (alternateTags == NULL) { 195 /* 196 * Append the value for an unknown language, if 197 * we found no language. 198 */ 199 appendTag( 200 unknownLanguage, 201 (int32_t)uprv_strlen(unknownLanguage), 202 tagBuffer, 203 &tagLength); 204 } 205 else { 206 /* 207 * Parse the alternateTags string for the language. 208 */ 209 char alternateLang[ULOC_LANG_CAPACITY]; 210 int32_t alternateLangLength = sizeof(alternateLang); 211 212 alternateLangLength = 213 uloc_getLanguage( 214 alternateTags, 215 alternateLang, 216 alternateLangLength, 217 err); 218 if(U_FAILURE(*err) || 219 alternateLangLength >= ULOC_LANG_CAPACITY) { 220 goto error; 221 } 222 else if (alternateLangLength == 0) { 223 /* 224 * Append the value for an unknown language, if 225 * we found no language. 226 */ 227 appendTag( 228 unknownLanguage, 229 (int32_t)uprv_strlen(unknownLanguage), 230 tagBuffer, 231 &tagLength); 232 } 233 else { 234 appendTag( 235 alternateLang, 236 alternateLangLength, 237 tagBuffer, 238 &tagLength); 239 } 240 } 241 242 if (scriptLength > 0) { 243 appendTag( 244 script, 245 scriptLength, 246 tagBuffer, 247 &tagLength); 248 } 249 else if (alternateTags != NULL) { 250 /* 251 * Parse the alternateTags string for the script. 252 */ 253 char alternateScript[ULOC_SCRIPT_CAPACITY]; 254 255 const int32_t alternateScriptLength = 256 uloc_getScript( 257 alternateTags, 258 alternateScript, 259 sizeof(alternateScript), 260 err); 261 262 if (U_FAILURE(*err) || 263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 264 goto error; 265 } 266 else if (alternateScriptLength > 0) { 267 appendTag( 268 alternateScript, 269 alternateScriptLength, 270 tagBuffer, 271 &tagLength); 272 } 273 } 274 275 if (regionLength > 0) { 276 appendTag( 277 region, 278 regionLength, 279 tagBuffer, 280 &tagLength); 281 282 regionAppended = TRUE; 283 } 284 else if (alternateTags != NULL) { 285 /* 286 * Parse the alternateTags string for the region. 287 */ 288 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 289 290 const int32_t alternateRegionLength = 291 uloc_getCountry( 292 alternateTags, 293 alternateRegion, 294 sizeof(alternateRegion), 295 err); 296 if (U_FAILURE(*err) || 297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 298 goto error; 299 } 300 else if (alternateRegionLength > 0) { 301 appendTag( 302 alternateRegion, 303 alternateRegionLength, 304 tagBuffer, 305 &tagLength); 306 307 regionAppended = TRUE; 308 } 309 } 310 311 { 312 const int32_t toCopy = 313 tagLength >= tagCapacity ? tagCapacity : tagLength; 314 315 /** 316 * Copy the partial tag from our internal buffer to the supplied 317 * target. 318 **/ 319 uprv_memcpy( 320 tag, 321 tagBuffer, 322 toCopy); 323 324 capacityRemaining -= toCopy; 325 } 326 327 if (trailingLength > 0) { 328 if (capacityRemaining > 0 && !regionAppended) { 329 tag[tagLength++] = '_'; 330 --capacityRemaining; 331 } 332 333 if (capacityRemaining > 0) { 334 /* 335 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 336 * don't know if the user-supplied buffers overlap. 337 */ 338 const int32_t toCopy = 339 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 340 341 uprv_memmove( 342 &tag[tagLength], 343 trailing, 344 toCopy); 345 } 346 } 347 348 tagLength += trailingLength; 349 350 return u_terminateChars( 351 tag, 352 tagCapacity, 353 tagLength, 354 err); 355 } 356 357 error: 358 359 /** 360 * An overflow indicates the locale ID passed in 361 * is ill-formed. If we got here, and there was 362 * no previous error, it's an implicit overflow. 363 **/ 364 if (*err == U_BUFFER_OVERFLOW_ERROR || 365 U_SUCCESS(*err)) { 366 *err = U_ILLEGAL_ARGUMENT_ERROR; 367 } 368 369 return -1; 370 } 371 372 /** 373 * Create a tag string from the supplied parameters. The lang, script and region 374 * parameters may be NULL pointers. If they are, their corresponding length parameters 375 * must be less than or equal to 0. If the lang parameter is an empty string, the 376 * default value for an unknown language is written to the output buffer. 377 * 378 * If the length of the new string exceeds the capacity of the output buffer, 379 * the function copies as many bytes to the output buffer as it can, and returns 380 * the error U_BUFFER_OVERFLOW_ERROR. 381 * 382 * If an illegal argument is provided, the function returns the error 383 * U_ILLEGAL_ARGUMENT_ERROR. 384 * 385 * @param lang The language tag to use. 386 * @param langLength The length of the language tag. 387 * @param script The script tag to use. 388 * @param scriptLength The length of the script tag. 389 * @param region The region tag to use. 390 * @param regionLength The length of the region tag. 391 * @param trailing Any trailing data to append to the new tag. 392 * @param trailingLength The length of the trailing data. 393 * @param tag The output buffer. 394 * @param tagCapacity The capacity of the output buffer. 395 * @param err A pointer to a UErrorCode for error reporting. 396 * @return The length of the tag string, which may be greater than tagCapacity. 397 **/ 398 static int32_t U_CALLCONV 399 createTagString( 400 const char* lang, 401 int32_t langLength, 402 const char* script, 403 int32_t scriptLength, 404 const char* region, 405 int32_t regionLength, 406 const char* trailing, 407 int32_t trailingLength, 408 char* tag, 409 int32_t tagCapacity, 410 UErrorCode* err) 411 { 412 return createTagStringWithAlternates( 413 lang, 414 langLength, 415 script, 416 scriptLength, 417 region, 418 regionLength, 419 trailing, 420 trailingLength, 421 NULL, 422 tag, 423 tagCapacity, 424 err); 425 } 426 427 /** 428 * Parse the language, script, and region subtags from a tag string, and copy the 429 * results into the corresponding output parameters. The buffers are null-terminated, 430 * unless overflow occurs. 431 * 432 * The langLength, scriptLength, and regionLength parameters are input/output 433 * parameters, and must contain the capacity of their corresponding buffers on 434 * input. On output, they will contain the actual length of the buffers, not 435 * including the null terminator. 436 * 437 * If the length of any of the output subtags exceeds the capacity of the corresponding 438 * buffer, the function copies as many bytes to the output buffer as it can, and returns 439 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 440 * occurs. 441 * 442 * If an illegal argument is provided, the function returns the error 443 * U_ILLEGAL_ARGUMENT_ERROR. 444 * 445 * @param localeID The locale ID to parse. 446 * @param lang The language tag buffer. 447 * @param langLength The length of the language tag. 448 * @param script The script tag buffer. 449 * @param scriptLength The length of the script tag. 450 * @param region The region tag buffer. 451 * @param regionLength The length of the region tag. 452 * @param err A pointer to a UErrorCode for error reporting. 453 * @return The number of chars of the localeID parameter consumed. 454 **/ 455 static int32_t U_CALLCONV 456 parseTagString( 457 const char* localeID, 458 char* lang, 459 int32_t* langLength, 460 char* script, 461 int32_t* scriptLength, 462 char* region, 463 int32_t* regionLength, 464 UErrorCode* err) 465 { 466 const char* position = localeID; 467 int32_t subtagLength = 0; 468 469 if(U_FAILURE(*err) || 470 localeID == NULL || 471 lang == NULL || 472 langLength == NULL || 473 script == NULL || 474 scriptLength == NULL || 475 region == NULL || 476 regionLength == NULL) { 477 goto error; 478 } 479 480 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); 481 u_terminateChars(lang, *langLength, subtagLength, err); 482 483 /* 484 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 485 * to be an error, because it indicates the user-supplied tag is 486 * not well-formed. 487 */ 488 if(U_FAILURE(*err)) { 489 goto error; 490 } 491 492 *langLength = subtagLength; 493 494 /* 495 * If no language was present, use the value of unknownLanguage 496 * instead. Otherwise, move past any separator. 497 */ 498 if (*langLength == 0) { 499 uprv_strcpy( 500 lang, 501 unknownLanguage); 502 *langLength = (int32_t)uprv_strlen(lang); 503 } 504 else if (_isIDSeparator(*position)) { 505 ++position; 506 } 507 508 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); 509 u_terminateChars(script, *scriptLength, subtagLength, err); 510 511 if(U_FAILURE(*err)) { 512 goto error; 513 } 514 515 *scriptLength = subtagLength; 516 517 if (*scriptLength > 0) { 518 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 519 /** 520 * If the script part is the "unknown" script, then don't return it. 521 **/ 522 *scriptLength = 0; 523 } 524 525 /* 526 * Move past any separator. 527 */ 528 if (_isIDSeparator(*position)) { 529 ++position; 530 } 531 } 532 533 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); 534 u_terminateChars(region, *regionLength, subtagLength, err); 535 536 if(U_FAILURE(*err)) { 537 goto error; 538 } 539 540 *regionLength = subtagLength; 541 542 if (*regionLength > 0) { 543 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 544 /** 545 * If the region part is the "unknown" region, then don't return it. 546 **/ 547 *regionLength = 0; 548 } 549 } 550 551 exit: 552 553 return (int32_t)(position - localeID); 554 555 error: 556 557 /** 558 * If we get here, we have no explicit error, it's the result of an 559 * illegal argument. 560 **/ 561 if (!U_FAILURE(*err)) { 562 *err = U_ILLEGAL_ARGUMENT_ERROR; 563 } 564 565 goto exit; 566 } 567 568 static int32_t U_CALLCONV 569 createLikelySubtagsString( 570 const char* lang, 571 int32_t langLength, 572 const char* script, 573 int32_t scriptLength, 574 const char* region, 575 int32_t regionLength, 576 const char* variants, 577 int32_t variantsLength, 578 char* tag, 579 int32_t tagCapacity, 580 UErrorCode* err) 581 { 582 /** 583 * ULOC_FULLNAME_CAPACITY will provide enough capacity 584 * that we can build a string that contains the language, 585 * script and region code without worrying about overrunning 586 * the user-supplied buffer. 587 **/ 588 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 589 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 590 int32_t tagBufferLength = 0; 591 592 if(U_FAILURE(*err)) { 593 goto error; 594 } 595 596 /** 597 * Try the language with the script and region first. 598 **/ 599 if (scriptLength > 0 && regionLength > 0) { 600 601 const char* likelySubtags = NULL; 602 603 tagBufferLength = createTagString( 604 lang, 605 langLength, 606 script, 607 scriptLength, 608 region, 609 regionLength, 610 NULL, 611 0, 612 tagBuffer, 613 sizeof(tagBuffer), 614 err); 615 if(U_FAILURE(*err)) { 616 goto error; 617 } 618 619 likelySubtags = 620 findLikelySubtags( 621 tagBuffer, 622 likelySubtagsBuffer, 623 sizeof(likelySubtagsBuffer), 624 err); 625 if(U_FAILURE(*err)) { 626 goto error; 627 } 628 629 if (likelySubtags != NULL) { 630 /* Always use the language tag from the 631 maximal string, since it may be more 632 specific than the one provided. */ 633 return createTagStringWithAlternates( 634 NULL, 635 0, 636 NULL, 637 0, 638 NULL, 639 0, 640 variants, 641 variantsLength, 642 likelySubtags, 643 tag, 644 tagCapacity, 645 err); 646 } 647 } 648 649 /** 650 * Try the language with just the script. 651 **/ 652 if (scriptLength > 0) { 653 654 const char* likelySubtags = NULL; 655 656 tagBufferLength = createTagString( 657 lang, 658 langLength, 659 script, 660 scriptLength, 661 NULL, 662 0, 663 NULL, 664 0, 665 tagBuffer, 666 sizeof(tagBuffer), 667 err); 668 if(U_FAILURE(*err)) { 669 goto error; 670 } 671 672 likelySubtags = 673 findLikelySubtags( 674 tagBuffer, 675 likelySubtagsBuffer, 676 sizeof(likelySubtagsBuffer), 677 err); 678 if(U_FAILURE(*err)) { 679 goto error; 680 } 681 682 if (likelySubtags != NULL) { 683 /* Always use the language tag from the 684 maximal string, since it may be more 685 specific than the one provided. */ 686 return createTagStringWithAlternates( 687 NULL, 688 0, 689 NULL, 690 0, 691 region, 692 regionLength, 693 variants, 694 variantsLength, 695 likelySubtags, 696 tag, 697 tagCapacity, 698 err); 699 } 700 } 701 702 /** 703 * Try the language with just the region. 704 **/ 705 if (regionLength > 0) { 706 707 const char* likelySubtags = NULL; 708 709 createTagString( 710 lang, 711 langLength, 712 NULL, 713 0, 714 region, 715 regionLength, 716 NULL, 717 0, 718 tagBuffer, 719 sizeof(tagBuffer), 720 err); 721 if(U_FAILURE(*err)) { 722 goto error; 723 } 724 725 likelySubtags = 726 findLikelySubtags( 727 tagBuffer, 728 likelySubtagsBuffer, 729 sizeof(likelySubtagsBuffer), 730 err); 731 if(U_FAILURE(*err)) { 732 goto error; 733 } 734 735 if (likelySubtags != NULL) { 736 /* Always use the language tag from the 737 maximal string, since it may be more 738 specific than the one provided. */ 739 return createTagStringWithAlternates( 740 NULL, 741 0, 742 script, 743 scriptLength, 744 NULL, 745 0, 746 variants, 747 variantsLength, 748 likelySubtags, 749 tag, 750 tagCapacity, 751 err); 752 } 753 } 754 755 /** 756 * Finally, try just the language. 757 **/ 758 { 759 const char* likelySubtags = NULL; 760 761 createTagString( 762 lang, 763 langLength, 764 NULL, 765 0, 766 NULL, 767 0, 768 NULL, 769 0, 770 tagBuffer, 771 sizeof(tagBuffer), 772 err); 773 if(U_FAILURE(*err)) { 774 goto error; 775 } 776 777 likelySubtags = 778 findLikelySubtags( 779 tagBuffer, 780 likelySubtagsBuffer, 781 sizeof(likelySubtagsBuffer), 782 err); 783 if(U_FAILURE(*err)) { 784 goto error; 785 } 786 787 if (likelySubtags != NULL) { 788 /* Always use the language tag from the 789 maximal string, since it may be more 790 specific than the one provided. */ 791 return createTagStringWithAlternates( 792 NULL, 793 0, 794 script, 795 scriptLength, 796 region, 797 regionLength, 798 variants, 799 variantsLength, 800 likelySubtags, 801 tag, 802 tagCapacity, 803 err); 804 } 805 } 806 807 return u_terminateChars( 808 tag, 809 tagCapacity, 810 0, 811 err); 812 813 error: 814 815 if (!U_FAILURE(*err)) { 816 *err = U_ILLEGAL_ARGUMENT_ERROR; 817 } 818 819 return -1; 820 } 821 822 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ 823 { int32_t count = 0; \ 824 int32_t i; \ 825 for (i = 0; i < trailingLength; i++) { \ 826 if (trailing[i] == '-' || trailing[i] == '_') { \ 827 count = 0; \ 828 if (count > 8) { \ 829 goto error; \ 830 } \ 831 } else if (trailing[i] == '@') { \ 832 break; \ 833 } else if (count > 8) { \ 834 goto error; \ 835 } else { \ 836 count++; \ 837 } \ 838 } \ 839 } 840 841 static int32_t 842 _uloc_addLikelySubtags(const char* localeID, 843 char* maximizedLocaleID, 844 int32_t maximizedLocaleIDCapacity, 845 UErrorCode* err) 846 { 847 char lang[ULOC_LANG_CAPACITY]; 848 int32_t langLength = sizeof(lang); 849 char script[ULOC_SCRIPT_CAPACITY]; 850 int32_t scriptLength = sizeof(script); 851 char region[ULOC_COUNTRY_CAPACITY]; 852 int32_t regionLength = sizeof(region); 853 const char* trailing = ""; 854 int32_t trailingLength = 0; 855 int32_t trailingIndex = 0; 856 int32_t resultLength = 0; 857 858 if(U_FAILURE(*err)) { 859 goto error; 860 } 861 else if (localeID == NULL || 862 maximizedLocaleID == NULL || 863 maximizedLocaleIDCapacity <= 0) { 864 goto error; 865 } 866 867 trailingIndex = parseTagString( 868 localeID, 869 lang, 870 &langLength, 871 script, 872 &scriptLength, 873 region, 874 ®ionLength, 875 err); 876 if(U_FAILURE(*err)) { 877 /* Overflow indicates an illegal argument error */ 878 if (*err == U_BUFFER_OVERFLOW_ERROR) { 879 *err = U_ILLEGAL_ARGUMENT_ERROR; 880 } 881 882 goto error; 883 } 884 885 /* Find the length of the trailing portion. */ 886 trailing = &localeID[trailingIndex]; 887 trailingLength = (int32_t)uprv_strlen(trailing); 888 889 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 890 891 resultLength = 892 createLikelySubtagsString( 893 lang, 894 langLength, 895 script, 896 scriptLength, 897 region, 898 regionLength, 899 trailing, 900 trailingLength, 901 maximizedLocaleID, 902 maximizedLocaleIDCapacity, 903 err); 904 905 if (resultLength == 0) { 906 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 907 908 /* 909 * If we get here, we need to return localeID. 910 */ 911 uprv_memcpy( 912 maximizedLocaleID, 913 localeID, 914 localIDLength <= maximizedLocaleIDCapacity ? 915 localIDLength : maximizedLocaleIDCapacity); 916 917 resultLength = 918 u_terminateChars( 919 maximizedLocaleID, 920 maximizedLocaleIDCapacity, 921 localIDLength, 922 err); 923 } 924 925 return resultLength; 926 927 error: 928 929 if (!U_FAILURE(*err)) { 930 *err = U_ILLEGAL_ARGUMENT_ERROR; 931 } 932 933 return -1; 934 } 935 936 static int32_t 937 _uloc_minimizeSubtags(const char* localeID, 938 char* minimizedLocaleID, 939 int32_t minimizedLocaleIDCapacity, 940 UErrorCode* err) 941 { 942 /** 943 * ULOC_FULLNAME_CAPACITY will provide enough capacity 944 * that we can build a string that contains the language, 945 * script and region code without worrying about overrunning 946 * the user-supplied buffer. 947 **/ 948 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 949 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 950 951 char lang[ULOC_LANG_CAPACITY]; 952 int32_t langLength = sizeof(lang); 953 char script[ULOC_SCRIPT_CAPACITY]; 954 int32_t scriptLength = sizeof(script); 955 char region[ULOC_COUNTRY_CAPACITY]; 956 int32_t regionLength = sizeof(region); 957 const char* trailing = ""; 958 int32_t trailingLength = 0; 959 int32_t trailingIndex = 0; 960 961 if(U_FAILURE(*err)) { 962 goto error; 963 } 964 else if (localeID == NULL || 965 minimizedLocaleID == NULL || 966 minimizedLocaleIDCapacity <= 0) { 967 goto error; 968 } 969 970 trailingIndex = 971 parseTagString( 972 localeID, 973 lang, 974 &langLength, 975 script, 976 &scriptLength, 977 region, 978 ®ionLength, 979 err); 980 if(U_FAILURE(*err)) { 981 982 /* Overflow indicates an illegal argument error */ 983 if (*err == U_BUFFER_OVERFLOW_ERROR) { 984 *err = U_ILLEGAL_ARGUMENT_ERROR; 985 } 986 987 goto error; 988 } 989 990 /* Find the spot where the variants begin, if any. */ 991 trailing = &localeID[trailingIndex]; 992 trailingLength = (int32_t)uprv_strlen(trailing); 993 994 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 995 996 createTagString( 997 lang, 998 langLength, 999 script, 1000 scriptLength, 1001 region, 1002 regionLength, 1003 NULL, 1004 0, 1005 maximizedTagBuffer, 1006 maximizedTagBufferLength, 1007 err); 1008 if(U_FAILURE(*err)) { 1009 goto error; 1010 } 1011 1012 /** 1013 * First, we need to first get the maximization 1014 * from AddLikelySubtags. 1015 **/ 1016 maximizedTagBufferLength = 1017 uloc_addLikelySubtags( 1018 maximizedTagBuffer, 1019 maximizedTagBuffer, 1020 maximizedTagBufferLength, 1021 err); 1022 1023 if(U_FAILURE(*err)) { 1024 goto error; 1025 } 1026 1027 /** 1028 * Start first with just the language. 1029 **/ 1030 { 1031 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1032 1033 const int32_t tagBufferLength = 1034 createLikelySubtagsString( 1035 lang, 1036 langLength, 1037 NULL, 1038 0, 1039 NULL, 1040 0, 1041 NULL, 1042 0, 1043 tagBuffer, 1044 sizeof(tagBuffer), 1045 err); 1046 1047 if(U_FAILURE(*err)) { 1048 goto error; 1049 } 1050 else if (uprv_strnicmp( 1051 maximizedTagBuffer, 1052 tagBuffer, 1053 tagBufferLength) == 0) { 1054 1055 return createTagString( 1056 lang, 1057 langLength, 1058 NULL, 1059 0, 1060 NULL, 1061 0, 1062 trailing, 1063 trailingLength, 1064 minimizedLocaleID, 1065 minimizedLocaleIDCapacity, 1066 err); 1067 } 1068 } 1069 1070 /** 1071 * Next, try the language and region. 1072 **/ 1073 if (regionLength > 0) { 1074 1075 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1076 1077 const int32_t tagBufferLength = 1078 createLikelySubtagsString( 1079 lang, 1080 langLength, 1081 NULL, 1082 0, 1083 region, 1084 regionLength, 1085 NULL, 1086 0, 1087 tagBuffer, 1088 sizeof(tagBuffer), 1089 err); 1090 1091 if(U_FAILURE(*err)) { 1092 goto error; 1093 } 1094 else if (uprv_strnicmp( 1095 maximizedTagBuffer, 1096 tagBuffer, 1097 tagBufferLength) == 0) { 1098 1099 return createTagString( 1100 lang, 1101 langLength, 1102 NULL, 1103 0, 1104 region, 1105 regionLength, 1106 trailing, 1107 trailingLength, 1108 minimizedLocaleID, 1109 minimizedLocaleIDCapacity, 1110 err); 1111 } 1112 } 1113 1114 /** 1115 * Finally, try the language and script. This is our last chance, 1116 * since trying with all three subtags would only yield the 1117 * maximal version that we already have. 1118 **/ 1119 if (scriptLength > 0 && regionLength > 0) { 1120 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1121 1122 const int32_t tagBufferLength = 1123 createLikelySubtagsString( 1124 lang, 1125 langLength, 1126 script, 1127 scriptLength, 1128 NULL, 1129 0, 1130 NULL, 1131 0, 1132 tagBuffer, 1133 sizeof(tagBuffer), 1134 err); 1135 1136 if(U_FAILURE(*err)) { 1137 goto error; 1138 } 1139 else if (uprv_strnicmp( 1140 maximizedTagBuffer, 1141 tagBuffer, 1142 tagBufferLength) == 0) { 1143 1144 return createTagString( 1145 lang, 1146 langLength, 1147 script, 1148 scriptLength, 1149 NULL, 1150 0, 1151 trailing, 1152 trailingLength, 1153 minimizedLocaleID, 1154 minimizedLocaleIDCapacity, 1155 err); 1156 } 1157 } 1158 1159 { 1160 /** 1161 * If we got here, return the locale ID parameter. 1162 **/ 1163 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1164 1165 uprv_memcpy( 1166 minimizedLocaleID, 1167 localeID, 1168 localeIDLength <= minimizedLocaleIDCapacity ? 1169 localeIDLength : minimizedLocaleIDCapacity); 1170 1171 return u_terminateChars( 1172 minimizedLocaleID, 1173 minimizedLocaleIDCapacity, 1174 localeIDLength, 1175 err); 1176 } 1177 1178 error: 1179 1180 if (!U_FAILURE(*err)) { 1181 *err = U_ILLEGAL_ARGUMENT_ERROR; 1182 } 1183 1184 return -1; 1185 1186 1187 } 1188 1189 static UBool 1190 do_canonicalize(const char* localeID, 1191 char* buffer, 1192 int32_t bufferCapacity, 1193 UErrorCode* err) 1194 { 1195 uloc_canonicalize( 1196 localeID, 1197 buffer, 1198 bufferCapacity, 1199 err); 1200 1201 if (*err == U_STRING_NOT_TERMINATED_WARNING || 1202 *err == U_BUFFER_OVERFLOW_ERROR) { 1203 *err = U_ILLEGAL_ARGUMENT_ERROR; 1204 1205 return FALSE; 1206 } 1207 else if (U_FAILURE(*err)) { 1208 1209 return FALSE; 1210 } 1211 else { 1212 return TRUE; 1213 } 1214 } 1215 1216 U_DRAFT int32_t U_EXPORT2 1217 uloc_addLikelySubtags(const char* localeID, 1218 char* maximizedLocaleID, 1219 int32_t maximizedLocaleIDCapacity, 1220 UErrorCode* err) 1221 { 1222 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1223 1224 if (!do_canonicalize( 1225 localeID, 1226 localeBuffer, 1227 sizeof(localeBuffer), 1228 err)) { 1229 return -1; 1230 } 1231 else { 1232 return _uloc_addLikelySubtags( 1233 localeBuffer, 1234 maximizedLocaleID, 1235 maximizedLocaleIDCapacity, 1236 err); 1237 } 1238 } 1239 1240 U_DRAFT int32_t U_EXPORT2 1241 uloc_minimizeSubtags(const char* localeID, 1242 char* minimizedLocaleID, 1243 int32_t minimizedLocaleIDCapacity, 1244 UErrorCode* err) 1245 { 1246 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1247 1248 if (!do_canonicalize( 1249 localeID, 1250 localeBuffer, 1251 sizeof(localeBuffer), 1252 err)) { 1253 return -1; 1254 } 1255 else { 1256 return _uloc_minimizeSubtags( 1257 localeBuffer, 1258 minimizedLocaleID, 1259 minimizedLocaleIDCapacity, 1260 err); 1261 } 1262 } 1263