1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1997-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: loclikely.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010feb25 14 * created by: Markus W. Scherer 15 * 16 * Code for likely and minimized locale subtags, separated out from other .cpp files 17 * that then do not depend on resource bundle code and likely-subtags data. 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/uloc.h" 23 #include "unicode/ures.h" 24 #include "cmemory.h" 25 #include "cstring.h" 26 #include "ulocimp.h" 27 #include "ustr_imp.h" 28 29 /** 30 * This function looks for the localeID in the likelySubtags resource. 31 * 32 * @param localeID The tag to find. 33 * @param buffer A buffer to hold the matching entry 34 * @param bufferLength The length of the output buffer 35 * @return A pointer to "buffer" if found, or a null pointer if not. 36 */ 37 static const char* U_CALLCONV 38 findLikelySubtags(const char* localeID, 39 char* buffer, 40 int32_t bufferLength, 41 UErrorCode* err) { 42 const char* result = NULL; 43 44 if (!U_FAILURE(*err)) { 45 int32_t resLen = 0; 46 const UChar* s = NULL; 47 UErrorCode tmpErr = U_ZERO_ERROR; 48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 49 if (U_SUCCESS(tmpErr)) { 50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 51 52 if (U_FAILURE(tmpErr)) { 53 /* 54 * If a resource is missing, it's not really an error, it's 55 * just that we don't have any data for that particular locale ID. 56 */ 57 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 58 *err = tmpErr; 59 } 60 } 61 else if (resLen >= bufferLength) { 62 /* The buffer should never overflow. */ 63 *err = U_INTERNAL_PROGRAM_ERROR; 64 } 65 else { 66 u_UCharsToChars(s, buffer, resLen + 1); 67 result = buffer; 68 } 69 70 ures_close(subtags); 71 } else { 72 *err = tmpErr; 73 } 74 } 75 76 return result; 77 } 78 79 /** 80 * Append a tag to a buffer, adding the separator if necessary. The buffer 81 * must be large enough to contain the resulting tag plus any separator 82 * necessary. The tag must not be a zero-length string. 83 * 84 * @param tag The tag to add. 85 * @param tagLength The length of the tag. 86 * @param buffer The output buffer. 87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 88 **/ 89 static void U_CALLCONV 90 appendTag( 91 const char* tag, 92 int32_t tagLength, 93 char* buffer, 94 int32_t* bufferLength) { 95 96 if (*bufferLength > 0) { 97 buffer[*bufferLength] = '_'; 98 ++(*bufferLength); 99 } 100 101 uprv_memmove( 102 &buffer[*bufferLength], 103 tag, 104 tagLength); 105 106 *bufferLength += tagLength; 107 } 108 109 /** 110 * These are the canonical strings for unknown languages, scripts and regions. 111 **/ 112 static const char* const unknownLanguage = "und"; 113 static const char* const unknownScript = "Zzzz"; 114 static const char* const unknownRegion = "ZZ"; 115 116 /** 117 * Create a tag string from the supplied parameters. The lang, script and region 118 * parameters may be NULL pointers. If they are, their corresponding length parameters 119 * must be less than or equal to 0. 120 * 121 * If any of the language, script or region parameters are empty, and the alternateTags 122 * parameter is not NULL, it will be parsed for potential language, script and region tags 123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 124 * it contains no language tag, the default tag for the unknown language is used. 125 * 126 * If the length of the new string exceeds the capacity of the output buffer, 127 * the function copies as many bytes to the output buffer as it can, and returns 128 * the error U_BUFFER_OVERFLOW_ERROR. 129 * 130 * If an illegal argument is provided, the function returns the error 131 * U_ILLEGAL_ARGUMENT_ERROR. 132 * 133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 134 * the tag string fits in the output buffer, but the null terminator doesn't. 135 * 136 * @param lang The language tag to use. 137 * @param langLength The length of the language tag. 138 * @param script The script tag to use. 139 * @param scriptLength The length of the script tag. 140 * @param region The region tag to use. 141 * @param regionLength The length of the region tag. 142 * @param trailing Any trailing data to append to the new tag. 143 * @param trailingLength The length of the trailing data. 144 * @param alternateTags A string containing any alternate tags. 145 * @param tag The output buffer. 146 * @param tagCapacity The capacity of the output buffer. 147 * @param err A pointer to a UErrorCode for error reporting. 148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 149 **/ 150 static int32_t U_CALLCONV 151 createTagStringWithAlternates( 152 const char* lang, 153 int32_t langLength, 154 const char* script, 155 int32_t scriptLength, 156 const char* region, 157 int32_t regionLength, 158 const char* trailing, 159 int32_t trailingLength, 160 const char* alternateTags, 161 char* tag, 162 int32_t tagCapacity, 163 UErrorCode* err) { 164 165 if (U_FAILURE(*err)) { 166 goto error; 167 } 168 else if (tag == NULL || 169 tagCapacity <= 0 || 170 langLength >= ULOC_LANG_CAPACITY || 171 scriptLength >= ULOC_SCRIPT_CAPACITY || 172 regionLength >= ULOC_COUNTRY_CAPACITY) { 173 goto error; 174 } 175 else { 176 /** 177 * ULOC_FULLNAME_CAPACITY will provide enough capacity 178 * that we can build a string that contains the language, 179 * script and region code without worrying about overrunning 180 * the user-supplied buffer. 181 **/ 182 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 183 int32_t tagLength = 0; 184 int32_t capacityRemaining = tagCapacity; 185 UBool regionAppended = FALSE; 186 187 if (langLength > 0) { 188 appendTag( 189 lang, 190 langLength, 191 tagBuffer, 192 &tagLength); 193 } 194 else if (alternateTags == NULL) { 195 /* 196 * Append the value for an unknown language, if 197 * we found no language. 198 */ 199 appendTag( 200 unknownLanguage, 201 (int32_t)uprv_strlen(unknownLanguage), 202 tagBuffer, 203 &tagLength); 204 } 205 else { 206 /* 207 * Parse the alternateTags string for the language. 208 */ 209 char alternateLang[ULOC_LANG_CAPACITY]; 210 int32_t alternateLangLength = sizeof(alternateLang); 211 212 alternateLangLength = 213 uloc_getLanguage( 214 alternateTags, 215 alternateLang, 216 alternateLangLength, 217 err); 218 if(U_FAILURE(*err) || 219 alternateLangLength >= ULOC_LANG_CAPACITY) { 220 goto error; 221 } 222 else if (alternateLangLength == 0) { 223 /* 224 * Append the value for an unknown language, if 225 * we found no language. 226 */ 227 appendTag( 228 unknownLanguage, 229 (int32_t)uprv_strlen(unknownLanguage), 230 tagBuffer, 231 &tagLength); 232 } 233 else { 234 appendTag( 235 alternateLang, 236 alternateLangLength, 237 tagBuffer, 238 &tagLength); 239 } 240 } 241 242 if (scriptLength > 0) { 243 appendTag( 244 script, 245 scriptLength, 246 tagBuffer, 247 &tagLength); 248 } 249 else if (alternateTags != NULL) { 250 /* 251 * Parse the alternateTags string for the script. 252 */ 253 char alternateScript[ULOC_SCRIPT_CAPACITY]; 254 255 const int32_t alternateScriptLength = 256 uloc_getScript( 257 alternateTags, 258 alternateScript, 259 sizeof(alternateScript), 260 err); 261 262 if (U_FAILURE(*err) || 263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 264 goto error; 265 } 266 else if (alternateScriptLength > 0) { 267 appendTag( 268 alternateScript, 269 alternateScriptLength, 270 tagBuffer, 271 &tagLength); 272 } 273 } 274 275 if (regionLength > 0) { 276 appendTag( 277 region, 278 regionLength, 279 tagBuffer, 280 &tagLength); 281 282 regionAppended = TRUE; 283 } 284 else if (alternateTags != NULL) { 285 /* 286 * Parse the alternateTags string for the region. 287 */ 288 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 289 290 const int32_t alternateRegionLength = 291 uloc_getCountry( 292 alternateTags, 293 alternateRegion, 294 sizeof(alternateRegion), 295 err); 296 if (U_FAILURE(*err) || 297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 298 goto error; 299 } 300 else if (alternateRegionLength > 0) { 301 appendTag( 302 alternateRegion, 303 alternateRegionLength, 304 tagBuffer, 305 &tagLength); 306 307 regionAppended = TRUE; 308 } 309 } 310 311 { 312 const int32_t toCopy = 313 tagLength >= tagCapacity ? tagCapacity : tagLength; 314 315 /** 316 * Copy the partial tag from our internal buffer to the supplied 317 * target. 318 **/ 319 uprv_memcpy( 320 tag, 321 tagBuffer, 322 toCopy); 323 324 capacityRemaining -= toCopy; 325 } 326 327 if (trailingLength > 0) { 328 if (*trailing != '@' && capacityRemaining > 0) { 329 tag[tagLength++] = '_'; 330 --capacityRemaining; 331 if (capacityRemaining > 0 && !regionAppended) { 332 /* extra separator is required */ 333 tag[tagLength++] = '_'; 334 --capacityRemaining; 335 } 336 } 337 338 if (capacityRemaining > 0) { 339 /* 340 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 341 * don't know if the user-supplied buffers overlap. 342 */ 343 const int32_t toCopy = 344 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 345 346 uprv_memmove( 347 &tag[tagLength], 348 trailing, 349 toCopy); 350 } 351 } 352 353 tagLength += trailingLength; 354 355 return u_terminateChars( 356 tag, 357 tagCapacity, 358 tagLength, 359 err); 360 } 361 362 error: 363 364 /** 365 * An overflow indicates the locale ID passed in 366 * is ill-formed. If we got here, and there was 367 * no previous error, it's an implicit overflow. 368 **/ 369 if (*err == U_BUFFER_OVERFLOW_ERROR || 370 U_SUCCESS(*err)) { 371 *err = U_ILLEGAL_ARGUMENT_ERROR; 372 } 373 374 return -1; 375 } 376 377 /** 378 * Create a tag string from the supplied parameters. The lang, script and region 379 * parameters may be NULL pointers. If they are, their corresponding length parameters 380 * must be less than or equal to 0. If the lang parameter is an empty string, the 381 * default value for an unknown language is written to the output buffer. 382 * 383 * If the length of the new string exceeds the capacity of the output buffer, 384 * the function copies as many bytes to the output buffer as it can, and returns 385 * the error U_BUFFER_OVERFLOW_ERROR. 386 * 387 * If an illegal argument is provided, the function returns the error 388 * U_ILLEGAL_ARGUMENT_ERROR. 389 * 390 * @param lang The language tag to use. 391 * @param langLength The length of the language tag. 392 * @param script The script tag to use. 393 * @param scriptLength The length of the script tag. 394 * @param region The region tag to use. 395 * @param regionLength The length of the region tag. 396 * @param trailing Any trailing data to append to the new tag. 397 * @param trailingLength The length of the trailing data. 398 * @param tag The output buffer. 399 * @param tagCapacity The capacity of the output buffer. 400 * @param err A pointer to a UErrorCode for error reporting. 401 * @return The length of the tag string, which may be greater than tagCapacity. 402 **/ 403 static int32_t U_CALLCONV 404 createTagString( 405 const char* lang, 406 int32_t langLength, 407 const char* script, 408 int32_t scriptLength, 409 const char* region, 410 int32_t regionLength, 411 const char* trailing, 412 int32_t trailingLength, 413 char* tag, 414 int32_t tagCapacity, 415 UErrorCode* err) 416 { 417 return createTagStringWithAlternates( 418 lang, 419 langLength, 420 script, 421 scriptLength, 422 region, 423 regionLength, 424 trailing, 425 trailingLength, 426 NULL, 427 tag, 428 tagCapacity, 429 err); 430 } 431 432 /** 433 * Parse the language, script, and region subtags from a tag string, and copy the 434 * results into the corresponding output parameters. The buffers are null-terminated, 435 * unless overflow occurs. 436 * 437 * The langLength, scriptLength, and regionLength parameters are input/output 438 * parameters, and must contain the capacity of their corresponding buffers on 439 * input. On output, they will contain the actual length of the buffers, not 440 * including the null terminator. 441 * 442 * If the length of any of the output subtags exceeds the capacity of the corresponding 443 * buffer, the function copies as many bytes to the output buffer as it can, and returns 444 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 445 * occurs. 446 * 447 * If an illegal argument is provided, the function returns the error 448 * U_ILLEGAL_ARGUMENT_ERROR. 449 * 450 * @param localeID The locale ID to parse. 451 * @param lang The language tag buffer. 452 * @param langLength The length of the language tag. 453 * @param script The script tag buffer. 454 * @param scriptLength The length of the script tag. 455 * @param region The region tag buffer. 456 * @param regionLength The length of the region tag. 457 * @param err A pointer to a UErrorCode for error reporting. 458 * @return The number of chars of the localeID parameter consumed. 459 **/ 460 static int32_t U_CALLCONV 461 parseTagString( 462 const char* localeID, 463 char* lang, 464 int32_t* langLength, 465 char* script, 466 int32_t* scriptLength, 467 char* region, 468 int32_t* regionLength, 469 UErrorCode* err) 470 { 471 const char* position = localeID; 472 int32_t subtagLength = 0; 473 474 if(U_FAILURE(*err) || 475 localeID == NULL || 476 lang == NULL || 477 langLength == NULL || 478 script == NULL || 479 scriptLength == NULL || 480 region == NULL || 481 regionLength == NULL) { 482 goto error; 483 } 484 485 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); 486 u_terminateChars(lang, *langLength, subtagLength, err); 487 488 /* 489 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 490 * to be an error, because it indicates the user-supplied tag is 491 * not well-formed. 492 */ 493 if(U_FAILURE(*err)) { 494 goto error; 495 } 496 497 *langLength = subtagLength; 498 499 /* 500 * If no language was present, use the value of unknownLanguage 501 * instead. Otherwise, move past any separator. 502 */ 503 if (*langLength == 0) { 504 uprv_strcpy( 505 lang, 506 unknownLanguage); 507 *langLength = (int32_t)uprv_strlen(lang); 508 } 509 else if (_isIDSeparator(*position)) { 510 ++position; 511 } 512 513 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); 514 u_terminateChars(script, *scriptLength, subtagLength, err); 515 516 if(U_FAILURE(*err)) { 517 goto error; 518 } 519 520 *scriptLength = subtagLength; 521 522 if (*scriptLength > 0) { 523 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 524 /** 525 * If the script part is the "unknown" script, then don't return it. 526 **/ 527 *scriptLength = 0; 528 } 529 530 /* 531 * Move past any separator. 532 */ 533 if (_isIDSeparator(*position)) { 534 ++position; 535 } 536 } 537 538 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); 539 u_terminateChars(region, *regionLength, subtagLength, err); 540 541 if(U_FAILURE(*err)) { 542 goto error; 543 } 544 545 *regionLength = subtagLength; 546 547 if (*regionLength > 0) { 548 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 549 /** 550 * If the region part is the "unknown" region, then don't return it. 551 **/ 552 *regionLength = 0; 553 } 554 } else if (*position != 0 && *position != '@') { 555 /* back up over consumed trailing separator */ 556 --position; 557 } 558 559 exit: 560 561 return (int32_t)(position - localeID); 562 563 error: 564 565 /** 566 * If we get here, we have no explicit error, it's the result of an 567 * illegal argument. 568 **/ 569 if (!U_FAILURE(*err)) { 570 *err = U_ILLEGAL_ARGUMENT_ERROR; 571 } 572 573 goto exit; 574 } 575 576 static int32_t U_CALLCONV 577 createLikelySubtagsString( 578 const char* lang, 579 int32_t langLength, 580 const char* script, 581 int32_t scriptLength, 582 const char* region, 583 int32_t regionLength, 584 const char* variants, 585 int32_t variantsLength, 586 char* tag, 587 int32_t tagCapacity, 588 UErrorCode* err) 589 { 590 /** 591 * ULOC_FULLNAME_CAPACITY will provide enough capacity 592 * that we can build a string that contains the language, 593 * script and region code without worrying about overrunning 594 * the user-supplied buffer. 595 **/ 596 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 597 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 598 int32_t tagBufferLength = 0; 599 600 if(U_FAILURE(*err)) { 601 goto error; 602 } 603 604 /** 605 * Try the language with the script and region first. 606 **/ 607 if (scriptLength > 0 && regionLength > 0) { 608 609 const char* likelySubtags = NULL; 610 611 tagBufferLength = createTagString( 612 lang, 613 langLength, 614 script, 615 scriptLength, 616 region, 617 regionLength, 618 NULL, 619 0, 620 tagBuffer, 621 sizeof(tagBuffer), 622 err); 623 if(U_FAILURE(*err)) { 624 goto error; 625 } 626 627 likelySubtags = 628 findLikelySubtags( 629 tagBuffer, 630 likelySubtagsBuffer, 631 sizeof(likelySubtagsBuffer), 632 err); 633 if(U_FAILURE(*err)) { 634 goto error; 635 } 636 637 if (likelySubtags != NULL) { 638 /* Always use the language tag from the 639 maximal string, since it may be more 640 specific than the one provided. */ 641 return createTagStringWithAlternates( 642 NULL, 643 0, 644 NULL, 645 0, 646 NULL, 647 0, 648 variants, 649 variantsLength, 650 likelySubtags, 651 tag, 652 tagCapacity, 653 err); 654 } 655 } 656 657 /** 658 * Try the language with just the script. 659 **/ 660 if (scriptLength > 0) { 661 662 const char* likelySubtags = NULL; 663 664 tagBufferLength = createTagString( 665 lang, 666 langLength, 667 script, 668 scriptLength, 669 NULL, 670 0, 671 NULL, 672 0, 673 tagBuffer, 674 sizeof(tagBuffer), 675 err); 676 if(U_FAILURE(*err)) { 677 goto error; 678 } 679 680 likelySubtags = 681 findLikelySubtags( 682 tagBuffer, 683 likelySubtagsBuffer, 684 sizeof(likelySubtagsBuffer), 685 err); 686 if(U_FAILURE(*err)) { 687 goto error; 688 } 689 690 if (likelySubtags != NULL) { 691 /* Always use the language tag from the 692 maximal string, since it may be more 693 specific than the one provided. */ 694 return createTagStringWithAlternates( 695 NULL, 696 0, 697 NULL, 698 0, 699 region, 700 regionLength, 701 variants, 702 variantsLength, 703 likelySubtags, 704 tag, 705 tagCapacity, 706 err); 707 } 708 } 709 710 /** 711 * Try the language with just the region. 712 **/ 713 if (regionLength > 0) { 714 715 const char* likelySubtags = NULL; 716 717 createTagString( 718 lang, 719 langLength, 720 NULL, 721 0, 722 region, 723 regionLength, 724 NULL, 725 0, 726 tagBuffer, 727 sizeof(tagBuffer), 728 err); 729 if(U_FAILURE(*err)) { 730 goto error; 731 } 732 733 likelySubtags = 734 findLikelySubtags( 735 tagBuffer, 736 likelySubtagsBuffer, 737 sizeof(likelySubtagsBuffer), 738 err); 739 if(U_FAILURE(*err)) { 740 goto error; 741 } 742 743 if (likelySubtags != NULL) { 744 /* Always use the language tag from the 745 maximal string, since it may be more 746 specific than the one provided. */ 747 return createTagStringWithAlternates( 748 NULL, 749 0, 750 script, 751 scriptLength, 752 NULL, 753 0, 754 variants, 755 variantsLength, 756 likelySubtags, 757 tag, 758 tagCapacity, 759 err); 760 } 761 } 762 763 /** 764 * Finally, try just the language. 765 **/ 766 { 767 const char* likelySubtags = NULL; 768 769 createTagString( 770 lang, 771 langLength, 772 NULL, 773 0, 774 NULL, 775 0, 776 NULL, 777 0, 778 tagBuffer, 779 sizeof(tagBuffer), 780 err); 781 if(U_FAILURE(*err)) { 782 goto error; 783 } 784 785 likelySubtags = 786 findLikelySubtags( 787 tagBuffer, 788 likelySubtagsBuffer, 789 sizeof(likelySubtagsBuffer), 790 err); 791 if(U_FAILURE(*err)) { 792 goto error; 793 } 794 795 if (likelySubtags != NULL) { 796 /* Always use the language tag from the 797 maximal string, since it may be more 798 specific than the one provided. */ 799 return createTagStringWithAlternates( 800 NULL, 801 0, 802 script, 803 scriptLength, 804 region, 805 regionLength, 806 variants, 807 variantsLength, 808 likelySubtags, 809 tag, 810 tagCapacity, 811 err); 812 } 813 } 814 815 return u_terminateChars( 816 tag, 817 tagCapacity, 818 0, 819 err); 820 821 error: 822 823 if (!U_FAILURE(*err)) { 824 *err = U_ILLEGAL_ARGUMENT_ERROR; 825 } 826 827 return -1; 828 } 829 830 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ 831 { int32_t count = 0; \ 832 int32_t i; \ 833 for (i = 0; i < trailingLength; i++) { \ 834 if (trailing[i] == '-' || trailing[i] == '_') { \ 835 count = 0; \ 836 if (count > 8) { \ 837 goto error; \ 838 } \ 839 } else if (trailing[i] == '@') { \ 840 break; \ 841 } else if (count > 8) { \ 842 goto error; \ 843 } else { \ 844 count++; \ 845 } \ 846 } \ 847 } 848 849 static int32_t 850 _uloc_addLikelySubtags(const char* localeID, 851 char* maximizedLocaleID, 852 int32_t maximizedLocaleIDCapacity, 853 UErrorCode* err) 854 { 855 char lang[ULOC_LANG_CAPACITY]; 856 int32_t langLength = sizeof(lang); 857 char script[ULOC_SCRIPT_CAPACITY]; 858 int32_t scriptLength = sizeof(script); 859 char region[ULOC_COUNTRY_CAPACITY]; 860 int32_t regionLength = sizeof(region); 861 const char* trailing = ""; 862 int32_t trailingLength = 0; 863 int32_t trailingIndex = 0; 864 int32_t resultLength = 0; 865 866 if(U_FAILURE(*err)) { 867 goto error; 868 } 869 else if (localeID == NULL || 870 maximizedLocaleID == NULL || 871 maximizedLocaleIDCapacity <= 0) { 872 goto error; 873 } 874 875 trailingIndex = parseTagString( 876 localeID, 877 lang, 878 &langLength, 879 script, 880 &scriptLength, 881 region, 882 ®ionLength, 883 err); 884 if(U_FAILURE(*err)) { 885 /* Overflow indicates an illegal argument error */ 886 if (*err == U_BUFFER_OVERFLOW_ERROR) { 887 *err = U_ILLEGAL_ARGUMENT_ERROR; 888 } 889 890 goto error; 891 } 892 893 /* Find the length of the trailing portion. */ 894 while (_isIDSeparator(localeID[trailingIndex])) { 895 trailingIndex++; 896 } 897 trailing = &localeID[trailingIndex]; 898 trailingLength = (int32_t)uprv_strlen(trailing); 899 900 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 901 902 resultLength = 903 createLikelySubtagsString( 904 lang, 905 langLength, 906 script, 907 scriptLength, 908 region, 909 regionLength, 910 trailing, 911 trailingLength, 912 maximizedLocaleID, 913 maximizedLocaleIDCapacity, 914 err); 915 916 if (resultLength == 0) { 917 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 918 919 /* 920 * If we get here, we need to return localeID. 921 */ 922 uprv_memcpy( 923 maximizedLocaleID, 924 localeID, 925 localIDLength <= maximizedLocaleIDCapacity ? 926 localIDLength : maximizedLocaleIDCapacity); 927 928 resultLength = 929 u_terminateChars( 930 maximizedLocaleID, 931 maximizedLocaleIDCapacity, 932 localIDLength, 933 err); 934 } 935 936 return resultLength; 937 938 error: 939 940 if (!U_FAILURE(*err)) { 941 *err = U_ILLEGAL_ARGUMENT_ERROR; 942 } 943 944 return -1; 945 } 946 947 static int32_t 948 _uloc_minimizeSubtags(const char* localeID, 949 char* minimizedLocaleID, 950 int32_t minimizedLocaleIDCapacity, 951 UErrorCode* err) 952 { 953 /** 954 * ULOC_FULLNAME_CAPACITY will provide enough capacity 955 * that we can build a string that contains the language, 956 * script and region code without worrying about overrunning 957 * the user-supplied buffer. 958 **/ 959 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 960 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 961 962 char lang[ULOC_LANG_CAPACITY]; 963 int32_t langLength = sizeof(lang); 964 char script[ULOC_SCRIPT_CAPACITY]; 965 int32_t scriptLength = sizeof(script); 966 char region[ULOC_COUNTRY_CAPACITY]; 967 int32_t regionLength = sizeof(region); 968 const char* trailing = ""; 969 int32_t trailingLength = 0; 970 int32_t trailingIndex = 0; 971 972 if(U_FAILURE(*err)) { 973 goto error; 974 } 975 else if (localeID == NULL || 976 minimizedLocaleID == NULL || 977 minimizedLocaleIDCapacity <= 0) { 978 goto error; 979 } 980 981 trailingIndex = 982 parseTagString( 983 localeID, 984 lang, 985 &langLength, 986 script, 987 &scriptLength, 988 region, 989 ®ionLength, 990 err); 991 if(U_FAILURE(*err)) { 992 993 /* Overflow indicates an illegal argument error */ 994 if (*err == U_BUFFER_OVERFLOW_ERROR) { 995 *err = U_ILLEGAL_ARGUMENT_ERROR; 996 } 997 998 goto error; 999 } 1000 1001 /* Find the spot where the variants or the keywords begin, if any. */ 1002 while (_isIDSeparator(localeID[trailingIndex])) { 1003 trailingIndex++; 1004 } 1005 trailing = &localeID[trailingIndex]; 1006 trailingLength = (int32_t)uprv_strlen(trailing); 1007 1008 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 1009 1010 createTagString( 1011 lang, 1012 langLength, 1013 script, 1014 scriptLength, 1015 region, 1016 regionLength, 1017 NULL, 1018 0, 1019 maximizedTagBuffer, 1020 maximizedTagBufferLength, 1021 err); 1022 if(U_FAILURE(*err)) { 1023 goto error; 1024 } 1025 1026 /** 1027 * First, we need to first get the maximization 1028 * from AddLikelySubtags. 1029 **/ 1030 maximizedTagBufferLength = 1031 uloc_addLikelySubtags( 1032 maximizedTagBuffer, 1033 maximizedTagBuffer, 1034 maximizedTagBufferLength, 1035 err); 1036 1037 if(U_FAILURE(*err)) { 1038 goto error; 1039 } 1040 1041 /** 1042 * Start first with just the language. 1043 **/ 1044 { 1045 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1046 1047 const int32_t tagBufferLength = 1048 createLikelySubtagsString( 1049 lang, 1050 langLength, 1051 NULL, 1052 0, 1053 NULL, 1054 0, 1055 NULL, 1056 0, 1057 tagBuffer, 1058 sizeof(tagBuffer), 1059 err); 1060 1061 if(U_FAILURE(*err)) { 1062 goto error; 1063 } 1064 else if (uprv_strnicmp( 1065 maximizedTagBuffer, 1066 tagBuffer, 1067 tagBufferLength) == 0) { 1068 1069 return createTagString( 1070 lang, 1071 langLength, 1072 NULL, 1073 0, 1074 NULL, 1075 0, 1076 trailing, 1077 trailingLength, 1078 minimizedLocaleID, 1079 minimizedLocaleIDCapacity, 1080 err); 1081 } 1082 } 1083 1084 /** 1085 * Next, try the language and region. 1086 **/ 1087 if (regionLength > 0) { 1088 1089 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1090 1091 const int32_t tagBufferLength = 1092 createLikelySubtagsString( 1093 lang, 1094 langLength, 1095 NULL, 1096 0, 1097 region, 1098 regionLength, 1099 NULL, 1100 0, 1101 tagBuffer, 1102 sizeof(tagBuffer), 1103 err); 1104 1105 if(U_FAILURE(*err)) { 1106 goto error; 1107 } 1108 else if (uprv_strnicmp( 1109 maximizedTagBuffer, 1110 tagBuffer, 1111 tagBufferLength) == 0) { 1112 1113 return createTagString( 1114 lang, 1115 langLength, 1116 NULL, 1117 0, 1118 region, 1119 regionLength, 1120 trailing, 1121 trailingLength, 1122 minimizedLocaleID, 1123 minimizedLocaleIDCapacity, 1124 err); 1125 } 1126 } 1127 1128 /** 1129 * Finally, try the language and script. This is our last chance, 1130 * since trying with all three subtags would only yield the 1131 * maximal version that we already have. 1132 **/ 1133 if (scriptLength > 0 && regionLength > 0) { 1134 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1135 1136 const int32_t tagBufferLength = 1137 createLikelySubtagsString( 1138 lang, 1139 langLength, 1140 script, 1141 scriptLength, 1142 NULL, 1143 0, 1144 NULL, 1145 0, 1146 tagBuffer, 1147 sizeof(tagBuffer), 1148 err); 1149 1150 if(U_FAILURE(*err)) { 1151 goto error; 1152 } 1153 else if (uprv_strnicmp( 1154 maximizedTagBuffer, 1155 tagBuffer, 1156 tagBufferLength) == 0) { 1157 1158 return createTagString( 1159 lang, 1160 langLength, 1161 script, 1162 scriptLength, 1163 NULL, 1164 0, 1165 trailing, 1166 trailingLength, 1167 minimizedLocaleID, 1168 minimizedLocaleIDCapacity, 1169 err); 1170 } 1171 } 1172 1173 { 1174 /** 1175 * If we got here, return the locale ID parameter. 1176 **/ 1177 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1178 1179 uprv_memcpy( 1180 minimizedLocaleID, 1181 localeID, 1182 localeIDLength <= minimizedLocaleIDCapacity ? 1183 localeIDLength : minimizedLocaleIDCapacity); 1184 1185 return u_terminateChars( 1186 minimizedLocaleID, 1187 minimizedLocaleIDCapacity, 1188 localeIDLength, 1189 err); 1190 } 1191 1192 error: 1193 1194 if (!U_FAILURE(*err)) { 1195 *err = U_ILLEGAL_ARGUMENT_ERROR; 1196 } 1197 1198 return -1; 1199 1200 1201 } 1202 1203 static UBool 1204 do_canonicalize(const char* localeID, 1205 char* buffer, 1206 int32_t bufferCapacity, 1207 UErrorCode* err) 1208 { 1209 uloc_canonicalize( 1210 localeID, 1211 buffer, 1212 bufferCapacity, 1213 err); 1214 1215 if (*err == U_STRING_NOT_TERMINATED_WARNING || 1216 *err == U_BUFFER_OVERFLOW_ERROR) { 1217 *err = U_ILLEGAL_ARGUMENT_ERROR; 1218 1219 return FALSE; 1220 } 1221 else if (U_FAILURE(*err)) { 1222 1223 return FALSE; 1224 } 1225 else { 1226 return TRUE; 1227 } 1228 } 1229 1230 U_DRAFT int32_t U_EXPORT2 1231 uloc_addLikelySubtags(const char* localeID, 1232 char* maximizedLocaleID, 1233 int32_t maximizedLocaleIDCapacity, 1234 UErrorCode* err) 1235 { 1236 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1237 1238 if (!do_canonicalize( 1239 localeID, 1240 localeBuffer, 1241 sizeof(localeBuffer), 1242 err)) { 1243 return -1; 1244 } 1245 else { 1246 return _uloc_addLikelySubtags( 1247 localeBuffer, 1248 maximizedLocaleID, 1249 maximizedLocaleIDCapacity, 1250 err); 1251 } 1252 } 1253 1254 U_DRAFT int32_t U_EXPORT2 1255 uloc_minimizeSubtags(const char* localeID, 1256 char* minimizedLocaleID, 1257 int32_t minimizedLocaleIDCapacity, 1258 UErrorCode* err) 1259 { 1260 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1261 1262 if (!do_canonicalize( 1263 localeID, 1264 localeBuffer, 1265 sizeof(localeBuffer), 1266 err)) { 1267 return -1; 1268 } 1269 else { 1270 return _uloc_minimizeSubtags( 1271 localeBuffer, 1272 minimizedLocaleID, 1273 minimizedLocaleIDCapacity, 1274 err); 1275 } 1276 } 1277