1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1997-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: loclikely.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010feb25 14 * created by: Markus W. Scherer 15 * 16 * Code for likely and minimized locale subtags, separated out from other .cpp files 17 * that then do not depend on resource bundle code and likely-subtags data. 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/locid.h" 22 #include "unicode/putil.h" 23 #include "unicode/uloc.h" 24 #include "unicode/ures.h" 25 #include "unicode/uscript.h" 26 #include "cmemory.h" 27 #include "cstring.h" 28 #include "ulocimp.h" 29 #include "ustr_imp.h" 30 31 /** 32 * This function looks for the localeID in the likelySubtags resource. 33 * 34 * @param localeID The tag to find. 35 * @param buffer A buffer to hold the matching entry 36 * @param bufferLength The length of the output buffer 37 * @return A pointer to "buffer" if found, or a null pointer if not. 38 */ 39 static const char* U_CALLCONV 40 findLikelySubtags(const char* localeID, 41 char* buffer, 42 int32_t bufferLength, 43 UErrorCode* err) { 44 const char* result = NULL; 45 46 if (!U_FAILURE(*err)) { 47 int32_t resLen = 0; 48 const UChar* s = NULL; 49 UErrorCode tmpErr = U_ZERO_ERROR; 50 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 51 if (U_SUCCESS(tmpErr)) { 52 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 53 54 if (U_FAILURE(tmpErr)) { 55 /* 56 * If a resource is missing, it's not really an error, it's 57 * just that we don't have any data for that particular locale ID. 58 */ 59 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 60 *err = tmpErr; 61 } 62 } 63 else if (resLen >= bufferLength) { 64 /* The buffer should never overflow. */ 65 *err = U_INTERNAL_PROGRAM_ERROR; 66 } 67 else { 68 u_UCharsToChars(s, buffer, resLen + 1); 69 result = buffer; 70 } 71 72 ures_close(subtags); 73 } else { 74 *err = tmpErr; 75 } 76 } 77 78 return result; 79 } 80 81 /** 82 * Append a tag to a buffer, adding the separator if necessary. The buffer 83 * must be large enough to contain the resulting tag plus any separator 84 * necessary. The tag must not be a zero-length string. 85 * 86 * @param tag The tag to add. 87 * @param tagLength The length of the tag. 88 * @param buffer The output buffer. 89 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 90 **/ 91 static void U_CALLCONV 92 appendTag( 93 const char* tag, 94 int32_t tagLength, 95 char* buffer, 96 int32_t* bufferLength) { 97 98 if (*bufferLength > 0) { 99 buffer[*bufferLength] = '_'; 100 ++(*bufferLength); 101 } 102 103 uprv_memmove( 104 &buffer[*bufferLength], 105 tag, 106 tagLength); 107 108 *bufferLength += tagLength; 109 } 110 111 /** 112 * These are the canonical strings for unknown languages, scripts and regions. 113 **/ 114 static const char* const unknownLanguage = "und"; 115 static const char* const unknownScript = "Zzzz"; 116 static const char* const unknownRegion = "ZZ"; 117 118 /** 119 * Create a tag string from the supplied parameters. The lang, script and region 120 * parameters may be NULL pointers. If they are, their corresponding length parameters 121 * must be less than or equal to 0. 122 * 123 * If any of the language, script or region parameters are empty, and the alternateTags 124 * parameter is not NULL, it will be parsed for potential language, script and region tags 125 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 126 * it contains no language tag, the default tag for the unknown language is used. 127 * 128 * If the length of the new string exceeds the capacity of the output buffer, 129 * the function copies as many bytes to the output buffer as it can, and returns 130 * the error U_BUFFER_OVERFLOW_ERROR. 131 * 132 * If an illegal argument is provided, the function returns the error 133 * U_ILLEGAL_ARGUMENT_ERROR. 134 * 135 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 136 * the tag string fits in the output buffer, but the null terminator doesn't. 137 * 138 * @param lang The language tag to use. 139 * @param langLength The length of the language tag. 140 * @param script The script tag to use. 141 * @param scriptLength The length of the script tag. 142 * @param region The region tag to use. 143 * @param regionLength The length of the region tag. 144 * @param trailing Any trailing data to append to the new tag. 145 * @param trailingLength The length of the trailing data. 146 * @param alternateTags A string containing any alternate tags. 147 * @param tag The output buffer. 148 * @param tagCapacity The capacity of the output buffer. 149 * @param err A pointer to a UErrorCode for error reporting. 150 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 151 **/ 152 static int32_t U_CALLCONV 153 createTagStringWithAlternates( 154 const char* lang, 155 int32_t langLength, 156 const char* script, 157 int32_t scriptLength, 158 const char* region, 159 int32_t regionLength, 160 const char* trailing, 161 int32_t trailingLength, 162 const char* alternateTags, 163 char* tag, 164 int32_t tagCapacity, 165 UErrorCode* err) { 166 167 if (U_FAILURE(*err)) { 168 goto error; 169 } 170 else if (tag == NULL || 171 tagCapacity <= 0 || 172 langLength >= ULOC_LANG_CAPACITY || 173 scriptLength >= ULOC_SCRIPT_CAPACITY || 174 regionLength >= ULOC_COUNTRY_CAPACITY) { 175 goto error; 176 } 177 else { 178 /** 179 * ULOC_FULLNAME_CAPACITY will provide enough capacity 180 * that we can build a string that contains the language, 181 * script and region code without worrying about overrunning 182 * the user-supplied buffer. 183 **/ 184 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 185 int32_t tagLength = 0; 186 int32_t capacityRemaining = tagCapacity; 187 UBool regionAppended = FALSE; 188 189 if (langLength > 0) { 190 appendTag( 191 lang, 192 langLength, 193 tagBuffer, 194 &tagLength); 195 } 196 else if (alternateTags == NULL) { 197 /* 198 * Append the value for an unknown language, if 199 * we found no language. 200 */ 201 appendTag( 202 unknownLanguage, 203 (int32_t)uprv_strlen(unknownLanguage), 204 tagBuffer, 205 &tagLength); 206 } 207 else { 208 /* 209 * Parse the alternateTags string for the language. 210 */ 211 char alternateLang[ULOC_LANG_CAPACITY]; 212 int32_t alternateLangLength = sizeof(alternateLang); 213 214 alternateLangLength = 215 uloc_getLanguage( 216 alternateTags, 217 alternateLang, 218 alternateLangLength, 219 err); 220 if(U_FAILURE(*err) || 221 alternateLangLength >= ULOC_LANG_CAPACITY) { 222 goto error; 223 } 224 else if (alternateLangLength == 0) { 225 /* 226 * Append the value for an unknown language, if 227 * we found no language. 228 */ 229 appendTag( 230 unknownLanguage, 231 (int32_t)uprv_strlen(unknownLanguage), 232 tagBuffer, 233 &tagLength); 234 } 235 else { 236 appendTag( 237 alternateLang, 238 alternateLangLength, 239 tagBuffer, 240 &tagLength); 241 } 242 } 243 244 if (scriptLength > 0) { 245 appendTag( 246 script, 247 scriptLength, 248 tagBuffer, 249 &tagLength); 250 } 251 else if (alternateTags != NULL) { 252 /* 253 * Parse the alternateTags string for the script. 254 */ 255 char alternateScript[ULOC_SCRIPT_CAPACITY]; 256 257 const int32_t alternateScriptLength = 258 uloc_getScript( 259 alternateTags, 260 alternateScript, 261 sizeof(alternateScript), 262 err); 263 264 if (U_FAILURE(*err) || 265 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 266 goto error; 267 } 268 else if (alternateScriptLength > 0) { 269 appendTag( 270 alternateScript, 271 alternateScriptLength, 272 tagBuffer, 273 &tagLength); 274 } 275 } 276 277 if (regionLength > 0) { 278 appendTag( 279 region, 280 regionLength, 281 tagBuffer, 282 &tagLength); 283 284 regionAppended = TRUE; 285 } 286 else if (alternateTags != NULL) { 287 /* 288 * Parse the alternateTags string for the region. 289 */ 290 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 291 292 const int32_t alternateRegionLength = 293 uloc_getCountry( 294 alternateTags, 295 alternateRegion, 296 sizeof(alternateRegion), 297 err); 298 if (U_FAILURE(*err) || 299 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 300 goto error; 301 } 302 else if (alternateRegionLength > 0) { 303 appendTag( 304 alternateRegion, 305 alternateRegionLength, 306 tagBuffer, 307 &tagLength); 308 309 regionAppended = TRUE; 310 } 311 } 312 313 { 314 const int32_t toCopy = 315 tagLength >= tagCapacity ? tagCapacity : tagLength; 316 317 /** 318 * Copy the partial tag from our internal buffer to the supplied 319 * target. 320 **/ 321 uprv_memcpy( 322 tag, 323 tagBuffer, 324 toCopy); 325 326 capacityRemaining -= toCopy; 327 } 328 329 if (trailingLength > 0) { 330 if (*trailing != '@' && capacityRemaining > 0) { 331 tag[tagLength++] = '_'; 332 --capacityRemaining; 333 if (capacityRemaining > 0 && !regionAppended) { 334 /* extra separator is required */ 335 tag[tagLength++] = '_'; 336 --capacityRemaining; 337 } 338 } 339 340 if (capacityRemaining > 0) { 341 /* 342 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 343 * don't know if the user-supplied buffers overlap. 344 */ 345 const int32_t toCopy = 346 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 347 348 uprv_memmove( 349 &tag[tagLength], 350 trailing, 351 toCopy); 352 } 353 } 354 355 tagLength += trailingLength; 356 357 return u_terminateChars( 358 tag, 359 tagCapacity, 360 tagLength, 361 err); 362 } 363 364 error: 365 366 /** 367 * An overflow indicates the locale ID passed in 368 * is ill-formed. If we got here, and there was 369 * no previous error, it's an implicit overflow. 370 **/ 371 if (*err == U_BUFFER_OVERFLOW_ERROR || 372 U_SUCCESS(*err)) { 373 *err = U_ILLEGAL_ARGUMENT_ERROR; 374 } 375 376 return -1; 377 } 378 379 /** 380 * Create a tag string from the supplied parameters. The lang, script and region 381 * parameters may be NULL pointers. If they are, their corresponding length parameters 382 * must be less than or equal to 0. If the lang parameter is an empty string, the 383 * default value for an unknown language is written to the output buffer. 384 * 385 * If the length of the new string exceeds the capacity of the output buffer, 386 * the function copies as many bytes to the output buffer as it can, and returns 387 * the error U_BUFFER_OVERFLOW_ERROR. 388 * 389 * If an illegal argument is provided, the function returns the error 390 * U_ILLEGAL_ARGUMENT_ERROR. 391 * 392 * @param lang The language tag to use. 393 * @param langLength The length of the language tag. 394 * @param script The script tag to use. 395 * @param scriptLength The length of the script tag. 396 * @param region The region tag to use. 397 * @param regionLength The length of the region tag. 398 * @param trailing Any trailing data to append to the new tag. 399 * @param trailingLength The length of the trailing data. 400 * @param tag The output buffer. 401 * @param tagCapacity The capacity of the output buffer. 402 * @param err A pointer to a UErrorCode for error reporting. 403 * @return The length of the tag string, which may be greater than tagCapacity. 404 **/ 405 static int32_t U_CALLCONV 406 createTagString( 407 const char* lang, 408 int32_t langLength, 409 const char* script, 410 int32_t scriptLength, 411 const char* region, 412 int32_t regionLength, 413 const char* trailing, 414 int32_t trailingLength, 415 char* tag, 416 int32_t tagCapacity, 417 UErrorCode* err) 418 { 419 return createTagStringWithAlternates( 420 lang, 421 langLength, 422 script, 423 scriptLength, 424 region, 425 regionLength, 426 trailing, 427 trailingLength, 428 NULL, 429 tag, 430 tagCapacity, 431 err); 432 } 433 434 /** 435 * Parse the language, script, and region subtags from a tag string, and copy the 436 * results into the corresponding output parameters. The buffers are null-terminated, 437 * unless overflow occurs. 438 * 439 * The langLength, scriptLength, and regionLength parameters are input/output 440 * parameters, and must contain the capacity of their corresponding buffers on 441 * input. On output, they will contain the actual length of the buffers, not 442 * including the null terminator. 443 * 444 * If the length of any of the output subtags exceeds the capacity of the corresponding 445 * buffer, the function copies as many bytes to the output buffer as it can, and returns 446 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 447 * occurs. 448 * 449 * If an illegal argument is provided, the function returns the error 450 * U_ILLEGAL_ARGUMENT_ERROR. 451 * 452 * @param localeID The locale ID to parse. 453 * @param lang The language tag buffer. 454 * @param langLength The length of the language tag. 455 * @param script The script tag buffer. 456 * @param scriptLength The length of the script tag. 457 * @param region The region tag buffer. 458 * @param regionLength The length of the region tag. 459 * @param err A pointer to a UErrorCode for error reporting. 460 * @return The number of chars of the localeID parameter consumed. 461 **/ 462 static int32_t U_CALLCONV 463 parseTagString( 464 const char* localeID, 465 char* lang, 466 int32_t* langLength, 467 char* script, 468 int32_t* scriptLength, 469 char* region, 470 int32_t* regionLength, 471 UErrorCode* err) 472 { 473 const char* position = localeID; 474 int32_t subtagLength = 0; 475 476 if(U_FAILURE(*err) || 477 localeID == NULL || 478 lang == NULL || 479 langLength == NULL || 480 script == NULL || 481 scriptLength == NULL || 482 region == NULL || 483 regionLength == NULL) { 484 goto error; 485 } 486 487 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); 488 u_terminateChars(lang, *langLength, subtagLength, err); 489 490 /* 491 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 492 * to be an error, because it indicates the user-supplied tag is 493 * not well-formed. 494 */ 495 if(U_FAILURE(*err)) { 496 goto error; 497 } 498 499 *langLength = subtagLength; 500 501 /* 502 * If no language was present, use the value of unknownLanguage 503 * instead. Otherwise, move past any separator. 504 */ 505 if (*langLength == 0) { 506 uprv_strcpy( 507 lang, 508 unknownLanguage); 509 *langLength = (int32_t)uprv_strlen(lang); 510 } 511 else if (_isIDSeparator(*position)) { 512 ++position; 513 } 514 515 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); 516 u_terminateChars(script, *scriptLength, subtagLength, err); 517 518 if(U_FAILURE(*err)) { 519 goto error; 520 } 521 522 *scriptLength = subtagLength; 523 524 if (*scriptLength > 0) { 525 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 526 /** 527 * If the script part is the "unknown" script, then don't return it. 528 **/ 529 *scriptLength = 0; 530 } 531 532 /* 533 * Move past any separator. 534 */ 535 if (_isIDSeparator(*position)) { 536 ++position; 537 } 538 } 539 540 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); 541 u_terminateChars(region, *regionLength, subtagLength, err); 542 543 if(U_FAILURE(*err)) { 544 goto error; 545 } 546 547 *regionLength = subtagLength; 548 549 if (*regionLength > 0) { 550 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 551 /** 552 * If the region part is the "unknown" region, then don't return it. 553 **/ 554 *regionLength = 0; 555 } 556 } else if (*position != 0 && *position != '@') { 557 /* back up over consumed trailing separator */ 558 --position; 559 } 560 561 exit: 562 563 return (int32_t)(position - localeID); 564 565 error: 566 567 /** 568 * If we get here, we have no explicit error, it's the result of an 569 * illegal argument. 570 **/ 571 if (!U_FAILURE(*err)) { 572 *err = U_ILLEGAL_ARGUMENT_ERROR; 573 } 574 575 goto exit; 576 } 577 578 static int32_t U_CALLCONV 579 createLikelySubtagsString( 580 const char* lang, 581 int32_t langLength, 582 const char* script, 583 int32_t scriptLength, 584 const char* region, 585 int32_t regionLength, 586 const char* variants, 587 int32_t variantsLength, 588 char* tag, 589 int32_t tagCapacity, 590 UErrorCode* err) 591 { 592 /** 593 * ULOC_FULLNAME_CAPACITY will provide enough capacity 594 * that we can build a string that contains the language, 595 * script and region code without worrying about overrunning 596 * the user-supplied buffer. 597 **/ 598 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 599 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 600 601 if(U_FAILURE(*err)) { 602 goto error; 603 } 604 605 /** 606 * Try the language with the script and region first. 607 **/ 608 if (scriptLength > 0 && regionLength > 0) { 609 610 const char* likelySubtags = NULL; 611 612 createTagString( 613 lang, 614 langLength, 615 script, 616 scriptLength, 617 region, 618 regionLength, 619 NULL, 620 0, 621 tagBuffer, 622 sizeof(tagBuffer), 623 err); 624 if(U_FAILURE(*err)) { 625 goto error; 626 } 627 628 likelySubtags = 629 findLikelySubtags( 630 tagBuffer, 631 likelySubtagsBuffer, 632 sizeof(likelySubtagsBuffer), 633 err); 634 if(U_FAILURE(*err)) { 635 goto error; 636 } 637 638 if (likelySubtags != NULL) { 639 /* Always use the language tag from the 640 maximal string, since it may be more 641 specific than the one provided. */ 642 return createTagStringWithAlternates( 643 NULL, 644 0, 645 NULL, 646 0, 647 NULL, 648 0, 649 variants, 650 variantsLength, 651 likelySubtags, 652 tag, 653 tagCapacity, 654 err); 655 } 656 } 657 658 /** 659 * Try the language with just the script. 660 **/ 661 if (scriptLength > 0) { 662 663 const char* likelySubtags = NULL; 664 665 createTagString( 666 lang, 667 langLength, 668 script, 669 scriptLength, 670 NULL, 671 0, 672 NULL, 673 0, 674 tagBuffer, 675 sizeof(tagBuffer), 676 err); 677 if(U_FAILURE(*err)) { 678 goto error; 679 } 680 681 likelySubtags = 682 findLikelySubtags( 683 tagBuffer, 684 likelySubtagsBuffer, 685 sizeof(likelySubtagsBuffer), 686 err); 687 if(U_FAILURE(*err)) { 688 goto error; 689 } 690 691 if (likelySubtags != NULL) { 692 /* Always use the language tag from the 693 maximal string, since it may be more 694 specific than the one provided. */ 695 return createTagStringWithAlternates( 696 NULL, 697 0, 698 NULL, 699 0, 700 region, 701 regionLength, 702 variants, 703 variantsLength, 704 likelySubtags, 705 tag, 706 tagCapacity, 707 err); 708 } 709 } 710 711 /** 712 * Try the language with just the region. 713 **/ 714 if (regionLength > 0) { 715 716 const char* likelySubtags = NULL; 717 718 createTagString( 719 lang, 720 langLength, 721 NULL, 722 0, 723 region, 724 regionLength, 725 NULL, 726 0, 727 tagBuffer, 728 sizeof(tagBuffer), 729 err); 730 if(U_FAILURE(*err)) { 731 goto error; 732 } 733 734 likelySubtags = 735 findLikelySubtags( 736 tagBuffer, 737 likelySubtagsBuffer, 738 sizeof(likelySubtagsBuffer), 739 err); 740 if(U_FAILURE(*err)) { 741 goto error; 742 } 743 744 if (likelySubtags != NULL) { 745 /* Always use the language tag from the 746 maximal string, since it may be more 747 specific than the one provided. */ 748 return createTagStringWithAlternates( 749 NULL, 750 0, 751 script, 752 scriptLength, 753 NULL, 754 0, 755 variants, 756 variantsLength, 757 likelySubtags, 758 tag, 759 tagCapacity, 760 err); 761 } 762 } 763 764 /** 765 * Finally, try just the language. 766 **/ 767 { 768 const char* likelySubtags = NULL; 769 770 createTagString( 771 lang, 772 langLength, 773 NULL, 774 0, 775 NULL, 776 0, 777 NULL, 778 0, 779 tagBuffer, 780 sizeof(tagBuffer), 781 err); 782 if(U_FAILURE(*err)) { 783 goto error; 784 } 785 786 likelySubtags = 787 findLikelySubtags( 788 tagBuffer, 789 likelySubtagsBuffer, 790 sizeof(likelySubtagsBuffer), 791 err); 792 if(U_FAILURE(*err)) { 793 goto error; 794 } 795 796 if (likelySubtags != NULL) { 797 /* Always use the language tag from the 798 maximal string, since it may be more 799 specific than the one provided. */ 800 return createTagStringWithAlternates( 801 NULL, 802 0, 803 script, 804 scriptLength, 805 region, 806 regionLength, 807 variants, 808 variantsLength, 809 likelySubtags, 810 tag, 811 tagCapacity, 812 err); 813 } 814 } 815 816 return u_terminateChars( 817 tag, 818 tagCapacity, 819 0, 820 err); 821 822 error: 823 824 if (!U_FAILURE(*err)) { 825 *err = U_ILLEGAL_ARGUMENT_ERROR; 826 } 827 828 return -1; 829 } 830 831 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ 832 { int32_t count = 0; \ 833 int32_t i; \ 834 for (i = 0; i < trailingLength; i++) { \ 835 if (trailing[i] == '-' || trailing[i] == '_') { \ 836 count = 0; \ 837 if (count > 8) { \ 838 goto error; \ 839 } \ 840 } else if (trailing[i] == '@') { \ 841 break; \ 842 } else if (count > 8) { \ 843 goto error; \ 844 } else { \ 845 count++; \ 846 } \ 847 } \ 848 } 849 850 static int32_t 851 _uloc_addLikelySubtags(const char* localeID, 852 char* maximizedLocaleID, 853 int32_t maximizedLocaleIDCapacity, 854 UErrorCode* err) 855 { 856 char lang[ULOC_LANG_CAPACITY]; 857 int32_t langLength = sizeof(lang); 858 char script[ULOC_SCRIPT_CAPACITY]; 859 int32_t scriptLength = sizeof(script); 860 char region[ULOC_COUNTRY_CAPACITY]; 861 int32_t regionLength = sizeof(region); 862 const char* trailing = ""; 863 int32_t trailingLength = 0; 864 int32_t trailingIndex = 0; 865 int32_t resultLength = 0; 866 867 if(U_FAILURE(*err)) { 868 goto error; 869 } 870 else if (localeID == NULL || 871 maximizedLocaleID == NULL || 872 maximizedLocaleIDCapacity <= 0) { 873 goto error; 874 } 875 876 trailingIndex = parseTagString( 877 localeID, 878 lang, 879 &langLength, 880 script, 881 &scriptLength, 882 region, 883 ®ionLength, 884 err); 885 if(U_FAILURE(*err)) { 886 /* Overflow indicates an illegal argument error */ 887 if (*err == U_BUFFER_OVERFLOW_ERROR) { 888 *err = U_ILLEGAL_ARGUMENT_ERROR; 889 } 890 891 goto error; 892 } 893 894 /* Find the length of the trailing portion. */ 895 while (_isIDSeparator(localeID[trailingIndex])) { 896 trailingIndex++; 897 } 898 trailing = &localeID[trailingIndex]; 899 trailingLength = (int32_t)uprv_strlen(trailing); 900 901 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 902 903 resultLength = 904 createLikelySubtagsString( 905 lang, 906 langLength, 907 script, 908 scriptLength, 909 region, 910 regionLength, 911 trailing, 912 trailingLength, 913 maximizedLocaleID, 914 maximizedLocaleIDCapacity, 915 err); 916 917 if (resultLength == 0) { 918 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 919 920 /* 921 * If we get here, we need to return localeID. 922 */ 923 uprv_memcpy( 924 maximizedLocaleID, 925 localeID, 926 localIDLength <= maximizedLocaleIDCapacity ? 927 localIDLength : maximizedLocaleIDCapacity); 928 929 resultLength = 930 u_terminateChars( 931 maximizedLocaleID, 932 maximizedLocaleIDCapacity, 933 localIDLength, 934 err); 935 } 936 937 return resultLength; 938 939 error: 940 941 if (!U_FAILURE(*err)) { 942 *err = U_ILLEGAL_ARGUMENT_ERROR; 943 } 944 945 return -1; 946 } 947 948 static int32_t 949 _uloc_minimizeSubtags(const char* localeID, 950 char* minimizedLocaleID, 951 int32_t minimizedLocaleIDCapacity, 952 UErrorCode* err) 953 { 954 /** 955 * ULOC_FULLNAME_CAPACITY will provide enough capacity 956 * that we can build a string that contains the language, 957 * script and region code without worrying about overrunning 958 * the user-supplied buffer. 959 **/ 960 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 961 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 962 963 char lang[ULOC_LANG_CAPACITY]; 964 int32_t langLength = sizeof(lang); 965 char script[ULOC_SCRIPT_CAPACITY]; 966 int32_t scriptLength = sizeof(script); 967 char region[ULOC_COUNTRY_CAPACITY]; 968 int32_t regionLength = sizeof(region); 969 const char* trailing = ""; 970 int32_t trailingLength = 0; 971 int32_t trailingIndex = 0; 972 973 if(U_FAILURE(*err)) { 974 goto error; 975 } 976 else if (localeID == NULL || 977 minimizedLocaleID == NULL || 978 minimizedLocaleIDCapacity <= 0) { 979 goto error; 980 } 981 982 trailingIndex = 983 parseTagString( 984 localeID, 985 lang, 986 &langLength, 987 script, 988 &scriptLength, 989 region, 990 ®ionLength, 991 err); 992 if(U_FAILURE(*err)) { 993 994 /* Overflow indicates an illegal argument error */ 995 if (*err == U_BUFFER_OVERFLOW_ERROR) { 996 *err = U_ILLEGAL_ARGUMENT_ERROR; 997 } 998 999 goto error; 1000 } 1001 1002 /* Find the spot where the variants or the keywords begin, if any. */ 1003 while (_isIDSeparator(localeID[trailingIndex])) { 1004 trailingIndex++; 1005 } 1006 trailing = &localeID[trailingIndex]; 1007 trailingLength = (int32_t)uprv_strlen(trailing); 1008 1009 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 1010 1011 createTagString( 1012 lang, 1013 langLength, 1014 script, 1015 scriptLength, 1016 region, 1017 regionLength, 1018 NULL, 1019 0, 1020 maximizedTagBuffer, 1021 maximizedTagBufferLength, 1022 err); 1023 if(U_FAILURE(*err)) { 1024 goto error; 1025 } 1026 1027 /** 1028 * First, we need to first get the maximization 1029 * from AddLikelySubtags. 1030 **/ 1031 maximizedTagBufferLength = 1032 uloc_addLikelySubtags( 1033 maximizedTagBuffer, 1034 maximizedTagBuffer, 1035 maximizedTagBufferLength, 1036 err); 1037 1038 if(U_FAILURE(*err)) { 1039 goto error; 1040 } 1041 1042 /** 1043 * Start first with just the language. 1044 **/ 1045 { 1046 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1047 1048 const int32_t tagBufferLength = 1049 createLikelySubtagsString( 1050 lang, 1051 langLength, 1052 NULL, 1053 0, 1054 NULL, 1055 0, 1056 NULL, 1057 0, 1058 tagBuffer, 1059 sizeof(tagBuffer), 1060 err); 1061 1062 if(U_FAILURE(*err)) { 1063 goto error; 1064 } 1065 else if (uprv_strnicmp( 1066 maximizedTagBuffer, 1067 tagBuffer, 1068 tagBufferLength) == 0) { 1069 1070 return createTagString( 1071 lang, 1072 langLength, 1073 NULL, 1074 0, 1075 NULL, 1076 0, 1077 trailing, 1078 trailingLength, 1079 minimizedLocaleID, 1080 minimizedLocaleIDCapacity, 1081 err); 1082 } 1083 } 1084 1085 /** 1086 * Next, try the language and region. 1087 **/ 1088 if (regionLength > 0) { 1089 1090 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1091 1092 const int32_t tagBufferLength = 1093 createLikelySubtagsString( 1094 lang, 1095 langLength, 1096 NULL, 1097 0, 1098 region, 1099 regionLength, 1100 NULL, 1101 0, 1102 tagBuffer, 1103 sizeof(tagBuffer), 1104 err); 1105 1106 if(U_FAILURE(*err)) { 1107 goto error; 1108 } 1109 else if (uprv_strnicmp( 1110 maximizedTagBuffer, 1111 tagBuffer, 1112 tagBufferLength) == 0) { 1113 1114 return createTagString( 1115 lang, 1116 langLength, 1117 NULL, 1118 0, 1119 region, 1120 regionLength, 1121 trailing, 1122 trailingLength, 1123 minimizedLocaleID, 1124 minimizedLocaleIDCapacity, 1125 err); 1126 } 1127 } 1128 1129 /** 1130 * Finally, try the language and script. This is our last chance, 1131 * since trying with all three subtags would only yield the 1132 * maximal version that we already have. 1133 **/ 1134 if (scriptLength > 0 && regionLength > 0) { 1135 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1136 1137 const int32_t tagBufferLength = 1138 createLikelySubtagsString( 1139 lang, 1140 langLength, 1141 script, 1142 scriptLength, 1143 NULL, 1144 0, 1145 NULL, 1146 0, 1147 tagBuffer, 1148 sizeof(tagBuffer), 1149 err); 1150 1151 if(U_FAILURE(*err)) { 1152 goto error; 1153 } 1154 else if (uprv_strnicmp( 1155 maximizedTagBuffer, 1156 tagBuffer, 1157 tagBufferLength) == 0) { 1158 1159 return createTagString( 1160 lang, 1161 langLength, 1162 script, 1163 scriptLength, 1164 NULL, 1165 0, 1166 trailing, 1167 trailingLength, 1168 minimizedLocaleID, 1169 minimizedLocaleIDCapacity, 1170 err); 1171 } 1172 } 1173 1174 { 1175 /** 1176 * If we got here, return the locale ID parameter. 1177 **/ 1178 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1179 1180 uprv_memcpy( 1181 minimizedLocaleID, 1182 localeID, 1183 localeIDLength <= minimizedLocaleIDCapacity ? 1184 localeIDLength : minimizedLocaleIDCapacity); 1185 1186 return u_terminateChars( 1187 minimizedLocaleID, 1188 minimizedLocaleIDCapacity, 1189 localeIDLength, 1190 err); 1191 } 1192 1193 error: 1194 1195 if (!U_FAILURE(*err)) { 1196 *err = U_ILLEGAL_ARGUMENT_ERROR; 1197 } 1198 1199 return -1; 1200 1201 1202 } 1203 1204 static UBool 1205 do_canonicalize(const char* localeID, 1206 char* buffer, 1207 int32_t bufferCapacity, 1208 UErrorCode* err) 1209 { 1210 uloc_canonicalize( 1211 localeID, 1212 buffer, 1213 bufferCapacity, 1214 err); 1215 1216 if (*err == U_STRING_NOT_TERMINATED_WARNING || 1217 *err == U_BUFFER_OVERFLOW_ERROR) { 1218 *err = U_ILLEGAL_ARGUMENT_ERROR; 1219 1220 return FALSE; 1221 } 1222 else if (U_FAILURE(*err)) { 1223 1224 return FALSE; 1225 } 1226 else { 1227 return TRUE; 1228 } 1229 } 1230 1231 U_CAPI int32_t U_EXPORT2 1232 uloc_addLikelySubtags(const char* localeID, 1233 char* maximizedLocaleID, 1234 int32_t maximizedLocaleIDCapacity, 1235 UErrorCode* err) 1236 { 1237 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1238 1239 if (!do_canonicalize( 1240 localeID, 1241 localeBuffer, 1242 sizeof(localeBuffer), 1243 err)) { 1244 return -1; 1245 } 1246 else { 1247 return _uloc_addLikelySubtags( 1248 localeBuffer, 1249 maximizedLocaleID, 1250 maximizedLocaleIDCapacity, 1251 err); 1252 } 1253 } 1254 1255 U_CAPI int32_t U_EXPORT2 1256 uloc_minimizeSubtags(const char* localeID, 1257 char* minimizedLocaleID, 1258 int32_t minimizedLocaleIDCapacity, 1259 UErrorCode* err) 1260 { 1261 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1262 1263 if (!do_canonicalize( 1264 localeID, 1265 localeBuffer, 1266 sizeof(localeBuffer), 1267 err)) { 1268 return -1; 1269 } 1270 else { 1271 return _uloc_minimizeSubtags( 1272 localeBuffer, 1273 minimizedLocaleID, 1274 minimizedLocaleIDCapacity, 1275 err); 1276 } 1277 } 1278 1279 // Pairs of (language subtag, + or -) for finding out fast if common languages 1280 // are LTR (minus) or RTL (plus). 1281 static const char* LANG_DIR_STRING = 1282 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; 1283 1284 // Implemented here because this calls uloc_addLikelySubtags(). 1285 U_CAPI UBool U_EXPORT2 1286 uloc_isRightToLeft(const char *locale) { 1287 UErrorCode errorCode = U_ZERO_ERROR; 1288 char script[8]; 1289 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); 1290 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1291 scriptLength == 0) { 1292 // Fastpath: We know the likely scripts and their writing direction 1293 // for some common languages. 1294 errorCode = U_ZERO_ERROR; 1295 char lang[8]; 1296 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); 1297 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1298 langLength == 0) { 1299 return FALSE; 1300 } 1301 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); 1302 if (langPtr != NULL) { 1303 switch (langPtr[langLength]) { 1304 case '-': return FALSE; 1305 case '+': return TRUE; 1306 default: break; // partial match of a longer code 1307 } 1308 } 1309 // Otherwise, find the likely script. 1310 errorCode = U_ZERO_ERROR; 1311 char likely[ULOC_FULLNAME_CAPACITY]; 1312 (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode); 1313 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { 1314 return FALSE; 1315 } 1316 scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode); 1317 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1318 scriptLength == 0) { 1319 return FALSE; 1320 } 1321 } 1322 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); 1323 return uscript_isRightToLeft(scriptCode); 1324 } 1325 1326 U_NAMESPACE_BEGIN 1327 1328 UBool 1329 Locale::isRightToLeft() const { 1330 return uloc_isRightToLeft(getBaseName()); 1331 } 1332 1333 U_NAMESPACE_END 1334