Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1997-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  loclikely.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2010feb25
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Code for likely and minimized locale subtags, separated out from other .cpp files
     19 *   that then do not depend on resource bundle code and likely-subtags data.
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/locid.h"
     24 #include "unicode/putil.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/ures.h"
     28 #include "unicode/uscript.h"
     29 #include "cmemory.h"
     30 #include "cstring.h"
     31 #include "ulocimp.h"
     32 #include "ustr_imp.h"
     33 
     34 /**
     35  * This function looks for the localeID in the likelySubtags resource.
     36  *
     37  * @param localeID The tag to find.
     38  * @param buffer A buffer to hold the matching entry
     39  * @param bufferLength The length of the output buffer
     40  * @return A pointer to "buffer" if found, or a null pointer if not.
     41  */
     42 static const char*  U_CALLCONV
     43 findLikelySubtags(const char* localeID,
     44                   char* buffer,
     45                   int32_t bufferLength,
     46                   UErrorCode* err) {
     47     const char* result = NULL;
     48 
     49     if (!U_FAILURE(*err)) {
     50         int32_t resLen = 0;
     51         const UChar* s = NULL;
     52         UErrorCode tmpErr = U_ZERO_ERROR;
     53         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
     54         if (U_SUCCESS(tmpErr)) {
     55             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
     56 
     57             if (U_FAILURE(tmpErr)) {
     58                 /*
     59                  * If a resource is missing, it's not really an error, it's
     60                  * just that we don't have any data for that particular locale ID.
     61                  */
     62                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
     63                     *err = tmpErr;
     64                 }
     65             }
     66             else if (resLen >= bufferLength) {
     67                 /* The buffer should never overflow. */
     68                 *err = U_INTERNAL_PROGRAM_ERROR;
     69             }
     70             else {
     71                 u_UCharsToChars(s, buffer, resLen + 1);
     72                 result = buffer;
     73             }
     74 
     75             ures_close(subtags);
     76         } else {
     77             *err = tmpErr;
     78         }
     79     }
     80 
     81     return result;
     82 }
     83 
     84 /**
     85  * Append a tag to a buffer, adding the separator if necessary.  The buffer
     86  * must be large enough to contain the resulting tag plus any separator
     87  * necessary. The tag must not be a zero-length string.
     88  *
     89  * @param tag The tag to add.
     90  * @param tagLength The length of the tag.
     91  * @param buffer The output buffer.
     92  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
     93  **/
     94 static void U_CALLCONV
     95 appendTag(
     96     const char* tag,
     97     int32_t tagLength,
     98     char* buffer,
     99     int32_t* bufferLength) {
    100 
    101     if (*bufferLength > 0) {
    102         buffer[*bufferLength] = '_';
    103         ++(*bufferLength);
    104     }
    105 
    106     uprv_memmove(
    107         &buffer[*bufferLength],
    108         tag,
    109         tagLength);
    110 
    111     *bufferLength += tagLength;
    112 }
    113 
    114 /**
    115  * These are the canonical strings for unknown languages, scripts and regions.
    116  **/
    117 static const char* const unknownLanguage = "und";
    118 static const char* const unknownScript = "Zzzz";
    119 static const char* const unknownRegion = "ZZ";
    120 
    121 /**
    122  * Create a tag string from the supplied parameters.  The lang, script and region
    123  * parameters may be NULL pointers. If they are, their corresponding length parameters
    124  * must be less than or equal to 0.
    125  *
    126  * If any of the language, script or region parameters are empty, and the alternateTags
    127  * parameter is not NULL, it will be parsed for potential language, script and region tags
    128  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
    129  * it contains no language tag, the default tag for the unknown language is used.
    130  *
    131  * If the length of the new string exceeds the capacity of the output buffer,
    132  * the function copies as many bytes to the output buffer as it can, and returns
    133  * the error U_BUFFER_OVERFLOW_ERROR.
    134  *
    135  * If an illegal argument is provided, the function returns the error
    136  * U_ILLEGAL_ARGUMENT_ERROR.
    137  *
    138  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
    139  * the tag string fits in the output buffer, but the null terminator doesn't.
    140  *
    141  * @param lang The language tag to use.
    142  * @param langLength The length of the language tag.
    143  * @param script The script tag to use.
    144  * @param scriptLength The length of the script tag.
    145  * @param region The region tag to use.
    146  * @param regionLength The length of the region tag.
    147  * @param trailing Any trailing data to append to the new tag.
    148  * @param trailingLength The length of the trailing data.
    149  * @param alternateTags A string containing any alternate tags.
    150  * @param tag The output buffer.
    151  * @param tagCapacity The capacity of the output buffer.
    152  * @param err A pointer to a UErrorCode for error reporting.
    153  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
    154  **/
    155 static int32_t U_CALLCONV
    156 createTagStringWithAlternates(
    157     const char* lang,
    158     int32_t langLength,
    159     const char* script,
    160     int32_t scriptLength,
    161     const char* region,
    162     int32_t regionLength,
    163     const char* trailing,
    164     int32_t trailingLength,
    165     const char* alternateTags,
    166     char* tag,
    167     int32_t tagCapacity,
    168     UErrorCode* err) {
    169 
    170     if (U_FAILURE(*err)) {
    171         goto error;
    172     }
    173     else if (tag == NULL ||
    174              tagCapacity <= 0 ||
    175              langLength >= ULOC_LANG_CAPACITY ||
    176              scriptLength >= ULOC_SCRIPT_CAPACITY ||
    177              regionLength >= ULOC_COUNTRY_CAPACITY) {
    178         goto error;
    179     }
    180     else {
    181         /**
    182          * ULOC_FULLNAME_CAPACITY will provide enough capacity
    183          * that we can build a string that contains the language,
    184          * script and region code without worrying about overrunning
    185          * the user-supplied buffer.
    186          **/
    187         char tagBuffer[ULOC_FULLNAME_CAPACITY];
    188         int32_t tagLength = 0;
    189         int32_t capacityRemaining = tagCapacity;
    190         UBool regionAppended = FALSE;
    191 
    192         if (langLength > 0) {
    193             appendTag(
    194                 lang,
    195                 langLength,
    196                 tagBuffer,
    197                 &tagLength);
    198         }
    199         else if (alternateTags == NULL) {
    200             /*
    201              * Append the value for an unknown language, if
    202              * we found no language.
    203              */
    204             appendTag(
    205                 unknownLanguage,
    206                 (int32_t)uprv_strlen(unknownLanguage),
    207                 tagBuffer,
    208                 &tagLength);
    209         }
    210         else {
    211             /*
    212              * Parse the alternateTags string for the language.
    213              */
    214             char alternateLang[ULOC_LANG_CAPACITY];
    215             int32_t alternateLangLength = sizeof(alternateLang);
    216 
    217             alternateLangLength =
    218                 uloc_getLanguage(
    219                     alternateTags,
    220                     alternateLang,
    221                     alternateLangLength,
    222                     err);
    223             if(U_FAILURE(*err) ||
    224                 alternateLangLength >= ULOC_LANG_CAPACITY) {
    225                 goto error;
    226             }
    227             else if (alternateLangLength == 0) {
    228                 /*
    229                  * Append the value for an unknown language, if
    230                  * we found no language.
    231                  */
    232                 appendTag(
    233                     unknownLanguage,
    234                     (int32_t)uprv_strlen(unknownLanguage),
    235                     tagBuffer,
    236                     &tagLength);
    237             }
    238             else {
    239                 appendTag(
    240                     alternateLang,
    241                     alternateLangLength,
    242                     tagBuffer,
    243                     &tagLength);
    244             }
    245         }
    246 
    247         if (scriptLength > 0) {
    248             appendTag(
    249                 script,
    250                 scriptLength,
    251                 tagBuffer,
    252                 &tagLength);
    253         }
    254         else if (alternateTags != NULL) {
    255             /*
    256              * Parse the alternateTags string for the script.
    257              */
    258             char alternateScript[ULOC_SCRIPT_CAPACITY];
    259 
    260             const int32_t alternateScriptLength =
    261                 uloc_getScript(
    262                     alternateTags,
    263                     alternateScript,
    264                     sizeof(alternateScript),
    265                     err);
    266 
    267             if (U_FAILURE(*err) ||
    268                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
    269                 goto error;
    270             }
    271             else if (alternateScriptLength > 0) {
    272                 appendTag(
    273                     alternateScript,
    274                     alternateScriptLength,
    275                     tagBuffer,
    276                     &tagLength);
    277             }
    278         }
    279 
    280         if (regionLength > 0) {
    281             appendTag(
    282                 region,
    283                 regionLength,
    284                 tagBuffer,
    285                 &tagLength);
    286 
    287             regionAppended = TRUE;
    288         }
    289         else if (alternateTags != NULL) {
    290             /*
    291              * Parse the alternateTags string for the region.
    292              */
    293             char alternateRegion[ULOC_COUNTRY_CAPACITY];
    294 
    295             const int32_t alternateRegionLength =
    296                 uloc_getCountry(
    297                     alternateTags,
    298                     alternateRegion,
    299                     sizeof(alternateRegion),
    300                     err);
    301             if (U_FAILURE(*err) ||
    302                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
    303                 goto error;
    304             }
    305             else if (alternateRegionLength > 0) {
    306                 appendTag(
    307                     alternateRegion,
    308                     alternateRegionLength,
    309                     tagBuffer,
    310                     &tagLength);
    311 
    312                 regionAppended = TRUE;
    313             }
    314         }
    315 
    316         {
    317             const int32_t toCopy =
    318                 tagLength >= tagCapacity ? tagCapacity : tagLength;
    319 
    320             /**
    321              * Copy the partial tag from our internal buffer to the supplied
    322              * target.
    323              **/
    324             uprv_memcpy(
    325                 tag,
    326                 tagBuffer,
    327                 toCopy);
    328 
    329             capacityRemaining -= toCopy;
    330         }
    331 
    332         if (trailingLength > 0) {
    333             if (*trailing != '@' && capacityRemaining > 0) {
    334                 tag[tagLength++] = '_';
    335                 --capacityRemaining;
    336                 if (capacityRemaining > 0 && !regionAppended) {
    337                     /* extra separator is required */
    338                     tag[tagLength++] = '_';
    339                     --capacityRemaining;
    340                 }
    341             }
    342 
    343             if (capacityRemaining > 0) {
    344                 /*
    345                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
    346                  * don't know if the user-supplied buffers overlap.
    347                  */
    348                 const int32_t toCopy =
    349                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
    350 
    351                 uprv_memmove(
    352                     &tag[tagLength],
    353                     trailing,
    354                     toCopy);
    355             }
    356         }
    357 
    358         tagLength += trailingLength;
    359 
    360         return u_terminateChars(
    361                     tag,
    362                     tagCapacity,
    363                     tagLength,
    364                     err);
    365     }
    366 
    367 error:
    368 
    369     /**
    370      * An overflow indicates the locale ID passed in
    371      * is ill-formed.  If we got here, and there was
    372      * no previous error, it's an implicit overflow.
    373      **/
    374     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
    375         U_SUCCESS(*err)) {
    376         *err = U_ILLEGAL_ARGUMENT_ERROR;
    377     }
    378 
    379     return -1;
    380 }
    381 
    382 /**
    383  * Create a tag string from the supplied parameters.  The lang, script and region
    384  * parameters may be NULL pointers. If they are, their corresponding length parameters
    385  * must be less than or equal to 0.  If the lang parameter is an empty string, the
    386  * default value for an unknown language is written to the output buffer.
    387  *
    388  * If the length of the new string exceeds the capacity of the output buffer,
    389  * the function copies as many bytes to the output buffer as it can, and returns
    390  * the error U_BUFFER_OVERFLOW_ERROR.
    391  *
    392  * If an illegal argument is provided, the function returns the error
    393  * U_ILLEGAL_ARGUMENT_ERROR.
    394  *
    395  * @param lang The language tag to use.
    396  * @param langLength The length of the language tag.
    397  * @param script The script tag to use.
    398  * @param scriptLength The length of the script tag.
    399  * @param region The region tag to use.
    400  * @param regionLength The length of the region tag.
    401  * @param trailing Any trailing data to append to the new tag.
    402  * @param trailingLength The length of the trailing data.
    403  * @param tag The output buffer.
    404  * @param tagCapacity The capacity of the output buffer.
    405  * @param err A pointer to a UErrorCode for error reporting.
    406  * @return The length of the tag string, which may be greater than tagCapacity.
    407  **/
    408 static int32_t U_CALLCONV
    409 createTagString(
    410     const char* lang,
    411     int32_t langLength,
    412     const char* script,
    413     int32_t scriptLength,
    414     const char* region,
    415     int32_t regionLength,
    416     const char* trailing,
    417     int32_t trailingLength,
    418     char* tag,
    419     int32_t tagCapacity,
    420     UErrorCode* err)
    421 {
    422     return createTagStringWithAlternates(
    423                 lang,
    424                 langLength,
    425                 script,
    426                 scriptLength,
    427                 region,
    428                 regionLength,
    429                 trailing,
    430                 trailingLength,
    431                 NULL,
    432                 tag,
    433                 tagCapacity,
    434                 err);
    435 }
    436 
    437 /**
    438  * Parse the language, script, and region subtags from a tag string, and copy the
    439  * results into the corresponding output parameters. The buffers are null-terminated,
    440  * unless overflow occurs.
    441  *
    442  * The langLength, scriptLength, and regionLength parameters are input/output
    443  * parameters, and must contain the capacity of their corresponding buffers on
    444  * input.  On output, they will contain the actual length of the buffers, not
    445  * including the null terminator.
    446  *
    447  * If the length of any of the output subtags exceeds the capacity of the corresponding
    448  * buffer, the function copies as many bytes to the output buffer as it can, and returns
    449  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
    450  * occurs.
    451  *
    452  * If an illegal argument is provided, the function returns the error
    453  * U_ILLEGAL_ARGUMENT_ERROR.
    454  *
    455  * @param localeID The locale ID to parse.
    456  * @param lang The language tag buffer.
    457  * @param langLength The length of the language tag.
    458  * @param script The script tag buffer.
    459  * @param scriptLength The length of the script tag.
    460  * @param region The region tag buffer.
    461  * @param regionLength The length of the region tag.
    462  * @param err A pointer to a UErrorCode for error reporting.
    463  * @return The number of chars of the localeID parameter consumed.
    464  **/
    465 static int32_t U_CALLCONV
    466 parseTagString(
    467     const char* localeID,
    468     char* lang,
    469     int32_t* langLength,
    470     char* script,
    471     int32_t* scriptLength,
    472     char* region,
    473     int32_t* regionLength,
    474     UErrorCode* err)
    475 {
    476     const char* position = localeID;
    477     int32_t subtagLength = 0;
    478 
    479     if(U_FAILURE(*err) ||
    480        localeID == NULL ||
    481        lang == NULL ||
    482        langLength == NULL ||
    483        script == NULL ||
    484        scriptLength == NULL ||
    485        region == NULL ||
    486        regionLength == NULL) {
    487         goto error;
    488     }
    489 
    490     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
    491     u_terminateChars(lang, *langLength, subtagLength, err);
    492 
    493     /*
    494      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
    495      * to be an error, because it indicates the user-supplied tag is
    496      * not well-formed.
    497      */
    498     if(U_FAILURE(*err)) {
    499         goto error;
    500     }
    501 
    502     *langLength = subtagLength;
    503 
    504     /*
    505      * If no language was present, use the value of unknownLanguage
    506      * instead.  Otherwise, move past any separator.
    507      */
    508     if (*langLength == 0) {
    509         uprv_strcpy(
    510             lang,
    511             unknownLanguage);
    512         *langLength = (int32_t)uprv_strlen(lang);
    513     }
    514     if (_isIDSeparator(*position)) {
    515         ++position;
    516     }
    517 
    518     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
    519     u_terminateChars(script, *scriptLength, subtagLength, err);
    520 
    521     if(U_FAILURE(*err)) {
    522         goto error;
    523     }
    524 
    525     *scriptLength = subtagLength;
    526 
    527     if (*scriptLength > 0) {
    528         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
    529             /**
    530              * If the script part is the "unknown" script, then don't return it.
    531              **/
    532             *scriptLength = 0;
    533         }
    534 
    535         /*
    536          * Move past any separator.
    537          */
    538         if (_isIDSeparator(*position)) {
    539             ++position;
    540         }
    541     }
    542 
    543     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
    544     u_terminateChars(region, *regionLength, subtagLength, err);
    545 
    546     if(U_FAILURE(*err)) {
    547         goto error;
    548     }
    549 
    550     *regionLength = subtagLength;
    551 
    552     if (*regionLength > 0) {
    553         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
    554             /**
    555              * If the region part is the "unknown" region, then don't return it.
    556              **/
    557             *regionLength = 0;
    558         }
    559     } else if (*position != 0 && *position != '@') {
    560         /* back up over consumed trailing separator */
    561         --position;
    562     }
    563 
    564 exit:
    565 
    566     return (int32_t)(position - localeID);
    567 
    568 error:
    569 
    570     /**
    571      * If we get here, we have no explicit error, it's the result of an
    572      * illegal argument.
    573      **/
    574     if (!U_FAILURE(*err)) {
    575         *err = U_ILLEGAL_ARGUMENT_ERROR;
    576     }
    577 
    578     goto exit;
    579 }
    580 
    581 static int32_t U_CALLCONV
    582 createLikelySubtagsString(
    583     const char* lang,
    584     int32_t langLength,
    585     const char* script,
    586     int32_t scriptLength,
    587     const char* region,
    588     int32_t regionLength,
    589     const char* variants,
    590     int32_t variantsLength,
    591     char* tag,
    592     int32_t tagCapacity,
    593     UErrorCode* err)
    594 {
    595     /**
    596      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    597      * that we can build a string that contains the language,
    598      * script and region code without worrying about overrunning
    599      * the user-supplied buffer.
    600      **/
    601     char tagBuffer[ULOC_FULLNAME_CAPACITY];
    602     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
    603 
    604     if(U_FAILURE(*err)) {
    605         goto error;
    606     }
    607 
    608     /**
    609      * Try the language with the script and region first.
    610      **/
    611     if (scriptLength > 0 && regionLength > 0) {
    612 
    613         const char* likelySubtags = NULL;
    614 
    615         createTagString(
    616             lang,
    617             langLength,
    618             script,
    619             scriptLength,
    620             region,
    621             regionLength,
    622             NULL,
    623             0,
    624             tagBuffer,
    625             sizeof(tagBuffer),
    626             err);
    627         if(U_FAILURE(*err)) {
    628             goto error;
    629         }
    630 
    631         likelySubtags =
    632             findLikelySubtags(
    633                 tagBuffer,
    634                 likelySubtagsBuffer,
    635                 sizeof(likelySubtagsBuffer),
    636                 err);
    637         if(U_FAILURE(*err)) {
    638             goto error;
    639         }
    640 
    641         if (likelySubtags != NULL) {
    642             /* Always use the language tag from the
    643                maximal string, since it may be more
    644                specific than the one provided. */
    645             return createTagStringWithAlternates(
    646                         NULL,
    647                         0,
    648                         NULL,
    649                         0,
    650                         NULL,
    651                         0,
    652                         variants,
    653                         variantsLength,
    654                         likelySubtags,
    655                         tag,
    656                         tagCapacity,
    657                         err);
    658         }
    659     }
    660 
    661     /**
    662      * Try the language with just the script.
    663      **/
    664     if (scriptLength > 0) {
    665 
    666         const char* likelySubtags = NULL;
    667 
    668         createTagString(
    669             lang,
    670             langLength,
    671             script,
    672             scriptLength,
    673             NULL,
    674             0,
    675             NULL,
    676             0,
    677             tagBuffer,
    678             sizeof(tagBuffer),
    679             err);
    680         if(U_FAILURE(*err)) {
    681             goto error;
    682         }
    683 
    684         likelySubtags =
    685             findLikelySubtags(
    686                 tagBuffer,
    687                 likelySubtagsBuffer,
    688                 sizeof(likelySubtagsBuffer),
    689                 err);
    690         if(U_FAILURE(*err)) {
    691             goto error;
    692         }
    693 
    694         if (likelySubtags != NULL) {
    695             /* Always use the language tag from the
    696                maximal string, since it may be more
    697                specific than the one provided. */
    698             return createTagStringWithAlternates(
    699                         NULL,
    700                         0,
    701                         NULL,
    702                         0,
    703                         region,
    704                         regionLength,
    705                         variants,
    706                         variantsLength,
    707                         likelySubtags,
    708                         tag,
    709                         tagCapacity,
    710                         err);
    711         }
    712     }
    713 
    714     /**
    715      * Try the language with just the region.
    716      **/
    717     if (regionLength > 0) {
    718 
    719         const char* likelySubtags = NULL;
    720 
    721         createTagString(
    722             lang,
    723             langLength,
    724             NULL,
    725             0,
    726             region,
    727             regionLength,
    728             NULL,
    729             0,
    730             tagBuffer,
    731             sizeof(tagBuffer),
    732             err);
    733         if(U_FAILURE(*err)) {
    734             goto error;
    735         }
    736 
    737         likelySubtags =
    738             findLikelySubtags(
    739                 tagBuffer,
    740                 likelySubtagsBuffer,
    741                 sizeof(likelySubtagsBuffer),
    742                 err);
    743         if(U_FAILURE(*err)) {
    744             goto error;
    745         }
    746 
    747         if (likelySubtags != NULL) {
    748             /* Always use the language tag from the
    749                maximal string, since it may be more
    750                specific than the one provided. */
    751             return createTagStringWithAlternates(
    752                         NULL,
    753                         0,
    754                         script,
    755                         scriptLength,
    756                         NULL,
    757                         0,
    758                         variants,
    759                         variantsLength,
    760                         likelySubtags,
    761                         tag,
    762                         tagCapacity,
    763                         err);
    764         }
    765     }
    766 
    767     /**
    768      * Finally, try just the language.
    769      **/
    770     {
    771         const char* likelySubtags = NULL;
    772 
    773         createTagString(
    774             lang,
    775             langLength,
    776             NULL,
    777             0,
    778             NULL,
    779             0,
    780             NULL,
    781             0,
    782             tagBuffer,
    783             sizeof(tagBuffer),
    784             err);
    785         if(U_FAILURE(*err)) {
    786             goto error;
    787         }
    788 
    789         likelySubtags =
    790             findLikelySubtags(
    791                 tagBuffer,
    792                 likelySubtagsBuffer,
    793                 sizeof(likelySubtagsBuffer),
    794                 err);
    795         if(U_FAILURE(*err)) {
    796             goto error;
    797         }
    798 
    799         if (likelySubtags != NULL) {
    800             /* Always use the language tag from the
    801                maximal string, since it may be more
    802                specific than the one provided. */
    803             return createTagStringWithAlternates(
    804                         NULL,
    805                         0,
    806                         script,
    807                         scriptLength,
    808                         region,
    809                         regionLength,
    810                         variants,
    811                         variantsLength,
    812                         likelySubtags,
    813                         tag,
    814                         tagCapacity,
    815                         err);
    816         }
    817     }
    818 
    819     return u_terminateChars(
    820                 tag,
    821                 tagCapacity,
    822                 0,
    823                 err);
    824 
    825 error:
    826 
    827     if (!U_FAILURE(*err)) {
    828         *err = U_ILLEGAL_ARGUMENT_ERROR;
    829     }
    830 
    831     return -1;
    832 }
    833 
    834 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
    835     {   int32_t count = 0; \
    836         int32_t i; \
    837         for (i = 0; i < trailingLength; i++) { \
    838             if (trailing[i] == '-' || trailing[i] == '_') { \
    839                 count = 0; \
    840                 if (count > 8) { \
    841                     goto error; \
    842                 } \
    843             } else if (trailing[i] == '@') { \
    844                 break; \
    845             } else if (count > 8) { \
    846                 goto error; \
    847             } else { \
    848                 count++; \
    849             } \
    850         } \
    851     }
    852 
    853 static int32_t
    854 _uloc_addLikelySubtags(const char*    localeID,
    855          char* maximizedLocaleID,
    856          int32_t maximizedLocaleIDCapacity,
    857          UErrorCode* err)
    858 {
    859     char lang[ULOC_LANG_CAPACITY];
    860     int32_t langLength = sizeof(lang);
    861     char script[ULOC_SCRIPT_CAPACITY];
    862     int32_t scriptLength = sizeof(script);
    863     char region[ULOC_COUNTRY_CAPACITY];
    864     int32_t regionLength = sizeof(region);
    865     const char* trailing = "";
    866     int32_t trailingLength = 0;
    867     int32_t trailingIndex = 0;
    868     int32_t resultLength = 0;
    869 
    870     if(U_FAILURE(*err)) {
    871         goto error;
    872     }
    873     else if (localeID == NULL ||
    874              maximizedLocaleID == NULL ||
    875              maximizedLocaleIDCapacity <= 0) {
    876         goto error;
    877     }
    878 
    879     trailingIndex = parseTagString(
    880         localeID,
    881         lang,
    882         &langLength,
    883         script,
    884         &scriptLength,
    885         region,
    886         &regionLength,
    887         err);
    888     if(U_FAILURE(*err)) {
    889         /* Overflow indicates an illegal argument error */
    890         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    891             *err = U_ILLEGAL_ARGUMENT_ERROR;
    892         }
    893 
    894         goto error;
    895     }
    896 
    897     /* Find the length of the trailing portion. */
    898     while (_isIDSeparator(localeID[trailingIndex])) {
    899         trailingIndex++;
    900     }
    901     trailing = &localeID[trailingIndex];
    902     trailingLength = (int32_t)uprv_strlen(trailing);
    903 
    904     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
    905 
    906     resultLength =
    907         createLikelySubtagsString(
    908             lang,
    909             langLength,
    910             script,
    911             scriptLength,
    912             region,
    913             regionLength,
    914             trailing,
    915             trailingLength,
    916             maximizedLocaleID,
    917             maximizedLocaleIDCapacity,
    918             err);
    919 
    920     if (resultLength == 0) {
    921         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
    922 
    923         /*
    924          * If we get here, we need to return localeID.
    925          */
    926         uprv_memcpy(
    927             maximizedLocaleID,
    928             localeID,
    929             localIDLength <= maximizedLocaleIDCapacity ?
    930                 localIDLength : maximizedLocaleIDCapacity);
    931 
    932         resultLength =
    933             u_terminateChars(
    934                 maximizedLocaleID,
    935                 maximizedLocaleIDCapacity,
    936                 localIDLength,
    937                 err);
    938     }
    939 
    940     return resultLength;
    941 
    942 error:
    943 
    944     if (!U_FAILURE(*err)) {
    945         *err = U_ILLEGAL_ARGUMENT_ERROR;
    946     }
    947 
    948     return -1;
    949 }
    950 
    951 static int32_t
    952 _uloc_minimizeSubtags(const char*    localeID,
    953          char* minimizedLocaleID,
    954          int32_t minimizedLocaleIDCapacity,
    955          UErrorCode* err)
    956 {
    957     /**
    958      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    959      * that we can build a string that contains the language,
    960      * script and region code without worrying about overrunning
    961      * the user-supplied buffer.
    962      **/
    963     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
    964     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
    965 
    966     char lang[ULOC_LANG_CAPACITY];
    967     int32_t langLength = sizeof(lang);
    968     char script[ULOC_SCRIPT_CAPACITY];
    969     int32_t scriptLength = sizeof(script);
    970     char region[ULOC_COUNTRY_CAPACITY];
    971     int32_t regionLength = sizeof(region);
    972     const char* trailing = "";
    973     int32_t trailingLength = 0;
    974     int32_t trailingIndex = 0;
    975 
    976     if(U_FAILURE(*err)) {
    977         goto error;
    978     }
    979     else if (localeID == NULL ||
    980              minimizedLocaleID == NULL ||
    981              minimizedLocaleIDCapacity <= 0) {
    982         goto error;
    983     }
    984 
    985     trailingIndex =
    986         parseTagString(
    987             localeID,
    988             lang,
    989             &langLength,
    990             script,
    991             &scriptLength,
    992             region,
    993             &regionLength,
    994             err);
    995     if(U_FAILURE(*err)) {
    996 
    997         /* Overflow indicates an illegal argument error */
    998         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    999             *err = U_ILLEGAL_ARGUMENT_ERROR;
   1000         }
   1001 
   1002         goto error;
   1003     }
   1004 
   1005     /* Find the spot where the variants or the keywords begin, if any. */
   1006     while (_isIDSeparator(localeID[trailingIndex])) {
   1007         trailingIndex++;
   1008     }
   1009     trailing = &localeID[trailingIndex];
   1010     trailingLength = (int32_t)uprv_strlen(trailing);
   1011 
   1012     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
   1013 
   1014     createTagString(
   1015         lang,
   1016         langLength,
   1017         script,
   1018         scriptLength,
   1019         region,
   1020         regionLength,
   1021         NULL,
   1022         0,
   1023         maximizedTagBuffer,
   1024         maximizedTagBufferLength,
   1025         err);
   1026     if(U_FAILURE(*err)) {
   1027         goto error;
   1028     }
   1029 
   1030     /**
   1031      * First, we need to first get the maximization
   1032      * from AddLikelySubtags.
   1033      **/
   1034     maximizedTagBufferLength =
   1035         uloc_addLikelySubtags(
   1036             maximizedTagBuffer,
   1037             maximizedTagBuffer,
   1038             maximizedTagBufferLength,
   1039             err);
   1040 
   1041     if(U_FAILURE(*err)) {
   1042         goto error;
   1043     }
   1044 
   1045     /**
   1046      * Start first with just the language.
   1047      **/
   1048     {
   1049         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1050 
   1051         const int32_t tagBufferLength =
   1052             createLikelySubtagsString(
   1053                 lang,
   1054                 langLength,
   1055                 NULL,
   1056                 0,
   1057                 NULL,
   1058                 0,
   1059                 NULL,
   1060                 0,
   1061                 tagBuffer,
   1062                 sizeof(tagBuffer),
   1063                 err);
   1064 
   1065         if(U_FAILURE(*err)) {
   1066             goto error;
   1067         }
   1068         else if (uprv_strnicmp(
   1069                     maximizedTagBuffer,
   1070                     tagBuffer,
   1071                     tagBufferLength) == 0) {
   1072 
   1073             return createTagString(
   1074                         lang,
   1075                         langLength,
   1076                         NULL,
   1077                         0,
   1078                         NULL,
   1079                         0,
   1080                         trailing,
   1081                         trailingLength,
   1082                         minimizedLocaleID,
   1083                         minimizedLocaleIDCapacity,
   1084                         err);
   1085         }
   1086     }
   1087 
   1088     /**
   1089      * Next, try the language and region.
   1090      **/
   1091     if (regionLength > 0) {
   1092 
   1093         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1094 
   1095         const int32_t tagBufferLength =
   1096             createLikelySubtagsString(
   1097                 lang,
   1098                 langLength,
   1099                 NULL,
   1100                 0,
   1101                 region,
   1102                 regionLength,
   1103                 NULL,
   1104                 0,
   1105                 tagBuffer,
   1106                 sizeof(tagBuffer),
   1107                 err);
   1108 
   1109         if(U_FAILURE(*err)) {
   1110             goto error;
   1111         }
   1112         else if (uprv_strnicmp(
   1113                     maximizedTagBuffer,
   1114                     tagBuffer,
   1115                     tagBufferLength) == 0) {
   1116 
   1117             return createTagString(
   1118                         lang,
   1119                         langLength,
   1120                         NULL,
   1121                         0,
   1122                         region,
   1123                         regionLength,
   1124                         trailing,
   1125                         trailingLength,
   1126                         minimizedLocaleID,
   1127                         minimizedLocaleIDCapacity,
   1128                         err);
   1129         }
   1130     }
   1131 
   1132     /**
   1133      * Finally, try the language and script.  This is our last chance,
   1134      * since trying with all three subtags would only yield the
   1135      * maximal version that we already have.
   1136      **/
   1137     if (scriptLength > 0 && regionLength > 0) {
   1138         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1139 
   1140         const int32_t tagBufferLength =
   1141             createLikelySubtagsString(
   1142                 lang,
   1143                 langLength,
   1144                 script,
   1145                 scriptLength,
   1146                 NULL,
   1147                 0,
   1148                 NULL,
   1149                 0,
   1150                 tagBuffer,
   1151                 sizeof(tagBuffer),
   1152                 err);
   1153 
   1154         if(U_FAILURE(*err)) {
   1155             goto error;
   1156         }
   1157         else if (uprv_strnicmp(
   1158                     maximizedTagBuffer,
   1159                     tagBuffer,
   1160                     tagBufferLength) == 0) {
   1161 
   1162             return createTagString(
   1163                         lang,
   1164                         langLength,
   1165                         script,
   1166                         scriptLength,
   1167                         NULL,
   1168                         0,
   1169                         trailing,
   1170                         trailingLength,
   1171                         minimizedLocaleID,
   1172                         minimizedLocaleIDCapacity,
   1173                         err);
   1174         }
   1175     }
   1176 
   1177     {
   1178         /**
   1179          * If we got here, return the locale ID parameter.
   1180          **/
   1181         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
   1182 
   1183         uprv_memcpy(
   1184             minimizedLocaleID,
   1185             localeID,
   1186             localeIDLength <= minimizedLocaleIDCapacity ?
   1187                 localeIDLength : minimizedLocaleIDCapacity);
   1188 
   1189         return u_terminateChars(
   1190                     minimizedLocaleID,
   1191                     minimizedLocaleIDCapacity,
   1192                     localeIDLength,
   1193                     err);
   1194     }
   1195 
   1196 error:
   1197 
   1198     if (!U_FAILURE(*err)) {
   1199         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1200     }
   1201 
   1202     return -1;
   1203 
   1204 
   1205 }
   1206 
   1207 static UBool
   1208 do_canonicalize(const char*    localeID,
   1209          char* buffer,
   1210          int32_t bufferCapacity,
   1211          UErrorCode* err)
   1212 {
   1213     uloc_canonicalize(
   1214         localeID,
   1215         buffer,
   1216         bufferCapacity,
   1217         err);
   1218 
   1219     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
   1220         *err == U_BUFFER_OVERFLOW_ERROR) {
   1221         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1222 
   1223         return FALSE;
   1224     }
   1225     else if (U_FAILURE(*err)) {
   1226 
   1227         return FALSE;
   1228     }
   1229     else {
   1230         return TRUE;
   1231     }
   1232 }
   1233 
   1234 U_CAPI int32_t U_EXPORT2
   1235 uloc_addLikelySubtags(const char*    localeID,
   1236          char* maximizedLocaleID,
   1237          int32_t maximizedLocaleIDCapacity,
   1238          UErrorCode* err)
   1239 {
   1240     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1241 
   1242     if (!do_canonicalize(
   1243         localeID,
   1244         localeBuffer,
   1245         sizeof(localeBuffer),
   1246         err)) {
   1247         return -1;
   1248     }
   1249     else {
   1250         return _uloc_addLikelySubtags(
   1251                     localeBuffer,
   1252                     maximizedLocaleID,
   1253                     maximizedLocaleIDCapacity,
   1254                     err);
   1255     }
   1256 }
   1257 
   1258 U_CAPI int32_t U_EXPORT2
   1259 uloc_minimizeSubtags(const char*    localeID,
   1260          char* minimizedLocaleID,
   1261          int32_t minimizedLocaleIDCapacity,
   1262          UErrorCode* err)
   1263 {
   1264     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1265 
   1266     if (!do_canonicalize(
   1267         localeID,
   1268         localeBuffer,
   1269         sizeof(localeBuffer),
   1270         err)) {
   1271         return -1;
   1272     }
   1273     else {
   1274         return _uloc_minimizeSubtags(
   1275                     localeBuffer,
   1276                     minimizedLocaleID,
   1277                     minimizedLocaleIDCapacity,
   1278                     err);
   1279     }
   1280 }
   1281 
   1282 // Pairs of (language subtag, + or -) for finding out fast if common languages
   1283 // are LTR (minus) or RTL (plus).
   1284 static const char LANG_DIR_STRING[] =
   1285         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
   1286 
   1287 // Implemented here because this calls uloc_addLikelySubtags().
   1288 U_CAPI UBool U_EXPORT2
   1289 uloc_isRightToLeft(const char *locale) {
   1290     UErrorCode errorCode = U_ZERO_ERROR;
   1291     char script[8];
   1292     int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
   1293     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
   1294             scriptLength == 0) {
   1295         // Fastpath: We know the likely scripts and their writing direction
   1296         // for some common languages.
   1297         errorCode = U_ZERO_ERROR;
   1298         char lang[8];
   1299         int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
   1300         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
   1301                 langLength == 0) {
   1302             return FALSE;
   1303         }
   1304         const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
   1305         if (langPtr != NULL) {
   1306             switch (langPtr[langLength]) {
   1307             case '-': return FALSE;
   1308             case '+': return TRUE;
   1309             default: break;  // partial match of a longer code
   1310             }
   1311         }
   1312         // Otherwise, find the likely script.
   1313         errorCode = U_ZERO_ERROR;
   1314         char likely[ULOC_FULLNAME_CAPACITY];
   1315         (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
   1316         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
   1317             return FALSE;
   1318         }
   1319         scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
   1320         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
   1321                 scriptLength == 0) {
   1322             return FALSE;
   1323         }
   1324     }
   1325     UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
   1326     return uscript_isRightToLeft(scriptCode);
   1327 }
   1328 
   1329 U_NAMESPACE_BEGIN
   1330 
   1331 UBool
   1332 Locale::isRightToLeft() const {
   1333     return uloc_isRightToLeft(getBaseName());
   1334 }
   1335 
   1336 U_NAMESPACE_END
   1337 
   1338 // The following must at least allow for rg key value (6) plus terminator (1).
   1339 #define ULOC_RG_BUFLEN 8
   1340 
   1341 U_CAPI int32_t U_EXPORT2
   1342 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
   1343                                      char *region, int32_t regionCapacity, UErrorCode* status) {
   1344     if (U_FAILURE(*status)) {
   1345         return 0;
   1346     }
   1347     char rgBuf[ULOC_RG_BUFLEN];
   1348     UErrorCode rgStatus = U_ZERO_ERROR;
   1349 
   1350     // First check for rg keyword value
   1351     int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
   1352     if (U_FAILURE(rgStatus) || rgLen != 6) {
   1353         rgLen = 0;
   1354     } else {
   1355         // rgBuf guaranteed to be zero terminated here, with text len 6
   1356         char *rgPtr = rgBuf;
   1357         for (; *rgPtr!= 0; rgPtr++) {
   1358             *rgPtr = uprv_toupper(*rgPtr);
   1359         }
   1360         rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
   1361     }
   1362 
   1363     if (rgLen == 0) {
   1364         // No valid rg keyword value, try for unicode_region_subtag
   1365         rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
   1366         if (U_FAILURE(*status)) {
   1367             rgLen = 0;
   1368         } else if (rgLen == 0 && inferRegion) {
   1369             // no unicode_region_subtag but inferRegion TRUE, try likely subtags
   1370             char locBuf[ULOC_FULLNAME_CAPACITY];
   1371             rgStatus = U_ZERO_ERROR;
   1372             (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
   1373             if (U_SUCCESS(rgStatus)) {
   1374                 rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
   1375                 if (U_FAILURE(*status)) {
   1376                     rgLen = 0;
   1377                 }
   1378             }
   1379         }
   1380     }
   1381 
   1382     rgBuf[rgLen] = 0;
   1383     uprv_strncpy(region, rgBuf, regionCapacity);
   1384     return u_terminateChars(region, regionCapacity, rgLen, status);
   1385 }
   1386 
   1387