Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1997-2012, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  loclikely.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010feb25
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
     17 *   that then do not depend on resource bundle code and likely-subtags data.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/putil.h"
     22 #include "unicode/uloc.h"
     23 #include "unicode/ures.h"
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "ulocimp.h"
     27 #include "ustr_imp.h"
     28 
     29 /**
     30  * This function looks for the localeID in the likelySubtags resource.
     31  *
     32  * @param localeID The tag to find.
     33  * @param buffer A buffer to hold the matching entry
     34  * @param bufferLength The length of the output buffer
     35  * @return A pointer to "buffer" if found, or a null pointer if not.
     36  */
     37 static const char*  U_CALLCONV
     38 findLikelySubtags(const char* localeID,
     39                   char* buffer,
     40                   int32_t bufferLength,
     41                   UErrorCode* err) {
     42     const char* result = NULL;
     43 
     44     if (!U_FAILURE(*err)) {
     45         int32_t resLen = 0;
     46         const UChar* s = NULL;
     47         UErrorCode tmpErr = U_ZERO_ERROR;
     48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
     49         if (U_SUCCESS(tmpErr)) {
     50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
     51 
     52             if (U_FAILURE(tmpErr)) {
     53                 /*
     54                  * If a resource is missing, it's not really an error, it's
     55                  * just that we don't have any data for that particular locale ID.
     56                  */
     57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
     58                     *err = tmpErr;
     59                 }
     60             }
     61             else if (resLen >= bufferLength) {
     62                 /* The buffer should never overflow. */
     63                 *err = U_INTERNAL_PROGRAM_ERROR;
     64             }
     65             else {
     66                 u_UCharsToChars(s, buffer, resLen + 1);
     67                 result = buffer;
     68             }
     69 
     70             ures_close(subtags);
     71         } else {
     72             *err = tmpErr;
     73         }
     74     }
     75 
     76     return result;
     77 }
     78 
     79 /**
     80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
     81  * must be large enough to contain the resulting tag plus any separator
     82  * necessary. The tag must not be a zero-length string.
     83  *
     84  * @param tag The tag to add.
     85  * @param tagLength The length of the tag.
     86  * @param buffer The output buffer.
     87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
     88  **/
     89 static void U_CALLCONV
     90 appendTag(
     91     const char* tag,
     92     int32_t tagLength,
     93     char* buffer,
     94     int32_t* bufferLength) {
     95 
     96     if (*bufferLength > 0) {
     97         buffer[*bufferLength] = '_';
     98         ++(*bufferLength);
     99     }
    100 
    101     uprv_memmove(
    102         &buffer[*bufferLength],
    103         tag,
    104         tagLength);
    105 
    106     *bufferLength += tagLength;
    107 }
    108 
    109 /**
    110  * These are the canonical strings for unknown languages, scripts and regions.
    111  **/
    112 static const char* const unknownLanguage = "und";
    113 static const char* const unknownScript = "Zzzz";
    114 static const char* const unknownRegion = "ZZ";
    115 
    116 /**
    117  * Create a tag string from the supplied parameters.  The lang, script and region
    118  * parameters may be NULL pointers. If they are, their corresponding length parameters
    119  * must be less than or equal to 0.
    120  *
    121  * If any of the language, script or region parameters are empty, and the alternateTags
    122  * parameter is not NULL, it will be parsed for potential language, script and region tags
    123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
    124  * it contains no language tag, the default tag for the unknown language is used.
    125  *
    126  * If the length of the new string exceeds the capacity of the output buffer,
    127  * the function copies as many bytes to the output buffer as it can, and returns
    128  * the error U_BUFFER_OVERFLOW_ERROR.
    129  *
    130  * If an illegal argument is provided, the function returns the error
    131  * U_ILLEGAL_ARGUMENT_ERROR.
    132  *
    133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
    134  * the tag string fits in the output buffer, but the null terminator doesn't.
    135  *
    136  * @param lang The language tag to use.
    137  * @param langLength The length of the language tag.
    138  * @param script The script tag to use.
    139  * @param scriptLength The length of the script tag.
    140  * @param region The region tag to use.
    141  * @param regionLength The length of the region tag.
    142  * @param trailing Any trailing data to append to the new tag.
    143  * @param trailingLength The length of the trailing data.
    144  * @param alternateTags A string containing any alternate tags.
    145  * @param tag The output buffer.
    146  * @param tagCapacity The capacity of the output buffer.
    147  * @param err A pointer to a UErrorCode for error reporting.
    148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
    149  **/
    150 static int32_t U_CALLCONV
    151 createTagStringWithAlternates(
    152     const char* lang,
    153     int32_t langLength,
    154     const char* script,
    155     int32_t scriptLength,
    156     const char* region,
    157     int32_t regionLength,
    158     const char* trailing,
    159     int32_t trailingLength,
    160     const char* alternateTags,
    161     char* tag,
    162     int32_t tagCapacity,
    163     UErrorCode* err) {
    164 
    165     if (U_FAILURE(*err)) {
    166         goto error;
    167     }
    168     else if (tag == NULL ||
    169              tagCapacity <= 0 ||
    170              langLength >= ULOC_LANG_CAPACITY ||
    171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
    172              regionLength >= ULOC_COUNTRY_CAPACITY) {
    173         goto error;
    174     }
    175     else {
    176         /**
    177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
    178          * that we can build a string that contains the language,
    179          * script and region code without worrying about overrunning
    180          * the user-supplied buffer.
    181          **/
    182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
    183         int32_t tagLength = 0;
    184         int32_t capacityRemaining = tagCapacity;
    185         UBool regionAppended = FALSE;
    186 
    187         if (langLength > 0) {
    188             appendTag(
    189                 lang,
    190                 langLength,
    191                 tagBuffer,
    192                 &tagLength);
    193         }
    194         else if (alternateTags == NULL) {
    195             /*
    196              * Append the value for an unknown language, if
    197              * we found no language.
    198              */
    199             appendTag(
    200                 unknownLanguage,
    201                 (int32_t)uprv_strlen(unknownLanguage),
    202                 tagBuffer,
    203                 &tagLength);
    204         }
    205         else {
    206             /*
    207              * Parse the alternateTags string for the language.
    208              */
    209             char alternateLang[ULOC_LANG_CAPACITY];
    210             int32_t alternateLangLength = sizeof(alternateLang);
    211 
    212             alternateLangLength =
    213                 uloc_getLanguage(
    214                     alternateTags,
    215                     alternateLang,
    216                     alternateLangLength,
    217                     err);
    218             if(U_FAILURE(*err) ||
    219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
    220                 goto error;
    221             }
    222             else if (alternateLangLength == 0) {
    223                 /*
    224                  * Append the value for an unknown language, if
    225                  * we found no language.
    226                  */
    227                 appendTag(
    228                     unknownLanguage,
    229                     (int32_t)uprv_strlen(unknownLanguage),
    230                     tagBuffer,
    231                     &tagLength);
    232             }
    233             else {
    234                 appendTag(
    235                     alternateLang,
    236                     alternateLangLength,
    237                     tagBuffer,
    238                     &tagLength);
    239             }
    240         }
    241 
    242         if (scriptLength > 0) {
    243             appendTag(
    244                 script,
    245                 scriptLength,
    246                 tagBuffer,
    247                 &tagLength);
    248         }
    249         else if (alternateTags != NULL) {
    250             /*
    251              * Parse the alternateTags string for the script.
    252              */
    253             char alternateScript[ULOC_SCRIPT_CAPACITY];
    254 
    255             const int32_t alternateScriptLength =
    256                 uloc_getScript(
    257                     alternateTags,
    258                     alternateScript,
    259                     sizeof(alternateScript),
    260                     err);
    261 
    262             if (U_FAILURE(*err) ||
    263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
    264                 goto error;
    265             }
    266             else if (alternateScriptLength > 0) {
    267                 appendTag(
    268                     alternateScript,
    269                     alternateScriptLength,
    270                     tagBuffer,
    271                     &tagLength);
    272             }
    273         }
    274 
    275         if (regionLength > 0) {
    276             appendTag(
    277                 region,
    278                 regionLength,
    279                 tagBuffer,
    280                 &tagLength);
    281 
    282             regionAppended = TRUE;
    283         }
    284         else if (alternateTags != NULL) {
    285             /*
    286              * Parse the alternateTags string for the region.
    287              */
    288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
    289 
    290             const int32_t alternateRegionLength =
    291                 uloc_getCountry(
    292                     alternateTags,
    293                     alternateRegion,
    294                     sizeof(alternateRegion),
    295                     err);
    296             if (U_FAILURE(*err) ||
    297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
    298                 goto error;
    299             }
    300             else if (alternateRegionLength > 0) {
    301                 appendTag(
    302                     alternateRegion,
    303                     alternateRegionLength,
    304                     tagBuffer,
    305                     &tagLength);
    306 
    307                 regionAppended = TRUE;
    308             }
    309         }
    310 
    311         {
    312             const int32_t toCopy =
    313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
    314 
    315             /**
    316              * Copy the partial tag from our internal buffer to the supplied
    317              * target.
    318              **/
    319             uprv_memcpy(
    320                 tag,
    321                 tagBuffer,
    322                 toCopy);
    323 
    324             capacityRemaining -= toCopy;
    325         }
    326 
    327         if (trailingLength > 0) {
    328             if (*trailing != '@' && capacityRemaining > 0) {
    329                 tag[tagLength++] = '_';
    330                 --capacityRemaining;
    331                 if (capacityRemaining > 0 && !regionAppended) {
    332                     /* extra separator is required */
    333                     tag[tagLength++] = '_';
    334                     --capacityRemaining;
    335                 }
    336             }
    337 
    338             if (capacityRemaining > 0) {
    339                 /*
    340                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
    341                  * don't know if the user-supplied buffers overlap.
    342                  */
    343                 const int32_t toCopy =
    344                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
    345 
    346                 uprv_memmove(
    347                     &tag[tagLength],
    348                     trailing,
    349                     toCopy);
    350             }
    351         }
    352 
    353         tagLength += trailingLength;
    354 
    355         return u_terminateChars(
    356                     tag,
    357                     tagCapacity,
    358                     tagLength,
    359                     err);
    360     }
    361 
    362 error:
    363 
    364     /**
    365      * An overflow indicates the locale ID passed in
    366      * is ill-formed.  If we got here, and there was
    367      * no previous error, it's an implicit overflow.
    368      **/
    369     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
    370         U_SUCCESS(*err)) {
    371         *err = U_ILLEGAL_ARGUMENT_ERROR;
    372     }
    373 
    374     return -1;
    375 }
    376 
    377 /**
    378  * Create a tag string from the supplied parameters.  The lang, script and region
    379  * parameters may be NULL pointers. If they are, their corresponding length parameters
    380  * must be less than or equal to 0.  If the lang parameter is an empty string, the
    381  * default value for an unknown language is written to the output buffer.
    382  *
    383  * If the length of the new string exceeds the capacity of the output buffer,
    384  * the function copies as many bytes to the output buffer as it can, and returns
    385  * the error U_BUFFER_OVERFLOW_ERROR.
    386  *
    387  * If an illegal argument is provided, the function returns the error
    388  * U_ILLEGAL_ARGUMENT_ERROR.
    389  *
    390  * @param lang The language tag to use.
    391  * @param langLength The length of the language tag.
    392  * @param script The script tag to use.
    393  * @param scriptLength The length of the script tag.
    394  * @param region The region tag to use.
    395  * @param regionLength The length of the region tag.
    396  * @param trailing Any trailing data to append to the new tag.
    397  * @param trailingLength The length of the trailing data.
    398  * @param tag The output buffer.
    399  * @param tagCapacity The capacity of the output buffer.
    400  * @param err A pointer to a UErrorCode for error reporting.
    401  * @return The length of the tag string, which may be greater than tagCapacity.
    402  **/
    403 static int32_t U_CALLCONV
    404 createTagString(
    405     const char* lang,
    406     int32_t langLength,
    407     const char* script,
    408     int32_t scriptLength,
    409     const char* region,
    410     int32_t regionLength,
    411     const char* trailing,
    412     int32_t trailingLength,
    413     char* tag,
    414     int32_t tagCapacity,
    415     UErrorCode* err)
    416 {
    417     return createTagStringWithAlternates(
    418                 lang,
    419                 langLength,
    420                 script,
    421                 scriptLength,
    422                 region,
    423                 regionLength,
    424                 trailing,
    425                 trailingLength,
    426                 NULL,
    427                 tag,
    428                 tagCapacity,
    429                 err);
    430 }
    431 
    432 /**
    433  * Parse the language, script, and region subtags from a tag string, and copy the
    434  * results into the corresponding output parameters. The buffers are null-terminated,
    435  * unless overflow occurs.
    436  *
    437  * The langLength, scriptLength, and regionLength parameters are input/output
    438  * parameters, and must contain the capacity of their corresponding buffers on
    439  * input.  On output, they will contain the actual length of the buffers, not
    440  * including the null terminator.
    441  *
    442  * If the length of any of the output subtags exceeds the capacity of the corresponding
    443  * buffer, the function copies as many bytes to the output buffer as it can, and returns
    444  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
    445  * occurs.
    446  *
    447  * If an illegal argument is provided, the function returns the error
    448  * U_ILLEGAL_ARGUMENT_ERROR.
    449  *
    450  * @param localeID The locale ID to parse.
    451  * @param lang The language tag buffer.
    452  * @param langLength The length of the language tag.
    453  * @param script The script tag buffer.
    454  * @param scriptLength The length of the script tag.
    455  * @param region The region tag buffer.
    456  * @param regionLength The length of the region tag.
    457  * @param err A pointer to a UErrorCode for error reporting.
    458  * @return The number of chars of the localeID parameter consumed.
    459  **/
    460 static int32_t U_CALLCONV
    461 parseTagString(
    462     const char* localeID,
    463     char* lang,
    464     int32_t* langLength,
    465     char* script,
    466     int32_t* scriptLength,
    467     char* region,
    468     int32_t* regionLength,
    469     UErrorCode* err)
    470 {
    471     const char* position = localeID;
    472     int32_t subtagLength = 0;
    473 
    474     if(U_FAILURE(*err) ||
    475        localeID == NULL ||
    476        lang == NULL ||
    477        langLength == NULL ||
    478        script == NULL ||
    479        scriptLength == NULL ||
    480        region == NULL ||
    481        regionLength == NULL) {
    482         goto error;
    483     }
    484 
    485     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
    486     u_terminateChars(lang, *langLength, subtagLength, err);
    487 
    488     /*
    489      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
    490      * to be an error, because it indicates the user-supplied tag is
    491      * not well-formed.
    492      */
    493     if(U_FAILURE(*err)) {
    494         goto error;
    495     }
    496 
    497     *langLength = subtagLength;
    498 
    499     /*
    500      * If no language was present, use the value of unknownLanguage
    501      * instead.  Otherwise, move past any separator.
    502      */
    503     if (*langLength == 0) {
    504         uprv_strcpy(
    505             lang,
    506             unknownLanguage);
    507         *langLength = (int32_t)uprv_strlen(lang);
    508     }
    509     else if (_isIDSeparator(*position)) {
    510         ++position;
    511     }
    512 
    513     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
    514     u_terminateChars(script, *scriptLength, subtagLength, err);
    515 
    516     if(U_FAILURE(*err)) {
    517         goto error;
    518     }
    519 
    520     *scriptLength = subtagLength;
    521 
    522     if (*scriptLength > 0) {
    523         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
    524             /**
    525              * If the script part is the "unknown" script, then don't return it.
    526              **/
    527             *scriptLength = 0;
    528         }
    529 
    530         /*
    531          * Move past any separator.
    532          */
    533         if (_isIDSeparator(*position)) {
    534             ++position;
    535         }
    536     }
    537 
    538     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
    539     u_terminateChars(region, *regionLength, subtagLength, err);
    540 
    541     if(U_FAILURE(*err)) {
    542         goto error;
    543     }
    544 
    545     *regionLength = subtagLength;
    546 
    547     if (*regionLength > 0) {
    548         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
    549             /**
    550              * If the region part is the "unknown" region, then don't return it.
    551              **/
    552             *regionLength = 0;
    553         }
    554     } else if (*position != 0 && *position != '@') {
    555         /* back up over consumed trailing separator */
    556         --position;
    557     }
    558 
    559 exit:
    560 
    561     return (int32_t)(position - localeID);
    562 
    563 error:
    564 
    565     /**
    566      * If we get here, we have no explicit error, it's the result of an
    567      * illegal argument.
    568      **/
    569     if (!U_FAILURE(*err)) {
    570         *err = U_ILLEGAL_ARGUMENT_ERROR;
    571     }
    572 
    573     goto exit;
    574 }
    575 
    576 static int32_t U_CALLCONV
    577 createLikelySubtagsString(
    578     const char* lang,
    579     int32_t langLength,
    580     const char* script,
    581     int32_t scriptLength,
    582     const char* region,
    583     int32_t regionLength,
    584     const char* variants,
    585     int32_t variantsLength,
    586     char* tag,
    587     int32_t tagCapacity,
    588     UErrorCode* err)
    589 {
    590     /**
    591      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    592      * that we can build a string that contains the language,
    593      * script and region code without worrying about overrunning
    594      * the user-supplied buffer.
    595      **/
    596     char tagBuffer[ULOC_FULLNAME_CAPACITY];
    597     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
    598 
    599     if(U_FAILURE(*err)) {
    600         goto error;
    601     }
    602 
    603     /**
    604      * Try the language with the script and region first.
    605      **/
    606     if (scriptLength > 0 && regionLength > 0) {
    607 
    608         const char* likelySubtags = NULL;
    609 
    610         createTagString(
    611             lang,
    612             langLength,
    613             script,
    614             scriptLength,
    615             region,
    616             regionLength,
    617             NULL,
    618             0,
    619             tagBuffer,
    620             sizeof(tagBuffer),
    621             err);
    622         if(U_FAILURE(*err)) {
    623             goto error;
    624         }
    625 
    626         likelySubtags =
    627             findLikelySubtags(
    628                 tagBuffer,
    629                 likelySubtagsBuffer,
    630                 sizeof(likelySubtagsBuffer),
    631                 err);
    632         if(U_FAILURE(*err)) {
    633             goto error;
    634         }
    635 
    636         if (likelySubtags != NULL) {
    637             /* Always use the language tag from the
    638                maximal string, since it may be more
    639                specific than the one provided. */
    640             return createTagStringWithAlternates(
    641                         NULL,
    642                         0,
    643                         NULL,
    644                         0,
    645                         NULL,
    646                         0,
    647                         variants,
    648                         variantsLength,
    649                         likelySubtags,
    650                         tag,
    651                         tagCapacity,
    652                         err);
    653         }
    654     }
    655 
    656     /**
    657      * Try the language with just the script.
    658      **/
    659     if (scriptLength > 0) {
    660 
    661         const char* likelySubtags = NULL;
    662 
    663         createTagString(
    664             lang,
    665             langLength,
    666             script,
    667             scriptLength,
    668             NULL,
    669             0,
    670             NULL,
    671             0,
    672             tagBuffer,
    673             sizeof(tagBuffer),
    674             err);
    675         if(U_FAILURE(*err)) {
    676             goto error;
    677         }
    678 
    679         likelySubtags =
    680             findLikelySubtags(
    681                 tagBuffer,
    682                 likelySubtagsBuffer,
    683                 sizeof(likelySubtagsBuffer),
    684                 err);
    685         if(U_FAILURE(*err)) {
    686             goto error;
    687         }
    688 
    689         if (likelySubtags != NULL) {
    690             /* Always use the language tag from the
    691                maximal string, since it may be more
    692                specific than the one provided. */
    693             return createTagStringWithAlternates(
    694                         NULL,
    695                         0,
    696                         NULL,
    697                         0,
    698                         region,
    699                         regionLength,
    700                         variants,
    701                         variantsLength,
    702                         likelySubtags,
    703                         tag,
    704                         tagCapacity,
    705                         err);
    706         }
    707     }
    708 
    709     /**
    710      * Try the language with just the region.
    711      **/
    712     if (regionLength > 0) {
    713 
    714         const char* likelySubtags = NULL;
    715 
    716         createTagString(
    717             lang,
    718             langLength,
    719             NULL,
    720             0,
    721             region,
    722             regionLength,
    723             NULL,
    724             0,
    725             tagBuffer,
    726             sizeof(tagBuffer),
    727             err);
    728         if(U_FAILURE(*err)) {
    729             goto error;
    730         }
    731 
    732         likelySubtags =
    733             findLikelySubtags(
    734                 tagBuffer,
    735                 likelySubtagsBuffer,
    736                 sizeof(likelySubtagsBuffer),
    737                 err);
    738         if(U_FAILURE(*err)) {
    739             goto error;
    740         }
    741 
    742         if (likelySubtags != NULL) {
    743             /* Always use the language tag from the
    744                maximal string, since it may be more
    745                specific than the one provided. */
    746             return createTagStringWithAlternates(
    747                         NULL,
    748                         0,
    749                         script,
    750                         scriptLength,
    751                         NULL,
    752                         0,
    753                         variants,
    754                         variantsLength,
    755                         likelySubtags,
    756                         tag,
    757                         tagCapacity,
    758                         err);
    759         }
    760     }
    761 
    762     /**
    763      * Finally, try just the language.
    764      **/
    765     {
    766         const char* likelySubtags = NULL;
    767 
    768         createTagString(
    769             lang,
    770             langLength,
    771             NULL,
    772             0,
    773             NULL,
    774             0,
    775             NULL,
    776             0,
    777             tagBuffer,
    778             sizeof(tagBuffer),
    779             err);
    780         if(U_FAILURE(*err)) {
    781             goto error;
    782         }
    783 
    784         likelySubtags =
    785             findLikelySubtags(
    786                 tagBuffer,
    787                 likelySubtagsBuffer,
    788                 sizeof(likelySubtagsBuffer),
    789                 err);
    790         if(U_FAILURE(*err)) {
    791             goto error;
    792         }
    793 
    794         if (likelySubtags != NULL) {
    795             /* Always use the language tag from the
    796                maximal string, since it may be more
    797                specific than the one provided. */
    798             return createTagStringWithAlternates(
    799                         NULL,
    800                         0,
    801                         script,
    802                         scriptLength,
    803                         region,
    804                         regionLength,
    805                         variants,
    806                         variantsLength,
    807                         likelySubtags,
    808                         tag,
    809                         tagCapacity,
    810                         err);
    811         }
    812     }
    813 
    814     return u_terminateChars(
    815                 tag,
    816                 tagCapacity,
    817                 0,
    818                 err);
    819 
    820 error:
    821 
    822     if (!U_FAILURE(*err)) {
    823         *err = U_ILLEGAL_ARGUMENT_ERROR;
    824     }
    825 
    826     return -1;
    827 }
    828 
    829 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
    830     {   int32_t count = 0; \
    831         int32_t i; \
    832         for (i = 0; i < trailingLength; i++) { \
    833             if (trailing[i] == '-' || trailing[i] == '_') { \
    834                 count = 0; \
    835                 if (count > 8) { \
    836                     goto error; \
    837                 } \
    838             } else if (trailing[i] == '@') { \
    839                 break; \
    840             } else if (count > 8) { \
    841                 goto error; \
    842             } else { \
    843                 count++; \
    844             } \
    845         } \
    846     }
    847 
    848 static int32_t
    849 _uloc_addLikelySubtags(const char*    localeID,
    850          char* maximizedLocaleID,
    851          int32_t maximizedLocaleIDCapacity,
    852          UErrorCode* err)
    853 {
    854     char lang[ULOC_LANG_CAPACITY];
    855     int32_t langLength = sizeof(lang);
    856     char script[ULOC_SCRIPT_CAPACITY];
    857     int32_t scriptLength = sizeof(script);
    858     char region[ULOC_COUNTRY_CAPACITY];
    859     int32_t regionLength = sizeof(region);
    860     const char* trailing = "";
    861     int32_t trailingLength = 0;
    862     int32_t trailingIndex = 0;
    863     int32_t resultLength = 0;
    864 
    865     if(U_FAILURE(*err)) {
    866         goto error;
    867     }
    868     else if (localeID == NULL ||
    869              maximizedLocaleID == NULL ||
    870              maximizedLocaleIDCapacity <= 0) {
    871         goto error;
    872     }
    873 
    874     trailingIndex = parseTagString(
    875         localeID,
    876         lang,
    877         &langLength,
    878         script,
    879         &scriptLength,
    880         region,
    881         &regionLength,
    882         err);
    883     if(U_FAILURE(*err)) {
    884         /* Overflow indicates an illegal argument error */
    885         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    886             *err = U_ILLEGAL_ARGUMENT_ERROR;
    887         }
    888 
    889         goto error;
    890     }
    891 
    892     /* Find the length of the trailing portion. */
    893     while (_isIDSeparator(localeID[trailingIndex])) {
    894         trailingIndex++;
    895     }
    896     trailing = &localeID[trailingIndex];
    897     trailingLength = (int32_t)uprv_strlen(trailing);
    898 
    899     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
    900 
    901     resultLength =
    902         createLikelySubtagsString(
    903             lang,
    904             langLength,
    905             script,
    906             scriptLength,
    907             region,
    908             regionLength,
    909             trailing,
    910             trailingLength,
    911             maximizedLocaleID,
    912             maximizedLocaleIDCapacity,
    913             err);
    914 
    915     if (resultLength == 0) {
    916         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
    917 
    918         /*
    919          * If we get here, we need to return localeID.
    920          */
    921         uprv_memcpy(
    922             maximizedLocaleID,
    923             localeID,
    924             localIDLength <= maximizedLocaleIDCapacity ?
    925                 localIDLength : maximizedLocaleIDCapacity);
    926 
    927         resultLength =
    928             u_terminateChars(
    929                 maximizedLocaleID,
    930                 maximizedLocaleIDCapacity,
    931                 localIDLength,
    932                 err);
    933     }
    934 
    935     return resultLength;
    936 
    937 error:
    938 
    939     if (!U_FAILURE(*err)) {
    940         *err = U_ILLEGAL_ARGUMENT_ERROR;
    941     }
    942 
    943     return -1;
    944 }
    945 
    946 static int32_t
    947 _uloc_minimizeSubtags(const char*    localeID,
    948          char* minimizedLocaleID,
    949          int32_t minimizedLocaleIDCapacity,
    950          UErrorCode* err)
    951 {
    952     /**
    953      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    954      * that we can build a string that contains the language,
    955      * script and region code without worrying about overrunning
    956      * the user-supplied buffer.
    957      **/
    958     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
    959     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
    960 
    961     char lang[ULOC_LANG_CAPACITY];
    962     int32_t langLength = sizeof(lang);
    963     char script[ULOC_SCRIPT_CAPACITY];
    964     int32_t scriptLength = sizeof(script);
    965     char region[ULOC_COUNTRY_CAPACITY];
    966     int32_t regionLength = sizeof(region);
    967     const char* trailing = "";
    968     int32_t trailingLength = 0;
    969     int32_t trailingIndex = 0;
    970 
    971     if(U_FAILURE(*err)) {
    972         goto error;
    973     }
    974     else if (localeID == NULL ||
    975              minimizedLocaleID == NULL ||
    976              minimizedLocaleIDCapacity <= 0) {
    977         goto error;
    978     }
    979 
    980     trailingIndex =
    981         parseTagString(
    982             localeID,
    983             lang,
    984             &langLength,
    985             script,
    986             &scriptLength,
    987             region,
    988             &regionLength,
    989             err);
    990     if(U_FAILURE(*err)) {
    991 
    992         /* Overflow indicates an illegal argument error */
    993         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    994             *err = U_ILLEGAL_ARGUMENT_ERROR;
    995         }
    996 
    997         goto error;
    998     }
    999 
   1000     /* Find the spot where the variants or the keywords begin, if any. */
   1001     while (_isIDSeparator(localeID[trailingIndex])) {
   1002         trailingIndex++;
   1003     }
   1004     trailing = &localeID[trailingIndex];
   1005     trailingLength = (int32_t)uprv_strlen(trailing);
   1006 
   1007     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
   1008 
   1009     createTagString(
   1010         lang,
   1011         langLength,
   1012         script,
   1013         scriptLength,
   1014         region,
   1015         regionLength,
   1016         NULL,
   1017         0,
   1018         maximizedTagBuffer,
   1019         maximizedTagBufferLength,
   1020         err);
   1021     if(U_FAILURE(*err)) {
   1022         goto error;
   1023     }
   1024 
   1025     /**
   1026      * First, we need to first get the maximization
   1027      * from AddLikelySubtags.
   1028      **/
   1029     maximizedTagBufferLength =
   1030         uloc_addLikelySubtags(
   1031             maximizedTagBuffer,
   1032             maximizedTagBuffer,
   1033             maximizedTagBufferLength,
   1034             err);
   1035 
   1036     if(U_FAILURE(*err)) {
   1037         goto error;
   1038     }
   1039 
   1040     /**
   1041      * Start first with just the language.
   1042      **/
   1043     {
   1044         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1045 
   1046         const int32_t tagBufferLength =
   1047             createLikelySubtagsString(
   1048                 lang,
   1049                 langLength,
   1050                 NULL,
   1051                 0,
   1052                 NULL,
   1053                 0,
   1054                 NULL,
   1055                 0,
   1056                 tagBuffer,
   1057                 sizeof(tagBuffer),
   1058                 err);
   1059 
   1060         if(U_FAILURE(*err)) {
   1061             goto error;
   1062         }
   1063         else if (uprv_strnicmp(
   1064                     maximizedTagBuffer,
   1065                     tagBuffer,
   1066                     tagBufferLength) == 0) {
   1067 
   1068             return createTagString(
   1069                         lang,
   1070                         langLength,
   1071                         NULL,
   1072                         0,
   1073                         NULL,
   1074                         0,
   1075                         trailing,
   1076                         trailingLength,
   1077                         minimizedLocaleID,
   1078                         minimizedLocaleIDCapacity,
   1079                         err);
   1080         }
   1081     }
   1082 
   1083     /**
   1084      * Next, try the language and region.
   1085      **/
   1086     if (regionLength > 0) {
   1087 
   1088         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1089 
   1090         const int32_t tagBufferLength =
   1091             createLikelySubtagsString(
   1092                 lang,
   1093                 langLength,
   1094                 NULL,
   1095                 0,
   1096                 region,
   1097                 regionLength,
   1098                 NULL,
   1099                 0,
   1100                 tagBuffer,
   1101                 sizeof(tagBuffer),
   1102                 err);
   1103 
   1104         if(U_FAILURE(*err)) {
   1105             goto error;
   1106         }
   1107         else if (uprv_strnicmp(
   1108                     maximizedTagBuffer,
   1109                     tagBuffer,
   1110                     tagBufferLength) == 0) {
   1111 
   1112             return createTagString(
   1113                         lang,
   1114                         langLength,
   1115                         NULL,
   1116                         0,
   1117                         region,
   1118                         regionLength,
   1119                         trailing,
   1120                         trailingLength,
   1121                         minimizedLocaleID,
   1122                         minimizedLocaleIDCapacity,
   1123                         err);
   1124         }
   1125     }
   1126 
   1127     /**
   1128      * Finally, try the language and script.  This is our last chance,
   1129      * since trying with all three subtags would only yield the
   1130      * maximal version that we already have.
   1131      **/
   1132     if (scriptLength > 0 && regionLength > 0) {
   1133         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1134 
   1135         const int32_t tagBufferLength =
   1136             createLikelySubtagsString(
   1137                 lang,
   1138                 langLength,
   1139                 script,
   1140                 scriptLength,
   1141                 NULL,
   1142                 0,
   1143                 NULL,
   1144                 0,
   1145                 tagBuffer,
   1146                 sizeof(tagBuffer),
   1147                 err);
   1148 
   1149         if(U_FAILURE(*err)) {
   1150             goto error;
   1151         }
   1152         else if (uprv_strnicmp(
   1153                     maximizedTagBuffer,
   1154                     tagBuffer,
   1155                     tagBufferLength) == 0) {
   1156 
   1157             return createTagString(
   1158                         lang,
   1159                         langLength,
   1160                         script,
   1161                         scriptLength,
   1162                         NULL,
   1163                         0,
   1164                         trailing,
   1165                         trailingLength,
   1166                         minimizedLocaleID,
   1167                         minimizedLocaleIDCapacity,
   1168                         err);
   1169         }
   1170     }
   1171 
   1172     {
   1173         /**
   1174          * If we got here, return the locale ID parameter.
   1175          **/
   1176         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
   1177 
   1178         uprv_memcpy(
   1179             minimizedLocaleID,
   1180             localeID,
   1181             localeIDLength <= minimizedLocaleIDCapacity ?
   1182                 localeIDLength : minimizedLocaleIDCapacity);
   1183 
   1184         return u_terminateChars(
   1185                     minimizedLocaleID,
   1186                     minimizedLocaleIDCapacity,
   1187                     localeIDLength,
   1188                     err);
   1189     }
   1190 
   1191 error:
   1192 
   1193     if (!U_FAILURE(*err)) {
   1194         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1195     }
   1196 
   1197     return -1;
   1198 
   1199 
   1200 }
   1201 
   1202 static UBool
   1203 do_canonicalize(const char*    localeID,
   1204          char* buffer,
   1205          int32_t bufferCapacity,
   1206          UErrorCode* err)
   1207 {
   1208     uloc_canonicalize(
   1209         localeID,
   1210         buffer,
   1211         bufferCapacity,
   1212         err);
   1213 
   1214     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
   1215         *err == U_BUFFER_OVERFLOW_ERROR) {
   1216         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1217 
   1218         return FALSE;
   1219     }
   1220     else if (U_FAILURE(*err)) {
   1221 
   1222         return FALSE;
   1223     }
   1224     else {
   1225         return TRUE;
   1226     }
   1227 }
   1228 
   1229 U_CAPI int32_t U_EXPORT2
   1230 uloc_addLikelySubtags(const char*    localeID,
   1231          char* maximizedLocaleID,
   1232          int32_t maximizedLocaleIDCapacity,
   1233          UErrorCode* err)
   1234 {
   1235     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1236 
   1237     if (!do_canonicalize(
   1238         localeID,
   1239         localeBuffer,
   1240         sizeof(localeBuffer),
   1241         err)) {
   1242         return -1;
   1243     }
   1244     else {
   1245         return _uloc_addLikelySubtags(
   1246                     localeBuffer,
   1247                     maximizedLocaleID,
   1248                     maximizedLocaleIDCapacity,
   1249                     err);
   1250     }
   1251 }
   1252 
   1253 U_CAPI int32_t U_EXPORT2
   1254 uloc_minimizeSubtags(const char*    localeID,
   1255          char* minimizedLocaleID,
   1256          int32_t minimizedLocaleIDCapacity,
   1257          UErrorCode* err)
   1258 {
   1259     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1260 
   1261     if (!do_canonicalize(
   1262         localeID,
   1263         localeBuffer,
   1264         sizeof(localeBuffer),
   1265         err)) {
   1266         return -1;
   1267     }
   1268     else {
   1269         return _uloc_minimizeSubtags(
   1270                     localeBuffer,
   1271                     minimizedLocaleID,
   1272                     minimizedLocaleIDCapacity,
   1273                     err);
   1274     }
   1275 }
   1276