Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1997-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  loclikely.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010feb25
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
     17 *   that then do not depend on resource bundle code and likely-subtags data.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/putil.h"
     22 #include "unicode/uloc.h"
     23 #include "unicode/ures.h"
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "ulocimp.h"
     27 #include "ustr_imp.h"
     28 
     29 /**
     30  * This function looks for the localeID in the likelySubtags resource.
     31  *
     32  * @param localeID The tag to find.
     33  * @param buffer A buffer to hold the matching entry
     34  * @param bufferLength The length of the output buffer
     35  * @return A pointer to "buffer" if found, or a null pointer if not.
     36  */
     37 static const char*  U_CALLCONV
     38 findLikelySubtags(const char* localeID,
     39                   char* buffer,
     40                   int32_t bufferLength,
     41                   UErrorCode* err) {
     42     const char* result = NULL;
     43 
     44     if (!U_FAILURE(*err)) {
     45         int32_t resLen = 0;
     46         const UChar* s = NULL;
     47         UErrorCode tmpErr = U_ZERO_ERROR;
     48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
     49         if (U_SUCCESS(tmpErr)) {
     50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
     51 
     52             if (U_FAILURE(tmpErr)) {
     53                 /*
     54                  * If a resource is missing, it's not really an error, it's
     55                  * just that we don't have any data for that particular locale ID.
     56                  */
     57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
     58                     *err = tmpErr;
     59                 }
     60             }
     61             else if (resLen >= bufferLength) {
     62                 /* The buffer should never overflow. */
     63                 *err = U_INTERNAL_PROGRAM_ERROR;
     64             }
     65             else {
     66                 u_UCharsToChars(s, buffer, resLen + 1);
     67                 result = buffer;
     68             }
     69 
     70             ures_close(subtags);
     71         } else {
     72             *err = tmpErr;
     73         }
     74     }
     75 
     76     return result;
     77 }
     78 
     79 /**
     80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
     81  * must be large enough to contain the resulting tag plus any separator
     82  * necessary. The tag must not be a zero-length string.
     83  *
     84  * @param tag The tag to add.
     85  * @param tagLength The length of the tag.
     86  * @param buffer The output buffer.
     87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
     88  **/
     89 static void U_CALLCONV
     90 appendTag(
     91     const char* tag,
     92     int32_t tagLength,
     93     char* buffer,
     94     int32_t* bufferLength) {
     95 
     96     if (*bufferLength > 0) {
     97         buffer[*bufferLength] = '_';
     98         ++(*bufferLength);
     99     }
    100 
    101     uprv_memmove(
    102         &buffer[*bufferLength],
    103         tag,
    104         tagLength);
    105 
    106     *bufferLength += tagLength;
    107 }
    108 
    109 /**
    110  * These are the canonical strings for unknown languages, scripts and regions.
    111  **/
    112 static const char* const unknownLanguage = "und";
    113 static const char* const unknownScript = "Zzzz";
    114 static const char* const unknownRegion = "ZZ";
    115 
    116 /**
    117  * Create a tag string from the supplied parameters.  The lang, script and region
    118  * parameters may be NULL pointers. If they are, their corresponding length parameters
    119  * must be less than or equal to 0.
    120  *
    121  * If any of the language, script or region parameters are empty, and the alternateTags
    122  * parameter is not NULL, it will be parsed for potential language, script and region tags
    123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
    124  * it contains no language tag, the default tag for the unknown language is used.
    125  *
    126  * If the length of the new string exceeds the capacity of the output buffer,
    127  * the function copies as many bytes to the output buffer as it can, and returns
    128  * the error U_BUFFER_OVERFLOW_ERROR.
    129  *
    130  * If an illegal argument is provided, the function returns the error
    131  * U_ILLEGAL_ARGUMENT_ERROR.
    132  *
    133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
    134  * the tag string fits in the output buffer, but the null terminator doesn't.
    135  *
    136  * @param lang The language tag to use.
    137  * @param langLength The length of the language tag.
    138  * @param script The script tag to use.
    139  * @param scriptLength The length of the script tag.
    140  * @param region The region tag to use.
    141  * @param regionLength The length of the region tag.
    142  * @param trailing Any trailing data to append to the new tag.
    143  * @param trailingLength The length of the trailing data.
    144  * @param alternateTags A string containing any alternate tags.
    145  * @param tag The output buffer.
    146  * @param tagCapacity The capacity of the output buffer.
    147  * @param err A pointer to a UErrorCode for error reporting.
    148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
    149  **/
    150 static int32_t U_CALLCONV
    151 createTagStringWithAlternates(
    152     const char* lang,
    153     int32_t langLength,
    154     const char* script,
    155     int32_t scriptLength,
    156     const char* region,
    157     int32_t regionLength,
    158     const char* trailing,
    159     int32_t trailingLength,
    160     const char* alternateTags,
    161     char* tag,
    162     int32_t tagCapacity,
    163     UErrorCode* err) {
    164 
    165     if (U_FAILURE(*err)) {
    166         goto error;
    167     }
    168     else if (tag == NULL ||
    169              tagCapacity <= 0 ||
    170              langLength >= ULOC_LANG_CAPACITY ||
    171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
    172              regionLength >= ULOC_COUNTRY_CAPACITY) {
    173         goto error;
    174     }
    175     else {
    176         /**
    177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
    178          * that we can build a string that contains the language,
    179          * script and region code without worrying about overrunning
    180          * the user-supplied buffer.
    181          **/
    182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
    183         int32_t tagLength = 0;
    184         int32_t capacityRemaining = tagCapacity;
    185         UBool regionAppended = FALSE;
    186 
    187         if (langLength > 0) {
    188             appendTag(
    189                 lang,
    190                 langLength,
    191                 tagBuffer,
    192                 &tagLength);
    193         }
    194         else if (alternateTags == NULL) {
    195             /*
    196              * Append the value for an unknown language, if
    197              * we found no language.
    198              */
    199             appendTag(
    200                 unknownLanguage,
    201                 (int32_t)uprv_strlen(unknownLanguage),
    202                 tagBuffer,
    203                 &tagLength);
    204         }
    205         else {
    206             /*
    207              * Parse the alternateTags string for the language.
    208              */
    209             char alternateLang[ULOC_LANG_CAPACITY];
    210             int32_t alternateLangLength = sizeof(alternateLang);
    211 
    212             alternateLangLength =
    213                 uloc_getLanguage(
    214                     alternateTags,
    215                     alternateLang,
    216                     alternateLangLength,
    217                     err);
    218             if(U_FAILURE(*err) ||
    219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
    220                 goto error;
    221             }
    222             else if (alternateLangLength == 0) {
    223                 /*
    224                  * Append the value for an unknown language, if
    225                  * we found no language.
    226                  */
    227                 appendTag(
    228                     unknownLanguage,
    229                     (int32_t)uprv_strlen(unknownLanguage),
    230                     tagBuffer,
    231                     &tagLength);
    232             }
    233             else {
    234                 appendTag(
    235                     alternateLang,
    236                     alternateLangLength,
    237                     tagBuffer,
    238                     &tagLength);
    239             }
    240         }
    241 
    242         if (scriptLength > 0) {
    243             appendTag(
    244                 script,
    245                 scriptLength,
    246                 tagBuffer,
    247                 &tagLength);
    248         }
    249         else if (alternateTags != NULL) {
    250             /*
    251              * Parse the alternateTags string for the script.
    252              */
    253             char alternateScript[ULOC_SCRIPT_CAPACITY];
    254 
    255             const int32_t alternateScriptLength =
    256                 uloc_getScript(
    257                     alternateTags,
    258                     alternateScript,
    259                     sizeof(alternateScript),
    260                     err);
    261 
    262             if (U_FAILURE(*err) ||
    263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
    264                 goto error;
    265             }
    266             else if (alternateScriptLength > 0) {
    267                 appendTag(
    268                     alternateScript,
    269                     alternateScriptLength,
    270                     tagBuffer,
    271                     &tagLength);
    272             }
    273         }
    274 
    275         if (regionLength > 0) {
    276             appendTag(
    277                 region,
    278                 regionLength,
    279                 tagBuffer,
    280                 &tagLength);
    281 
    282             regionAppended = TRUE;
    283         }
    284         else if (alternateTags != NULL) {
    285             /*
    286              * Parse the alternateTags string for the region.
    287              */
    288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
    289 
    290             const int32_t alternateRegionLength =
    291                 uloc_getCountry(
    292                     alternateTags,
    293                     alternateRegion,
    294                     sizeof(alternateRegion),
    295                     err);
    296             if (U_FAILURE(*err) ||
    297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
    298                 goto error;
    299             }
    300             else if (alternateRegionLength > 0) {
    301                 appendTag(
    302                     alternateRegion,
    303                     alternateRegionLength,
    304                     tagBuffer,
    305                     &tagLength);
    306 
    307                 regionAppended = TRUE;
    308             }
    309         }
    310 
    311         {
    312             const int32_t toCopy =
    313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
    314 
    315             /**
    316              * Copy the partial tag from our internal buffer to the supplied
    317              * target.
    318              **/
    319             uprv_memcpy(
    320                 tag,
    321                 tagBuffer,
    322                 toCopy);
    323 
    324             capacityRemaining -= toCopy;
    325         }
    326 
    327         if (trailingLength > 0) {
    328             if (*trailing != '@' && capacityRemaining > 0) {
    329                 tag[tagLength++] = '_';
    330                 --capacityRemaining;
    331                 if (capacityRemaining > 0 && !regionAppended) {
    332                     /* extra separator is required */
    333                     tag[tagLength++] = '_';
    334                     --capacityRemaining;
    335                 }
    336             }
    337 
    338             if (capacityRemaining > 0) {
    339                 /*
    340                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
    341                  * don't know if the user-supplied buffers overlap.
    342                  */
    343                 const int32_t toCopy =
    344                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
    345 
    346                 uprv_memmove(
    347                     &tag[tagLength],
    348                     trailing,
    349                     toCopy);
    350             }
    351         }
    352 
    353         tagLength += trailingLength;
    354 
    355         return u_terminateChars(
    356                     tag,
    357                     tagCapacity,
    358                     tagLength,
    359                     err);
    360     }
    361 
    362 error:
    363 
    364     /**
    365      * An overflow indicates the locale ID passed in
    366      * is ill-formed.  If we got here, and there was
    367      * no previous error, it's an implicit overflow.
    368      **/
    369     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
    370         U_SUCCESS(*err)) {
    371         *err = U_ILLEGAL_ARGUMENT_ERROR;
    372     }
    373 
    374     return -1;
    375 }
    376 
    377 /**
    378  * Create a tag string from the supplied parameters.  The lang, script and region
    379  * parameters may be NULL pointers. If they are, their corresponding length parameters
    380  * must be less than or equal to 0.  If the lang parameter is an empty string, the
    381  * default value for an unknown language is written to the output buffer.
    382  *
    383  * If the length of the new string exceeds the capacity of the output buffer,
    384  * the function copies as many bytes to the output buffer as it can, and returns
    385  * the error U_BUFFER_OVERFLOW_ERROR.
    386  *
    387  * If an illegal argument is provided, the function returns the error
    388  * U_ILLEGAL_ARGUMENT_ERROR.
    389  *
    390  * @param lang The language tag to use.
    391  * @param langLength The length of the language tag.
    392  * @param script The script tag to use.
    393  * @param scriptLength The length of the script tag.
    394  * @param region The region tag to use.
    395  * @param regionLength The length of the region tag.
    396  * @param trailing Any trailing data to append to the new tag.
    397  * @param trailingLength The length of the trailing data.
    398  * @param tag The output buffer.
    399  * @param tagCapacity The capacity of the output buffer.
    400  * @param err A pointer to a UErrorCode for error reporting.
    401  * @return The length of the tag string, which may be greater than tagCapacity.
    402  **/
    403 static int32_t U_CALLCONV
    404 createTagString(
    405     const char* lang,
    406     int32_t langLength,
    407     const char* script,
    408     int32_t scriptLength,
    409     const char* region,
    410     int32_t regionLength,
    411     const char* trailing,
    412     int32_t trailingLength,
    413     char* tag,
    414     int32_t tagCapacity,
    415     UErrorCode* err)
    416 {
    417     return createTagStringWithAlternates(
    418                 lang,
    419                 langLength,
    420                 script,
    421                 scriptLength,
    422                 region,
    423                 regionLength,
    424                 trailing,
    425                 trailingLength,
    426                 NULL,
    427                 tag,
    428                 tagCapacity,
    429                 err);
    430 }
    431 
    432 /**
    433  * Parse the language, script, and region subtags from a tag string, and copy the
    434  * results into the corresponding output parameters. The buffers are null-terminated,
    435  * unless overflow occurs.
    436  *
    437  * The langLength, scriptLength, and regionLength parameters are input/output
    438  * parameters, and must contain the capacity of their corresponding buffers on
    439  * input.  On output, they will contain the actual length of the buffers, not
    440  * including the null terminator.
    441  *
    442  * If the length of any of the output subtags exceeds the capacity of the corresponding
    443  * buffer, the function copies as many bytes to the output buffer as it can, and returns
    444  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
    445  * occurs.
    446  *
    447  * If an illegal argument is provided, the function returns the error
    448  * U_ILLEGAL_ARGUMENT_ERROR.
    449  *
    450  * @param localeID The locale ID to parse.
    451  * @param lang The language tag buffer.
    452  * @param langLength The length of the language tag.
    453  * @param script The script tag buffer.
    454  * @param scriptLength The length of the script tag.
    455  * @param region The region tag buffer.
    456  * @param regionLength The length of the region tag.
    457  * @param err A pointer to a UErrorCode for error reporting.
    458  * @return The number of chars of the localeID parameter consumed.
    459  **/
    460 static int32_t U_CALLCONV
    461 parseTagString(
    462     const char* localeID,
    463     char* lang,
    464     int32_t* langLength,
    465     char* script,
    466     int32_t* scriptLength,
    467     char* region,
    468     int32_t* regionLength,
    469     UErrorCode* err)
    470 {
    471     const char* position = localeID;
    472     int32_t subtagLength = 0;
    473 
    474     if(U_FAILURE(*err) ||
    475        localeID == NULL ||
    476        lang == NULL ||
    477        langLength == NULL ||
    478        script == NULL ||
    479        scriptLength == NULL ||
    480        region == NULL ||
    481        regionLength == NULL) {
    482         goto error;
    483     }
    484 
    485     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
    486     u_terminateChars(lang, *langLength, subtagLength, err);
    487 
    488     /*
    489      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
    490      * to be an error, because it indicates the user-supplied tag is
    491      * not well-formed.
    492      */
    493     if(U_FAILURE(*err)) {
    494         goto error;
    495     }
    496 
    497     *langLength = subtagLength;
    498 
    499     /*
    500      * If no language was present, use the value of unknownLanguage
    501      * instead.  Otherwise, move past any separator.
    502      */
    503     if (*langLength == 0) {
    504         uprv_strcpy(
    505             lang,
    506             unknownLanguage);
    507         *langLength = (int32_t)uprv_strlen(lang);
    508     }
    509     else if (_isIDSeparator(*position)) {
    510         ++position;
    511     }
    512 
    513     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
    514     u_terminateChars(script, *scriptLength, subtagLength, err);
    515 
    516     if(U_FAILURE(*err)) {
    517         goto error;
    518     }
    519 
    520     *scriptLength = subtagLength;
    521 
    522     if (*scriptLength > 0) {
    523         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
    524             /**
    525              * If the script part is the "unknown" script, then don't return it.
    526              **/
    527             *scriptLength = 0;
    528         }
    529 
    530         /*
    531          * Move past any separator.
    532          */
    533         if (_isIDSeparator(*position)) {
    534             ++position;
    535         }
    536     }
    537 
    538     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
    539     u_terminateChars(region, *regionLength, subtagLength, err);
    540 
    541     if(U_FAILURE(*err)) {
    542         goto error;
    543     }
    544 
    545     *regionLength = subtagLength;
    546 
    547     if (*regionLength > 0) {
    548         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
    549             /**
    550              * If the region part is the "unknown" region, then don't return it.
    551              **/
    552             *regionLength = 0;
    553         }
    554     } else if (*position != 0 && *position != '@') {
    555         /* back up over consumed trailing separator */
    556         --position;
    557     }
    558 
    559 exit:
    560 
    561     return (int32_t)(position - localeID);
    562 
    563 error:
    564 
    565     /**
    566      * If we get here, we have no explicit error, it's the result of an
    567      * illegal argument.
    568      **/
    569     if (!U_FAILURE(*err)) {
    570         *err = U_ILLEGAL_ARGUMENT_ERROR;
    571     }
    572 
    573     goto exit;
    574 }
    575 
    576 static int32_t U_CALLCONV
    577 createLikelySubtagsString(
    578     const char* lang,
    579     int32_t langLength,
    580     const char* script,
    581     int32_t scriptLength,
    582     const char* region,
    583     int32_t regionLength,
    584     const char* variants,
    585     int32_t variantsLength,
    586     char* tag,
    587     int32_t tagCapacity,
    588     UErrorCode* err)
    589 {
    590     /**
    591      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    592      * that we can build a string that contains the language,
    593      * script and region code without worrying about overrunning
    594      * the user-supplied buffer.
    595      **/
    596     char tagBuffer[ULOC_FULLNAME_CAPACITY];
    597     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
    598     int32_t tagBufferLength = 0;
    599 
    600     if(U_FAILURE(*err)) {
    601         goto error;
    602     }
    603 
    604     /**
    605      * Try the language with the script and region first.
    606      **/
    607     if (scriptLength > 0 && regionLength > 0) {
    608 
    609         const char* likelySubtags = NULL;
    610 
    611         tagBufferLength = createTagString(
    612             lang,
    613             langLength,
    614             script,
    615             scriptLength,
    616             region,
    617             regionLength,
    618             NULL,
    619             0,
    620             tagBuffer,
    621             sizeof(tagBuffer),
    622             err);
    623         if(U_FAILURE(*err)) {
    624             goto error;
    625         }
    626 
    627         likelySubtags =
    628             findLikelySubtags(
    629                 tagBuffer,
    630                 likelySubtagsBuffer,
    631                 sizeof(likelySubtagsBuffer),
    632                 err);
    633         if(U_FAILURE(*err)) {
    634             goto error;
    635         }
    636 
    637         if (likelySubtags != NULL) {
    638             /* Always use the language tag from the
    639                maximal string, since it may be more
    640                specific than the one provided. */
    641             return createTagStringWithAlternates(
    642                         NULL,
    643                         0,
    644                         NULL,
    645                         0,
    646                         NULL,
    647                         0,
    648                         variants,
    649                         variantsLength,
    650                         likelySubtags,
    651                         tag,
    652                         tagCapacity,
    653                         err);
    654         }
    655     }
    656 
    657     /**
    658      * Try the language with just the script.
    659      **/
    660     if (scriptLength > 0) {
    661 
    662         const char* likelySubtags = NULL;
    663 
    664         tagBufferLength = createTagString(
    665             lang,
    666             langLength,
    667             script,
    668             scriptLength,
    669             NULL,
    670             0,
    671             NULL,
    672             0,
    673             tagBuffer,
    674             sizeof(tagBuffer),
    675             err);
    676         if(U_FAILURE(*err)) {
    677             goto error;
    678         }
    679 
    680         likelySubtags =
    681             findLikelySubtags(
    682                 tagBuffer,
    683                 likelySubtagsBuffer,
    684                 sizeof(likelySubtagsBuffer),
    685                 err);
    686         if(U_FAILURE(*err)) {
    687             goto error;
    688         }
    689 
    690         if (likelySubtags != NULL) {
    691             /* Always use the language tag from the
    692                maximal string, since it may be more
    693                specific than the one provided. */
    694             return createTagStringWithAlternates(
    695                         NULL,
    696                         0,
    697                         NULL,
    698                         0,
    699                         region,
    700                         regionLength,
    701                         variants,
    702                         variantsLength,
    703                         likelySubtags,
    704                         tag,
    705                         tagCapacity,
    706                         err);
    707         }
    708     }
    709 
    710     /**
    711      * Try the language with just the region.
    712      **/
    713     if (regionLength > 0) {
    714 
    715         const char* likelySubtags = NULL;
    716 
    717         createTagString(
    718             lang,
    719             langLength,
    720             NULL,
    721             0,
    722             region,
    723             regionLength,
    724             NULL,
    725             0,
    726             tagBuffer,
    727             sizeof(tagBuffer),
    728             err);
    729         if(U_FAILURE(*err)) {
    730             goto error;
    731         }
    732 
    733         likelySubtags =
    734             findLikelySubtags(
    735                 tagBuffer,
    736                 likelySubtagsBuffer,
    737                 sizeof(likelySubtagsBuffer),
    738                 err);
    739         if(U_FAILURE(*err)) {
    740             goto error;
    741         }
    742 
    743         if (likelySubtags != NULL) {
    744             /* Always use the language tag from the
    745                maximal string, since it may be more
    746                specific than the one provided. */
    747             return createTagStringWithAlternates(
    748                         NULL,
    749                         0,
    750                         script,
    751                         scriptLength,
    752                         NULL,
    753                         0,
    754                         variants,
    755                         variantsLength,
    756                         likelySubtags,
    757                         tag,
    758                         tagCapacity,
    759                         err);
    760         }
    761     }
    762 
    763     /**
    764      * Finally, try just the language.
    765      **/
    766     {
    767         const char* likelySubtags = NULL;
    768 
    769         createTagString(
    770             lang,
    771             langLength,
    772             NULL,
    773             0,
    774             NULL,
    775             0,
    776             NULL,
    777             0,
    778             tagBuffer,
    779             sizeof(tagBuffer),
    780             err);
    781         if(U_FAILURE(*err)) {
    782             goto error;
    783         }
    784 
    785         likelySubtags =
    786             findLikelySubtags(
    787                 tagBuffer,
    788                 likelySubtagsBuffer,
    789                 sizeof(likelySubtagsBuffer),
    790                 err);
    791         if(U_FAILURE(*err)) {
    792             goto error;
    793         }
    794 
    795         if (likelySubtags != NULL) {
    796             /* Always use the language tag from the
    797                maximal string, since it may be more
    798                specific than the one provided. */
    799             return createTagStringWithAlternates(
    800                         NULL,
    801                         0,
    802                         script,
    803                         scriptLength,
    804                         region,
    805                         regionLength,
    806                         variants,
    807                         variantsLength,
    808                         likelySubtags,
    809                         tag,
    810                         tagCapacity,
    811                         err);
    812         }
    813     }
    814 
    815     return u_terminateChars(
    816                 tag,
    817                 tagCapacity,
    818                 0,
    819                 err);
    820 
    821 error:
    822 
    823     if (!U_FAILURE(*err)) {
    824         *err = U_ILLEGAL_ARGUMENT_ERROR;
    825     }
    826 
    827     return -1;
    828 }
    829 
    830 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
    831     {   int32_t count = 0; \
    832         int32_t i; \
    833         for (i = 0; i < trailingLength; i++) { \
    834             if (trailing[i] == '-' || trailing[i] == '_') { \
    835                 count = 0; \
    836                 if (count > 8) { \
    837                     goto error; \
    838                 } \
    839             } else if (trailing[i] == '@') { \
    840                 break; \
    841             } else if (count > 8) { \
    842                 goto error; \
    843             } else { \
    844                 count++; \
    845             } \
    846         } \
    847     }
    848 
    849 static int32_t
    850 _uloc_addLikelySubtags(const char*    localeID,
    851          char* maximizedLocaleID,
    852          int32_t maximizedLocaleIDCapacity,
    853          UErrorCode* err)
    854 {
    855     char lang[ULOC_LANG_CAPACITY];
    856     int32_t langLength = sizeof(lang);
    857     char script[ULOC_SCRIPT_CAPACITY];
    858     int32_t scriptLength = sizeof(script);
    859     char region[ULOC_COUNTRY_CAPACITY];
    860     int32_t regionLength = sizeof(region);
    861     const char* trailing = "";
    862     int32_t trailingLength = 0;
    863     int32_t trailingIndex = 0;
    864     int32_t resultLength = 0;
    865 
    866     if(U_FAILURE(*err)) {
    867         goto error;
    868     }
    869     else if (localeID == NULL ||
    870              maximizedLocaleID == NULL ||
    871              maximizedLocaleIDCapacity <= 0) {
    872         goto error;
    873     }
    874 
    875     trailingIndex = parseTagString(
    876         localeID,
    877         lang,
    878         &langLength,
    879         script,
    880         &scriptLength,
    881         region,
    882         &regionLength,
    883         err);
    884     if(U_FAILURE(*err)) {
    885         /* Overflow indicates an illegal argument error */
    886         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    887             *err = U_ILLEGAL_ARGUMENT_ERROR;
    888         }
    889 
    890         goto error;
    891     }
    892 
    893     /* Find the length of the trailing portion. */
    894     while (_isIDSeparator(localeID[trailingIndex])) {
    895         trailingIndex++;
    896     }
    897     trailing = &localeID[trailingIndex];
    898     trailingLength = (int32_t)uprv_strlen(trailing);
    899 
    900     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
    901 
    902     resultLength =
    903         createLikelySubtagsString(
    904             lang,
    905             langLength,
    906             script,
    907             scriptLength,
    908             region,
    909             regionLength,
    910             trailing,
    911             trailingLength,
    912             maximizedLocaleID,
    913             maximizedLocaleIDCapacity,
    914             err);
    915 
    916     if (resultLength == 0) {
    917         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
    918 
    919         /*
    920          * If we get here, we need to return localeID.
    921          */
    922         uprv_memcpy(
    923             maximizedLocaleID,
    924             localeID,
    925             localIDLength <= maximizedLocaleIDCapacity ?
    926                 localIDLength : maximizedLocaleIDCapacity);
    927 
    928         resultLength =
    929             u_terminateChars(
    930                 maximizedLocaleID,
    931                 maximizedLocaleIDCapacity,
    932                 localIDLength,
    933                 err);
    934     }
    935 
    936     return resultLength;
    937 
    938 error:
    939 
    940     if (!U_FAILURE(*err)) {
    941         *err = U_ILLEGAL_ARGUMENT_ERROR;
    942     }
    943 
    944     return -1;
    945 }
    946 
    947 static int32_t
    948 _uloc_minimizeSubtags(const char*    localeID,
    949          char* minimizedLocaleID,
    950          int32_t minimizedLocaleIDCapacity,
    951          UErrorCode* err)
    952 {
    953     /**
    954      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    955      * that we can build a string that contains the language,
    956      * script and region code without worrying about overrunning
    957      * the user-supplied buffer.
    958      **/
    959     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
    960     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
    961 
    962     char lang[ULOC_LANG_CAPACITY];
    963     int32_t langLength = sizeof(lang);
    964     char script[ULOC_SCRIPT_CAPACITY];
    965     int32_t scriptLength = sizeof(script);
    966     char region[ULOC_COUNTRY_CAPACITY];
    967     int32_t regionLength = sizeof(region);
    968     const char* trailing = "";
    969     int32_t trailingLength = 0;
    970     int32_t trailingIndex = 0;
    971 
    972     if(U_FAILURE(*err)) {
    973         goto error;
    974     }
    975     else if (localeID == NULL ||
    976              minimizedLocaleID == NULL ||
    977              minimizedLocaleIDCapacity <= 0) {
    978         goto error;
    979     }
    980 
    981     trailingIndex =
    982         parseTagString(
    983             localeID,
    984             lang,
    985             &langLength,
    986             script,
    987             &scriptLength,
    988             region,
    989             &regionLength,
    990             err);
    991     if(U_FAILURE(*err)) {
    992 
    993         /* Overflow indicates an illegal argument error */
    994         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    995             *err = U_ILLEGAL_ARGUMENT_ERROR;
    996         }
    997 
    998         goto error;
    999     }
   1000 
   1001     /* Find the spot where the variants or the keywords begin, if any. */
   1002     while (_isIDSeparator(localeID[trailingIndex])) {
   1003         trailingIndex++;
   1004     }
   1005     trailing = &localeID[trailingIndex];
   1006     trailingLength = (int32_t)uprv_strlen(trailing);
   1007 
   1008     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
   1009 
   1010     createTagString(
   1011         lang,
   1012         langLength,
   1013         script,
   1014         scriptLength,
   1015         region,
   1016         regionLength,
   1017         NULL,
   1018         0,
   1019         maximizedTagBuffer,
   1020         maximizedTagBufferLength,
   1021         err);
   1022     if(U_FAILURE(*err)) {
   1023         goto error;
   1024     }
   1025 
   1026     /**
   1027      * First, we need to first get the maximization
   1028      * from AddLikelySubtags.
   1029      **/
   1030     maximizedTagBufferLength =
   1031         uloc_addLikelySubtags(
   1032             maximizedTagBuffer,
   1033             maximizedTagBuffer,
   1034             maximizedTagBufferLength,
   1035             err);
   1036 
   1037     if(U_FAILURE(*err)) {
   1038         goto error;
   1039     }
   1040 
   1041     /**
   1042      * Start first with just the language.
   1043      **/
   1044     {
   1045         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1046 
   1047         const int32_t tagBufferLength =
   1048             createLikelySubtagsString(
   1049                 lang,
   1050                 langLength,
   1051                 NULL,
   1052                 0,
   1053                 NULL,
   1054                 0,
   1055                 NULL,
   1056                 0,
   1057                 tagBuffer,
   1058                 sizeof(tagBuffer),
   1059                 err);
   1060 
   1061         if(U_FAILURE(*err)) {
   1062             goto error;
   1063         }
   1064         else if (uprv_strnicmp(
   1065                     maximizedTagBuffer,
   1066                     tagBuffer,
   1067                     tagBufferLength) == 0) {
   1068 
   1069             return createTagString(
   1070                         lang,
   1071                         langLength,
   1072                         NULL,
   1073                         0,
   1074                         NULL,
   1075                         0,
   1076                         trailing,
   1077                         trailingLength,
   1078                         minimizedLocaleID,
   1079                         minimizedLocaleIDCapacity,
   1080                         err);
   1081         }
   1082     }
   1083 
   1084     /**
   1085      * Next, try the language and region.
   1086      **/
   1087     if (regionLength > 0) {
   1088 
   1089         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1090 
   1091         const int32_t tagBufferLength =
   1092             createLikelySubtagsString(
   1093                 lang,
   1094                 langLength,
   1095                 NULL,
   1096                 0,
   1097                 region,
   1098                 regionLength,
   1099                 NULL,
   1100                 0,
   1101                 tagBuffer,
   1102                 sizeof(tagBuffer),
   1103                 err);
   1104 
   1105         if(U_FAILURE(*err)) {
   1106             goto error;
   1107         }
   1108         else if (uprv_strnicmp(
   1109                     maximizedTagBuffer,
   1110                     tagBuffer,
   1111                     tagBufferLength) == 0) {
   1112 
   1113             return createTagString(
   1114                         lang,
   1115                         langLength,
   1116                         NULL,
   1117                         0,
   1118                         region,
   1119                         regionLength,
   1120                         trailing,
   1121                         trailingLength,
   1122                         minimizedLocaleID,
   1123                         minimizedLocaleIDCapacity,
   1124                         err);
   1125         }
   1126     }
   1127 
   1128     /**
   1129      * Finally, try the language and script.  This is our last chance,
   1130      * since trying with all three subtags would only yield the
   1131      * maximal version that we already have.
   1132      **/
   1133     if (scriptLength > 0 && regionLength > 0) {
   1134         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1135 
   1136         const int32_t tagBufferLength =
   1137             createLikelySubtagsString(
   1138                 lang,
   1139                 langLength,
   1140                 script,
   1141                 scriptLength,
   1142                 NULL,
   1143                 0,
   1144                 NULL,
   1145                 0,
   1146                 tagBuffer,
   1147                 sizeof(tagBuffer),
   1148                 err);
   1149 
   1150         if(U_FAILURE(*err)) {
   1151             goto error;
   1152         }
   1153         else if (uprv_strnicmp(
   1154                     maximizedTagBuffer,
   1155                     tagBuffer,
   1156                     tagBufferLength) == 0) {
   1157 
   1158             return createTagString(
   1159                         lang,
   1160                         langLength,
   1161                         script,
   1162                         scriptLength,
   1163                         NULL,
   1164                         0,
   1165                         trailing,
   1166                         trailingLength,
   1167                         minimizedLocaleID,
   1168                         minimizedLocaleIDCapacity,
   1169                         err);
   1170         }
   1171     }
   1172 
   1173     {
   1174         /**
   1175          * If we got here, return the locale ID parameter.
   1176          **/
   1177         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
   1178 
   1179         uprv_memcpy(
   1180             minimizedLocaleID,
   1181             localeID,
   1182             localeIDLength <= minimizedLocaleIDCapacity ?
   1183                 localeIDLength : minimizedLocaleIDCapacity);
   1184 
   1185         return u_terminateChars(
   1186                     minimizedLocaleID,
   1187                     minimizedLocaleIDCapacity,
   1188                     localeIDLength,
   1189                     err);
   1190     }
   1191 
   1192 error:
   1193 
   1194     if (!U_FAILURE(*err)) {
   1195         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1196     }
   1197 
   1198     return -1;
   1199 
   1200 
   1201 }
   1202 
   1203 static UBool
   1204 do_canonicalize(const char*    localeID,
   1205          char* buffer,
   1206          int32_t bufferCapacity,
   1207          UErrorCode* err)
   1208 {
   1209     uloc_canonicalize(
   1210         localeID,
   1211         buffer,
   1212         bufferCapacity,
   1213         err);
   1214 
   1215     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
   1216         *err == U_BUFFER_OVERFLOW_ERROR) {
   1217         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1218 
   1219         return FALSE;
   1220     }
   1221     else if (U_FAILURE(*err)) {
   1222 
   1223         return FALSE;
   1224     }
   1225     else {
   1226         return TRUE;
   1227     }
   1228 }
   1229 
   1230 U_DRAFT int32_t U_EXPORT2
   1231 uloc_addLikelySubtags(const char*    localeID,
   1232          char* maximizedLocaleID,
   1233          int32_t maximizedLocaleIDCapacity,
   1234          UErrorCode* err)
   1235 {
   1236     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1237 
   1238     if (!do_canonicalize(
   1239         localeID,
   1240         localeBuffer,
   1241         sizeof(localeBuffer),
   1242         err)) {
   1243         return -1;
   1244     }
   1245     else {
   1246         return _uloc_addLikelySubtags(
   1247                     localeBuffer,
   1248                     maximizedLocaleID,
   1249                     maximizedLocaleIDCapacity,
   1250                     err);
   1251     }
   1252 }
   1253 
   1254 U_DRAFT int32_t U_EXPORT2
   1255 uloc_minimizeSubtags(const char*    localeID,
   1256          char* minimizedLocaleID,
   1257          int32_t minimizedLocaleIDCapacity,
   1258          UErrorCode* err)
   1259 {
   1260     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1261 
   1262     if (!do_canonicalize(
   1263         localeID,
   1264         localeBuffer,
   1265         sizeof(localeBuffer),
   1266         err)) {
   1267         return -1;
   1268     }
   1269     else {
   1270         return _uloc_minimizeSubtags(
   1271                     localeBuffer,
   1272                     minimizedLocaleID,
   1273                     minimizedLocaleIDCapacity,
   1274                     err);
   1275     }
   1276 }
   1277