Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1997-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  loclikely.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010feb25
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
     17 *   that then do not depend on resource bundle code and likely-subtags data.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/putil.h"
     22 #include "unicode/uloc.h"
     23 #include "unicode/ures.h"
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "ulocimp.h"
     27 #include "ustr_imp.h"
     28 
     29 /**
     30  * This function looks for the localeID in the likelySubtags resource.
     31  *
     32  * @param localeID The tag to find.
     33  * @param buffer A buffer to hold the matching entry
     34  * @param bufferLength The length of the output buffer
     35  * @return A pointer to "buffer" if found, or a null pointer if not.
     36  */
     37 static const char*  U_CALLCONV
     38 findLikelySubtags(const char* localeID,
     39                   char* buffer,
     40                   int32_t bufferLength,
     41                   UErrorCode* err) {
     42     const char* result = NULL;
     43 
     44     if (!U_FAILURE(*err)) {
     45         int32_t resLen = 0;
     46         const UChar* s = NULL;
     47         UErrorCode tmpErr = U_ZERO_ERROR;
     48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
     49         if (U_SUCCESS(tmpErr)) {
     50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
     51 
     52             if (U_FAILURE(tmpErr)) {
     53                 /*
     54                  * If a resource is missing, it's not really an error, it's
     55                  * just that we don't have any data for that particular locale ID.
     56                  */
     57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
     58                     *err = tmpErr;
     59                 }
     60             }
     61             else if (resLen >= bufferLength) {
     62                 /* The buffer should never overflow. */
     63                 *err = U_INTERNAL_PROGRAM_ERROR;
     64             }
     65             else {
     66                 u_UCharsToChars(s, buffer, resLen + 1);
     67                 result = buffer;
     68             }
     69 
     70             ures_close(subtags);
     71         } else {
     72             *err = tmpErr;
     73         }
     74     }
     75 
     76     return result;
     77 }
     78 
     79 /**
     80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
     81  * must be large enough to contain the resulting tag plus any separator
     82  * necessary. The tag must not be a zero-length string.
     83  *
     84  * @param tag The tag to add.
     85  * @param tagLength The length of the tag.
     86  * @param buffer The output buffer.
     87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
     88  **/
     89 static void U_CALLCONV
     90 appendTag(
     91     const char* tag,
     92     int32_t tagLength,
     93     char* buffer,
     94     int32_t* bufferLength) {
     95 
     96     if (*bufferLength > 0) {
     97         buffer[*bufferLength] = '_';
     98         ++(*bufferLength);
     99     }
    100 
    101     uprv_memmove(
    102         &buffer[*bufferLength],
    103         tag,
    104         tagLength);
    105 
    106     *bufferLength += tagLength;
    107 }
    108 
    109 /**
    110  * These are the canonical strings for unknown languages, scripts and regions.
    111  **/
    112 static const char* const unknownLanguage = "und";
    113 static const char* const unknownScript = "Zzzz";
    114 static const char* const unknownRegion = "ZZ";
    115 
    116 /**
    117  * Create a tag string from the supplied parameters.  The lang, script and region
    118  * parameters may be NULL pointers. If they are, their corresponding length parameters
    119  * must be less than or equal to 0.
    120  *
    121  * If any of the language, script or region parameters are empty, and the alternateTags
    122  * parameter is not NULL, it will be parsed for potential language, script and region tags
    123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
    124  * it contains no language tag, the default tag for the unknown language is used.
    125  *
    126  * If the length of the new string exceeds the capacity of the output buffer,
    127  * the function copies as many bytes to the output buffer as it can, and returns
    128  * the error U_BUFFER_OVERFLOW_ERROR.
    129  *
    130  * If an illegal argument is provided, the function returns the error
    131  * U_ILLEGAL_ARGUMENT_ERROR.
    132  *
    133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
    134  * the tag string fits in the output buffer, but the null terminator doesn't.
    135  *
    136  * @param lang The language tag to use.
    137  * @param langLength The length of the language tag.
    138  * @param script The script tag to use.
    139  * @param scriptLength The length of the script tag.
    140  * @param region The region tag to use.
    141  * @param regionLength The length of the region tag.
    142  * @param trailing Any trailing data to append to the new tag.
    143  * @param trailingLength The length of the trailing data.
    144  * @param alternateTags A string containing any alternate tags.
    145  * @param tag The output buffer.
    146  * @param tagCapacity The capacity of the output buffer.
    147  * @param err A pointer to a UErrorCode for error reporting.
    148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
    149  **/
    150 static int32_t U_CALLCONV
    151 createTagStringWithAlternates(
    152     const char* lang,
    153     int32_t langLength,
    154     const char* script,
    155     int32_t scriptLength,
    156     const char* region,
    157     int32_t regionLength,
    158     const char* trailing,
    159     int32_t trailingLength,
    160     const char* alternateTags,
    161     char* tag,
    162     int32_t tagCapacity,
    163     UErrorCode* err) {
    164 
    165     if (U_FAILURE(*err)) {
    166         goto error;
    167     }
    168     else if (tag == NULL ||
    169              tagCapacity <= 0 ||
    170              langLength >= ULOC_LANG_CAPACITY ||
    171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
    172              regionLength >= ULOC_COUNTRY_CAPACITY) {
    173         goto error;
    174     }
    175     else {
    176         /**
    177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
    178          * that we can build a string that contains the language,
    179          * script and region code without worrying about overrunning
    180          * the user-supplied buffer.
    181          **/
    182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
    183         int32_t tagLength = 0;
    184         int32_t capacityRemaining = tagCapacity;
    185         UBool regionAppended = FALSE;
    186 
    187         if (langLength > 0) {
    188             appendTag(
    189                 lang,
    190                 langLength,
    191                 tagBuffer,
    192                 &tagLength);
    193         }
    194         else if (alternateTags == NULL) {
    195             /*
    196              * Append the value for an unknown language, if
    197              * we found no language.
    198              */
    199             appendTag(
    200                 unknownLanguage,
    201                 (int32_t)uprv_strlen(unknownLanguage),
    202                 tagBuffer,
    203                 &tagLength);
    204         }
    205         else {
    206             /*
    207              * Parse the alternateTags string for the language.
    208              */
    209             char alternateLang[ULOC_LANG_CAPACITY];
    210             int32_t alternateLangLength = sizeof(alternateLang);
    211 
    212             alternateLangLength =
    213                 uloc_getLanguage(
    214                     alternateTags,
    215                     alternateLang,
    216                     alternateLangLength,
    217                     err);
    218             if(U_FAILURE(*err) ||
    219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
    220                 goto error;
    221             }
    222             else if (alternateLangLength == 0) {
    223                 /*
    224                  * Append the value for an unknown language, if
    225                  * we found no language.
    226                  */
    227                 appendTag(
    228                     unknownLanguage,
    229                     (int32_t)uprv_strlen(unknownLanguage),
    230                     tagBuffer,
    231                     &tagLength);
    232             }
    233             else {
    234                 appendTag(
    235                     alternateLang,
    236                     alternateLangLength,
    237                     tagBuffer,
    238                     &tagLength);
    239             }
    240         }
    241 
    242         if (scriptLength > 0) {
    243             appendTag(
    244                 script,
    245                 scriptLength,
    246                 tagBuffer,
    247                 &tagLength);
    248         }
    249         else if (alternateTags != NULL) {
    250             /*
    251              * Parse the alternateTags string for the script.
    252              */
    253             char alternateScript[ULOC_SCRIPT_CAPACITY];
    254 
    255             const int32_t alternateScriptLength =
    256                 uloc_getScript(
    257                     alternateTags,
    258                     alternateScript,
    259                     sizeof(alternateScript),
    260                     err);
    261 
    262             if (U_FAILURE(*err) ||
    263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
    264                 goto error;
    265             }
    266             else if (alternateScriptLength > 0) {
    267                 appendTag(
    268                     alternateScript,
    269                     alternateScriptLength,
    270                     tagBuffer,
    271                     &tagLength);
    272             }
    273         }
    274 
    275         if (regionLength > 0) {
    276             appendTag(
    277                 region,
    278                 regionLength,
    279                 tagBuffer,
    280                 &tagLength);
    281 
    282             regionAppended = TRUE;
    283         }
    284         else if (alternateTags != NULL) {
    285             /*
    286              * Parse the alternateTags string for the region.
    287              */
    288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
    289 
    290             const int32_t alternateRegionLength =
    291                 uloc_getCountry(
    292                     alternateTags,
    293                     alternateRegion,
    294                     sizeof(alternateRegion),
    295                     err);
    296             if (U_FAILURE(*err) ||
    297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
    298                 goto error;
    299             }
    300             else if (alternateRegionLength > 0) {
    301                 appendTag(
    302                     alternateRegion,
    303                     alternateRegionLength,
    304                     tagBuffer,
    305                     &tagLength);
    306 
    307                 regionAppended = TRUE;
    308             }
    309         }
    310 
    311         {
    312             const int32_t toCopy =
    313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
    314 
    315             /**
    316              * Copy the partial tag from our internal buffer to the supplied
    317              * target.
    318              **/
    319             uprv_memcpy(
    320                 tag,
    321                 tagBuffer,
    322                 toCopy);
    323 
    324             capacityRemaining -= toCopy;
    325         }
    326 
    327         if (trailingLength > 0) {
    328             if (capacityRemaining > 0 && !regionAppended) {
    329                 tag[tagLength++] = '_';
    330                 --capacityRemaining;
    331             }
    332 
    333             if (capacityRemaining > 0) {
    334                 /*
    335                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
    336                  * don't know if the user-supplied buffers overlap.
    337                  */
    338                 const int32_t toCopy =
    339                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
    340 
    341                 uprv_memmove(
    342                     &tag[tagLength],
    343                     trailing,
    344                     toCopy);
    345             }
    346         }
    347 
    348         tagLength += trailingLength;
    349 
    350         return u_terminateChars(
    351                     tag,
    352                     tagCapacity,
    353                     tagLength,
    354                     err);
    355     }
    356 
    357 error:
    358 
    359     /**
    360      * An overflow indicates the locale ID passed in
    361      * is ill-formed.  If we got here, and there was
    362      * no previous error, it's an implicit overflow.
    363      **/
    364     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
    365         U_SUCCESS(*err)) {
    366         *err = U_ILLEGAL_ARGUMENT_ERROR;
    367     }
    368 
    369     return -1;
    370 }
    371 
    372 /**
    373  * Create a tag string from the supplied parameters.  The lang, script and region
    374  * parameters may be NULL pointers. If they are, their corresponding length parameters
    375  * must be less than or equal to 0.  If the lang parameter is an empty string, the
    376  * default value for an unknown language is written to the output buffer.
    377  *
    378  * If the length of the new string exceeds the capacity of the output buffer,
    379  * the function copies as many bytes to the output buffer as it can, and returns
    380  * the error U_BUFFER_OVERFLOW_ERROR.
    381  *
    382  * If an illegal argument is provided, the function returns the error
    383  * U_ILLEGAL_ARGUMENT_ERROR.
    384  *
    385  * @param lang The language tag to use.
    386  * @param langLength The length of the language tag.
    387  * @param script The script tag to use.
    388  * @param scriptLength The length of the script tag.
    389  * @param region The region tag to use.
    390  * @param regionLength The length of the region tag.
    391  * @param trailing Any trailing data to append to the new tag.
    392  * @param trailingLength The length of the trailing data.
    393  * @param tag The output buffer.
    394  * @param tagCapacity The capacity of the output buffer.
    395  * @param err A pointer to a UErrorCode for error reporting.
    396  * @return The length of the tag string, which may be greater than tagCapacity.
    397  **/
    398 static int32_t U_CALLCONV
    399 createTagString(
    400     const char* lang,
    401     int32_t langLength,
    402     const char* script,
    403     int32_t scriptLength,
    404     const char* region,
    405     int32_t regionLength,
    406     const char* trailing,
    407     int32_t trailingLength,
    408     char* tag,
    409     int32_t tagCapacity,
    410     UErrorCode* err)
    411 {
    412     return createTagStringWithAlternates(
    413                 lang,
    414                 langLength,
    415                 script,
    416                 scriptLength,
    417                 region,
    418                 regionLength,
    419                 trailing,
    420                 trailingLength,
    421                 NULL,
    422                 tag,
    423                 tagCapacity,
    424                 err);
    425 }
    426 
    427 /**
    428  * Parse the language, script, and region subtags from a tag string, and copy the
    429  * results into the corresponding output parameters. The buffers are null-terminated,
    430  * unless overflow occurs.
    431  *
    432  * The langLength, scriptLength, and regionLength parameters are input/output
    433  * parameters, and must contain the capacity of their corresponding buffers on
    434  * input.  On output, they will contain the actual length of the buffers, not
    435  * including the null terminator.
    436  *
    437  * If the length of any of the output subtags exceeds the capacity of the corresponding
    438  * buffer, the function copies as many bytes to the output buffer as it can, and returns
    439  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
    440  * occurs.
    441  *
    442  * If an illegal argument is provided, the function returns the error
    443  * U_ILLEGAL_ARGUMENT_ERROR.
    444  *
    445  * @param localeID The locale ID to parse.
    446  * @param lang The language tag buffer.
    447  * @param langLength The length of the language tag.
    448  * @param script The script tag buffer.
    449  * @param scriptLength The length of the script tag.
    450  * @param region The region tag buffer.
    451  * @param regionLength The length of the region tag.
    452  * @param err A pointer to a UErrorCode for error reporting.
    453  * @return The number of chars of the localeID parameter consumed.
    454  **/
    455 static int32_t U_CALLCONV
    456 parseTagString(
    457     const char* localeID,
    458     char* lang,
    459     int32_t* langLength,
    460     char* script,
    461     int32_t* scriptLength,
    462     char* region,
    463     int32_t* regionLength,
    464     UErrorCode* err)
    465 {
    466     const char* position = localeID;
    467     int32_t subtagLength = 0;
    468 
    469     if(U_FAILURE(*err) ||
    470        localeID == NULL ||
    471        lang == NULL ||
    472        langLength == NULL ||
    473        script == NULL ||
    474        scriptLength == NULL ||
    475        region == NULL ||
    476        regionLength == NULL) {
    477         goto error;
    478     }
    479 
    480     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
    481     u_terminateChars(lang, *langLength, subtagLength, err);
    482 
    483     /*
    484      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
    485      * to be an error, because it indicates the user-supplied tag is
    486      * not well-formed.
    487      */
    488     if(U_FAILURE(*err)) {
    489         goto error;
    490     }
    491 
    492     *langLength = subtagLength;
    493 
    494     /*
    495      * If no language was present, use the value of unknownLanguage
    496      * instead.  Otherwise, move past any separator.
    497      */
    498     if (*langLength == 0) {
    499         uprv_strcpy(
    500             lang,
    501             unknownLanguage);
    502         *langLength = (int32_t)uprv_strlen(lang);
    503     }
    504     else if (_isIDSeparator(*position)) {
    505         ++position;
    506     }
    507 
    508     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
    509     u_terminateChars(script, *scriptLength, subtagLength, err);
    510 
    511     if(U_FAILURE(*err)) {
    512         goto error;
    513     }
    514 
    515     *scriptLength = subtagLength;
    516 
    517     if (*scriptLength > 0) {
    518         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
    519             /**
    520              * If the script part is the "unknown" script, then don't return it.
    521              **/
    522             *scriptLength = 0;
    523         }
    524 
    525         /*
    526          * Move past any separator.
    527          */
    528         if (_isIDSeparator(*position)) {
    529             ++position;
    530         }
    531     }
    532 
    533     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
    534     u_terminateChars(region, *regionLength, subtagLength, err);
    535 
    536     if(U_FAILURE(*err)) {
    537         goto error;
    538     }
    539 
    540     *regionLength = subtagLength;
    541 
    542     if (*regionLength > 0) {
    543         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
    544             /**
    545              * If the region part is the "unknown" region, then don't return it.
    546              **/
    547             *regionLength = 0;
    548         }
    549     }
    550 
    551 exit:
    552 
    553     return (int32_t)(position - localeID);
    554 
    555 error:
    556 
    557     /**
    558      * If we get here, we have no explicit error, it's the result of an
    559      * illegal argument.
    560      **/
    561     if (!U_FAILURE(*err)) {
    562         *err = U_ILLEGAL_ARGUMENT_ERROR;
    563     }
    564 
    565     goto exit;
    566 }
    567 
    568 static int32_t U_CALLCONV
    569 createLikelySubtagsString(
    570     const char* lang,
    571     int32_t langLength,
    572     const char* script,
    573     int32_t scriptLength,
    574     const char* region,
    575     int32_t regionLength,
    576     const char* variants,
    577     int32_t variantsLength,
    578     char* tag,
    579     int32_t tagCapacity,
    580     UErrorCode* err)
    581 {
    582     /**
    583      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    584      * that we can build a string that contains the language,
    585      * script and region code without worrying about overrunning
    586      * the user-supplied buffer.
    587      **/
    588     char tagBuffer[ULOC_FULLNAME_CAPACITY];
    589     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
    590     int32_t tagBufferLength = 0;
    591 
    592     if(U_FAILURE(*err)) {
    593         goto error;
    594     }
    595 
    596     /**
    597      * Try the language with the script and region first.
    598      **/
    599     if (scriptLength > 0 && regionLength > 0) {
    600 
    601         const char* likelySubtags = NULL;
    602 
    603         tagBufferLength = createTagString(
    604             lang,
    605             langLength,
    606             script,
    607             scriptLength,
    608             region,
    609             regionLength,
    610             NULL,
    611             0,
    612             tagBuffer,
    613             sizeof(tagBuffer),
    614             err);
    615         if(U_FAILURE(*err)) {
    616             goto error;
    617         }
    618 
    619         likelySubtags =
    620             findLikelySubtags(
    621                 tagBuffer,
    622                 likelySubtagsBuffer,
    623                 sizeof(likelySubtagsBuffer),
    624                 err);
    625         if(U_FAILURE(*err)) {
    626             goto error;
    627         }
    628 
    629         if (likelySubtags != NULL) {
    630             /* Always use the language tag from the
    631                maximal string, since it may be more
    632                specific than the one provided. */
    633             return createTagStringWithAlternates(
    634                         NULL,
    635                         0,
    636                         NULL,
    637                         0,
    638                         NULL,
    639                         0,
    640                         variants,
    641                         variantsLength,
    642                         likelySubtags,
    643                         tag,
    644                         tagCapacity,
    645                         err);
    646         }
    647     }
    648 
    649     /**
    650      * Try the language with just the script.
    651      **/
    652     if (scriptLength > 0) {
    653 
    654         const char* likelySubtags = NULL;
    655 
    656         tagBufferLength = createTagString(
    657             lang,
    658             langLength,
    659             script,
    660             scriptLength,
    661             NULL,
    662             0,
    663             NULL,
    664             0,
    665             tagBuffer,
    666             sizeof(tagBuffer),
    667             err);
    668         if(U_FAILURE(*err)) {
    669             goto error;
    670         }
    671 
    672         likelySubtags =
    673             findLikelySubtags(
    674                 tagBuffer,
    675                 likelySubtagsBuffer,
    676                 sizeof(likelySubtagsBuffer),
    677                 err);
    678         if(U_FAILURE(*err)) {
    679             goto error;
    680         }
    681 
    682         if (likelySubtags != NULL) {
    683             /* Always use the language tag from the
    684                maximal string, since it may be more
    685                specific than the one provided. */
    686             return createTagStringWithAlternates(
    687                         NULL,
    688                         0,
    689                         NULL,
    690                         0,
    691                         region,
    692                         regionLength,
    693                         variants,
    694                         variantsLength,
    695                         likelySubtags,
    696                         tag,
    697                         tagCapacity,
    698                         err);
    699         }
    700     }
    701 
    702     /**
    703      * Try the language with just the region.
    704      **/
    705     if (regionLength > 0) {
    706 
    707         const char* likelySubtags = NULL;
    708 
    709         createTagString(
    710             lang,
    711             langLength,
    712             NULL,
    713             0,
    714             region,
    715             regionLength,
    716             NULL,
    717             0,
    718             tagBuffer,
    719             sizeof(tagBuffer),
    720             err);
    721         if(U_FAILURE(*err)) {
    722             goto error;
    723         }
    724 
    725         likelySubtags =
    726             findLikelySubtags(
    727                 tagBuffer,
    728                 likelySubtagsBuffer,
    729                 sizeof(likelySubtagsBuffer),
    730                 err);
    731         if(U_FAILURE(*err)) {
    732             goto error;
    733         }
    734 
    735         if (likelySubtags != NULL) {
    736             /* Always use the language tag from the
    737                maximal string, since it may be more
    738                specific than the one provided. */
    739             return createTagStringWithAlternates(
    740                         NULL,
    741                         0,
    742                         script,
    743                         scriptLength,
    744                         NULL,
    745                         0,
    746                         variants,
    747                         variantsLength,
    748                         likelySubtags,
    749                         tag,
    750                         tagCapacity,
    751                         err);
    752         }
    753     }
    754 
    755     /**
    756      * Finally, try just the language.
    757      **/
    758     {
    759         const char* likelySubtags = NULL;
    760 
    761         createTagString(
    762             lang,
    763             langLength,
    764             NULL,
    765             0,
    766             NULL,
    767             0,
    768             NULL,
    769             0,
    770             tagBuffer,
    771             sizeof(tagBuffer),
    772             err);
    773         if(U_FAILURE(*err)) {
    774             goto error;
    775         }
    776 
    777         likelySubtags =
    778             findLikelySubtags(
    779                 tagBuffer,
    780                 likelySubtagsBuffer,
    781                 sizeof(likelySubtagsBuffer),
    782                 err);
    783         if(U_FAILURE(*err)) {
    784             goto error;
    785         }
    786 
    787         if (likelySubtags != NULL) {
    788             /* Always use the language tag from the
    789                maximal string, since it may be more
    790                specific than the one provided. */
    791             return createTagStringWithAlternates(
    792                         NULL,
    793                         0,
    794                         script,
    795                         scriptLength,
    796                         region,
    797                         regionLength,
    798                         variants,
    799                         variantsLength,
    800                         likelySubtags,
    801                         tag,
    802                         tagCapacity,
    803                         err);
    804         }
    805     }
    806 
    807     return u_terminateChars(
    808                 tag,
    809                 tagCapacity,
    810                 0,
    811                 err);
    812 
    813 error:
    814 
    815     if (!U_FAILURE(*err)) {
    816         *err = U_ILLEGAL_ARGUMENT_ERROR;
    817     }
    818 
    819     return -1;
    820 }
    821 
    822 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
    823     {   int32_t count = 0; \
    824         int32_t i; \
    825         for (i = 0; i < trailingLength; i++) { \
    826             if (trailing[i] == '-' || trailing[i] == '_') { \
    827                 count = 0; \
    828                 if (count > 8) { \
    829                     goto error; \
    830                 } \
    831             } else if (trailing[i] == '@') { \
    832                 break; \
    833             } else if (count > 8) { \
    834                 goto error; \
    835             } else { \
    836                 count++; \
    837             } \
    838         } \
    839     }
    840 
    841 static int32_t
    842 _uloc_addLikelySubtags(const char*    localeID,
    843          char* maximizedLocaleID,
    844          int32_t maximizedLocaleIDCapacity,
    845          UErrorCode* err)
    846 {
    847     char lang[ULOC_LANG_CAPACITY];
    848     int32_t langLength = sizeof(lang);
    849     char script[ULOC_SCRIPT_CAPACITY];
    850     int32_t scriptLength = sizeof(script);
    851     char region[ULOC_COUNTRY_CAPACITY];
    852     int32_t regionLength = sizeof(region);
    853     const char* trailing = "";
    854     int32_t trailingLength = 0;
    855     int32_t trailingIndex = 0;
    856     int32_t resultLength = 0;
    857 
    858     if(U_FAILURE(*err)) {
    859         goto error;
    860     }
    861     else if (localeID == NULL ||
    862              maximizedLocaleID == NULL ||
    863              maximizedLocaleIDCapacity <= 0) {
    864         goto error;
    865     }
    866 
    867     trailingIndex = parseTagString(
    868         localeID,
    869         lang,
    870         &langLength,
    871         script,
    872         &scriptLength,
    873         region,
    874         &regionLength,
    875         err);
    876     if(U_FAILURE(*err)) {
    877         /* Overflow indicates an illegal argument error */
    878         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    879             *err = U_ILLEGAL_ARGUMENT_ERROR;
    880         }
    881 
    882         goto error;
    883     }
    884 
    885     /* Find the length of the trailing portion. */
    886     trailing = &localeID[trailingIndex];
    887     trailingLength = (int32_t)uprv_strlen(trailing);
    888 
    889     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
    890 
    891     resultLength =
    892         createLikelySubtagsString(
    893             lang,
    894             langLength,
    895             script,
    896             scriptLength,
    897             region,
    898             regionLength,
    899             trailing,
    900             trailingLength,
    901             maximizedLocaleID,
    902             maximizedLocaleIDCapacity,
    903             err);
    904 
    905     if (resultLength == 0) {
    906         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
    907 
    908         /*
    909          * If we get here, we need to return localeID.
    910          */
    911         uprv_memcpy(
    912             maximizedLocaleID,
    913             localeID,
    914             localIDLength <= maximizedLocaleIDCapacity ?
    915                 localIDLength : maximizedLocaleIDCapacity);
    916 
    917         resultLength =
    918             u_terminateChars(
    919                 maximizedLocaleID,
    920                 maximizedLocaleIDCapacity,
    921                 localIDLength,
    922                 err);
    923     }
    924 
    925     return resultLength;
    926 
    927 error:
    928 
    929     if (!U_FAILURE(*err)) {
    930         *err = U_ILLEGAL_ARGUMENT_ERROR;
    931     }
    932 
    933     return -1;
    934 }
    935 
    936 static int32_t
    937 _uloc_minimizeSubtags(const char*    localeID,
    938          char* minimizedLocaleID,
    939          int32_t minimizedLocaleIDCapacity,
    940          UErrorCode* err)
    941 {
    942     /**
    943      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    944      * that we can build a string that contains the language,
    945      * script and region code without worrying about overrunning
    946      * the user-supplied buffer.
    947      **/
    948     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
    949     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
    950 
    951     char lang[ULOC_LANG_CAPACITY];
    952     int32_t langLength = sizeof(lang);
    953     char script[ULOC_SCRIPT_CAPACITY];
    954     int32_t scriptLength = sizeof(script);
    955     char region[ULOC_COUNTRY_CAPACITY];
    956     int32_t regionLength = sizeof(region);
    957     const char* trailing = "";
    958     int32_t trailingLength = 0;
    959     int32_t trailingIndex = 0;
    960 
    961     if(U_FAILURE(*err)) {
    962         goto error;
    963     }
    964     else if (localeID == NULL ||
    965              minimizedLocaleID == NULL ||
    966              minimizedLocaleIDCapacity <= 0) {
    967         goto error;
    968     }
    969 
    970     trailingIndex =
    971         parseTagString(
    972             localeID,
    973             lang,
    974             &langLength,
    975             script,
    976             &scriptLength,
    977             region,
    978             &regionLength,
    979             err);
    980     if(U_FAILURE(*err)) {
    981 
    982         /* Overflow indicates an illegal argument error */
    983         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    984             *err = U_ILLEGAL_ARGUMENT_ERROR;
    985         }
    986 
    987         goto error;
    988     }
    989 
    990     /* Find the spot where the variants begin, if any. */
    991     trailing = &localeID[trailingIndex];
    992     trailingLength = (int32_t)uprv_strlen(trailing);
    993 
    994     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
    995 
    996     createTagString(
    997         lang,
    998         langLength,
    999         script,
   1000         scriptLength,
   1001         region,
   1002         regionLength,
   1003         NULL,
   1004         0,
   1005         maximizedTagBuffer,
   1006         maximizedTagBufferLength,
   1007         err);
   1008     if(U_FAILURE(*err)) {
   1009         goto error;
   1010     }
   1011 
   1012     /**
   1013      * First, we need to first get the maximization
   1014      * from AddLikelySubtags.
   1015      **/
   1016     maximizedTagBufferLength =
   1017         uloc_addLikelySubtags(
   1018             maximizedTagBuffer,
   1019             maximizedTagBuffer,
   1020             maximizedTagBufferLength,
   1021             err);
   1022 
   1023     if(U_FAILURE(*err)) {
   1024         goto error;
   1025     }
   1026 
   1027     /**
   1028      * Start first with just the language.
   1029      **/
   1030     {
   1031         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1032 
   1033         const int32_t tagBufferLength =
   1034             createLikelySubtagsString(
   1035                 lang,
   1036                 langLength,
   1037                 NULL,
   1038                 0,
   1039                 NULL,
   1040                 0,
   1041                 NULL,
   1042                 0,
   1043                 tagBuffer,
   1044                 sizeof(tagBuffer),
   1045                 err);
   1046 
   1047         if(U_FAILURE(*err)) {
   1048             goto error;
   1049         }
   1050         else if (uprv_strnicmp(
   1051                     maximizedTagBuffer,
   1052                     tagBuffer,
   1053                     tagBufferLength) == 0) {
   1054 
   1055             return createTagString(
   1056                         lang,
   1057                         langLength,
   1058                         NULL,
   1059                         0,
   1060                         NULL,
   1061                         0,
   1062                         trailing,
   1063                         trailingLength,
   1064                         minimizedLocaleID,
   1065                         minimizedLocaleIDCapacity,
   1066                         err);
   1067         }
   1068     }
   1069 
   1070     /**
   1071      * Next, try the language and region.
   1072      **/
   1073     if (regionLength > 0) {
   1074 
   1075         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1076 
   1077         const int32_t tagBufferLength =
   1078             createLikelySubtagsString(
   1079                 lang,
   1080                 langLength,
   1081                 NULL,
   1082                 0,
   1083                 region,
   1084                 regionLength,
   1085                 NULL,
   1086                 0,
   1087                 tagBuffer,
   1088                 sizeof(tagBuffer),
   1089                 err);
   1090 
   1091         if(U_FAILURE(*err)) {
   1092             goto error;
   1093         }
   1094         else if (uprv_strnicmp(
   1095                     maximizedTagBuffer,
   1096                     tagBuffer,
   1097                     tagBufferLength) == 0) {
   1098 
   1099             return createTagString(
   1100                         lang,
   1101                         langLength,
   1102                         NULL,
   1103                         0,
   1104                         region,
   1105                         regionLength,
   1106                         trailing,
   1107                         trailingLength,
   1108                         minimizedLocaleID,
   1109                         minimizedLocaleIDCapacity,
   1110                         err);
   1111         }
   1112     }
   1113 
   1114     /**
   1115      * Finally, try the language and script.  This is our last chance,
   1116      * since trying with all three subtags would only yield the
   1117      * maximal version that we already have.
   1118      **/
   1119     if (scriptLength > 0 && regionLength > 0) {
   1120         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1121 
   1122         const int32_t tagBufferLength =
   1123             createLikelySubtagsString(
   1124                 lang,
   1125                 langLength,
   1126                 script,
   1127                 scriptLength,
   1128                 NULL,
   1129                 0,
   1130                 NULL,
   1131                 0,
   1132                 tagBuffer,
   1133                 sizeof(tagBuffer),
   1134                 err);
   1135 
   1136         if(U_FAILURE(*err)) {
   1137             goto error;
   1138         }
   1139         else if (uprv_strnicmp(
   1140                     maximizedTagBuffer,
   1141                     tagBuffer,
   1142                     tagBufferLength) == 0) {
   1143 
   1144             return createTagString(
   1145                         lang,
   1146                         langLength,
   1147                         script,
   1148                         scriptLength,
   1149                         NULL,
   1150                         0,
   1151                         trailing,
   1152                         trailingLength,
   1153                         minimizedLocaleID,
   1154                         minimizedLocaleIDCapacity,
   1155                         err);
   1156         }
   1157     }
   1158 
   1159     {
   1160         /**
   1161          * If we got here, return the locale ID parameter.
   1162          **/
   1163         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
   1164 
   1165         uprv_memcpy(
   1166             minimizedLocaleID,
   1167             localeID,
   1168             localeIDLength <= minimizedLocaleIDCapacity ?
   1169                 localeIDLength : minimizedLocaleIDCapacity);
   1170 
   1171         return u_terminateChars(
   1172                     minimizedLocaleID,
   1173                     minimizedLocaleIDCapacity,
   1174                     localeIDLength,
   1175                     err);
   1176     }
   1177 
   1178 error:
   1179 
   1180     if (!U_FAILURE(*err)) {
   1181         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1182     }
   1183 
   1184     return -1;
   1185 
   1186 
   1187 }
   1188 
   1189 static UBool
   1190 do_canonicalize(const char*    localeID,
   1191          char* buffer,
   1192          int32_t bufferCapacity,
   1193          UErrorCode* err)
   1194 {
   1195     uloc_canonicalize(
   1196         localeID,
   1197         buffer,
   1198         bufferCapacity,
   1199         err);
   1200 
   1201     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
   1202         *err == U_BUFFER_OVERFLOW_ERROR) {
   1203         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1204 
   1205         return FALSE;
   1206     }
   1207     else if (U_FAILURE(*err)) {
   1208 
   1209         return FALSE;
   1210     }
   1211     else {
   1212         return TRUE;
   1213     }
   1214 }
   1215 
   1216 U_DRAFT int32_t U_EXPORT2
   1217 uloc_addLikelySubtags(const char*    localeID,
   1218          char* maximizedLocaleID,
   1219          int32_t maximizedLocaleIDCapacity,
   1220          UErrorCode* err)
   1221 {
   1222     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1223 
   1224     if (!do_canonicalize(
   1225         localeID,
   1226         localeBuffer,
   1227         sizeof(localeBuffer),
   1228         err)) {
   1229         return -1;
   1230     }
   1231     else {
   1232         return _uloc_addLikelySubtags(
   1233                     localeBuffer,
   1234                     maximizedLocaleID,
   1235                     maximizedLocaleIDCapacity,
   1236                     err);
   1237     }
   1238 }
   1239 
   1240 U_DRAFT int32_t U_EXPORT2
   1241 uloc_minimizeSubtags(const char*    localeID,
   1242          char* minimizedLocaleID,
   1243          int32_t minimizedLocaleIDCapacity,
   1244          UErrorCode* err)
   1245 {
   1246     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1247 
   1248     if (!do_canonicalize(
   1249         localeID,
   1250         localeBuffer,
   1251         sizeof(localeBuffer),
   1252         err)) {
   1253         return -1;
   1254     }
   1255     else {
   1256         return _uloc_minimizeSubtags(
   1257                     localeBuffer,
   1258                     minimizedLocaleID,
   1259                     minimizedLocaleIDCapacity,
   1260                     err);
   1261     }
   1262 }
   1263