Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1997-2014, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  loclikely.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010feb25
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
     17 *   that then do not depend on resource bundle code and likely-subtags data.
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/locid.h"
     22 #include "unicode/putil.h"
     23 #include "unicode/uloc.h"
     24 #include "unicode/ures.h"
     25 #include "unicode/uscript.h"
     26 #include "cmemory.h"
     27 #include "cstring.h"
     28 #include "ulocimp.h"
     29 #include "ustr_imp.h"
     30 
     31 /**
     32  * This function looks for the localeID in the likelySubtags resource.
     33  *
     34  * @param localeID The tag to find.
     35  * @param buffer A buffer to hold the matching entry
     36  * @param bufferLength The length of the output buffer
     37  * @return A pointer to "buffer" if found, or a null pointer if not.
     38  */
     39 static const char*  U_CALLCONV
     40 findLikelySubtags(const char* localeID,
     41                   char* buffer,
     42                   int32_t bufferLength,
     43                   UErrorCode* err) {
     44     const char* result = NULL;
     45 
     46     if (!U_FAILURE(*err)) {
     47         int32_t resLen = 0;
     48         const UChar* s = NULL;
     49         UErrorCode tmpErr = U_ZERO_ERROR;
     50         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
     51         if (U_SUCCESS(tmpErr)) {
     52             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
     53 
     54             if (U_FAILURE(tmpErr)) {
     55                 /*
     56                  * If a resource is missing, it's not really an error, it's
     57                  * just that we don't have any data for that particular locale ID.
     58                  */
     59                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
     60                     *err = tmpErr;
     61                 }
     62             }
     63             else if (resLen >= bufferLength) {
     64                 /* The buffer should never overflow. */
     65                 *err = U_INTERNAL_PROGRAM_ERROR;
     66             }
     67             else {
     68                 u_UCharsToChars(s, buffer, resLen + 1);
     69                 result = buffer;
     70             }
     71 
     72             ures_close(subtags);
     73         } else {
     74             *err = tmpErr;
     75         }
     76     }
     77 
     78     return result;
     79 }
     80 
     81 /**
     82  * Append a tag to a buffer, adding the separator if necessary.  The buffer
     83  * must be large enough to contain the resulting tag plus any separator
     84  * necessary. The tag must not be a zero-length string.
     85  *
     86  * @param tag The tag to add.
     87  * @param tagLength The length of the tag.
     88  * @param buffer The output buffer.
     89  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
     90  **/
     91 static void U_CALLCONV
     92 appendTag(
     93     const char* tag,
     94     int32_t tagLength,
     95     char* buffer,
     96     int32_t* bufferLength) {
     97 
     98     if (*bufferLength > 0) {
     99         buffer[*bufferLength] = '_';
    100         ++(*bufferLength);
    101     }
    102 
    103     uprv_memmove(
    104         &buffer[*bufferLength],
    105         tag,
    106         tagLength);
    107 
    108     *bufferLength += tagLength;
    109 }
    110 
    111 /**
    112  * These are the canonical strings for unknown languages, scripts and regions.
    113  **/
    114 static const char* const unknownLanguage = "und";
    115 static const char* const unknownScript = "Zzzz";
    116 static const char* const unknownRegion = "ZZ";
    117 
    118 /**
    119  * Create a tag string from the supplied parameters.  The lang, script and region
    120  * parameters may be NULL pointers. If they are, their corresponding length parameters
    121  * must be less than or equal to 0.
    122  *
    123  * If any of the language, script or region parameters are empty, and the alternateTags
    124  * parameter is not NULL, it will be parsed for potential language, script and region tags
    125  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
    126  * it contains no language tag, the default tag for the unknown language is used.
    127  *
    128  * If the length of the new string exceeds the capacity of the output buffer,
    129  * the function copies as many bytes to the output buffer as it can, and returns
    130  * the error U_BUFFER_OVERFLOW_ERROR.
    131  *
    132  * If an illegal argument is provided, the function returns the error
    133  * U_ILLEGAL_ARGUMENT_ERROR.
    134  *
    135  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
    136  * the tag string fits in the output buffer, but the null terminator doesn't.
    137  *
    138  * @param lang The language tag to use.
    139  * @param langLength The length of the language tag.
    140  * @param script The script tag to use.
    141  * @param scriptLength The length of the script tag.
    142  * @param region The region tag to use.
    143  * @param regionLength The length of the region tag.
    144  * @param trailing Any trailing data to append to the new tag.
    145  * @param trailingLength The length of the trailing data.
    146  * @param alternateTags A string containing any alternate tags.
    147  * @param tag The output buffer.
    148  * @param tagCapacity The capacity of the output buffer.
    149  * @param err A pointer to a UErrorCode for error reporting.
    150  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
    151  **/
    152 static int32_t U_CALLCONV
    153 createTagStringWithAlternates(
    154     const char* lang,
    155     int32_t langLength,
    156     const char* script,
    157     int32_t scriptLength,
    158     const char* region,
    159     int32_t regionLength,
    160     const char* trailing,
    161     int32_t trailingLength,
    162     const char* alternateTags,
    163     char* tag,
    164     int32_t tagCapacity,
    165     UErrorCode* err) {
    166 
    167     if (U_FAILURE(*err)) {
    168         goto error;
    169     }
    170     else if (tag == NULL ||
    171              tagCapacity <= 0 ||
    172              langLength >= ULOC_LANG_CAPACITY ||
    173              scriptLength >= ULOC_SCRIPT_CAPACITY ||
    174              regionLength >= ULOC_COUNTRY_CAPACITY) {
    175         goto error;
    176     }
    177     else {
    178         /**
    179          * ULOC_FULLNAME_CAPACITY will provide enough capacity
    180          * that we can build a string that contains the language,
    181          * script and region code without worrying about overrunning
    182          * the user-supplied buffer.
    183          **/
    184         char tagBuffer[ULOC_FULLNAME_CAPACITY];
    185         int32_t tagLength = 0;
    186         int32_t capacityRemaining = tagCapacity;
    187         UBool regionAppended = FALSE;
    188 
    189         if (langLength > 0) {
    190             appendTag(
    191                 lang,
    192                 langLength,
    193                 tagBuffer,
    194                 &tagLength);
    195         }
    196         else if (alternateTags == NULL) {
    197             /*
    198              * Append the value for an unknown language, if
    199              * we found no language.
    200              */
    201             appendTag(
    202                 unknownLanguage,
    203                 (int32_t)uprv_strlen(unknownLanguage),
    204                 tagBuffer,
    205                 &tagLength);
    206         }
    207         else {
    208             /*
    209              * Parse the alternateTags string for the language.
    210              */
    211             char alternateLang[ULOC_LANG_CAPACITY];
    212             int32_t alternateLangLength = sizeof(alternateLang);
    213 
    214             alternateLangLength =
    215                 uloc_getLanguage(
    216                     alternateTags,
    217                     alternateLang,
    218                     alternateLangLength,
    219                     err);
    220             if(U_FAILURE(*err) ||
    221                 alternateLangLength >= ULOC_LANG_CAPACITY) {
    222                 goto error;
    223             }
    224             else if (alternateLangLength == 0) {
    225                 /*
    226                  * Append the value for an unknown language, if
    227                  * we found no language.
    228                  */
    229                 appendTag(
    230                     unknownLanguage,
    231                     (int32_t)uprv_strlen(unknownLanguage),
    232                     tagBuffer,
    233                     &tagLength);
    234             }
    235             else {
    236                 appendTag(
    237                     alternateLang,
    238                     alternateLangLength,
    239                     tagBuffer,
    240                     &tagLength);
    241             }
    242         }
    243 
    244         if (scriptLength > 0) {
    245             appendTag(
    246                 script,
    247                 scriptLength,
    248                 tagBuffer,
    249                 &tagLength);
    250         }
    251         else if (alternateTags != NULL) {
    252             /*
    253              * Parse the alternateTags string for the script.
    254              */
    255             char alternateScript[ULOC_SCRIPT_CAPACITY];
    256 
    257             const int32_t alternateScriptLength =
    258                 uloc_getScript(
    259                     alternateTags,
    260                     alternateScript,
    261                     sizeof(alternateScript),
    262                     err);
    263 
    264             if (U_FAILURE(*err) ||
    265                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
    266                 goto error;
    267             }
    268             else if (alternateScriptLength > 0) {
    269                 appendTag(
    270                     alternateScript,
    271                     alternateScriptLength,
    272                     tagBuffer,
    273                     &tagLength);
    274             }
    275         }
    276 
    277         if (regionLength > 0) {
    278             appendTag(
    279                 region,
    280                 regionLength,
    281                 tagBuffer,
    282                 &tagLength);
    283 
    284             regionAppended = TRUE;
    285         }
    286         else if (alternateTags != NULL) {
    287             /*
    288              * Parse the alternateTags string for the region.
    289              */
    290             char alternateRegion[ULOC_COUNTRY_CAPACITY];
    291 
    292             const int32_t alternateRegionLength =
    293                 uloc_getCountry(
    294                     alternateTags,
    295                     alternateRegion,
    296                     sizeof(alternateRegion),
    297                     err);
    298             if (U_FAILURE(*err) ||
    299                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
    300                 goto error;
    301             }
    302             else if (alternateRegionLength > 0) {
    303                 appendTag(
    304                     alternateRegion,
    305                     alternateRegionLength,
    306                     tagBuffer,
    307                     &tagLength);
    308 
    309                 regionAppended = TRUE;
    310             }
    311         }
    312 
    313         {
    314             const int32_t toCopy =
    315                 tagLength >= tagCapacity ? tagCapacity : tagLength;
    316 
    317             /**
    318              * Copy the partial tag from our internal buffer to the supplied
    319              * target.
    320              **/
    321             uprv_memcpy(
    322                 tag,
    323                 tagBuffer,
    324                 toCopy);
    325 
    326             capacityRemaining -= toCopy;
    327         }
    328 
    329         if (trailingLength > 0) {
    330             if (*trailing != '@' && capacityRemaining > 0) {
    331                 tag[tagLength++] = '_';
    332                 --capacityRemaining;
    333                 if (capacityRemaining > 0 && !regionAppended) {
    334                     /* extra separator is required */
    335                     tag[tagLength++] = '_';
    336                     --capacityRemaining;
    337                 }
    338             }
    339 
    340             if (capacityRemaining > 0) {
    341                 /*
    342                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
    343                  * don't know if the user-supplied buffers overlap.
    344                  */
    345                 const int32_t toCopy =
    346                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
    347 
    348                 uprv_memmove(
    349                     &tag[tagLength],
    350                     trailing,
    351                     toCopy);
    352             }
    353         }
    354 
    355         tagLength += trailingLength;
    356 
    357         return u_terminateChars(
    358                     tag,
    359                     tagCapacity,
    360                     tagLength,
    361                     err);
    362     }
    363 
    364 error:
    365 
    366     /**
    367      * An overflow indicates the locale ID passed in
    368      * is ill-formed.  If we got here, and there was
    369      * no previous error, it's an implicit overflow.
    370      **/
    371     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
    372         U_SUCCESS(*err)) {
    373         *err = U_ILLEGAL_ARGUMENT_ERROR;
    374     }
    375 
    376     return -1;
    377 }
    378 
    379 /**
    380  * Create a tag string from the supplied parameters.  The lang, script and region
    381  * parameters may be NULL pointers. If they are, their corresponding length parameters
    382  * must be less than or equal to 0.  If the lang parameter is an empty string, the
    383  * default value for an unknown language is written to the output buffer.
    384  *
    385  * If the length of the new string exceeds the capacity of the output buffer,
    386  * the function copies as many bytes to the output buffer as it can, and returns
    387  * the error U_BUFFER_OVERFLOW_ERROR.
    388  *
    389  * If an illegal argument is provided, the function returns the error
    390  * U_ILLEGAL_ARGUMENT_ERROR.
    391  *
    392  * @param lang The language tag to use.
    393  * @param langLength The length of the language tag.
    394  * @param script The script tag to use.
    395  * @param scriptLength The length of the script tag.
    396  * @param region The region tag to use.
    397  * @param regionLength The length of the region tag.
    398  * @param trailing Any trailing data to append to the new tag.
    399  * @param trailingLength The length of the trailing data.
    400  * @param tag The output buffer.
    401  * @param tagCapacity The capacity of the output buffer.
    402  * @param err A pointer to a UErrorCode for error reporting.
    403  * @return The length of the tag string, which may be greater than tagCapacity.
    404  **/
    405 static int32_t U_CALLCONV
    406 createTagString(
    407     const char* lang,
    408     int32_t langLength,
    409     const char* script,
    410     int32_t scriptLength,
    411     const char* region,
    412     int32_t regionLength,
    413     const char* trailing,
    414     int32_t trailingLength,
    415     char* tag,
    416     int32_t tagCapacity,
    417     UErrorCode* err)
    418 {
    419     return createTagStringWithAlternates(
    420                 lang,
    421                 langLength,
    422                 script,
    423                 scriptLength,
    424                 region,
    425                 regionLength,
    426                 trailing,
    427                 trailingLength,
    428                 NULL,
    429                 tag,
    430                 tagCapacity,
    431                 err);
    432 }
    433 
    434 /**
    435  * Parse the language, script, and region subtags from a tag string, and copy the
    436  * results into the corresponding output parameters. The buffers are null-terminated,
    437  * unless overflow occurs.
    438  *
    439  * The langLength, scriptLength, and regionLength parameters are input/output
    440  * parameters, and must contain the capacity of their corresponding buffers on
    441  * input.  On output, they will contain the actual length of the buffers, not
    442  * including the null terminator.
    443  *
    444  * If the length of any of the output subtags exceeds the capacity of the corresponding
    445  * buffer, the function copies as many bytes to the output buffer as it can, and returns
    446  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
    447  * occurs.
    448  *
    449  * If an illegal argument is provided, the function returns the error
    450  * U_ILLEGAL_ARGUMENT_ERROR.
    451  *
    452  * @param localeID The locale ID to parse.
    453  * @param lang The language tag buffer.
    454  * @param langLength The length of the language tag.
    455  * @param script The script tag buffer.
    456  * @param scriptLength The length of the script tag.
    457  * @param region The region tag buffer.
    458  * @param regionLength The length of the region tag.
    459  * @param err A pointer to a UErrorCode for error reporting.
    460  * @return The number of chars of the localeID parameter consumed.
    461  **/
    462 static int32_t U_CALLCONV
    463 parseTagString(
    464     const char* localeID,
    465     char* lang,
    466     int32_t* langLength,
    467     char* script,
    468     int32_t* scriptLength,
    469     char* region,
    470     int32_t* regionLength,
    471     UErrorCode* err)
    472 {
    473     const char* position = localeID;
    474     int32_t subtagLength = 0;
    475 
    476     if(U_FAILURE(*err) ||
    477        localeID == NULL ||
    478        lang == NULL ||
    479        langLength == NULL ||
    480        script == NULL ||
    481        scriptLength == NULL ||
    482        region == NULL ||
    483        regionLength == NULL) {
    484         goto error;
    485     }
    486 
    487     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
    488     u_terminateChars(lang, *langLength, subtagLength, err);
    489 
    490     /*
    491      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
    492      * to be an error, because it indicates the user-supplied tag is
    493      * not well-formed.
    494      */
    495     if(U_FAILURE(*err)) {
    496         goto error;
    497     }
    498 
    499     *langLength = subtagLength;
    500 
    501     /*
    502      * If no language was present, use the value of unknownLanguage
    503      * instead.  Otherwise, move past any separator.
    504      */
    505     if (*langLength == 0) {
    506         uprv_strcpy(
    507             lang,
    508             unknownLanguage);
    509         *langLength = (int32_t)uprv_strlen(lang);
    510     }
    511     else if (_isIDSeparator(*position)) {
    512         ++position;
    513     }
    514 
    515     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
    516     u_terminateChars(script, *scriptLength, subtagLength, err);
    517 
    518     if(U_FAILURE(*err)) {
    519         goto error;
    520     }
    521 
    522     *scriptLength = subtagLength;
    523 
    524     if (*scriptLength > 0) {
    525         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
    526             /**
    527              * If the script part is the "unknown" script, then don't return it.
    528              **/
    529             *scriptLength = 0;
    530         }
    531 
    532         /*
    533          * Move past any separator.
    534          */
    535         if (_isIDSeparator(*position)) {
    536             ++position;
    537         }
    538     }
    539 
    540     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
    541     u_terminateChars(region, *regionLength, subtagLength, err);
    542 
    543     if(U_FAILURE(*err)) {
    544         goto error;
    545     }
    546 
    547     *regionLength = subtagLength;
    548 
    549     if (*regionLength > 0) {
    550         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
    551             /**
    552              * If the region part is the "unknown" region, then don't return it.
    553              **/
    554             *regionLength = 0;
    555         }
    556     } else if (*position != 0 && *position != '@') {
    557         /* back up over consumed trailing separator */
    558         --position;
    559     }
    560 
    561 exit:
    562 
    563     return (int32_t)(position - localeID);
    564 
    565 error:
    566 
    567     /**
    568      * If we get here, we have no explicit error, it's the result of an
    569      * illegal argument.
    570      **/
    571     if (!U_FAILURE(*err)) {
    572         *err = U_ILLEGAL_ARGUMENT_ERROR;
    573     }
    574 
    575     goto exit;
    576 }
    577 
    578 static int32_t U_CALLCONV
    579 createLikelySubtagsString(
    580     const char* lang,
    581     int32_t langLength,
    582     const char* script,
    583     int32_t scriptLength,
    584     const char* region,
    585     int32_t regionLength,
    586     const char* variants,
    587     int32_t variantsLength,
    588     char* tag,
    589     int32_t tagCapacity,
    590     UErrorCode* err)
    591 {
    592     /**
    593      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    594      * that we can build a string that contains the language,
    595      * script and region code without worrying about overrunning
    596      * the user-supplied buffer.
    597      **/
    598     char tagBuffer[ULOC_FULLNAME_CAPACITY];
    599     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
    600 
    601     if(U_FAILURE(*err)) {
    602         goto error;
    603     }
    604 
    605     /**
    606      * Try the language with the script and region first.
    607      **/
    608     if (scriptLength > 0 && regionLength > 0) {
    609 
    610         const char* likelySubtags = NULL;
    611 
    612         createTagString(
    613             lang,
    614             langLength,
    615             script,
    616             scriptLength,
    617             region,
    618             regionLength,
    619             NULL,
    620             0,
    621             tagBuffer,
    622             sizeof(tagBuffer),
    623             err);
    624         if(U_FAILURE(*err)) {
    625             goto error;
    626         }
    627 
    628         likelySubtags =
    629             findLikelySubtags(
    630                 tagBuffer,
    631                 likelySubtagsBuffer,
    632                 sizeof(likelySubtagsBuffer),
    633                 err);
    634         if(U_FAILURE(*err)) {
    635             goto error;
    636         }
    637 
    638         if (likelySubtags != NULL) {
    639             /* Always use the language tag from the
    640                maximal string, since it may be more
    641                specific than the one provided. */
    642             return createTagStringWithAlternates(
    643                         NULL,
    644                         0,
    645                         NULL,
    646                         0,
    647                         NULL,
    648                         0,
    649                         variants,
    650                         variantsLength,
    651                         likelySubtags,
    652                         tag,
    653                         tagCapacity,
    654                         err);
    655         }
    656     }
    657 
    658     /**
    659      * Try the language with just the script.
    660      **/
    661     if (scriptLength > 0) {
    662 
    663         const char* likelySubtags = NULL;
    664 
    665         createTagString(
    666             lang,
    667             langLength,
    668             script,
    669             scriptLength,
    670             NULL,
    671             0,
    672             NULL,
    673             0,
    674             tagBuffer,
    675             sizeof(tagBuffer),
    676             err);
    677         if(U_FAILURE(*err)) {
    678             goto error;
    679         }
    680 
    681         likelySubtags =
    682             findLikelySubtags(
    683                 tagBuffer,
    684                 likelySubtagsBuffer,
    685                 sizeof(likelySubtagsBuffer),
    686                 err);
    687         if(U_FAILURE(*err)) {
    688             goto error;
    689         }
    690 
    691         if (likelySubtags != NULL) {
    692             /* Always use the language tag from the
    693                maximal string, since it may be more
    694                specific than the one provided. */
    695             return createTagStringWithAlternates(
    696                         NULL,
    697                         0,
    698                         NULL,
    699                         0,
    700                         region,
    701                         regionLength,
    702                         variants,
    703                         variantsLength,
    704                         likelySubtags,
    705                         tag,
    706                         tagCapacity,
    707                         err);
    708         }
    709     }
    710 
    711     /**
    712      * Try the language with just the region.
    713      **/
    714     if (regionLength > 0) {
    715 
    716         const char* likelySubtags = NULL;
    717 
    718         createTagString(
    719             lang,
    720             langLength,
    721             NULL,
    722             0,
    723             region,
    724             regionLength,
    725             NULL,
    726             0,
    727             tagBuffer,
    728             sizeof(tagBuffer),
    729             err);
    730         if(U_FAILURE(*err)) {
    731             goto error;
    732         }
    733 
    734         likelySubtags =
    735             findLikelySubtags(
    736                 tagBuffer,
    737                 likelySubtagsBuffer,
    738                 sizeof(likelySubtagsBuffer),
    739                 err);
    740         if(U_FAILURE(*err)) {
    741             goto error;
    742         }
    743 
    744         if (likelySubtags != NULL) {
    745             /* Always use the language tag from the
    746                maximal string, since it may be more
    747                specific than the one provided. */
    748             return createTagStringWithAlternates(
    749                         NULL,
    750                         0,
    751                         script,
    752                         scriptLength,
    753                         NULL,
    754                         0,
    755                         variants,
    756                         variantsLength,
    757                         likelySubtags,
    758                         tag,
    759                         tagCapacity,
    760                         err);
    761         }
    762     }
    763 
    764     /**
    765      * Finally, try just the language.
    766      **/
    767     {
    768         const char* likelySubtags = NULL;
    769 
    770         createTagString(
    771             lang,
    772             langLength,
    773             NULL,
    774             0,
    775             NULL,
    776             0,
    777             NULL,
    778             0,
    779             tagBuffer,
    780             sizeof(tagBuffer),
    781             err);
    782         if(U_FAILURE(*err)) {
    783             goto error;
    784         }
    785 
    786         likelySubtags =
    787             findLikelySubtags(
    788                 tagBuffer,
    789                 likelySubtagsBuffer,
    790                 sizeof(likelySubtagsBuffer),
    791                 err);
    792         if(U_FAILURE(*err)) {
    793             goto error;
    794         }
    795 
    796         if (likelySubtags != NULL) {
    797             /* Always use the language tag from the
    798                maximal string, since it may be more
    799                specific than the one provided. */
    800             return createTagStringWithAlternates(
    801                         NULL,
    802                         0,
    803                         script,
    804                         scriptLength,
    805                         region,
    806                         regionLength,
    807                         variants,
    808                         variantsLength,
    809                         likelySubtags,
    810                         tag,
    811                         tagCapacity,
    812                         err);
    813         }
    814     }
    815 
    816     return u_terminateChars(
    817                 tag,
    818                 tagCapacity,
    819                 0,
    820                 err);
    821 
    822 error:
    823 
    824     if (!U_FAILURE(*err)) {
    825         *err = U_ILLEGAL_ARGUMENT_ERROR;
    826     }
    827 
    828     return -1;
    829 }
    830 
    831 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
    832     {   int32_t count = 0; \
    833         int32_t i; \
    834         for (i = 0; i < trailingLength; i++) { \
    835             if (trailing[i] == '-' || trailing[i] == '_') { \
    836                 count = 0; \
    837                 if (count > 8) { \
    838                     goto error; \
    839                 } \
    840             } else if (trailing[i] == '@') { \
    841                 break; \
    842             } else if (count > 8) { \
    843                 goto error; \
    844             } else { \
    845                 count++; \
    846             } \
    847         } \
    848     }
    849 
    850 static int32_t
    851 _uloc_addLikelySubtags(const char*    localeID,
    852          char* maximizedLocaleID,
    853          int32_t maximizedLocaleIDCapacity,
    854          UErrorCode* err)
    855 {
    856     char lang[ULOC_LANG_CAPACITY];
    857     int32_t langLength = sizeof(lang);
    858     char script[ULOC_SCRIPT_CAPACITY];
    859     int32_t scriptLength = sizeof(script);
    860     char region[ULOC_COUNTRY_CAPACITY];
    861     int32_t regionLength = sizeof(region);
    862     const char* trailing = "";
    863     int32_t trailingLength = 0;
    864     int32_t trailingIndex = 0;
    865     int32_t resultLength = 0;
    866 
    867     if(U_FAILURE(*err)) {
    868         goto error;
    869     }
    870     else if (localeID == NULL ||
    871              maximizedLocaleID == NULL ||
    872              maximizedLocaleIDCapacity <= 0) {
    873         goto error;
    874     }
    875 
    876     trailingIndex = parseTagString(
    877         localeID,
    878         lang,
    879         &langLength,
    880         script,
    881         &scriptLength,
    882         region,
    883         &regionLength,
    884         err);
    885     if(U_FAILURE(*err)) {
    886         /* Overflow indicates an illegal argument error */
    887         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    888             *err = U_ILLEGAL_ARGUMENT_ERROR;
    889         }
    890 
    891         goto error;
    892     }
    893 
    894     /* Find the length of the trailing portion. */
    895     while (_isIDSeparator(localeID[trailingIndex])) {
    896         trailingIndex++;
    897     }
    898     trailing = &localeID[trailingIndex];
    899     trailingLength = (int32_t)uprv_strlen(trailing);
    900 
    901     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
    902 
    903     resultLength =
    904         createLikelySubtagsString(
    905             lang,
    906             langLength,
    907             script,
    908             scriptLength,
    909             region,
    910             regionLength,
    911             trailing,
    912             trailingLength,
    913             maximizedLocaleID,
    914             maximizedLocaleIDCapacity,
    915             err);
    916 
    917     if (resultLength == 0) {
    918         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
    919 
    920         /*
    921          * If we get here, we need to return localeID.
    922          */
    923         uprv_memcpy(
    924             maximizedLocaleID,
    925             localeID,
    926             localIDLength <= maximizedLocaleIDCapacity ?
    927                 localIDLength : maximizedLocaleIDCapacity);
    928 
    929         resultLength =
    930             u_terminateChars(
    931                 maximizedLocaleID,
    932                 maximizedLocaleIDCapacity,
    933                 localIDLength,
    934                 err);
    935     }
    936 
    937     return resultLength;
    938 
    939 error:
    940 
    941     if (!U_FAILURE(*err)) {
    942         *err = U_ILLEGAL_ARGUMENT_ERROR;
    943     }
    944 
    945     return -1;
    946 }
    947 
    948 static int32_t
    949 _uloc_minimizeSubtags(const char*    localeID,
    950          char* minimizedLocaleID,
    951          int32_t minimizedLocaleIDCapacity,
    952          UErrorCode* err)
    953 {
    954     /**
    955      * ULOC_FULLNAME_CAPACITY will provide enough capacity
    956      * that we can build a string that contains the language,
    957      * script and region code without worrying about overrunning
    958      * the user-supplied buffer.
    959      **/
    960     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
    961     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
    962 
    963     char lang[ULOC_LANG_CAPACITY];
    964     int32_t langLength = sizeof(lang);
    965     char script[ULOC_SCRIPT_CAPACITY];
    966     int32_t scriptLength = sizeof(script);
    967     char region[ULOC_COUNTRY_CAPACITY];
    968     int32_t regionLength = sizeof(region);
    969     const char* trailing = "";
    970     int32_t trailingLength = 0;
    971     int32_t trailingIndex = 0;
    972 
    973     if(U_FAILURE(*err)) {
    974         goto error;
    975     }
    976     else if (localeID == NULL ||
    977              minimizedLocaleID == NULL ||
    978              minimizedLocaleIDCapacity <= 0) {
    979         goto error;
    980     }
    981 
    982     trailingIndex =
    983         parseTagString(
    984             localeID,
    985             lang,
    986             &langLength,
    987             script,
    988             &scriptLength,
    989             region,
    990             &regionLength,
    991             err);
    992     if(U_FAILURE(*err)) {
    993 
    994         /* Overflow indicates an illegal argument error */
    995         if (*err == U_BUFFER_OVERFLOW_ERROR) {
    996             *err = U_ILLEGAL_ARGUMENT_ERROR;
    997         }
    998 
    999         goto error;
   1000     }
   1001 
   1002     /* Find the spot where the variants or the keywords begin, if any. */
   1003     while (_isIDSeparator(localeID[trailingIndex])) {
   1004         trailingIndex++;
   1005     }
   1006     trailing = &localeID[trailingIndex];
   1007     trailingLength = (int32_t)uprv_strlen(trailing);
   1008 
   1009     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
   1010 
   1011     createTagString(
   1012         lang,
   1013         langLength,
   1014         script,
   1015         scriptLength,
   1016         region,
   1017         regionLength,
   1018         NULL,
   1019         0,
   1020         maximizedTagBuffer,
   1021         maximizedTagBufferLength,
   1022         err);
   1023     if(U_FAILURE(*err)) {
   1024         goto error;
   1025     }
   1026 
   1027     /**
   1028      * First, we need to first get the maximization
   1029      * from AddLikelySubtags.
   1030      **/
   1031     maximizedTagBufferLength =
   1032         uloc_addLikelySubtags(
   1033             maximizedTagBuffer,
   1034             maximizedTagBuffer,
   1035             maximizedTagBufferLength,
   1036             err);
   1037 
   1038     if(U_FAILURE(*err)) {
   1039         goto error;
   1040     }
   1041 
   1042     /**
   1043      * Start first with just the language.
   1044      **/
   1045     {
   1046         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1047 
   1048         const int32_t tagBufferLength =
   1049             createLikelySubtagsString(
   1050                 lang,
   1051                 langLength,
   1052                 NULL,
   1053                 0,
   1054                 NULL,
   1055                 0,
   1056                 NULL,
   1057                 0,
   1058                 tagBuffer,
   1059                 sizeof(tagBuffer),
   1060                 err);
   1061 
   1062         if(U_FAILURE(*err)) {
   1063             goto error;
   1064         }
   1065         else if (uprv_strnicmp(
   1066                     maximizedTagBuffer,
   1067                     tagBuffer,
   1068                     tagBufferLength) == 0) {
   1069 
   1070             return createTagString(
   1071                         lang,
   1072                         langLength,
   1073                         NULL,
   1074                         0,
   1075                         NULL,
   1076                         0,
   1077                         trailing,
   1078                         trailingLength,
   1079                         minimizedLocaleID,
   1080                         minimizedLocaleIDCapacity,
   1081                         err);
   1082         }
   1083     }
   1084 
   1085     /**
   1086      * Next, try the language and region.
   1087      **/
   1088     if (regionLength > 0) {
   1089 
   1090         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1091 
   1092         const int32_t tagBufferLength =
   1093             createLikelySubtagsString(
   1094                 lang,
   1095                 langLength,
   1096                 NULL,
   1097                 0,
   1098                 region,
   1099                 regionLength,
   1100                 NULL,
   1101                 0,
   1102                 tagBuffer,
   1103                 sizeof(tagBuffer),
   1104                 err);
   1105 
   1106         if(U_FAILURE(*err)) {
   1107             goto error;
   1108         }
   1109         else if (uprv_strnicmp(
   1110                     maximizedTagBuffer,
   1111                     tagBuffer,
   1112                     tagBufferLength) == 0) {
   1113 
   1114             return createTagString(
   1115                         lang,
   1116                         langLength,
   1117                         NULL,
   1118                         0,
   1119                         region,
   1120                         regionLength,
   1121                         trailing,
   1122                         trailingLength,
   1123                         minimizedLocaleID,
   1124                         minimizedLocaleIDCapacity,
   1125                         err);
   1126         }
   1127     }
   1128 
   1129     /**
   1130      * Finally, try the language and script.  This is our last chance,
   1131      * since trying with all three subtags would only yield the
   1132      * maximal version that we already have.
   1133      **/
   1134     if (scriptLength > 0 && regionLength > 0) {
   1135         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   1136 
   1137         const int32_t tagBufferLength =
   1138             createLikelySubtagsString(
   1139                 lang,
   1140                 langLength,
   1141                 script,
   1142                 scriptLength,
   1143                 NULL,
   1144                 0,
   1145                 NULL,
   1146                 0,
   1147                 tagBuffer,
   1148                 sizeof(tagBuffer),
   1149                 err);
   1150 
   1151         if(U_FAILURE(*err)) {
   1152             goto error;
   1153         }
   1154         else if (uprv_strnicmp(
   1155                     maximizedTagBuffer,
   1156                     tagBuffer,
   1157                     tagBufferLength) == 0) {
   1158 
   1159             return createTagString(
   1160                         lang,
   1161                         langLength,
   1162                         script,
   1163                         scriptLength,
   1164                         NULL,
   1165                         0,
   1166                         trailing,
   1167                         trailingLength,
   1168                         minimizedLocaleID,
   1169                         minimizedLocaleIDCapacity,
   1170                         err);
   1171         }
   1172     }
   1173 
   1174     {
   1175         /**
   1176          * If we got here, return the locale ID parameter.
   1177          **/
   1178         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
   1179 
   1180         uprv_memcpy(
   1181             minimizedLocaleID,
   1182             localeID,
   1183             localeIDLength <= minimizedLocaleIDCapacity ?
   1184                 localeIDLength : minimizedLocaleIDCapacity);
   1185 
   1186         return u_terminateChars(
   1187                     minimizedLocaleID,
   1188                     minimizedLocaleIDCapacity,
   1189                     localeIDLength,
   1190                     err);
   1191     }
   1192 
   1193 error:
   1194 
   1195     if (!U_FAILURE(*err)) {
   1196         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1197     }
   1198 
   1199     return -1;
   1200 
   1201 
   1202 }
   1203 
   1204 static UBool
   1205 do_canonicalize(const char*    localeID,
   1206          char* buffer,
   1207          int32_t bufferCapacity,
   1208          UErrorCode* err)
   1209 {
   1210     uloc_canonicalize(
   1211         localeID,
   1212         buffer,
   1213         bufferCapacity,
   1214         err);
   1215 
   1216     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
   1217         *err == U_BUFFER_OVERFLOW_ERROR) {
   1218         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1219 
   1220         return FALSE;
   1221     }
   1222     else if (U_FAILURE(*err)) {
   1223 
   1224         return FALSE;
   1225     }
   1226     else {
   1227         return TRUE;
   1228     }
   1229 }
   1230 
   1231 U_CAPI int32_t U_EXPORT2
   1232 uloc_addLikelySubtags(const char*    localeID,
   1233          char* maximizedLocaleID,
   1234          int32_t maximizedLocaleIDCapacity,
   1235          UErrorCode* err)
   1236 {
   1237     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1238 
   1239     if (!do_canonicalize(
   1240         localeID,
   1241         localeBuffer,
   1242         sizeof(localeBuffer),
   1243         err)) {
   1244         return -1;
   1245     }
   1246     else {
   1247         return _uloc_addLikelySubtags(
   1248                     localeBuffer,
   1249                     maximizedLocaleID,
   1250                     maximizedLocaleIDCapacity,
   1251                     err);
   1252     }
   1253 }
   1254 
   1255 U_CAPI int32_t U_EXPORT2
   1256 uloc_minimizeSubtags(const char*    localeID,
   1257          char* minimizedLocaleID,
   1258          int32_t minimizedLocaleIDCapacity,
   1259          UErrorCode* err)
   1260 {
   1261     char localeBuffer[ULOC_FULLNAME_CAPACITY];
   1262 
   1263     if (!do_canonicalize(
   1264         localeID,
   1265         localeBuffer,
   1266         sizeof(localeBuffer),
   1267         err)) {
   1268         return -1;
   1269     }
   1270     else {
   1271         return _uloc_minimizeSubtags(
   1272                     localeBuffer,
   1273                     minimizedLocaleID,
   1274                     minimizedLocaleIDCapacity,
   1275                     err);
   1276     }
   1277 }
   1278 
   1279 // Pairs of (language subtag, + or -) for finding out fast if common languages
   1280 // are LTR (minus) or RTL (plus).
   1281 static const char* LANG_DIR_STRING =
   1282         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
   1283 
   1284 // Implemented here because this calls uloc_addLikelySubtags().
   1285 U_CAPI UBool U_EXPORT2
   1286 uloc_isRightToLeft(const char *locale) {
   1287     UErrorCode errorCode = U_ZERO_ERROR;
   1288     char script[8];
   1289     int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
   1290     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
   1291             scriptLength == 0) {
   1292         // Fastpath: We know the likely scripts and their writing direction
   1293         // for some common languages.
   1294         errorCode = U_ZERO_ERROR;
   1295         char lang[8];
   1296         int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
   1297         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
   1298                 langLength == 0) {
   1299             return FALSE;
   1300         }
   1301         const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
   1302         if (langPtr != NULL) {
   1303             switch (langPtr[langLength]) {
   1304             case '-': return FALSE;
   1305             case '+': return TRUE;
   1306             default: break;  // partial match of a longer code
   1307             }
   1308         }
   1309         // Otherwise, find the likely script.
   1310         errorCode = U_ZERO_ERROR;
   1311         char likely[ULOC_FULLNAME_CAPACITY];
   1312         (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
   1313         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
   1314             return FALSE;
   1315         }
   1316         scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
   1317         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
   1318                 scriptLength == 0) {
   1319             return FALSE;
   1320         }
   1321     }
   1322     UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
   1323     return uscript_isRightToLeft(scriptCode);
   1324 }
   1325 
   1326 U_NAMESPACE_BEGIN
   1327 
   1328 UBool
   1329 Locale::isRightToLeft() const {
   1330     return uloc_isRightToLeft(getBaseName());
   1331 }
   1332 
   1333 U_NAMESPACE_END
   1334