Home | History | Annotate | Download | only in libxslt
      1 /*
      2  * xsltlocale.c: locale handling
      3  *
      4  * Reference:
      5  * RFC 3066: Tags for the Identification of Languages
      6  * http://www.ietf.org/rfc/rfc3066.txt
      7  * ISO 639-1, ISO 3166-1
      8  *
      9  * Author: Nick Wellnhofer
     10  * winapi port: Roumen Petrov
     11  */
     12 
     13 #define IN_LIBXSLT
     14 #include "libxslt.h"
     15 
     16 #include <string.h>
     17 #include <libxml/xmlmemory.h>
     18 
     19 #include "xsltlocale.h"
     20 #include "xsltutils.h"
     21 
     22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
     23 #define newlocale __newlocale
     24 #define freelocale __freelocale
     25 #define strxfrm_l __strxfrm_l
     26 #define LC_COLLATE_MASK (1 << LC_COLLATE)
     27 #endif
     28 
     29 #define ISALPHA(c) ((c & 0xc0) == 0x40 && (unsigned)((c & 0x1f) - 1) < 26)
     30 #define TOUPPER(c) (c & ~0x20)
     31 #define TOLOWER(c) (c | 0x20)
     32 
     33 /*without terminating null character*/
     34 #define XSLTMAX_ISO639LANGLEN		8
     35 #define XSLTMAX_ISO3166CNTRYLEN		8
     36 					/* <lang>-<cntry> */
     37 #define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
     38 
     39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
     40 
     41 #ifdef XSLT_LOCALE_WINAPI
     42 xmlRMutexPtr xsltLocaleMutex = NULL;
     43 
     44 struct xsltRFC1766Info_s {
     45       /*note typedef unsigned char xmlChar !*/
     46     xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
     47       /*note typedef LCID xsltLocale !*/
     48     xsltLocale lcid;
     49 };
     50 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
     51 
     52 static int xsltLocaleListSize = 0;
     53 static xsltRFC1766Info *xsltLocaleList = NULL;
     54 
     55 
     56 static xsltLocale
     57 xslt_locale_WINAPI(const xmlChar *languageTag) {
     58     int k;
     59     xsltRFC1766Info *p = xsltLocaleList;
     60 
     61     for (k=0; k<xsltLocaleListSize; k++, p++)
     62 	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
     63     return((xsltLocale)0);
     64 }
     65 
     66 static void xsltEnumSupportedLocales(void);
     67 #endif
     68 
     69 /**
     70  * xsltNewLocale:
     71  * @languageTag: RFC 3066 language tag
     72  *
     73  * Creates a new locale of an opaque system dependent type based on the
     74  * language tag.
     75  *
     76  * Returns the locale or NULL on error or if no matching locale was found
     77  */
     78 xsltLocale
     79 xsltNewLocale(const xmlChar *languageTag) {
     80 #ifdef XSLT_LOCALE_XLOCALE
     81     xsltLocale locale;
     82     char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
     83     const xmlChar *p = languageTag;
     84     const char *region = NULL;
     85     char *q = localeName;
     86     int i, llen;
     87 
     88     /* Convert something like "pt-br" to "pt_BR.utf8" */
     89 
     90     if (languageTag == NULL)
     91     	return(NULL);
     92 
     93     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
     94 	*q++ = TOLOWER(*p++);
     95 
     96     if (i == 0)
     97     	return(NULL);
     98 
     99     llen = i;
    100     *q++ = '_';
    101 
    102     if (*p) {
    103     	if (*p++ != '-')
    104     	    return(NULL);
    105 
    106 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
    107 	    *q++ = TOUPPER(*p++);
    108 
    109     	if (i == 0 || *p)
    110     	    return(NULL);
    111 
    112         memcpy(q, ".utf8", 6);
    113         locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
    114         if (locale != NULL)
    115             return(locale);
    116 
    117         /* Continue without using country code */
    118 
    119         q = localeName + llen + 1;
    120     }
    121 
    122     /* Try locale without territory, e.g. for Esperanto (eo) */
    123 
    124     memcpy(q, ".utf8", 6);
    125     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
    126     if (locale != NULL)
    127         return(locale);
    128 
    129     /* Try to find most common country for language */
    130 
    131     if (llen != 2)
    132         return(NULL);
    133 
    134     region = (char *)xsltDefaultRegion((xmlChar *)localeName);
    135     if (region == NULL)
    136         return(NULL);
    137 
    138     q = localeName + llen + 1;
    139     *q++ = region[0];
    140     *q++ = region[1];
    141     memcpy(q, ".utf8", 6);
    142     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
    143 
    144     return(locale);
    145 #endif
    146 
    147 #ifdef XSLT_LOCALE_WINAPI
    148 {
    149     xsltLocale    locale = (xsltLocale)0;
    150     xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
    151     xmlChar       *q = localeName;
    152     const xmlChar *p = languageTag;
    153     int           i, llen;
    154     const xmlChar *region = NULL;
    155 
    156     if (languageTag == NULL) goto end;
    157 
    158     xsltEnumSupportedLocales();
    159 
    160     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
    161 	*q++ = TOLOWER(*p++);
    162     if (i == 0) goto end;
    163 
    164     llen = i;
    165     *q++ = '-';
    166     if (*p) { /*if country tag is given*/
    167 	if (*p++ != '-') goto end;
    168 
    169 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
    170 	    *q++ = TOUPPER(*p++);
    171 	if (i == 0 || *p) goto end;
    172 
    173 	*q = '\0';
    174 	locale = xslt_locale_WINAPI(localeName);
    175 	if (locale != (xsltLocale)0) goto end;
    176     }
    177     /* Try to find most common country for language */
    178     region = xsltDefaultRegion(localeName);
    179     if (region == NULL) goto end;
    180 
    181     strcpy(localeName + llen + 1, region);
    182     locale = xslt_locale_WINAPI(localeName);
    183 end:
    184     return(locale);
    185 }
    186 #endif
    187 
    188 #ifdef XSLT_LOCALE_NONE
    189     return(NULL);
    190 #endif
    191 }
    192 
    193 static const xmlChar*
    194 xsltDefaultRegion(const xmlChar *localeName) {
    195     xmlChar c;
    196     /* region should be xmlChar, but gcc warns on all string assignments */
    197     const char *region = NULL;
    198 
    199     c = localeName[1];
    200     /* This is based on the locales from glibc 2.3.3 */
    201 
    202     switch (localeName[0]) {
    203         case 'a':
    204             if (c == 'a' || c == 'm') region = "ET";
    205             else if (c == 'f') region = "ZA";
    206             else if (c == 'n') region = "ES";
    207             else if (c == 'r') region = "AE";
    208             else if (c == 'z') region = "AZ";
    209             break;
    210         case 'b':
    211             if (c == 'e') region = "BY";
    212             else if (c == 'g') region = "BG";
    213             else if (c == 'n') region = "BD";
    214             else if (c == 'r') region = "FR";
    215             else if (c == 's') region = "BA";
    216             break;
    217         case 'c':
    218             if (c == 'a') region = "ES";
    219             else if (c == 's') region = "CZ";
    220             else if (c == 'y') region = "GB";
    221             break;
    222         case 'd':
    223             if (c == 'a') region = "DK";
    224             else if (c == 'e') region = "DE";
    225             break;
    226         case 'e':
    227             if (c == 'l') region = "GR";
    228             else if (c == 'n' || c == 'o') region = "US";
    229             else if (c == 's' || c == 'u') region = "ES";
    230             else if (c == 't') region = "EE";
    231             break;
    232         case 'f':
    233             if (c == 'a') region = "IR";
    234             else if (c == 'i') region = "FI";
    235             else if (c == 'o') region = "FO";
    236             else if (c == 'r') region = "FR";
    237             break;
    238         case 'g':
    239             if (c == 'a') region = "IE";
    240             else if (c == 'l') region = "ES";
    241             else if (c == 'v') region = "GB";
    242             break;
    243         case 'h':
    244             if (c == 'e') region = "IL";
    245             else if (c == 'i') region = "IN";
    246             else if (c == 'r') region = "HT";
    247             else if (c == 'u') region = "HU";
    248             break;
    249         case 'i':
    250             if (c == 'd') region = "ID";
    251             else if (c == 's') region = "IS";
    252             else if (c == 't') region = "IT";
    253             else if (c == 'w') region = "IL";
    254             break;
    255         case 'j':
    256             if (c == 'a') region = "JP";
    257             break;
    258         case 'k':
    259             if (c == 'l') region = "GL";
    260             else if (c == 'o') region = "KR";
    261             else if (c == 'w') region = "GB";
    262             break;
    263         case 'l':
    264             if (c == 't') region = "LT";
    265             else if (c == 'v') region = "LV";
    266             break;
    267         case 'm':
    268             if (c == 'k') region = "MK";
    269             else if (c == 'l' || c == 'r') region = "IN";
    270             else if (c == 'n') region = "MN";
    271             else if (c == 's') region = "MY";
    272             else if (c == 't') region = "MT";
    273             break;
    274         case 'n':
    275             if (c == 'b' || c == 'n' || c == 'o') region = "NO";
    276             else if (c == 'e') region = "NP";
    277             else if (c == 'l') region = "NL";
    278             break;
    279         case 'o':
    280             if (c == 'm') region = "ET";
    281             break;
    282         case 'p':
    283             if (c == 'a') region = "IN";
    284             else if (c == 'l') region = "PL";
    285             else if (c == 't') region = "PT";
    286             break;
    287         case 'r':
    288             if (c == 'o') region = "RO";
    289             else if (c == 'u') region = "RU";
    290             break;
    291         case 's':
    292             switch (c) {
    293                 case 'e': region = "NO"; break;
    294                 case 'h': region = "YU"; break;
    295                 case 'k': region = "SK"; break;
    296                 case 'l': region = "SI"; break;
    297                 case 'o': region = "ET"; break;
    298                 case 'q': region = "AL"; break;
    299                 case 't': region = "ZA"; break;
    300                 case 'v': region = "SE"; break;
    301             }
    302             break;
    303         case 't':
    304             if (c == 'a' || c == 'e') region = "IN";
    305             else if (c == 'h') region = "TH";
    306             else if (c == 'i') region = "ER";
    307             else if (c == 'r') region = "TR";
    308             else if (c == 't') region = "RU";
    309             break;
    310         case 'u':
    311             if (c == 'k') region = "UA";
    312             else if (c == 'r') region = "PK";
    313             break;
    314         case 'v':
    315             if (c == 'i') region = "VN";
    316             break;
    317         case 'w':
    318             if (c == 'a') region = "BE";
    319             break;
    320         case 'x':
    321             if (c == 'h') region = "ZA";
    322             break;
    323         case 'z':
    324             if (c == 'h') region = "CN";
    325             else if (c == 'u') region = "ZA";
    326             break;
    327     }
    328     return((xmlChar *)region);
    329 }
    330 
    331 /**
    332  * xsltFreeLocale:
    333  * @locale: the locale to free
    334  *
    335  * Frees a locale created with xsltNewLocale
    336  */
    337 void
    338 xsltFreeLocale(xsltLocale locale) {
    339 #ifdef XSLT_LOCALE_XLOCALE
    340     freelocale(locale);
    341 #endif
    342 }
    343 
    344 /**
    345  * xsltStrxfrm:
    346  * @locale: locale created with xsltNewLocale
    347  * @string: UTF-8 string to transform
    348  *
    349  * Transforms a string according to locale. The transformed string must then be
    350  * compared with xsltLocaleStrcmp and freed with xmlFree.
    351  *
    352  * Returns the transformed string or NULL on error
    353  */
    354 xsltLocaleChar *
    355 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
    356 {
    357 #ifdef XSLT_LOCALE_NONE
    358     return(NULL);
    359 #else
    360     size_t xstrlen, r;
    361     xsltLocaleChar *xstr;
    362 
    363 #ifdef XSLT_LOCALE_XLOCALE
    364     xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
    365     xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
    366     if (xstr == NULL) {
    367 	xsltTransformError(NULL, NULL, NULL,
    368 	    "xsltStrxfrm : out of memory error\n");
    369 	return(NULL);
    370     }
    371 
    372     r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
    373 #endif
    374 
    375 #ifdef XSLT_LOCALE_WINAPI
    376     xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
    377     if (xstrlen == 0) {
    378         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
    379         return(NULL);
    380     }
    381     xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
    382     if (xstr == NULL) {
    383         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
    384         return(NULL);
    385     }
    386     r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
    387     if (r == 0) {
    388         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
    389         xmlFree(xstr);
    390         return(NULL);
    391     }
    392     return(xstr);
    393 #endif /* XSLT_LOCALE_WINAPI */
    394 
    395     if (r >= xstrlen) {
    396 	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
    397         xmlFree(xstr);
    398         return(NULL);
    399     }
    400 
    401     return(xstr);
    402 #endif /* XSLT_LOCALE_NONE */
    403 }
    404 
    405 /**
    406  * xsltLocaleStrcmp:
    407  * @locale: a locale identifier
    408  * @str1: a string transformed with xsltStrxfrm
    409  * @str2: a string transformed with xsltStrxfrm
    410  *
    411  * Compares two strings transformed with xsltStrxfrm
    412  *
    413  * Returns a value < 0 if str1 sorts before str2,
    414  *         a value > 0 if str1 sorts after str2,
    415  *         0 if str1 and str2 are equal wrt sorting
    416  */
    417 int
    418 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
    419     (void)locale;
    420 #ifdef XSLT_LOCALE_WINAPI
    421 {
    422     int ret;
    423     if (str1 == str2) return(0);
    424     if (str1 == NULL) return(-1);
    425     if (str2 == NULL) return(1);
    426     ret = CompareStringW(locale, 0, str1, -1, str2, -1);
    427     if (ret == 0) {
    428         xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
    429         return(0);
    430     }
    431     return(ret - 2);
    432 }
    433 #else
    434     return(xmlStrcmp(str1, str2));
    435 #endif
    436 }
    437 
    438 #ifdef XSLT_LOCALE_WINAPI
    439 /**
    440  * xsltCountSupportedLocales:
    441  * @lcid: not used
    442  *
    443  * callback used to count locales
    444  *
    445  * Returns TRUE
    446  */
    447 BOOL CALLBACK
    448 xsltCountSupportedLocales(LPSTR lcid) {
    449     (void) lcid;
    450     ++xsltLocaleListSize;
    451     return(TRUE);
    452 }
    453 
    454 /**
    455  * xsltIterateSupportedLocales:
    456  * @lcid: not used
    457  *
    458  * callback used to track locales
    459  *
    460  * Returns TRUE if not at the end of the array
    461  */
    462 BOOL CALLBACK
    463 xsltIterateSupportedLocales(LPSTR lcid) {
    464     static int count = 0;
    465     xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
    466     xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
    467     int        k, l;
    468     xsltRFC1766Info *p = xsltLocaleList + count;
    469 
    470     k = sscanf(lcid, "%lx", (long*)&p->lcid);
    471     if (k < 1) goto end;
    472     /*don't count terminating null character*/
    473     k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
    474     if (--k < 1) goto end;
    475     l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
    476     if (--l < 1) goto end;
    477 
    478     {  /*fill results*/
    479 	xmlChar    *q = p->tag;
    480 	memcpy(q, iso639lang, k);
    481 	q += k;
    482 	*q++ = '-';
    483 	memcpy(q, iso3136ctry, l);
    484 	q += l;
    485 	*q = '\0';
    486     }
    487     ++count;
    488 end:
    489     return((count < xsltLocaleListSize) ? TRUE : FALSE);
    490 }
    491 
    492 
    493 static void
    494 xsltEnumSupportedLocales(void) {
    495     xmlRMutexLock(xsltLocaleMutex);
    496     if (xsltLocaleListSize <= 0) {
    497 	size_t len;
    498 
    499 	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
    500 
    501 	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
    502 	xsltLocaleList = xmlMalloc(len);
    503 	memset(xsltLocaleList, 0, len);
    504 	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
    505     }
    506     xmlRMutexUnlock(xsltLocaleMutex);
    507 }
    508 
    509 #endif /*def XSLT_LOCALE_WINAPI*/
    510