Home | History | Annotate | Download | only in binutils
      1 /* winduni.c -- unicode support for the windres program.
      2    Copyright (C) 1997-2016 Free Software Foundation, Inc.
      3    Written by Ian Lance Taylor, Cygnus Support.
      4    Rewritten by Kai Tietz, Onevision.
      5 
      6    This file is part of GNU Binutils.
      7 
      8    This program is free software; you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 3 of the License, or
     11    (at your option) any later version.
     12 
     13    This program is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with this program; if not, write to the Free Software
     20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     21    02110-1301, USA.  */
     22 
     23 
     24 /* This file contains unicode support routines for the windres
     25    program.  Ideally, we would have generic unicode support which
     26    would work on all systems.  However, we don't.  Instead, on a
     27    Windows host, we are prepared to call some Windows routines.  This
     28    means that we will generate different output on Windows and Unix
     29    hosts, but that seems better than not really supporting unicode at
     30    all.  */
     31 
     32 #include "sysdep.h"
     33 #include "bfd.h"
     34 #include "libiberty.h" /* for xstrdup */
     35 #include "bucomm.h"
     36 /* Must be include before windows.h and winnls.h.  */
     37 #if defined (_WIN32) || defined (__CYGWIN__)
     38 #include <windows.h>
     39 #include <winnls.h>
     40 #endif
     41 #include "winduni.h"
     42 #include "safe-ctype.h"
     43 
     44 #if HAVE_ICONV
     45 #include <iconv.h>
     46 #endif
     47 
     48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
     49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
     50 static int unichar_isascii (const unichar *, rc_uint_type);
     51 
     52 /* Convert an ASCII string to a unicode string.  We just copy it,
     53    expanding chars to shorts, rather than doing something intelligent.  */
     54 
     55 #if !defined (_WIN32) && !defined (__CYGWIN__)
     56 
     57 /* Codepages mapped.  */
     58 static local_iconv_map codepages[] =
     59 {
     60   { 0, "MS-ANSI" },
     61   { 1, "WINDOWS-1252" },
     62   { 437, "MS-ANSI" },
     63   { 737, "MS-GREEK" },
     64   { 775, "WINBALTRIM" },
     65   { 850, "MS-ANSI" },
     66   { 852, "MS-EE" },
     67   { 857, "MS-TURK" },
     68   { 862, "CP862" },
     69   { 864, "CP864" },
     70   { 866, "MS-CYRL" },
     71   { 874, "WINDOWS-874" },
     72   { 932, "CP932" },
     73   { 936, "CP936" },
     74   { 949, "CP949" },
     75   { 950, "CP950" },
     76   { 1250, "WINDOWS-1250" },
     77   { 1251, "WINDOWS-1251" },
     78   { 1252, "WINDOWS-1252" },
     79   { 1253, "WINDOWS-1253" },
     80   { 1254, "WINDOWS-1254" },
     81   { 1255, "WINDOWS-1255" },
     82   { 1256, "WINDOWS-1256" },
     83   { 1257, "WINDOWS-1257" },
     84   { 1258, "WINDOWS-1258" },
     85   { CP_UTF7, "UTF-7" },
     86   { CP_UTF8, "UTF-8" },
     87   { CP_UTF16, "UTF-16LE" },
     88   { (rc_uint_type) -1, NULL }
     89 };
     90 
     91 /* Languages supported.  */
     92 static const wind_language_t languages[] =
     93 {
     94   { 0x0000, 437, 1252, "Neutral", "Neutral" },
     95   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
     96   { 0x0403, 850, 1252, "Catalan", "Spain" },	      { 0x0404, 950,  950, "Chinese", "Taiwan" },
     97   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
     98   { 0x0407, 850, 1252, "German", "Germany" },	      { 0x0408, 737, 1253, "Greek", "Greece" },
     99   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
    100   { 0x040B, 850, 1252, "Finnish", "Finland" },	      { 0x040C, 850, 1252, "French", "France" },
    101   { 0x040D, 862, 1255, "Hebrew", "Israel" },	      { 0x040E, 852, 1250, "Hungarian", "Hungary" },
    102   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
    103   { 0x0411, 932,  932, "Japanese", "Japan" },	      { 0x0412, 949,  949, "Korean", "Korea (south)" },
    104   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
    105   { 0x0415, 852, 1250, "Polish", "Poland" },	      { 0x0416, 850, 1252, "Portuguese", "Brazil" },
    106   { 0x0418, 852, 1250, "Romanian", "Romania" },	      { 0x0419, 866, 1251, "Russian", "Russia" },
    107   { 0x041A, 852, 1250, "Croatian", "Croatia" },	      { 0x041B, 852, 1250, "Slovak", "Slovakia" },
    108   { 0x041C, 852, 1250, "Albanian", "Albania" },	      { 0x041D, 850, 1252, "Swedish", "Sweden" },
    109   { 0x041E, 874,  874, "Thai", "Thailand" },	      { 0x041F, 857, 1254, "Turkish", "Turkey" },
    110   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
    111   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
    112   { 0x0425, 775, 1257, "Estonian", "Estonia" },	      { 0x0426, 775, 1257, "Latvian", "Latvia" },
    113   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
    114   { 0x0429, 864, 1256, "Arabic", "Farsi" },	      { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
    115   { 0x042D, 850, 1252, "Basque", "Spain" },
    116   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
    117   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
    118   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
    119   { 0x043C, 437, 1252, "Irish", "Ireland" },
    120   { 0x043E, 850, 1252, "Malay", "Malaysia" },
    121   { 0x0801, 864, 1256, "Arabic", "Iraq" },
    122   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
    123   { 0x0807, 850, 1252, "German", "Switzerland" },
    124   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
    125   { 0x080C, 850, 1252, "French", "Belgium" },
    126   { 0x0810, 850, 1252, "Italian", "Switzerland" },
    127   { 0x0813, 850, 1252, "Dutch", "Belgium" },	      { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
    128   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
    129   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
    130   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
    131   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
    132   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
    133   { 0x0C07, 850, 1252, "German", "Austria" },
    134   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
    135   { 0x0C0C, 850, 1252, "French", "Canada"},
    136   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
    137   { 0x1001, 864, 1256, "Arabic", "Libya" },
    138   { 0x1004, 936,  936, "Chinese", "Singapore" },
    139   { 0x1007, 850, 1252, "German", "Luxembourg" },
    140   { 0x1009, 850, 1252, "English", "Canada" },
    141   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
    142   { 0x100C, 850, 1252, "French", "Switzerland" },
    143   { 0x1401, 864, 1256, "Arabic", "Algeria" },
    144   { 0x1407, 850, 1252, "German", "Liechtenstein" },
    145   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
    146   { 0x140C, 850, 1252, "French", "Luxembourg" },
    147   { 0x1801, 864, 1256, "Arabic", "Morocco" },
    148   { 0x1809, 850, 1252, "English", "Ireland" },	      { 0x180A, 850, 1252, "Spanish", "Panama" },
    149   { 0x180C, 850, 1252, "French", "Monaco" },
    150   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
    151   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
    152   { 0x2001, 864, 1256, "Arabic", "Oman" },
    153   { 0x2009, 850, 1252, "English", "Jamaica" },	      { 0x200A, 850, 1252, "Spanish", "Venezuela" },
    154   { 0x2401, 864, 1256, "Arabic", "Yemen" },
    155   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
    156   { 0x2801, 864, 1256, "Arabic", "Syria" },
    157   { 0x2809, 850, 1252, "English", "Belize" },	      { 0x280A, 850, 1252, "Spanish", "Peru" },
    158   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
    159   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
    160   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
    161   { 0x3009, 437, 1252, "English", "Zimbabwe" },	      { 0x300A, 850, 1252, "Spanish", "Ecuador" },
    162   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
    163   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
    164   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
    165   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
    166   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
    167   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
    168   { 0x4001, 864, 1256, "Arabic", "Qatar" },
    169   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
    170   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
    171   { 0x480A, 850, 1252, "Spanish", "Honduras" },
    172   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
    173   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
    174   { (unsigned) -1,  0,      0, NULL, NULL }
    175 };
    176 
    177 #endif
    178 
    179 /* Specifies the default codepage to be used for unicode
    180    transformations.  By default this is CP_ACP.  */
    181 rc_uint_type wind_default_codepage = CP_ACP;
    182 
    183 /* Specifies the currently used codepage for unicode
    184    transformations.  By default this is CP_ACP.  */
    185 rc_uint_type wind_current_codepage = CP_ACP;
    186 
    187 /* Convert an ASCII string to a unicode string.  We just copy it,
    188    expanding chars to shorts, rather than doing something intelligent.  */
    189 
    190 void
    191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
    192 {
    193   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
    194 }
    195 
    196 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
    197    copy it, expanding chars to shorts, rather than doing something intelligent.
    198    This routine converts also \0 within a string.  */
    199 
    200 void
    201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
    202 {
    203   char *tmp, *p;
    204   rc_uint_type tlen, elen, idx = 0;
    205 
    206   *unicode = NULL;
    207 
    208   if (!a_length)
    209     {
    210       if (length)
    211         *length = 0;
    212       return;
    213     }
    214 
    215   /* Make sure we have zero terminated string.  */
    216   p = tmp = (char *) xmalloc (a_length + 1);
    217   memcpy (tmp, ascii, a_length);
    218   tmp[a_length] = 0;
    219 
    220   while (a_length > 0)
    221     {
    222       unichar *utmp, *up;
    223 
    224       tlen = strlen (p);
    225 
    226       if (tlen > a_length)
    227         tlen = a_length;
    228       if (*p == 0)
    229         {
    230 	  /* Make room for one more character.  */
    231 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
    232 	  if (idx > 0)
    233 	    {
    234 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
    235 	    }
    236 	  *unicode = utmp;
    237 	  utmp[idx++] = 0;
    238 	  --a_length;
    239 	  p++;
    240 	  continue;
    241 	}
    242       utmp = NULL;
    243       elen = 0;
    244       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
    245       if (elen)
    246 	{
    247 	  utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
    248 	  wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
    249 	  elen /= sizeof (unichar);
    250 	  elen --;
    251 	}
    252       else
    253         {
    254 	  /* Make room for one more character.  */
    255 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
    256 	  if (idx > 0)
    257 	    {
    258 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
    259 	    }
    260 	  *unicode = utmp;
    261 	  utmp[idx++] = ((unichar) *p) & 0xff;
    262 	  --a_length;
    263 	  p++;
    264 	  continue;
    265 	}
    266       p += tlen;
    267       a_length -= tlen;
    268 
    269       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
    270       if (idx > 0)
    271 	memcpy (up, *unicode, idx * sizeof (unichar));
    272 
    273       *unicode = up;
    274       if (elen)
    275 	memcpy (&up[idx], utmp, sizeof (unichar) * elen);
    276 
    277       idx += elen;
    278     }
    279 
    280   if (length)
    281     *length = idx;
    282 
    283   free (tmp);
    284 }
    285 
    286 /* Convert an unicode string to an ASCII string.  We just copy it,
    287    shrink shorts to chars, rather than doing something intelligent.
    288    Shorts with not within the char range are replaced by '_'.  */
    289 
    290 void
    291 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
    292 {
    293   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
    294 }
    295 
    296 /* Print the unicode string UNICODE to the file E.  LENGTH is the
    297    number of characters to print, or -1 if we should print until the
    298    end of the string.  FIXME: On a Windows host, we should be calling
    299    some Windows function, probably WideCharToMultiByte.  */
    300 
    301 void
    302 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
    303 {
    304   while (1)
    305     {
    306       unichar ch;
    307 
    308       if (length == 0)
    309 	return;
    310       if ((bfd_signed_vma) length > 0)
    311 	--length;
    312 
    313       ch = *unicode;
    314 
    315       if (ch == 0 && (bfd_signed_vma) length < 0)
    316 	return;
    317 
    318       ++unicode;
    319 
    320       if ((ch & 0x7f) == ch)
    321 	{
    322 	  if (ch == '\\')
    323 	    fputs ("\\\\", e);
    324 	  else if (ch == '"')
    325 	    fputs ("\"\"", e);
    326 	  else if (ISPRINT (ch))
    327 	    putc (ch, e);
    328 	  else
    329 	    {
    330 	      switch (ch)
    331 		{
    332 		case ESCAPE_A:
    333 		  fputs ("\\a", e);
    334 		  break;
    335 
    336 		case ESCAPE_B:
    337 		  fputs ("\\b", e);
    338 		  break;
    339 
    340 		case ESCAPE_F:
    341 		  fputs ("\\f", e);
    342 		  break;
    343 
    344 		case ESCAPE_N:
    345 		  fputs ("\\n", e);
    346 		  break;
    347 
    348 		case ESCAPE_R:
    349 		  fputs ("\\r", e);
    350 		  break;
    351 
    352 		case ESCAPE_T:
    353 		  fputs ("\\t", e);
    354 		  break;
    355 
    356 		case ESCAPE_V:
    357 		  fputs ("\\v", e);
    358 		  break;
    359 
    360 		default:
    361 		  fprintf (e, "\\%03o", (unsigned int) ch);
    362 		  break;
    363 		}
    364 	    }
    365 	}
    366       else if ((ch & 0xff) == ch)
    367 	fprintf (e, "\\%03o", (unsigned int) ch);
    368       else
    369 	fprintf (e, "\\x%04x", (unsigned int) ch);
    370     }
    371 }
    372 
    373 /* Print a unicode string to a file.  */
    374 
    375 void
    376 ascii_print (FILE *e, const char *s, rc_uint_type length)
    377 {
    378   while (1)
    379     {
    380       char ch;
    381 
    382       if (length == 0)
    383 	return;
    384       if ((bfd_signed_vma) length > 0)
    385 	--length;
    386 
    387       ch = *s;
    388 
    389       if (ch == 0 && (bfd_signed_vma) length < 0)
    390 	return;
    391 
    392       ++s;
    393 
    394       if ((ch & 0x7f) == ch)
    395 	{
    396 	  if (ch == '\\')
    397 	    fputs ("\\\\", e);
    398 	  else if (ch == '"')
    399 	    fputs ("\"\"", e);
    400 	  else if (ISPRINT (ch))
    401 	    putc (ch, e);
    402 	  else
    403 	    {
    404 	      switch (ch)
    405 		{
    406 		case ESCAPE_A:
    407 		  fputs ("\\a", e);
    408 		  break;
    409 
    410 		case ESCAPE_B:
    411 		  fputs ("\\b", e);
    412 		  break;
    413 
    414 		case ESCAPE_F:
    415 		  fputs ("\\f", e);
    416 		  break;
    417 
    418 		case ESCAPE_N:
    419 		  fputs ("\\n", e);
    420 		  break;
    421 
    422 		case ESCAPE_R:
    423 		  fputs ("\\r", e);
    424 		  break;
    425 
    426 		case ESCAPE_T:
    427 		  fputs ("\\t", e);
    428 		  break;
    429 
    430 		case ESCAPE_V:
    431 		  fputs ("\\v", e);
    432 		  break;
    433 
    434 		default:
    435 		  fprintf (e, "\\%03o", (unsigned int) ch);
    436 		  break;
    437 		}
    438 	    }
    439 	}
    440       else
    441 	fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
    442     }
    443 }
    444 
    445 rc_uint_type
    446 unichar_len (const unichar *unicode)
    447 {
    448   rc_uint_type r = 0;
    449 
    450   if (unicode)
    451     while (unicode[r] != 0)
    452       r++;
    453   else
    454     --r;
    455   return r;
    456 }
    457 
    458 unichar *
    459 unichar_dup (const unichar *unicode)
    460 {
    461   unichar *r;
    462   int len;
    463 
    464   if (! unicode)
    465     return NULL;
    466   for (len = 0; unicode[len] != 0; ++len)
    467     ;
    468   ++len;
    469   r = ((unichar *) res_alloc (len * sizeof (unichar)));
    470   memcpy (r, unicode, len * sizeof (unichar));
    471   return r;
    472 }
    473 
    474 unichar *
    475 unichar_dup_uppercase (const unichar *u)
    476 {
    477   unichar *r = unichar_dup (u);
    478   int i;
    479 
    480   if (! r)
    481     return NULL;
    482 
    483   for (i = 0; r[i] != 0; ++i)
    484     {
    485       if (r[i] >= 'a' && r[i] <= 'z')
    486 	r[i] &= 0xdf;
    487     }
    488   return r;
    489 }
    490 
    491 static int
    492 unichar_isascii (const unichar *u, rc_uint_type len)
    493 {
    494   rc_uint_type i;
    495 
    496   if ((bfd_signed_vma) len < 0)
    497     {
    498       if (u)
    499 	len = (rc_uint_type) unichar_len (u);
    500       else
    501 	len = 0;
    502     }
    503 
    504   for (i = 0; i < len; i++)
    505     if ((u[i] & 0xff80) != 0)
    506       return 0;
    507   return 1;
    508 }
    509 
    510 void
    511 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
    512 {
    513   if (! unichar_isascii (u, len))
    514     fputc ('L', e);
    515   fputc ('"', e);
    516   unicode_print (e, u, len);
    517   fputc ('"', e);
    518 }
    519 
    520 int
    521 unicode_is_valid_codepage (rc_uint_type cp)
    522 {
    523   if ((cp & 0xffff) != cp)
    524     return 0;
    525   if (cp == CP_UTF16 || cp == CP_ACP)
    526     return 1;
    527 
    528 #if !defined (_WIN32) && !defined (__CYGWIN__)
    529   if (! wind_find_codepage_info (cp))
    530     return 0;
    531   return 1;
    532 #else
    533   return !! IsValidCodePage ((UINT) cp);
    534 #endif
    535 }
    536 
    537 #if defined (_WIN32) || defined (__CYGWIN__)
    538 
    539 #define max_cp_string_len 6
    540 
    541 static unsigned int
    542 codepage_from_langid (unsigned short langid)
    543 {
    544   char cp_string [max_cp_string_len];
    545   int c;
    546 
    547   memset (cp_string, 0, max_cp_string_len);
    548   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
    549      but is unavailable on Win95.  */
    550   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    551   		      LOCALE_IDEFAULTANSICODEPAGE,
    552   		      cp_string, max_cp_string_len);
    553   /* If codepage data for an LCID is not installed on users's system,
    554      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    555      default. */
    556   if (c == 0)
    557     return CP_ACP;
    558   return strtoul (cp_string, 0, 10);
    559 }
    560 
    561 static unsigned int
    562 wincodepage_from_langid (unsigned short langid)
    563 {
    564   char cp_string [max_cp_string_len];
    565   int c;
    566 
    567   memset (cp_string, 0, max_cp_string_len);
    568   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
    569      but is unavailable on Win95.  */
    570   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    571 		      LOCALE_IDEFAULTCODEPAGE,
    572 		      cp_string, max_cp_string_len);
    573   /* If codepage data for an LCID is not installed on users's system,
    574      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    575      default. */
    576   if (c == 0)
    577     return CP_OEM;
    578   return strtoul (cp_string, 0, 10);
    579 }
    580 
    581 static char *
    582 lang_from_langid (unsigned short langid)
    583 {
    584   char cp_string[261];
    585   int c;
    586 
    587   memset (cp_string, 0, 261);
    588   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    589   		      LOCALE_SENGLANGUAGE,
    590   		      cp_string, 260);
    591   /* If codepage data for an LCID is not installed on users's system,
    592      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    593      default. */
    594   if (c == 0)
    595     strcpy (cp_string, "Neutral");
    596   return xstrdup (cp_string);
    597 }
    598 
    599 static char *
    600 country_from_langid (unsigned short langid)
    601 {
    602   char cp_string[261];
    603   int c;
    604 
    605   memset (cp_string, 0, 261);
    606   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    607   		      LOCALE_SENGCOUNTRY,
    608   		      cp_string, 260);
    609   /* If codepage data for an LCID is not installed on users's system,
    610      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    611      default. */
    612   if (c == 0)
    613     strcpy (cp_string, "Neutral");
    614   return xstrdup (cp_string);
    615 }
    616 
    617 #endif
    618 
    619 const wind_language_t *
    620 wind_find_language_by_id (unsigned id)
    621 {
    622 #if !defined (_WIN32) && !defined (__CYGWIN__)
    623   int i;
    624 
    625   if (! id)
    626     return NULL;
    627   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
    628     ;
    629   if (languages[i].id == id)
    630     return &languages[i];
    631   return NULL;
    632 #else
    633   static wind_language_t wl;
    634 
    635   wl.id = id;
    636   wl.doscp = codepage_from_langid ((unsigned short) id);
    637   wl.wincp = wincodepage_from_langid ((unsigned short) id);
    638   wl.name = lang_from_langid ((unsigned short) id);
    639   wl.country = country_from_langid ((unsigned short) id);
    640 
    641   return & wl;
    642 #endif
    643 }
    644 
    645 const local_iconv_map *
    646 wind_find_codepage_info (unsigned cp)
    647 {
    648 #if !defined (_WIN32) && !defined (__CYGWIN__)
    649   int i;
    650 
    651   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
    652     ;
    653   if (codepages[i].codepage == (rc_uint_type) -1)
    654     return NULL;
    655   return &codepages[i];
    656 #else
    657   static local_iconv_map lim;
    658   if (!unicode_is_valid_codepage (cp))
    659   	return NULL;
    660   lim.codepage = cp;
    661   lim.iconv_name = "";
    662   return & lim;
    663 #endif
    664 }
    665 
    666 /* Convert an Codepage string to a unicode string.  */
    667 
    668 void
    669 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
    670 {
    671   rc_uint_type len;
    672 
    673   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
    674   if (len)
    675     {
    676       *u = ((unichar *) res_alloc (len));
    677       wind_MultiByteToWideChar (cp, src, *u, len);
    678     }
    679   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
    680      this will set *length to -1.  */
    681   len -= sizeof (unichar);
    682 
    683   if (length != NULL)
    684     *length = len / sizeof (unichar);
    685 }
    686 
    687 /* Convert an unicode string to an codepage string.  */
    688 
    689 void
    690 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
    691 {
    692   rc_uint_type len;
    693 
    694   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
    695   if (len)
    696     {
    697       *ascii = (char *) res_alloc (len * sizeof (char));
    698       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
    699     }
    700   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
    701      this will set *length to -1.  */
    702   len--;
    703 
    704   if (length != NULL)
    705     *length = len;
    706 }
    707 
    708 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
    709 static int
    710 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
    711 {
    712   int i;
    713 
    714   for (i = 1; i <= 32; i++)
    715     {
    716       char *tmp_d = d;
    717       ICONV_CONST char *tmp_s = s;
    718       size_t ret;
    719       size_t s_left = (size_t) i;
    720       size_t d_left = (size_t) d_len;
    721 
    722       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
    723 
    724       if (ret != (size_t) -1)
    725 	{
    726 	  *n_s = tmp_s;
    727 	  *n_d = tmp_d;
    728 	  return 0;
    729 	}
    730     }
    731 
    732   return 1;
    733 }
    734 
    735 static const char *
    736 wind_iconv_cp (rc_uint_type cp)
    737 {
    738   const local_iconv_map *lim = wind_find_codepage_info (cp);
    739 
    740   if (!lim)
    741     return NULL;
    742   return lim->iconv_name;
    743 }
    744 #endif /* HAVE_ICONV */
    745 
    746 static rc_uint_type
    747 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
    748 			  unichar *u, rc_uint_type u_len)
    749 {
    750   rc_uint_type ret = 0;
    751 
    752 #if defined (_WIN32) || defined (__CYGWIN__)
    753   rc_uint_type conv_flags = MB_PRECOMPOSED;
    754 
    755   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
    756      MultiByteToWideChar will set the last error to
    757      ERROR_INVALID_FLAGS if we do. */
    758   if (cp == CP_UTF8 || cp == CP_UTF7)
    759     conv_flags = 0;
    760 
    761   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
    762 					    mb, -1, u, u_len);
    763   /* Convert to bytes. */
    764   ret *= sizeof (unichar);
    765 
    766 #elif defined (HAVE_ICONV)
    767   int first = 1;
    768   char tmp[32];
    769   char *p_tmp;
    770   const char *iconv_name = wind_iconv_cp (cp);
    771 
    772   if (!mb || !iconv_name)
    773     return 0;
    774   iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
    775 
    776   while (1)
    777     {
    778       int iret;
    779       const char *n_mb = "";
    780       char *n_tmp = "";
    781 
    782       p_tmp = tmp;
    783       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
    784       if (first)
    785 	{
    786 	  first = 0;
    787 	  continue;
    788 	}
    789       if (!iret)
    790 	{
    791 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
    792 
    793 	  if (u)
    794 	    {
    795 	      if ((size_t) u_len < l_tmp)
    796 		break;
    797 	      memcpy (u, tmp, l_tmp);
    798 	      u += l_tmp/2;
    799 	      u_len -= l_tmp;
    800 	    }
    801 	  ret += l_tmp;
    802 	}
    803       else
    804 	break;
    805       if (tmp[0] == 0 && tmp[1] == 0)
    806 	break;
    807       mb = n_mb;
    808     }
    809   iconv_close (cd);
    810 #else
    811   if (cp)
    812     ret = 0;
    813   ret = strlen (mb) + 1;
    814   ret *= sizeof (unichar);
    815   if (u != NULL && u_len != 0)
    816     {
    817       do
    818 	{
    819 	  *u++ = ((unichar) *mb) & 0xff;
    820 	  --u_len; mb++;
    821 	}
    822       while (u_len != 0 && mb[-1] != 0);
    823     }
    824   if (u != NULL && u_len != 0)
    825     *u = 0;
    826 #endif
    827   return ret;
    828 }
    829 
    830 static rc_uint_type
    831 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
    832 {
    833   rc_uint_type ret = 0;
    834 #if defined (_WIN32) || defined (__CYGWIN__)
    835   WINBOOL used_def = FALSE;
    836 
    837   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
    838 				      	    NULL, & used_def);
    839 #elif defined (HAVE_ICONV)
    840   int first = 1;
    841   char tmp[32];
    842   char *p_tmp;
    843   const char *iconv_name = wind_iconv_cp (cp);
    844 
    845   if (!u || !iconv_name)
    846     return 0;
    847   iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
    848 
    849   while (1)
    850     {
    851       int iret;
    852       const char *n_u = "";
    853       char *n_tmp = "";
    854 
    855       p_tmp = tmp;
    856       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
    857       if (first)
    858 	{
    859 	  first = 0;
    860 	  continue;
    861 	}
    862       if (!iret)
    863 	{
    864 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
    865 
    866 	  if (mb)
    867 	    {
    868 	      if ((size_t) mb_len < l_tmp)
    869 		break;
    870 	      memcpy (mb, tmp, l_tmp);
    871 	      mb += l_tmp;
    872 	      mb_len -= l_tmp;
    873 	    }
    874 	  ret += l_tmp;
    875 	}
    876       else
    877 	break;
    878       if (u[0] == 0)
    879 	break;
    880       u = (const unichar *) n_u;
    881     }
    882   iconv_close (cd);
    883 #else
    884   if (cp)
    885     ret = 0;
    886 
    887   while (u[ret] != 0)
    888     ++ret;
    889 
    890   ++ret;
    891 
    892   if (mb)
    893     {
    894       while (*u != 0 && mb_len != 0)
    895 	{
    896 	  if (u[0] == (u[0] & 0x7f))
    897 	    *mb++ = (char) u[0];
    898 	  else
    899 	    *mb++ = '_';
    900 	  ++u; --mb_len;
    901 	}
    902       if (mb_len != 0)
    903 	*mb = 0;
    904     }
    905 #endif
    906   return ret;
    907 }
    908