Home | History | Annotate | Download | only in binutils
      1 /* winduni.c -- unicode support for the windres program.
      2    Copyright (C) 1997-2014 Free Software Foundation, Inc.
      3    Written by Ian Lance Taylor, Cygnus Support.
      4    Rewritten by Kai Tietz, Onevision.
      5 
      6    This file is part of GNU Binutils.
      7 
      8    This program is free software; you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 3 of the License, or
     11    (at your option) any later version.
     12 
     13    This program is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with this program; if not, write to the Free Software
     20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     21    02110-1301, USA.  */
     22 
     23 
     24 /* This file contains unicode support routines for the windres
     25    program.  Ideally, we would have generic unicode support which
     26    would work on all systems.  However, we don't.  Instead, on a
     27    Windows host, we are prepared to call some Windows routines.  This
     28    means that we will generate different output on Windows and Unix
     29    hosts, but that seems better than not really supporting unicode at
     30    all.  */
     31 
     32 #include "sysdep.h"
     33 #include "bfd.h"
     34 #include "libiberty.h" /* for xstrdup */
     35 #include "bucomm.h"
     36 /* Must be include before windows.h and winnls.h.  */
     37 #if defined (_WIN32) || defined (__CYGWIN__)
     38 #include <windows.h>
     39 #include <winnls.h>
     40 #endif
     41 #include "winduni.h"
     42 #include "safe-ctype.h"
     43 
     44 #if HAVE_ICONV
     45 #include <iconv.h>
     46 #endif
     47 
     48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
     49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
     50 static int unichar_isascii (const unichar *, rc_uint_type);
     51 
     52 /* Convert an ASCII string to a unicode string.  We just copy it,
     53    expanding chars to shorts, rather than doing something intelligent.  */
     54 
     55 #if !defined (_WIN32) && !defined (__CYGWIN__)
     56 
     57 /* Codepages mapped.  */
     58 static local_iconv_map codepages[] =
     59 {
     60   { 0, "MS-ANSI" },
     61   { 1, "WINDOWS-1252" },
     62   { 437, "MS-ANSI" },
     63   { 737, "MS-GREEK" },
     64   { 775, "WINBALTRIM" },
     65   { 850, "MS-ANSI" },
     66   { 852, "MS-EE" },
     67   { 857, "MS-TURK" },
     68   { 862, "CP862" },
     69   { 864, "CP864" },
     70   { 866, "MS-CYRL" },
     71   { 874, "WINDOWS-874" },
     72   { 932, "CP932" },
     73   { 936, "CP936" },
     74   { 949, "CP949" },
     75   { 950, "CP950" },
     76   { 1250, "WINDOWS-1250" },
     77   { 1251, "WINDOWS-1251" },
     78   { 1252, "WINDOWS-1252" },
     79   { 1253, "WINDOWS-1253" },
     80   { 1254, "WINDOWS-1254" },
     81   { 1255, "WINDOWS-1255" },
     82   { 1256, "WINDOWS-1256" },
     83   { 1257, "WINDOWS-1257" },
     84   { 1258, "WINDOWS-1258" },
     85   { CP_UTF7, "UTF-7" },
     86   { CP_UTF8, "UTF-8" },
     87   { CP_UTF16, "UTF-16LE" },
     88   { (rc_uint_type) -1, NULL }
     89 };
     90 
     91 /* Languages supported.  */
     92 static const wind_language_t languages[] =
     93 {
     94   { 0x0000, 437, 1252, "Neutral", "Neutral" },
     95   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
     96   { 0x0403, 850, 1252, "Catalan", "Spain" },	      { 0x0404, 950,  950, "Chinese", "Taiwan" },
     97   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
     98   { 0x0407, 850, 1252, "German", "Germany" },	      { 0x0408, 737, 1253, "Greek", "Greece" },
     99   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
    100   { 0x040B, 850, 1252, "Finnish", "Finland" },	      { 0x040C, 850, 1252, "French", "France" },
    101   { 0x040D, 862, 1255, "Hebrew", "Israel" },	      { 0x040E, 852, 1250, "Hungarian", "Hungary" },
    102   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
    103   { 0x0411, 932,  932, "Japanese", "Japan" },	      { 0x0412, 949,  949, "Korean", "Korea (south)" },
    104   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
    105   { 0x0415, 852, 1250, "Polish", "Poland" },	      { 0x0416, 850, 1252, "Portuguese", "Brazil" },
    106   { 0x0418, 852, 1250, "Romanian", "Romania" },	      { 0x0419, 866, 1251, "Russian", "Russia" },
    107   { 0x041A, 852, 1250, "Croatian", "Croatia" },	      { 0x041B, 852, 1250, "Slovak", "Slovakia" },
    108   { 0x041C, 852, 1250, "Albanian", "Albania" },	      { 0x041D, 850, 1252, "Swedish", "Sweden" },
    109   { 0x041E, 874,  874, "Thai", "Thailand" },	      { 0x041F, 857, 1254, "Turkish", "Turkey" },
    110   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
    111   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
    112   { 0x0425, 775, 1257, "Estonian", "Estonia" },	      { 0x0426, 775, 1257, "Latvian", "Latvia" },
    113   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
    114   { 0x0429, 864, 1256, "Arabic", "Farsi" },	      { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
    115   { 0x042D, 850, 1252, "Basque", "Spain" },
    116   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
    117   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
    118   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
    119   { 0x043C, 437, 1252, "Irish", "Ireland" },
    120   { 0x043E, 850, 1252, "Malay", "Malaysia" },
    121   { 0x0801, 864, 1256, "Arabic", "Iraq" },
    122   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
    123   { 0x0807, 850, 1252, "German", "Switzerland" },
    124   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
    125   { 0x080C, 850, 1252, "French", "Belgium" },
    126   { 0x0810, 850, 1252, "Italian", "Switzerland" },
    127   { 0x0813, 850, 1252, "Dutch", "Belgium" },	      { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
    128   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
    129   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
    130   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
    131   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
    132   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
    133   { 0x0C07, 850, 1252, "German", "Austria" },
    134   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
    135   { 0x0C0C, 850, 1252, "French", "Canada"},
    136   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
    137   { 0x1001, 864, 1256, "Arabic", "Libya" },
    138   { 0x1004, 936,  936, "Chinese", "Singapore" },
    139   { 0x1007, 850, 1252, "German", "Luxembourg" },
    140   { 0x1009, 850, 1252, "English", "Canada" },
    141   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
    142   { 0x100C, 850, 1252, "French", "Switzerland" },
    143   { 0x1401, 864, 1256, "Arabic", "Algeria" },
    144   { 0x1407, 850, 1252, "German", "Liechtenstein" },
    145   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
    146   { 0x140C, 850, 1252, "French", "Luxembourg" },
    147   { 0x1801, 864, 1256, "Arabic", "Morocco" },
    148   { 0x1809, 850, 1252, "English", "Ireland" },	      { 0x180A, 850, 1252, "Spanish", "Panama" },
    149   { 0x180C, 850, 1252, "French", "Monaco" },
    150   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
    151   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
    152   { 0x2001, 864, 1256, "Arabic", "Oman" },
    153   { 0x2009, 850, 1252, "English", "Jamaica" },	      { 0x200A, 850, 1252, "Spanish", "Venezuela" },
    154   { 0x2401, 864, 1256, "Arabic", "Yemen" },
    155   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
    156   { 0x2801, 864, 1256, "Arabic", "Syria" },
    157   { 0x2809, 850, 1252, "English", "Belize" },	      { 0x280A, 850, 1252, "Spanish", "Peru" },
    158   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
    159   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
    160   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
    161   { 0x3009, 437, 1252, "English", "Zimbabwe" },	      { 0x300A, 850, 1252, "Spanish", "Ecuador" },
    162   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
    163   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
    164   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
    165   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
    166   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
    167   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
    168   { 0x4001, 864, 1256, "Arabic", "Qatar" },
    169   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
    170   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
    171   { 0x480A, 850, 1252, "Spanish", "Honduras" },
    172   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
    173   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
    174   { (unsigned) -1,  0,      0, NULL, NULL }
    175 };
    176 
    177 #endif
    178 
    179 /* Specifies the default codepage to be used for unicode
    180    transformations.  By default this is CP_ACP.  */
    181 rc_uint_type wind_default_codepage = CP_ACP;
    182 
    183 /* Specifies the currently used codepage for unicode
    184    transformations.  By default this is CP_ACP.  */
    185 rc_uint_type wind_current_codepage = CP_ACP;
    186 
    187 /* Convert an ASCII string to a unicode string.  We just copy it,
    188    expanding chars to shorts, rather than doing something intelligent.  */
    189 
    190 void
    191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
    192 {
    193   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
    194 }
    195 
    196 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
    197    copy it, expanding chars to shorts, rather than doing something intelligent.
    198    This routine converts also \0 within a string.  */
    199 
    200 void
    201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
    202 {
    203   char *tmp, *p;
    204   rc_uint_type tlen, elen, idx = 0;
    205 
    206   *unicode = NULL;
    207 
    208   if (!a_length)
    209     {
    210       if (length)
    211         *length = 0;
    212       return;
    213     }
    214 
    215   /* Make sure we have zero terminated string.  */
    216   p = tmp = (char *) alloca (a_length + 1);
    217   memcpy (tmp, ascii, a_length);
    218   tmp[a_length] = 0;
    219 
    220   while (a_length > 0)
    221     {
    222       unichar *utmp, *up;
    223 
    224       tlen = strlen (p);
    225 
    226       if (tlen > a_length)
    227         tlen = a_length;
    228       if (*p == 0)
    229         {
    230 	  /* Make room for one more character.  */
    231 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
    232 	  if (idx > 0)
    233 	    {
    234 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
    235 	    }
    236 	  *unicode = utmp;
    237 	  utmp[idx++] = 0;
    238 	  --a_length;
    239 	  p++;
    240 	  continue;
    241 	}
    242       utmp = NULL;
    243       elen = 0;
    244       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
    245       if (elen)
    246 	{
    247 	  utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
    248 	  wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
    249 	  elen /= sizeof (unichar);
    250 	  elen --;
    251 	}
    252       else
    253         {
    254 	  /* Make room for one more character.  */
    255 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
    256 	  if (idx > 0)
    257 	    {
    258 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
    259 	    }
    260 	  *unicode = utmp;
    261 	  utmp[idx++] = ((unichar) *p) & 0xff;
    262 	  --a_length;
    263 	  p++;
    264 	  continue;
    265 	}
    266       p += tlen;
    267       a_length -= tlen;
    268 
    269       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
    270       if (idx > 0)
    271 	memcpy (up, *unicode, idx * sizeof (unichar));
    272 
    273       *unicode = up;
    274       if (elen)
    275 	memcpy (&up[idx], utmp, sizeof (unichar) * elen);
    276 
    277       idx += elen;
    278     }
    279 
    280   if (length)
    281     *length = idx;
    282 }
    283 
    284 /* Convert an unicode string to an ASCII string.  We just copy it,
    285    shrink shorts to chars, rather than doing something intelligent.
    286    Shorts with not within the char range are replaced by '_'.  */
    287 
    288 void
    289 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
    290 {
    291   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
    292 }
    293 
    294 /* Print the unicode string UNICODE to the file E.  LENGTH is the
    295    number of characters to print, or -1 if we should print until the
    296    end of the string.  FIXME: On a Windows host, we should be calling
    297    some Windows function, probably WideCharToMultiByte.  */
    298 
    299 void
    300 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
    301 {
    302   while (1)
    303     {
    304       unichar ch;
    305 
    306       if (length == 0)
    307 	return;
    308       if ((bfd_signed_vma) length > 0)
    309 	--length;
    310 
    311       ch = *unicode;
    312 
    313       if (ch == 0 && (bfd_signed_vma) length < 0)
    314 	return;
    315 
    316       ++unicode;
    317 
    318       if ((ch & 0x7f) == ch)
    319 	{
    320 	  if (ch == '\\')
    321 	    fputs ("\\\\", e);
    322 	  else if (ch == '"')
    323 	    fputs ("\"\"", e);
    324 	  else if (ISPRINT (ch))
    325 	    putc (ch, e);
    326 	  else
    327 	    {
    328 	      switch (ch)
    329 		{
    330 		case ESCAPE_A:
    331 		  fputs ("\\a", e);
    332 		  break;
    333 
    334 		case ESCAPE_B:
    335 		  fputs ("\\b", e);
    336 		  break;
    337 
    338 		case ESCAPE_F:
    339 		  fputs ("\\f", e);
    340 		  break;
    341 
    342 		case ESCAPE_N:
    343 		  fputs ("\\n", e);
    344 		  break;
    345 
    346 		case ESCAPE_R:
    347 		  fputs ("\\r", e);
    348 		  break;
    349 
    350 		case ESCAPE_T:
    351 		  fputs ("\\t", e);
    352 		  break;
    353 
    354 		case ESCAPE_V:
    355 		  fputs ("\\v", e);
    356 		  break;
    357 
    358 		default:
    359 		  fprintf (e, "\\%03o", (unsigned int) ch);
    360 		  break;
    361 		}
    362 	    }
    363 	}
    364       else if ((ch & 0xff) == ch)
    365 	fprintf (e, "\\%03o", (unsigned int) ch);
    366       else
    367 	fprintf (e, "\\x%04x", (unsigned int) ch);
    368     }
    369 }
    370 
    371 /* Print a unicode string to a file.  */
    372 
    373 void
    374 ascii_print (FILE *e, const char *s, rc_uint_type length)
    375 {
    376   while (1)
    377     {
    378       char ch;
    379 
    380       if (length == 0)
    381 	return;
    382       if ((bfd_signed_vma) length > 0)
    383 	--length;
    384 
    385       ch = *s;
    386 
    387       if (ch == 0 && (bfd_signed_vma) length < 0)
    388 	return;
    389 
    390       ++s;
    391 
    392       if ((ch & 0x7f) == ch)
    393 	{
    394 	  if (ch == '\\')
    395 	    fputs ("\\\\", e);
    396 	  else if (ch == '"')
    397 	    fputs ("\"\"", e);
    398 	  else if (ISPRINT (ch))
    399 	    putc (ch, e);
    400 	  else
    401 	    {
    402 	      switch (ch)
    403 		{
    404 		case ESCAPE_A:
    405 		  fputs ("\\a", e);
    406 		  break;
    407 
    408 		case ESCAPE_B:
    409 		  fputs ("\\b", e);
    410 		  break;
    411 
    412 		case ESCAPE_F:
    413 		  fputs ("\\f", e);
    414 		  break;
    415 
    416 		case ESCAPE_N:
    417 		  fputs ("\\n", e);
    418 		  break;
    419 
    420 		case ESCAPE_R:
    421 		  fputs ("\\r", e);
    422 		  break;
    423 
    424 		case ESCAPE_T:
    425 		  fputs ("\\t", e);
    426 		  break;
    427 
    428 		case ESCAPE_V:
    429 		  fputs ("\\v", e);
    430 		  break;
    431 
    432 		default:
    433 		  fprintf (e, "\\%03o", (unsigned int) ch);
    434 		  break;
    435 		}
    436 	    }
    437 	}
    438       else
    439 	fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
    440     }
    441 }
    442 
    443 rc_uint_type
    444 unichar_len (const unichar *unicode)
    445 {
    446   rc_uint_type r = 0;
    447 
    448   if (unicode)
    449     while (unicode[r] != 0)
    450       r++;
    451   else
    452     --r;
    453   return r;
    454 }
    455 
    456 unichar *
    457 unichar_dup (const unichar *unicode)
    458 {
    459   unichar *r;
    460   int len;
    461 
    462   if (! unicode)
    463     return NULL;
    464   for (len = 0; unicode[len] != 0; ++len)
    465     ;
    466   ++len;
    467   r = ((unichar *) res_alloc (len * sizeof (unichar)));
    468   memcpy (r, unicode, len * sizeof (unichar));
    469   return r;
    470 }
    471 
    472 unichar *
    473 unichar_dup_uppercase (const unichar *u)
    474 {
    475   unichar *r = unichar_dup (u);
    476   int i;
    477 
    478   if (! r)
    479     return NULL;
    480 
    481   for (i = 0; r[i] != 0; ++i)
    482     {
    483       if (r[i] >= 'a' && r[i] <= 'z')
    484 	r[i] &= 0xdf;
    485     }
    486   return r;
    487 }
    488 
    489 static int
    490 unichar_isascii (const unichar *u, rc_uint_type len)
    491 {
    492   rc_uint_type i;
    493 
    494   if ((bfd_signed_vma) len < 0)
    495     {
    496       if (u)
    497 	len = (rc_uint_type) unichar_len (u);
    498       else
    499 	len = 0;
    500     }
    501 
    502   for (i = 0; i < len; i++)
    503     if ((u[i] & 0xff80) != 0)
    504       return 0;
    505   return 1;
    506 }
    507 
    508 void
    509 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
    510 {
    511   if (! unichar_isascii (u, len))
    512     fputc ('L', e);
    513   fputc ('"', e);
    514   unicode_print (e, u, len);
    515   fputc ('"', e);
    516 }
    517 
    518 int
    519 unicode_is_valid_codepage (rc_uint_type cp)
    520 {
    521   if ((cp & 0xffff) != cp)
    522     return 0;
    523   if (cp == CP_UTF16 || cp == CP_ACP)
    524     return 1;
    525 
    526 #if !defined (_WIN32) && !defined (__CYGWIN__)
    527   if (! wind_find_codepage_info (cp))
    528     return 0;
    529   return 1;
    530 #else
    531   return !! IsValidCodePage ((UINT) cp);
    532 #endif
    533 }
    534 
    535 #if defined (_WIN32) || defined (__CYGWIN__)
    536 
    537 #define max_cp_string_len 6
    538 
    539 static unsigned int
    540 codepage_from_langid (unsigned short langid)
    541 {
    542   char cp_string [max_cp_string_len];
    543   int c;
    544 
    545   memset (cp_string, 0, max_cp_string_len);
    546   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
    547      but is unavailable on Win95.  */
    548   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    549   		      LOCALE_IDEFAULTANSICODEPAGE,
    550   		      cp_string, max_cp_string_len);
    551   /* If codepage data for an LCID is not installed on users's system,
    552      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    553      default. */
    554   if (c == 0)
    555     return CP_ACP;
    556   return strtoul (cp_string, 0, 10);
    557 }
    558 
    559 static unsigned int
    560 wincodepage_from_langid (unsigned short langid)
    561 {
    562   char cp_string [max_cp_string_len];
    563   int c;
    564 
    565   memset (cp_string, 0, max_cp_string_len);
    566   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
    567      but is unavailable on Win95.  */
    568   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    569 		      LOCALE_IDEFAULTCODEPAGE,
    570 		      cp_string, max_cp_string_len);
    571   /* If codepage data for an LCID is not installed on users's system,
    572      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    573      default. */
    574   if (c == 0)
    575     return CP_OEM;
    576   return strtoul (cp_string, 0, 10);
    577 }
    578 
    579 static char *
    580 lang_from_langid (unsigned short langid)
    581 {
    582   char cp_string[261];
    583   int c;
    584 
    585   memset (cp_string, 0, 261);
    586   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    587   		      LOCALE_SENGLANGUAGE,
    588   		      cp_string, 260);
    589   /* If codepage data for an LCID is not installed on users's system,
    590      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    591      default. */
    592   if (c == 0)
    593     strcpy (cp_string, "Neutral");
    594   return xstrdup (cp_string);
    595 }
    596 
    597 static char *
    598 country_from_langid (unsigned short langid)
    599 {
    600   char cp_string[261];
    601   int c;
    602 
    603   memset (cp_string, 0, 261);
    604   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    605   		      LOCALE_SENGCOUNTRY,
    606   		      cp_string, 260);
    607   /* If codepage data for an LCID is not installed on users's system,
    608      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    609      default. */
    610   if (c == 0)
    611     strcpy (cp_string, "Neutral");
    612   return xstrdup (cp_string);
    613 }
    614 
    615 #endif
    616 
    617 const wind_language_t *
    618 wind_find_language_by_id (unsigned id)
    619 {
    620 #if !defined (_WIN32) && !defined (__CYGWIN__)
    621   int i;
    622 
    623   if (! id)
    624     return NULL;
    625   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
    626     ;
    627   if (languages[i].id == id)
    628     return &languages[i];
    629   return NULL;
    630 #else
    631   static wind_language_t wl;
    632 
    633   wl.id = id;
    634   wl.doscp = codepage_from_langid ((unsigned short) id);
    635   wl.wincp = wincodepage_from_langid ((unsigned short) id);
    636   wl.name = lang_from_langid ((unsigned short) id);
    637   wl.country = country_from_langid ((unsigned short) id);
    638 
    639   return & wl;
    640 #endif
    641 }
    642 
    643 const local_iconv_map *
    644 wind_find_codepage_info (unsigned cp)
    645 {
    646 #if !defined (_WIN32) && !defined (__CYGWIN__)
    647   int i;
    648 
    649   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
    650     ;
    651   if (codepages[i].codepage == (rc_uint_type) -1)
    652     return NULL;
    653   return &codepages[i];
    654 #else
    655   static local_iconv_map lim;
    656   if (!unicode_is_valid_codepage (cp))
    657   	return NULL;
    658   lim.codepage = cp;
    659   lim.iconv_name = "";
    660   return & lim;
    661 #endif
    662 }
    663 
    664 /* Convert an Codepage string to a unicode string.  */
    665 
    666 void
    667 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
    668 {
    669   rc_uint_type len;
    670 
    671   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
    672   if (len)
    673     {
    674       *u = ((unichar *) res_alloc (len));
    675       wind_MultiByteToWideChar (cp, src, *u, len);
    676     }
    677   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
    678      this will set *length to -1.  */
    679   len -= sizeof (unichar);
    680 
    681   if (length != NULL)
    682     *length = len / sizeof (unichar);
    683 }
    684 
    685 /* Convert an unicode string to an codepage string.  */
    686 
    687 void
    688 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
    689 {
    690   rc_uint_type len;
    691 
    692   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
    693   if (len)
    694     {
    695       *ascii = (char *) res_alloc (len * sizeof (char));
    696       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
    697     }
    698   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
    699      this will set *length to -1.  */
    700   len--;
    701 
    702   if (length != NULL)
    703     *length = len;
    704 }
    705 
    706 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
    707 static int
    708 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
    709 {
    710   int i;
    711 
    712   for (i = 1; i <= 32; i++)
    713     {
    714       char *tmp_d = d;
    715       ICONV_CONST char *tmp_s = s;
    716       size_t ret;
    717       size_t s_left = (size_t) i;
    718       size_t d_left = (size_t) d_len;
    719 
    720       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
    721 
    722       if (ret != (size_t) -1)
    723 	{
    724 	  *n_s = tmp_s;
    725 	  *n_d = tmp_d;
    726 	  return 0;
    727 	}
    728     }
    729 
    730   return 1;
    731 }
    732 
    733 static const char *
    734 wind_iconv_cp (rc_uint_type cp)
    735 {
    736   const local_iconv_map *lim = wind_find_codepage_info (cp);
    737 
    738   if (!lim)
    739     return NULL;
    740   return lim->iconv_name;
    741 }
    742 #endif /* HAVE_ICONV */
    743 
    744 static rc_uint_type
    745 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
    746 			  unichar *u, rc_uint_type u_len)
    747 {
    748   rc_uint_type ret = 0;
    749 
    750 #if defined (_WIN32) || defined (__CYGWIN__)
    751   rc_uint_type conv_flags = MB_PRECOMPOSED;
    752 
    753   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
    754      MultiByteToWideChar will set the last error to
    755      ERROR_INVALID_FLAGS if we do. */
    756   if (cp == CP_UTF8 || cp == CP_UTF7)
    757     conv_flags = 0;
    758 
    759   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
    760 					    mb, -1, u, u_len);
    761   /* Convert to bytes. */
    762   ret *= sizeof (unichar);
    763 
    764 #elif defined (HAVE_ICONV)
    765   int first = 1;
    766   char tmp[32];
    767   char *p_tmp;
    768   const char *iconv_name = wind_iconv_cp (cp);
    769 
    770   if (!mb || !iconv_name)
    771     return 0;
    772   iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
    773 
    774   while (1)
    775     {
    776       int iret;
    777       const char *n_mb = "";
    778       char *n_tmp = "";
    779 
    780       p_tmp = tmp;
    781       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
    782       if (first)
    783 	{
    784 	  first = 0;
    785 	  continue;
    786 	}
    787       if (!iret)
    788 	{
    789 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
    790 
    791 	  if (u)
    792 	    {
    793 	      if ((size_t) u_len < l_tmp)
    794 		break;
    795 	      memcpy (u, tmp, l_tmp);
    796 	      u += l_tmp/2;
    797 	      u_len -= l_tmp;
    798 	    }
    799 	  ret += l_tmp;
    800 	}
    801       else
    802 	break;
    803       if (tmp[0] == 0 && tmp[1] == 0)
    804 	break;
    805       mb = n_mb;
    806     }
    807   iconv_close (cd);
    808 #else
    809   if (cp)
    810     ret = 0;
    811   ret = strlen (mb) + 1;
    812   ret *= sizeof (unichar);
    813   if (u != NULL && u_len != 0)
    814     {
    815       do
    816 	{
    817 	  *u++ = ((unichar) *mb) & 0xff;
    818 	  --u_len; mb++;
    819 	}
    820       while (u_len != 0 && mb[-1] != 0);
    821     }
    822   if (u != NULL && u_len != 0)
    823     *u = 0;
    824 #endif
    825   return ret;
    826 }
    827 
    828 static rc_uint_type
    829 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
    830 {
    831   rc_uint_type ret = 0;
    832 #if defined (_WIN32) || defined (__CYGWIN__)
    833   WINBOOL used_def = FALSE;
    834 
    835   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
    836 				      	    NULL, & used_def);
    837 #elif defined (HAVE_ICONV)
    838   int first = 1;
    839   char tmp[32];
    840   char *p_tmp;
    841   const char *iconv_name = wind_iconv_cp (cp);
    842 
    843   if (!u || !iconv_name)
    844     return 0;
    845   iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
    846 
    847   while (1)
    848     {
    849       int iret;
    850       const char *n_u = "";
    851       char *n_tmp = "";
    852 
    853       p_tmp = tmp;
    854       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
    855       if (first)
    856 	{
    857 	  first = 0;
    858 	  continue;
    859 	}
    860       if (!iret)
    861 	{
    862 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
    863 
    864 	  if (mb)
    865 	    {
    866 	      if ((size_t) mb_len < l_tmp)
    867 		break;
    868 	      memcpy (mb, tmp, l_tmp);
    869 	      mb += l_tmp;
    870 	      mb_len -= l_tmp;
    871 	    }
    872 	  ret += l_tmp;
    873 	}
    874       else
    875 	break;
    876       if (u[0] == 0)
    877 	break;
    878       u = (const unichar *) n_u;
    879     }
    880   iconv_close (cd);
    881 #else
    882   if (cp)
    883     ret = 0;
    884 
    885   while (u[ret] != 0)
    886     ++ret;
    887 
    888   ++ret;
    889 
    890   if (mb)
    891     {
    892       while (*u != 0 && mb_len != 0)
    893 	{
    894 	  if (u[0] == (u[0] & 0x7f))
    895 	    *mb++ = (char) u[0];
    896 	  else
    897 	    *mb++ = '_';
    898 	  ++u; --mb_len;
    899 	}
    900       if (mb_len != 0)
    901 	*mb = 0;
    902     }
    903 #endif
    904   return ret;
    905 }
    906