1 /* winduni.c -- unicode support for the windres program. 2 Copyright (C) 1997-2014 Free Software Foundation, Inc. 3 Written by Ian Lance Taylor, Cygnus Support. 4 Rewritten by Kai Tietz, Onevision. 5 6 This file is part of GNU Binutils. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 21 02110-1301, USA. */ 22 23 24 /* This file contains unicode support routines for the windres 25 program. Ideally, we would have generic unicode support which 26 would work on all systems. However, we don't. Instead, on a 27 Windows host, we are prepared to call some Windows routines. This 28 means that we will generate different output on Windows and Unix 29 hosts, but that seems better than not really supporting unicode at 30 all. */ 31 32 #include "sysdep.h" 33 #include "bfd.h" 34 #include "libiberty.h" /* for xstrdup */ 35 #include "bucomm.h" 36 /* Must be include before windows.h and winnls.h. */ 37 #if defined (_WIN32) || defined (__CYGWIN__) 38 #include <windows.h> 39 #include <winnls.h> 40 #endif 41 #include "winduni.h" 42 #include "safe-ctype.h" 43 44 #if HAVE_ICONV 45 #include <iconv.h> 46 #endif 47 48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type); 49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type); 50 static int unichar_isascii (const unichar *, rc_uint_type); 51 52 /* Convert an ASCII string to a unicode string. We just copy it, 53 expanding chars to shorts, rather than doing something intelligent. */ 54 55 #if !defined (_WIN32) && !defined (__CYGWIN__) 56 57 /* Codepages mapped. */ 58 static local_iconv_map codepages[] = 59 { 60 { 0, "MS-ANSI" }, 61 { 1, "WINDOWS-1252" }, 62 { 437, "MS-ANSI" }, 63 { 737, "MS-GREEK" }, 64 { 775, "WINBALTRIM" }, 65 { 850, "MS-ANSI" }, 66 { 852, "MS-EE" }, 67 { 857, "MS-TURK" }, 68 { 862, "CP862" }, 69 { 864, "CP864" }, 70 { 866, "MS-CYRL" }, 71 { 874, "WINDOWS-874" }, 72 { 932, "CP932" }, 73 { 936, "CP936" }, 74 { 949, "CP949" }, 75 { 950, "CP950" }, 76 { 1250, "WINDOWS-1250" }, 77 { 1251, "WINDOWS-1251" }, 78 { 1252, "WINDOWS-1252" }, 79 { 1253, "WINDOWS-1253" }, 80 { 1254, "WINDOWS-1254" }, 81 { 1255, "WINDOWS-1255" }, 82 { 1256, "WINDOWS-1256" }, 83 { 1257, "WINDOWS-1257" }, 84 { 1258, "WINDOWS-1258" }, 85 { CP_UTF7, "UTF-7" }, 86 { CP_UTF8, "UTF-8" }, 87 { CP_UTF16, "UTF-16LE" }, 88 { (rc_uint_type) -1, NULL } 89 }; 90 91 /* Languages supported. */ 92 static const wind_language_t languages[] = 93 { 94 { 0x0000, 437, 1252, "Neutral", "Neutral" }, 95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" }, 96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" }, 97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" }, 98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" }, 99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" }, 100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" }, 101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" }, 102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" }, 103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" }, 104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" }, 105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" }, 106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" }, 107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" }, 108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" }, 109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" }, 110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" }, 111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" }, 112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" }, 113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" }, 114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" }, 115 { 0x042D, 850, 1252, "Basque", "Spain" }, 116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" }, 117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" }, 118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" }, 119 { 0x043C, 437, 1252, "Irish", "Ireland" }, 120 { 0x043E, 850, 1252, "Malay", "Malaysia" }, 121 { 0x0801, 864, 1256, "Arabic", "Iraq" }, 122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" }, 123 { 0x0807, 850, 1252, "German", "Switzerland" }, 124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" }, 125 { 0x080C, 850, 1252, "French", "Belgium" }, 126 { 0x0810, 850, 1252, "Italian", "Switzerland" }, 127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" }, 128 { 0x0816, 850, 1252, "Portuguese", "Portugal" }, 129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" }, 130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" }, 131 { 0x0C01, 864, 1256, "Arabic", "Egypt" }, 132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" }, 133 { 0x0C07, 850, 1252, "German", "Austria" }, 134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" }, 135 { 0x0C0C, 850, 1252, "French", "Canada"}, 136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" }, 137 { 0x1001, 864, 1256, "Arabic", "Libya" }, 138 { 0x1004, 936, 936, "Chinese", "Singapore" }, 139 { 0x1007, 850, 1252, "German", "Luxembourg" }, 140 { 0x1009, 850, 1252, "English", "Canada" }, 141 { 0x100A, 850, 1252, "Spanish", "Guatemala" }, 142 { 0x100C, 850, 1252, "French", "Switzerland" }, 143 { 0x1401, 864, 1256, "Arabic", "Algeria" }, 144 { 0x1407, 850, 1252, "German", "Liechtenstein" }, 145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" }, 146 { 0x140C, 850, 1252, "French", "Luxembourg" }, 147 { 0x1801, 864, 1256, "Arabic", "Morocco" }, 148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" }, 149 { 0x180C, 850, 1252, "French", "Monaco" }, 150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" }, 151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" }, 152 { 0x2001, 864, 1256, "Arabic", "Oman" }, 153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" }, 154 { 0x2401, 864, 1256, "Arabic", "Yemen" }, 155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" }, 156 { 0x2801, 864, 1256, "Arabic", "Syria" }, 157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" }, 158 { 0x2C01, 864, 1256, "Arabic", "Jordan" }, 159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" }, 160 { 0x3001, 864, 1256, "Arabic", "Lebanon" }, 161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" }, 162 { 0x3401, 864, 1256, "Arabic", "Kuwait" }, 163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" }, 164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" }, 165 { 0x380A, 850, 1252, "Spanish", "Uruguay" }, 166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" }, 167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" }, 168 { 0x4001, 864, 1256, "Arabic", "Qatar" }, 169 { 0x400A, 850, 1252, "Spanish", "Bolivia" }, 170 { 0x440A, 850, 1252, "Spanish", "El Salvador" }, 171 { 0x480A, 850, 1252, "Spanish", "Honduras" }, 172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" }, 173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" }, 174 { (unsigned) -1, 0, 0, NULL, NULL } 175 }; 176 177 #endif 178 179 /* Specifies the default codepage to be used for unicode 180 transformations. By default this is CP_ACP. */ 181 rc_uint_type wind_default_codepage = CP_ACP; 182 183 /* Specifies the currently used codepage for unicode 184 transformations. By default this is CP_ACP. */ 185 rc_uint_type wind_current_codepage = CP_ACP; 186 187 /* Convert an ASCII string to a unicode string. We just copy it, 188 expanding chars to shorts, rather than doing something intelligent. */ 189 190 void 191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii) 192 { 193 unicode_from_codepage (length, unicode, ascii, wind_current_codepage); 194 } 195 196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just 197 copy it, expanding chars to shorts, rather than doing something intelligent. 198 This routine converts also \0 within a string. */ 199 200 void 201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length) 202 { 203 char *tmp, *p; 204 rc_uint_type tlen, elen, idx = 0; 205 206 *unicode = NULL; 207 208 if (!a_length) 209 { 210 if (length) 211 *length = 0; 212 return; 213 } 214 215 /* Make sure we have zero terminated string. */ 216 p = tmp = (char *) alloca (a_length + 1); 217 memcpy (tmp, ascii, a_length); 218 tmp[a_length] = 0; 219 220 while (a_length > 0) 221 { 222 unichar *utmp, *up; 223 224 tlen = strlen (p); 225 226 if (tlen > a_length) 227 tlen = a_length; 228 if (*p == 0) 229 { 230 /* Make room for one more character. */ 231 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); 232 if (idx > 0) 233 { 234 memcpy (utmp, *unicode, idx * sizeof (unichar)); 235 } 236 *unicode = utmp; 237 utmp[idx++] = 0; 238 --a_length; 239 p++; 240 continue; 241 } 242 utmp = NULL; 243 elen = 0; 244 elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0); 245 if (elen) 246 { 247 utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2)); 248 wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen); 249 elen /= sizeof (unichar); 250 elen --; 251 } 252 else 253 { 254 /* Make room for one more character. */ 255 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); 256 if (idx > 0) 257 { 258 memcpy (utmp, *unicode, idx * sizeof (unichar)); 259 } 260 *unicode = utmp; 261 utmp[idx++] = ((unichar) *p) & 0xff; 262 --a_length; 263 p++; 264 continue; 265 } 266 p += tlen; 267 a_length -= tlen; 268 269 up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen)); 270 if (idx > 0) 271 memcpy (up, *unicode, idx * sizeof (unichar)); 272 273 *unicode = up; 274 if (elen) 275 memcpy (&up[idx], utmp, sizeof (unichar) * elen); 276 277 idx += elen; 278 } 279 280 if (length) 281 *length = idx; 282 } 283 284 /* Convert an unicode string to an ASCII string. We just copy it, 285 shrink shorts to chars, rather than doing something intelligent. 286 Shorts with not within the char range are replaced by '_'. */ 287 288 void 289 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii) 290 { 291 codepage_from_unicode (length, unicode, ascii, wind_current_codepage); 292 } 293 294 /* Print the unicode string UNICODE to the file E. LENGTH is the 295 number of characters to print, or -1 if we should print until the 296 end of the string. FIXME: On a Windows host, we should be calling 297 some Windows function, probably WideCharToMultiByte. */ 298 299 void 300 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length) 301 { 302 while (1) 303 { 304 unichar ch; 305 306 if (length == 0) 307 return; 308 if ((bfd_signed_vma) length > 0) 309 --length; 310 311 ch = *unicode; 312 313 if (ch == 0 && (bfd_signed_vma) length < 0) 314 return; 315 316 ++unicode; 317 318 if ((ch & 0x7f) == ch) 319 { 320 if (ch == '\\') 321 fputs ("\\\\", e); 322 else if (ch == '"') 323 fputs ("\"\"", e); 324 else if (ISPRINT (ch)) 325 putc (ch, e); 326 else 327 { 328 switch (ch) 329 { 330 case ESCAPE_A: 331 fputs ("\\a", e); 332 break; 333 334 case ESCAPE_B: 335 fputs ("\\b", e); 336 break; 337 338 case ESCAPE_F: 339 fputs ("\\f", e); 340 break; 341 342 case ESCAPE_N: 343 fputs ("\\n", e); 344 break; 345 346 case ESCAPE_R: 347 fputs ("\\r", e); 348 break; 349 350 case ESCAPE_T: 351 fputs ("\\t", e); 352 break; 353 354 case ESCAPE_V: 355 fputs ("\\v", e); 356 break; 357 358 default: 359 fprintf (e, "\\%03o", (unsigned int) ch); 360 break; 361 } 362 } 363 } 364 else if ((ch & 0xff) == ch) 365 fprintf (e, "\\%03o", (unsigned int) ch); 366 else 367 fprintf (e, "\\x%04x", (unsigned int) ch); 368 } 369 } 370 371 /* Print a unicode string to a file. */ 372 373 void 374 ascii_print (FILE *e, const char *s, rc_uint_type length) 375 { 376 while (1) 377 { 378 char ch; 379 380 if (length == 0) 381 return; 382 if ((bfd_signed_vma) length > 0) 383 --length; 384 385 ch = *s; 386 387 if (ch == 0 && (bfd_signed_vma) length < 0) 388 return; 389 390 ++s; 391 392 if ((ch & 0x7f) == ch) 393 { 394 if (ch == '\\') 395 fputs ("\\\\", e); 396 else if (ch == '"') 397 fputs ("\"\"", e); 398 else if (ISPRINT (ch)) 399 putc (ch, e); 400 else 401 { 402 switch (ch) 403 { 404 case ESCAPE_A: 405 fputs ("\\a", e); 406 break; 407 408 case ESCAPE_B: 409 fputs ("\\b", e); 410 break; 411 412 case ESCAPE_F: 413 fputs ("\\f", e); 414 break; 415 416 case ESCAPE_N: 417 fputs ("\\n", e); 418 break; 419 420 case ESCAPE_R: 421 fputs ("\\r", e); 422 break; 423 424 case ESCAPE_T: 425 fputs ("\\t", e); 426 break; 427 428 case ESCAPE_V: 429 fputs ("\\v", e); 430 break; 431 432 default: 433 fprintf (e, "\\%03o", (unsigned int) ch); 434 break; 435 } 436 } 437 } 438 else 439 fprintf (e, "\\%03o", (unsigned int) ch & 0xff); 440 } 441 } 442 443 rc_uint_type 444 unichar_len (const unichar *unicode) 445 { 446 rc_uint_type r = 0; 447 448 if (unicode) 449 while (unicode[r] != 0) 450 r++; 451 else 452 --r; 453 return r; 454 } 455 456 unichar * 457 unichar_dup (const unichar *unicode) 458 { 459 unichar *r; 460 int len; 461 462 if (! unicode) 463 return NULL; 464 for (len = 0; unicode[len] != 0; ++len) 465 ; 466 ++len; 467 r = ((unichar *) res_alloc (len * sizeof (unichar))); 468 memcpy (r, unicode, len * sizeof (unichar)); 469 return r; 470 } 471 472 unichar * 473 unichar_dup_uppercase (const unichar *u) 474 { 475 unichar *r = unichar_dup (u); 476 int i; 477 478 if (! r) 479 return NULL; 480 481 for (i = 0; r[i] != 0; ++i) 482 { 483 if (r[i] >= 'a' && r[i] <= 'z') 484 r[i] &= 0xdf; 485 } 486 return r; 487 } 488 489 static int 490 unichar_isascii (const unichar *u, rc_uint_type len) 491 { 492 rc_uint_type i; 493 494 if ((bfd_signed_vma) len < 0) 495 { 496 if (u) 497 len = (rc_uint_type) unichar_len (u); 498 else 499 len = 0; 500 } 501 502 for (i = 0; i < len; i++) 503 if ((u[i] & 0xff80) != 0) 504 return 0; 505 return 1; 506 } 507 508 void 509 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len) 510 { 511 if (! unichar_isascii (u, len)) 512 fputc ('L', e); 513 fputc ('"', e); 514 unicode_print (e, u, len); 515 fputc ('"', e); 516 } 517 518 int 519 unicode_is_valid_codepage (rc_uint_type cp) 520 { 521 if ((cp & 0xffff) != cp) 522 return 0; 523 if (cp == CP_UTF16 || cp == CP_ACP) 524 return 1; 525 526 #if !defined (_WIN32) && !defined (__CYGWIN__) 527 if (! wind_find_codepage_info (cp)) 528 return 0; 529 return 1; 530 #else 531 return !! IsValidCodePage ((UINT) cp); 532 #endif 533 } 534 535 #if defined (_WIN32) || defined (__CYGWIN__) 536 537 #define max_cp_string_len 6 538 539 static unsigned int 540 codepage_from_langid (unsigned short langid) 541 { 542 char cp_string [max_cp_string_len]; 543 int c; 544 545 memset (cp_string, 0, max_cp_string_len); 546 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, 547 but is unavailable on Win95. */ 548 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 549 LOCALE_IDEFAULTANSICODEPAGE, 550 cp_string, max_cp_string_len); 551 /* If codepage data for an LCID is not installed on users's system, 552 GetLocaleInfo returns an empty string. Fall back to system ANSI 553 default. */ 554 if (c == 0) 555 return CP_ACP; 556 return strtoul (cp_string, 0, 10); 557 } 558 559 static unsigned int 560 wincodepage_from_langid (unsigned short langid) 561 { 562 char cp_string [max_cp_string_len]; 563 int c; 564 565 memset (cp_string, 0, max_cp_string_len); 566 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, 567 but is unavailable on Win95. */ 568 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 569 LOCALE_IDEFAULTCODEPAGE, 570 cp_string, max_cp_string_len); 571 /* If codepage data for an LCID is not installed on users's system, 572 GetLocaleInfo returns an empty string. Fall back to system ANSI 573 default. */ 574 if (c == 0) 575 return CP_OEM; 576 return strtoul (cp_string, 0, 10); 577 } 578 579 static char * 580 lang_from_langid (unsigned short langid) 581 { 582 char cp_string[261]; 583 int c; 584 585 memset (cp_string, 0, 261); 586 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 587 LOCALE_SENGLANGUAGE, 588 cp_string, 260); 589 /* If codepage data for an LCID is not installed on users's system, 590 GetLocaleInfo returns an empty string. Fall back to system ANSI 591 default. */ 592 if (c == 0) 593 strcpy (cp_string, "Neutral"); 594 return xstrdup (cp_string); 595 } 596 597 static char * 598 country_from_langid (unsigned short langid) 599 { 600 char cp_string[261]; 601 int c; 602 603 memset (cp_string, 0, 261); 604 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 605 LOCALE_SENGCOUNTRY, 606 cp_string, 260); 607 /* If codepage data for an LCID is not installed on users's system, 608 GetLocaleInfo returns an empty string. Fall back to system ANSI 609 default. */ 610 if (c == 0) 611 strcpy (cp_string, "Neutral"); 612 return xstrdup (cp_string); 613 } 614 615 #endif 616 617 const wind_language_t * 618 wind_find_language_by_id (unsigned id) 619 { 620 #if !defined (_WIN32) && !defined (__CYGWIN__) 621 int i; 622 623 if (! id) 624 return NULL; 625 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++) 626 ; 627 if (languages[i].id == id) 628 return &languages[i]; 629 return NULL; 630 #else 631 static wind_language_t wl; 632 633 wl.id = id; 634 wl.doscp = codepage_from_langid ((unsigned short) id); 635 wl.wincp = wincodepage_from_langid ((unsigned short) id); 636 wl.name = lang_from_langid ((unsigned short) id); 637 wl.country = country_from_langid ((unsigned short) id); 638 639 return & wl; 640 #endif 641 } 642 643 const local_iconv_map * 644 wind_find_codepage_info (unsigned cp) 645 { 646 #if !defined (_WIN32) && !defined (__CYGWIN__) 647 int i; 648 649 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++) 650 ; 651 if (codepages[i].codepage == (rc_uint_type) -1) 652 return NULL; 653 return &codepages[i]; 654 #else 655 static local_iconv_map lim; 656 if (!unicode_is_valid_codepage (cp)) 657 return NULL; 658 lim.codepage = cp; 659 lim.iconv_name = ""; 660 return & lim; 661 #endif 662 } 663 664 /* Convert an Codepage string to a unicode string. */ 665 666 void 667 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp) 668 { 669 rc_uint_type len; 670 671 len = wind_MultiByteToWideChar (cp, src, NULL, 0); 672 if (len) 673 { 674 *u = ((unichar *) res_alloc (len)); 675 wind_MultiByteToWideChar (cp, src, *u, len); 676 } 677 /* Discount the trailing '/0'. If MultiByteToWideChar failed, 678 this will set *length to -1. */ 679 len -= sizeof (unichar); 680 681 if (length != NULL) 682 *length = len / sizeof (unichar); 683 } 684 685 /* Convert an unicode string to an codepage string. */ 686 687 void 688 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp) 689 { 690 rc_uint_type len; 691 692 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0); 693 if (len) 694 { 695 *ascii = (char *) res_alloc (len * sizeof (char)); 696 wind_WideCharToMultiByte (cp, unicode, *ascii, len); 697 } 698 /* Discount the trailing '/0'. If MultiByteToWideChar failed, 699 this will set *length to -1. */ 700 len--; 701 702 if (length != NULL) 703 *length = len; 704 } 705 706 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__) 707 static int 708 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d) 709 { 710 int i; 711 712 for (i = 1; i <= 32; i++) 713 { 714 char *tmp_d = d; 715 ICONV_CONST char *tmp_s = s; 716 size_t ret; 717 size_t s_left = (size_t) i; 718 size_t d_left = (size_t) d_len; 719 720 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left); 721 722 if (ret != (size_t) -1) 723 { 724 *n_s = tmp_s; 725 *n_d = tmp_d; 726 return 0; 727 } 728 } 729 730 return 1; 731 } 732 733 static const char * 734 wind_iconv_cp (rc_uint_type cp) 735 { 736 const local_iconv_map *lim = wind_find_codepage_info (cp); 737 738 if (!lim) 739 return NULL; 740 return lim->iconv_name; 741 } 742 #endif /* HAVE_ICONV */ 743 744 static rc_uint_type 745 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb, 746 unichar *u, rc_uint_type u_len) 747 { 748 rc_uint_type ret = 0; 749 750 #if defined (_WIN32) || defined (__CYGWIN__) 751 rc_uint_type conv_flags = MB_PRECOMPOSED; 752 753 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8. 754 MultiByteToWideChar will set the last error to 755 ERROR_INVALID_FLAGS if we do. */ 756 if (cp == CP_UTF8 || cp == CP_UTF7) 757 conv_flags = 0; 758 759 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags, 760 mb, -1, u, u_len); 761 /* Convert to bytes. */ 762 ret *= sizeof (unichar); 763 764 #elif defined (HAVE_ICONV) 765 int first = 1; 766 char tmp[32]; 767 char *p_tmp; 768 const char *iconv_name = wind_iconv_cp (cp); 769 770 if (!mb || !iconv_name) 771 return 0; 772 iconv_t cd = iconv_open ("UTF-16LE", iconv_name); 773 774 while (1) 775 { 776 int iret; 777 const char *n_mb = ""; 778 char *n_tmp = ""; 779 780 p_tmp = tmp; 781 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp); 782 if (first) 783 { 784 first = 0; 785 continue; 786 } 787 if (!iret) 788 { 789 size_t l_tmp = (size_t) (n_tmp - p_tmp); 790 791 if (u) 792 { 793 if ((size_t) u_len < l_tmp) 794 break; 795 memcpy (u, tmp, l_tmp); 796 u += l_tmp/2; 797 u_len -= l_tmp; 798 } 799 ret += l_tmp; 800 } 801 else 802 break; 803 if (tmp[0] == 0 && tmp[1] == 0) 804 break; 805 mb = n_mb; 806 } 807 iconv_close (cd); 808 #else 809 if (cp) 810 ret = 0; 811 ret = strlen (mb) + 1; 812 ret *= sizeof (unichar); 813 if (u != NULL && u_len != 0) 814 { 815 do 816 { 817 *u++ = ((unichar) *mb) & 0xff; 818 --u_len; mb++; 819 } 820 while (u_len != 0 && mb[-1] != 0); 821 } 822 if (u != NULL && u_len != 0) 823 *u = 0; 824 #endif 825 return ret; 826 } 827 828 static rc_uint_type 829 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len) 830 { 831 rc_uint_type ret = 0; 832 #if defined (_WIN32) || defined (__CYGWIN__) 833 WINBOOL used_def = FALSE; 834 835 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len, 836 NULL, & used_def); 837 #elif defined (HAVE_ICONV) 838 int first = 1; 839 char tmp[32]; 840 char *p_tmp; 841 const char *iconv_name = wind_iconv_cp (cp); 842 843 if (!u || !iconv_name) 844 return 0; 845 iconv_t cd = iconv_open (iconv_name, "UTF-16LE"); 846 847 while (1) 848 { 849 int iret; 850 const char *n_u = ""; 851 char *n_tmp = ""; 852 853 p_tmp = tmp; 854 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp); 855 if (first) 856 { 857 first = 0; 858 continue; 859 } 860 if (!iret) 861 { 862 size_t l_tmp = (size_t) (n_tmp - p_tmp); 863 864 if (mb) 865 { 866 if ((size_t) mb_len < l_tmp) 867 break; 868 memcpy (mb, tmp, l_tmp); 869 mb += l_tmp; 870 mb_len -= l_tmp; 871 } 872 ret += l_tmp; 873 } 874 else 875 break; 876 if (u[0] == 0) 877 break; 878 u = (const unichar *) n_u; 879 } 880 iconv_close (cd); 881 #else 882 if (cp) 883 ret = 0; 884 885 while (u[ret] != 0) 886 ++ret; 887 888 ++ret; 889 890 if (mb) 891 { 892 while (*u != 0 && mb_len != 0) 893 { 894 if (u[0] == (u[0] & 0x7f)) 895 *mb++ = (char) u[0]; 896 else 897 *mb++ = '_'; 898 ++u; --mb_len; 899 } 900 if (mb_len != 0) 901 *mb = 0; 902 } 903 #endif 904 return ret; 905 } 906