1 /* winduni.c -- unicode support for the windres program. 2 Copyright (C) 1997-2016 Free Software Foundation, Inc. 3 Written by Ian Lance Taylor, Cygnus Support. 4 Rewritten by Kai Tietz, Onevision. 5 6 This file is part of GNU Binutils. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 21 02110-1301, USA. */ 22 23 24 /* This file contains unicode support routines for the windres 25 program. Ideally, we would have generic unicode support which 26 would work on all systems. However, we don't. Instead, on a 27 Windows host, we are prepared to call some Windows routines. This 28 means that we will generate different output on Windows and Unix 29 hosts, but that seems better than not really supporting unicode at 30 all. */ 31 32 #include "sysdep.h" 33 #include "bfd.h" 34 #include "libiberty.h" /* for xstrdup */ 35 #include "bucomm.h" 36 /* Must be include before windows.h and winnls.h. */ 37 #if defined (_WIN32) || defined (__CYGWIN__) 38 #include <windows.h> 39 #include <winnls.h> 40 #endif 41 #include "winduni.h" 42 #include "safe-ctype.h" 43 44 #if HAVE_ICONV 45 #include <iconv.h> 46 #endif 47 48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type); 49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type); 50 static int unichar_isascii (const unichar *, rc_uint_type); 51 52 /* Convert an ASCII string to a unicode string. We just copy it, 53 expanding chars to shorts, rather than doing something intelligent. */ 54 55 #if !defined (_WIN32) && !defined (__CYGWIN__) 56 57 /* Codepages mapped. */ 58 static local_iconv_map codepages[] = 59 { 60 { 0, "MS-ANSI" }, 61 { 1, "WINDOWS-1252" }, 62 { 437, "MS-ANSI" }, 63 { 737, "MS-GREEK" }, 64 { 775, "WINBALTRIM" }, 65 { 850, "MS-ANSI" }, 66 { 852, "MS-EE" }, 67 { 857, "MS-TURK" }, 68 { 862, "CP862" }, 69 { 864, "CP864" }, 70 { 866, "MS-CYRL" }, 71 { 874, "WINDOWS-874" }, 72 { 932, "CP932" }, 73 { 936, "CP936" }, 74 { 949, "CP949" }, 75 { 950, "CP950" }, 76 { 1250, "WINDOWS-1250" }, 77 { 1251, "WINDOWS-1251" }, 78 { 1252, "WINDOWS-1252" }, 79 { 1253, "WINDOWS-1253" }, 80 { 1254, "WINDOWS-1254" }, 81 { 1255, "WINDOWS-1255" }, 82 { 1256, "WINDOWS-1256" }, 83 { 1257, "WINDOWS-1257" }, 84 { 1258, "WINDOWS-1258" }, 85 { CP_UTF7, "UTF-7" }, 86 { CP_UTF8, "UTF-8" }, 87 { CP_UTF16, "UTF-16LE" }, 88 { (rc_uint_type) -1, NULL } 89 }; 90 91 /* Languages supported. */ 92 static const wind_language_t languages[] = 93 { 94 { 0x0000, 437, 1252, "Neutral", "Neutral" }, 95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" }, 96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" }, 97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" }, 98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" }, 99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" }, 100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" }, 101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" }, 102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" }, 103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" }, 104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" }, 105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" }, 106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" }, 107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" }, 108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" }, 109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" }, 110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" }, 111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" }, 112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" }, 113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" }, 114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" }, 115 { 0x042D, 850, 1252, "Basque", "Spain" }, 116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" }, 117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" }, 118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" }, 119 { 0x043C, 437, 1252, "Irish", "Ireland" }, 120 { 0x043E, 850, 1252, "Malay", "Malaysia" }, 121 { 0x0801, 864, 1256, "Arabic", "Iraq" }, 122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" }, 123 { 0x0807, 850, 1252, "German", "Switzerland" }, 124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" }, 125 { 0x080C, 850, 1252, "French", "Belgium" }, 126 { 0x0810, 850, 1252, "Italian", "Switzerland" }, 127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" }, 128 { 0x0816, 850, 1252, "Portuguese", "Portugal" }, 129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" }, 130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" }, 131 { 0x0C01, 864, 1256, "Arabic", "Egypt" }, 132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" }, 133 { 0x0C07, 850, 1252, "German", "Austria" }, 134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" }, 135 { 0x0C0C, 850, 1252, "French", "Canada"}, 136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" }, 137 { 0x1001, 864, 1256, "Arabic", "Libya" }, 138 { 0x1004, 936, 936, "Chinese", "Singapore" }, 139 { 0x1007, 850, 1252, "German", "Luxembourg" }, 140 { 0x1009, 850, 1252, "English", "Canada" }, 141 { 0x100A, 850, 1252, "Spanish", "Guatemala" }, 142 { 0x100C, 850, 1252, "French", "Switzerland" }, 143 { 0x1401, 864, 1256, "Arabic", "Algeria" }, 144 { 0x1407, 850, 1252, "German", "Liechtenstein" }, 145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" }, 146 { 0x140C, 850, 1252, "French", "Luxembourg" }, 147 { 0x1801, 864, 1256, "Arabic", "Morocco" }, 148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" }, 149 { 0x180C, 850, 1252, "French", "Monaco" }, 150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" }, 151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" }, 152 { 0x2001, 864, 1256, "Arabic", "Oman" }, 153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" }, 154 { 0x2401, 864, 1256, "Arabic", "Yemen" }, 155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" }, 156 { 0x2801, 864, 1256, "Arabic", "Syria" }, 157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" }, 158 { 0x2C01, 864, 1256, "Arabic", "Jordan" }, 159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" }, 160 { 0x3001, 864, 1256, "Arabic", "Lebanon" }, 161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" }, 162 { 0x3401, 864, 1256, "Arabic", "Kuwait" }, 163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" }, 164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" }, 165 { 0x380A, 850, 1252, "Spanish", "Uruguay" }, 166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" }, 167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" }, 168 { 0x4001, 864, 1256, "Arabic", "Qatar" }, 169 { 0x400A, 850, 1252, "Spanish", "Bolivia" }, 170 { 0x440A, 850, 1252, "Spanish", "El Salvador" }, 171 { 0x480A, 850, 1252, "Spanish", "Honduras" }, 172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" }, 173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" }, 174 { (unsigned) -1, 0, 0, NULL, NULL } 175 }; 176 177 #endif 178 179 /* Specifies the default codepage to be used for unicode 180 transformations. By default this is CP_ACP. */ 181 rc_uint_type wind_default_codepage = CP_ACP; 182 183 /* Specifies the currently used codepage for unicode 184 transformations. By default this is CP_ACP. */ 185 rc_uint_type wind_current_codepage = CP_ACP; 186 187 /* Convert an ASCII string to a unicode string. We just copy it, 188 expanding chars to shorts, rather than doing something intelligent. */ 189 190 void 191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii) 192 { 193 unicode_from_codepage (length, unicode, ascii, wind_current_codepage); 194 } 195 196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just 197 copy it, expanding chars to shorts, rather than doing something intelligent. 198 This routine converts also \0 within a string. */ 199 200 void 201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length) 202 { 203 char *tmp, *p; 204 rc_uint_type tlen, elen, idx = 0; 205 206 *unicode = NULL; 207 208 if (!a_length) 209 { 210 if (length) 211 *length = 0; 212 return; 213 } 214 215 /* Make sure we have zero terminated string. */ 216 p = tmp = (char *) xmalloc (a_length + 1); 217 memcpy (tmp, ascii, a_length); 218 tmp[a_length] = 0; 219 220 while (a_length > 0) 221 { 222 unichar *utmp, *up; 223 224 tlen = strlen (p); 225 226 if (tlen > a_length) 227 tlen = a_length; 228 if (*p == 0) 229 { 230 /* Make room for one more character. */ 231 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); 232 if (idx > 0) 233 { 234 memcpy (utmp, *unicode, idx * sizeof (unichar)); 235 } 236 *unicode = utmp; 237 utmp[idx++] = 0; 238 --a_length; 239 p++; 240 continue; 241 } 242 utmp = NULL; 243 elen = 0; 244 elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0); 245 if (elen) 246 { 247 utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2)); 248 wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen); 249 elen /= sizeof (unichar); 250 elen --; 251 } 252 else 253 { 254 /* Make room for one more character. */ 255 utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); 256 if (idx > 0) 257 { 258 memcpy (utmp, *unicode, idx * sizeof (unichar)); 259 } 260 *unicode = utmp; 261 utmp[idx++] = ((unichar) *p) & 0xff; 262 --a_length; 263 p++; 264 continue; 265 } 266 p += tlen; 267 a_length -= tlen; 268 269 up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen)); 270 if (idx > 0) 271 memcpy (up, *unicode, idx * sizeof (unichar)); 272 273 *unicode = up; 274 if (elen) 275 memcpy (&up[idx], utmp, sizeof (unichar) * elen); 276 277 idx += elen; 278 } 279 280 if (length) 281 *length = idx; 282 283 free (tmp); 284 } 285 286 /* Convert an unicode string to an ASCII string. We just copy it, 287 shrink shorts to chars, rather than doing something intelligent. 288 Shorts with not within the char range are replaced by '_'. */ 289 290 void 291 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii) 292 { 293 codepage_from_unicode (length, unicode, ascii, wind_current_codepage); 294 } 295 296 /* Print the unicode string UNICODE to the file E. LENGTH is the 297 number of characters to print, or -1 if we should print until the 298 end of the string. FIXME: On a Windows host, we should be calling 299 some Windows function, probably WideCharToMultiByte. */ 300 301 void 302 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length) 303 { 304 while (1) 305 { 306 unichar ch; 307 308 if (length == 0) 309 return; 310 if ((bfd_signed_vma) length > 0) 311 --length; 312 313 ch = *unicode; 314 315 if (ch == 0 && (bfd_signed_vma) length < 0) 316 return; 317 318 ++unicode; 319 320 if ((ch & 0x7f) == ch) 321 { 322 if (ch == '\\') 323 fputs ("\\\\", e); 324 else if (ch == '"') 325 fputs ("\"\"", e); 326 else if (ISPRINT (ch)) 327 putc (ch, e); 328 else 329 { 330 switch (ch) 331 { 332 case ESCAPE_A: 333 fputs ("\\a", e); 334 break; 335 336 case ESCAPE_B: 337 fputs ("\\b", e); 338 break; 339 340 case ESCAPE_F: 341 fputs ("\\f", e); 342 break; 343 344 case ESCAPE_N: 345 fputs ("\\n", e); 346 break; 347 348 case ESCAPE_R: 349 fputs ("\\r", e); 350 break; 351 352 case ESCAPE_T: 353 fputs ("\\t", e); 354 break; 355 356 case ESCAPE_V: 357 fputs ("\\v", e); 358 break; 359 360 default: 361 fprintf (e, "\\%03o", (unsigned int) ch); 362 break; 363 } 364 } 365 } 366 else if ((ch & 0xff) == ch) 367 fprintf (e, "\\%03o", (unsigned int) ch); 368 else 369 fprintf (e, "\\x%04x", (unsigned int) ch); 370 } 371 } 372 373 /* Print a unicode string to a file. */ 374 375 void 376 ascii_print (FILE *e, const char *s, rc_uint_type length) 377 { 378 while (1) 379 { 380 char ch; 381 382 if (length == 0) 383 return; 384 if ((bfd_signed_vma) length > 0) 385 --length; 386 387 ch = *s; 388 389 if (ch == 0 && (bfd_signed_vma) length < 0) 390 return; 391 392 ++s; 393 394 if ((ch & 0x7f) == ch) 395 { 396 if (ch == '\\') 397 fputs ("\\\\", e); 398 else if (ch == '"') 399 fputs ("\"\"", e); 400 else if (ISPRINT (ch)) 401 putc (ch, e); 402 else 403 { 404 switch (ch) 405 { 406 case ESCAPE_A: 407 fputs ("\\a", e); 408 break; 409 410 case ESCAPE_B: 411 fputs ("\\b", e); 412 break; 413 414 case ESCAPE_F: 415 fputs ("\\f", e); 416 break; 417 418 case ESCAPE_N: 419 fputs ("\\n", e); 420 break; 421 422 case ESCAPE_R: 423 fputs ("\\r", e); 424 break; 425 426 case ESCAPE_T: 427 fputs ("\\t", e); 428 break; 429 430 case ESCAPE_V: 431 fputs ("\\v", e); 432 break; 433 434 default: 435 fprintf (e, "\\%03o", (unsigned int) ch); 436 break; 437 } 438 } 439 } 440 else 441 fprintf (e, "\\%03o", (unsigned int) ch & 0xff); 442 } 443 } 444 445 rc_uint_type 446 unichar_len (const unichar *unicode) 447 { 448 rc_uint_type r = 0; 449 450 if (unicode) 451 while (unicode[r] != 0) 452 r++; 453 else 454 --r; 455 return r; 456 } 457 458 unichar * 459 unichar_dup (const unichar *unicode) 460 { 461 unichar *r; 462 int len; 463 464 if (! unicode) 465 return NULL; 466 for (len = 0; unicode[len] != 0; ++len) 467 ; 468 ++len; 469 r = ((unichar *) res_alloc (len * sizeof (unichar))); 470 memcpy (r, unicode, len * sizeof (unichar)); 471 return r; 472 } 473 474 unichar * 475 unichar_dup_uppercase (const unichar *u) 476 { 477 unichar *r = unichar_dup (u); 478 int i; 479 480 if (! r) 481 return NULL; 482 483 for (i = 0; r[i] != 0; ++i) 484 { 485 if (r[i] >= 'a' && r[i] <= 'z') 486 r[i] &= 0xdf; 487 } 488 return r; 489 } 490 491 static int 492 unichar_isascii (const unichar *u, rc_uint_type len) 493 { 494 rc_uint_type i; 495 496 if ((bfd_signed_vma) len < 0) 497 { 498 if (u) 499 len = (rc_uint_type) unichar_len (u); 500 else 501 len = 0; 502 } 503 504 for (i = 0; i < len; i++) 505 if ((u[i] & 0xff80) != 0) 506 return 0; 507 return 1; 508 } 509 510 void 511 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len) 512 { 513 if (! unichar_isascii (u, len)) 514 fputc ('L', e); 515 fputc ('"', e); 516 unicode_print (e, u, len); 517 fputc ('"', e); 518 } 519 520 int 521 unicode_is_valid_codepage (rc_uint_type cp) 522 { 523 if ((cp & 0xffff) != cp) 524 return 0; 525 if (cp == CP_UTF16 || cp == CP_ACP) 526 return 1; 527 528 #if !defined (_WIN32) && !defined (__CYGWIN__) 529 if (! wind_find_codepage_info (cp)) 530 return 0; 531 return 1; 532 #else 533 return !! IsValidCodePage ((UINT) cp); 534 #endif 535 } 536 537 #if defined (_WIN32) || defined (__CYGWIN__) 538 539 #define max_cp_string_len 6 540 541 static unsigned int 542 codepage_from_langid (unsigned short langid) 543 { 544 char cp_string [max_cp_string_len]; 545 int c; 546 547 memset (cp_string, 0, max_cp_string_len); 548 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, 549 but is unavailable on Win95. */ 550 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 551 LOCALE_IDEFAULTANSICODEPAGE, 552 cp_string, max_cp_string_len); 553 /* If codepage data for an LCID is not installed on users's system, 554 GetLocaleInfo returns an empty string. Fall back to system ANSI 555 default. */ 556 if (c == 0) 557 return CP_ACP; 558 return strtoul (cp_string, 0, 10); 559 } 560 561 static unsigned int 562 wincodepage_from_langid (unsigned short langid) 563 { 564 char cp_string [max_cp_string_len]; 565 int c; 566 567 memset (cp_string, 0, max_cp_string_len); 568 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, 569 but is unavailable on Win95. */ 570 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 571 LOCALE_IDEFAULTCODEPAGE, 572 cp_string, max_cp_string_len); 573 /* If codepage data for an LCID is not installed on users's system, 574 GetLocaleInfo returns an empty string. Fall back to system ANSI 575 default. */ 576 if (c == 0) 577 return CP_OEM; 578 return strtoul (cp_string, 0, 10); 579 } 580 581 static char * 582 lang_from_langid (unsigned short langid) 583 { 584 char cp_string[261]; 585 int c; 586 587 memset (cp_string, 0, 261); 588 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 589 LOCALE_SENGLANGUAGE, 590 cp_string, 260); 591 /* If codepage data for an LCID is not installed on users's system, 592 GetLocaleInfo returns an empty string. Fall back to system ANSI 593 default. */ 594 if (c == 0) 595 strcpy (cp_string, "Neutral"); 596 return xstrdup (cp_string); 597 } 598 599 static char * 600 country_from_langid (unsigned short langid) 601 { 602 char cp_string[261]; 603 int c; 604 605 memset (cp_string, 0, 261); 606 c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), 607 LOCALE_SENGCOUNTRY, 608 cp_string, 260); 609 /* If codepage data for an LCID is not installed on users's system, 610 GetLocaleInfo returns an empty string. Fall back to system ANSI 611 default. */ 612 if (c == 0) 613 strcpy (cp_string, "Neutral"); 614 return xstrdup (cp_string); 615 } 616 617 #endif 618 619 const wind_language_t * 620 wind_find_language_by_id (unsigned id) 621 { 622 #if !defined (_WIN32) && !defined (__CYGWIN__) 623 int i; 624 625 if (! id) 626 return NULL; 627 for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++) 628 ; 629 if (languages[i].id == id) 630 return &languages[i]; 631 return NULL; 632 #else 633 static wind_language_t wl; 634 635 wl.id = id; 636 wl.doscp = codepage_from_langid ((unsigned short) id); 637 wl.wincp = wincodepage_from_langid ((unsigned short) id); 638 wl.name = lang_from_langid ((unsigned short) id); 639 wl.country = country_from_langid ((unsigned short) id); 640 641 return & wl; 642 #endif 643 } 644 645 const local_iconv_map * 646 wind_find_codepage_info (unsigned cp) 647 { 648 #if !defined (_WIN32) && !defined (__CYGWIN__) 649 int i; 650 651 for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++) 652 ; 653 if (codepages[i].codepage == (rc_uint_type) -1) 654 return NULL; 655 return &codepages[i]; 656 #else 657 static local_iconv_map lim; 658 if (!unicode_is_valid_codepage (cp)) 659 return NULL; 660 lim.codepage = cp; 661 lim.iconv_name = ""; 662 return & lim; 663 #endif 664 } 665 666 /* Convert an Codepage string to a unicode string. */ 667 668 void 669 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp) 670 { 671 rc_uint_type len; 672 673 len = wind_MultiByteToWideChar (cp, src, NULL, 0); 674 if (len) 675 { 676 *u = ((unichar *) res_alloc (len)); 677 wind_MultiByteToWideChar (cp, src, *u, len); 678 } 679 /* Discount the trailing '/0'. If MultiByteToWideChar failed, 680 this will set *length to -1. */ 681 len -= sizeof (unichar); 682 683 if (length != NULL) 684 *length = len / sizeof (unichar); 685 } 686 687 /* Convert an unicode string to an codepage string. */ 688 689 void 690 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp) 691 { 692 rc_uint_type len; 693 694 len = wind_WideCharToMultiByte (cp, unicode, NULL, 0); 695 if (len) 696 { 697 *ascii = (char *) res_alloc (len * sizeof (char)); 698 wind_WideCharToMultiByte (cp, unicode, *ascii, len); 699 } 700 /* Discount the trailing '/0'. If MultiByteToWideChar failed, 701 this will set *length to -1. */ 702 len--; 703 704 if (length != NULL) 705 *length = len; 706 } 707 708 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__) 709 static int 710 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d) 711 { 712 int i; 713 714 for (i = 1; i <= 32; i++) 715 { 716 char *tmp_d = d; 717 ICONV_CONST char *tmp_s = s; 718 size_t ret; 719 size_t s_left = (size_t) i; 720 size_t d_left = (size_t) d_len; 721 722 ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left); 723 724 if (ret != (size_t) -1) 725 { 726 *n_s = tmp_s; 727 *n_d = tmp_d; 728 return 0; 729 } 730 } 731 732 return 1; 733 } 734 735 static const char * 736 wind_iconv_cp (rc_uint_type cp) 737 { 738 const local_iconv_map *lim = wind_find_codepage_info (cp); 739 740 if (!lim) 741 return NULL; 742 return lim->iconv_name; 743 } 744 #endif /* HAVE_ICONV */ 745 746 static rc_uint_type 747 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb, 748 unichar *u, rc_uint_type u_len) 749 { 750 rc_uint_type ret = 0; 751 752 #if defined (_WIN32) || defined (__CYGWIN__) 753 rc_uint_type conv_flags = MB_PRECOMPOSED; 754 755 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8. 756 MultiByteToWideChar will set the last error to 757 ERROR_INVALID_FLAGS if we do. */ 758 if (cp == CP_UTF8 || cp == CP_UTF7) 759 conv_flags = 0; 760 761 ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags, 762 mb, -1, u, u_len); 763 /* Convert to bytes. */ 764 ret *= sizeof (unichar); 765 766 #elif defined (HAVE_ICONV) 767 int first = 1; 768 char tmp[32]; 769 char *p_tmp; 770 const char *iconv_name = wind_iconv_cp (cp); 771 772 if (!mb || !iconv_name) 773 return 0; 774 iconv_t cd = iconv_open ("UTF-16LE", iconv_name); 775 776 while (1) 777 { 778 int iret; 779 const char *n_mb = ""; 780 char *n_tmp = ""; 781 782 p_tmp = tmp; 783 iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp); 784 if (first) 785 { 786 first = 0; 787 continue; 788 } 789 if (!iret) 790 { 791 size_t l_tmp = (size_t) (n_tmp - p_tmp); 792 793 if (u) 794 { 795 if ((size_t) u_len < l_tmp) 796 break; 797 memcpy (u, tmp, l_tmp); 798 u += l_tmp/2; 799 u_len -= l_tmp; 800 } 801 ret += l_tmp; 802 } 803 else 804 break; 805 if (tmp[0] == 0 && tmp[1] == 0) 806 break; 807 mb = n_mb; 808 } 809 iconv_close (cd); 810 #else 811 if (cp) 812 ret = 0; 813 ret = strlen (mb) + 1; 814 ret *= sizeof (unichar); 815 if (u != NULL && u_len != 0) 816 { 817 do 818 { 819 *u++ = ((unichar) *mb) & 0xff; 820 --u_len; mb++; 821 } 822 while (u_len != 0 && mb[-1] != 0); 823 } 824 if (u != NULL && u_len != 0) 825 *u = 0; 826 #endif 827 return ret; 828 } 829 830 static rc_uint_type 831 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len) 832 { 833 rc_uint_type ret = 0; 834 #if defined (_WIN32) || defined (__CYGWIN__) 835 WINBOOL used_def = FALSE; 836 837 ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len, 838 NULL, & used_def); 839 #elif defined (HAVE_ICONV) 840 int first = 1; 841 char tmp[32]; 842 char *p_tmp; 843 const char *iconv_name = wind_iconv_cp (cp); 844 845 if (!u || !iconv_name) 846 return 0; 847 iconv_t cd = iconv_open (iconv_name, "UTF-16LE"); 848 849 while (1) 850 { 851 int iret; 852 const char *n_u = ""; 853 char *n_tmp = ""; 854 855 p_tmp = tmp; 856 iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp); 857 if (first) 858 { 859 first = 0; 860 continue; 861 } 862 if (!iret) 863 { 864 size_t l_tmp = (size_t) (n_tmp - p_tmp); 865 866 if (mb) 867 { 868 if ((size_t) mb_len < l_tmp) 869 break; 870 memcpy (mb, tmp, l_tmp); 871 mb += l_tmp; 872 mb_len -= l_tmp; 873 } 874 ret += l_tmp; 875 } 876 else 877 break; 878 if (u[0] == 0) 879 break; 880 u = (const unichar *) n_u; 881 } 882 iconv_close (cd); 883 #else 884 if (cp) 885 ret = 0; 886 887 while (u[ret] != 0) 888 ++ret; 889 890 ++ret; 891 892 if (mb) 893 { 894 while (*u != 0 && mb_len != 0) 895 { 896 if (u[0] == (u[0] & 0x7f)) 897 *mb++ = (char) u[0]; 898 else 899 *mb++ = '_'; 900 ++u; --mb_len; 901 } 902 if (mb_len != 0) 903 *mb = 0; 904 } 905 #endif 906 return ret; 907 } 908