1 /* 2 SDL - Simple DirectMedia Layer 3 Copyright (C) 1997-2006 Sam Lantinga 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with this library; if not, write to the Free Software 17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 19 Sam Lantinga 20 slouken (at) libsdl.org 21 */ 22 #include "SDL_config.h" 23 24 /* This file contains portable iconv functions for SDL */ 25 26 #include "SDL_stdinc.h" 27 #include "SDL_endian.h" 28 29 #ifdef HAVE_ICONV 30 31 /* Depending on which standard the iconv() was implemented with, 32 iconv() may or may not use const char ** for the inbuf param. 33 If we get this wrong, it's just a warning, so no big deal. 34 */ 35 #if defined(_XGP6) || \ 36 defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) 37 #define ICONV_INBUF_NONCONST 38 #endif 39 40 #include <errno.h> 41 42 size_t SDL_iconv(SDL_iconv_t cd, 43 const char **inbuf, size_t *inbytesleft, 44 char **outbuf, size_t *outbytesleft) 45 { 46 size_t retCode; 47 #ifdef ICONV_INBUF_NONCONST 48 retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft); 49 #else 50 retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft); 51 #endif 52 if ( retCode == (size_t)-1 ) { 53 switch(errno) { 54 case E2BIG: 55 return SDL_ICONV_E2BIG; 56 case EILSEQ: 57 return SDL_ICONV_EILSEQ; 58 case EINVAL: 59 return SDL_ICONV_EINVAL; 60 default: 61 return SDL_ICONV_ERROR; 62 } 63 } 64 return retCode; 65 } 66 67 #else 68 69 /* Lots of useful information on Unicode at: 70 http://www.cl.cam.ac.uk/~mgk25/unicode.html 71 */ 72 73 #define UNICODE_BOM 0xFEFF 74 75 #define UNKNOWN_ASCII '?' 76 #define UNKNOWN_UNICODE 0xFFFD 77 78 enum { 79 ENCODING_UNKNOWN, 80 ENCODING_ASCII, 81 ENCODING_LATIN1, 82 ENCODING_UTF8, 83 ENCODING_UTF16, /* Needs byte order marker */ 84 ENCODING_UTF16BE, 85 ENCODING_UTF16LE, 86 ENCODING_UTF32, /* Needs byte order marker */ 87 ENCODING_UTF32BE, 88 ENCODING_UTF32LE, 89 ENCODING_UCS2, /* Native byte order assumed */ 90 ENCODING_UCS4, /* Native byte order assumed */ 91 }; 92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN 93 #define ENCODING_UTF16NATIVE ENCODING_UTF16BE 94 #define ENCODING_UTF32NATIVE ENCODING_UTF32BE 95 #else 96 #define ENCODING_UTF16NATIVE ENCODING_UTF16LE 97 #define ENCODING_UTF32NATIVE ENCODING_UTF32LE 98 #endif 99 100 struct _SDL_iconv_t 101 { 102 int src_fmt; 103 int dst_fmt; 104 }; 105 106 static struct { 107 const char *name; 108 int format; 109 } encodings[] = { 110 { "ASCII", ENCODING_ASCII }, 111 { "US-ASCII", ENCODING_ASCII }, 112 { "8859-1", ENCODING_LATIN1 }, 113 { "ISO-8859-1", ENCODING_LATIN1 }, 114 { "UTF8", ENCODING_UTF8 }, 115 { "UTF-8", ENCODING_UTF8 }, 116 { "UTF16", ENCODING_UTF16 }, 117 { "UTF-16", ENCODING_UTF16 }, 118 { "UTF16BE", ENCODING_UTF16BE }, 119 { "UTF-16BE", ENCODING_UTF16BE }, 120 { "UTF16LE", ENCODING_UTF16LE }, 121 { "UTF-16LE", ENCODING_UTF16LE }, 122 { "UTF32", ENCODING_UTF32 }, 123 { "UTF-32", ENCODING_UTF32 }, 124 { "UTF32BE", ENCODING_UTF32BE }, 125 { "UTF-32BE", ENCODING_UTF32BE }, 126 { "UTF32LE", ENCODING_UTF32LE }, 127 { "UTF-32LE", ENCODING_UTF32LE }, 128 { "UCS2", ENCODING_UCS2 }, 129 { "UCS-2", ENCODING_UCS2 }, 130 { "UCS4", ENCODING_UCS4 }, 131 { "UCS-4", ENCODING_UCS4 }, 132 }; 133 134 static const char *getlocale(char *buffer, size_t bufsize) 135 { 136 const char *lang; 137 char *ptr; 138 139 lang = SDL_getenv("LC_ALL"); 140 if ( !lang ) { 141 lang = SDL_getenv("LC_CTYPE"); 142 } 143 if ( !lang ) { 144 lang = SDL_getenv("LC_MESSAGES"); 145 } 146 if ( !lang ) { 147 lang = SDL_getenv("LANG"); 148 } 149 if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) { 150 lang = "ASCII"; 151 } 152 153 /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */ 154 ptr = SDL_strchr(lang, '.'); 155 if (ptr != NULL) { 156 lang = ptr + 1; 157 } 158 159 SDL_strlcpy(buffer, lang, bufsize); 160 ptr = SDL_strchr(buffer, '@'); 161 if (ptr != NULL) { 162 *ptr = '\0'; /* chop end of string. */ 163 } 164 165 return buffer; 166 } 167 168 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode) 169 { 170 int src_fmt = ENCODING_UNKNOWN; 171 int dst_fmt = ENCODING_UNKNOWN; 172 int i; 173 char fromcode_buffer[64]; 174 char tocode_buffer[64]; 175 176 if ( !fromcode || !*fromcode ) { 177 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); 178 } 179 if ( !tocode || !*tocode ) { 180 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); 181 } 182 for ( i = 0; i < SDL_arraysize(encodings); ++i ) { 183 if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) { 184 src_fmt = encodings[i].format; 185 if ( dst_fmt != ENCODING_UNKNOWN ) { 186 break; 187 } 188 } 189 if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) { 190 dst_fmt = encodings[i].format; 191 if ( src_fmt != ENCODING_UNKNOWN ) { 192 break; 193 } 194 } 195 } 196 if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) { 197 SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd)); 198 if ( cd ) { 199 cd->src_fmt = src_fmt; 200 cd->dst_fmt = dst_fmt; 201 return cd; 202 } 203 } 204 return (SDL_iconv_t)-1; 205 } 206 207 size_t SDL_iconv(SDL_iconv_t cd, 208 const char **inbuf, size_t *inbytesleft, 209 char **outbuf, size_t *outbytesleft) 210 { 211 /* For simplicity, we'll convert everything to and from UCS-4 */ 212 const char *src; 213 char *dst; 214 size_t srclen, dstlen; 215 Uint32 ch = 0; 216 size_t total; 217 218 if ( !inbuf || !*inbuf ) { 219 /* Reset the context */ 220 return 0; 221 } 222 if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) { 223 return SDL_ICONV_E2BIG; 224 } 225 src = *inbuf; 226 srclen = (inbytesleft ? *inbytesleft : 0); 227 dst = *outbuf; 228 dstlen = *outbytesleft; 229 230 switch ( cd->src_fmt ) { 231 case ENCODING_UTF16: 232 /* Scan for a byte order marker */ 233 { 234 Uint8 *p = (Uint8 *)src; 235 size_t n = srclen / 2; 236 while ( n ) { 237 if ( p[0] == 0xFF && p[1] == 0xFE ) { 238 cd->src_fmt = ENCODING_UTF16BE; 239 break; 240 } else if ( p[0] == 0xFE && p[1] == 0xFF ) { 241 cd->src_fmt = ENCODING_UTF16LE; 242 break; 243 } 244 p += 2; 245 --n; 246 } 247 if ( n == 0 ) { 248 /* We can't tell, default to host order */ 249 cd->src_fmt = ENCODING_UTF16NATIVE; 250 } 251 } 252 break; 253 case ENCODING_UTF32: 254 /* Scan for a byte order marker */ 255 { 256 Uint8 *p = (Uint8 *)src; 257 size_t n = srclen / 4; 258 while ( n ) { 259 if ( p[0] == 0xFF && p[1] == 0xFE && 260 p[2] == 0x00 && p[3] == 0x00 ) { 261 cd->src_fmt = ENCODING_UTF32BE; 262 break; 263 } else if ( p[0] == 0x00 && p[1] == 0x00 && 264 p[2] == 0xFE && p[3] == 0xFF ) { 265 cd->src_fmt = ENCODING_UTF32LE; 266 break; 267 } 268 p += 4; 269 --n; 270 } 271 if ( n == 0 ) { 272 /* We can't tell, default to host order */ 273 cd->src_fmt = ENCODING_UTF32NATIVE; 274 } 275 } 276 break; 277 } 278 279 switch ( cd->dst_fmt ) { 280 case ENCODING_UTF16: 281 /* Default to host order, need to add byte order marker */ 282 if ( dstlen < 2 ) { 283 return SDL_ICONV_E2BIG; 284 } 285 *(Uint16 *)dst = UNICODE_BOM; 286 dst += 2; 287 dstlen -= 2; 288 cd->dst_fmt = ENCODING_UTF16NATIVE; 289 break; 290 case ENCODING_UTF32: 291 /* Default to host order, need to add byte order marker */ 292 if ( dstlen < 4 ) { 293 return SDL_ICONV_E2BIG; 294 } 295 *(Uint32 *)dst = UNICODE_BOM; 296 dst += 4; 297 dstlen -= 4; 298 cd->dst_fmt = ENCODING_UTF32NATIVE; 299 break; 300 } 301 302 total = 0; 303 while ( srclen > 0 ) { 304 /* Decode a character */ 305 switch ( cd->src_fmt ) { 306 case ENCODING_ASCII: 307 { 308 Uint8 *p = (Uint8 *)src; 309 ch = (Uint32)(p[0] & 0x7F); 310 ++src; 311 --srclen; 312 } 313 break; 314 case ENCODING_LATIN1: 315 { 316 Uint8 *p = (Uint8 *)src; 317 ch = (Uint32)p[0]; 318 ++src; 319 --srclen; 320 } 321 break; 322 case ENCODING_UTF8: /* RFC 3629 */ 323 { 324 Uint8 *p = (Uint8 *)src; 325 size_t left = 0; 326 SDL_bool overlong = SDL_FALSE; 327 if ( p[0] >= 0xFC ) { 328 if ( (p[0] & 0xFE) != 0xFC ) { 329 /* Skip illegal sequences 330 return SDL_ICONV_EILSEQ; 331 */ 332 ch = UNKNOWN_UNICODE; 333 } else { 334 if ( p[0] == 0xFC ) { 335 overlong = SDL_TRUE; 336 } 337 ch = (Uint32)(p[0] & 0x01); 338 left = 5; 339 } 340 } else if ( p[0] >= 0xF8 ) { 341 if ( (p[0] & 0xFC) != 0xF8 ) { 342 /* Skip illegal sequences 343 return SDL_ICONV_EILSEQ; 344 */ 345 ch = UNKNOWN_UNICODE; 346 } else { 347 if ( p[0] == 0xF8 ) { 348 overlong = SDL_TRUE; 349 } 350 ch = (Uint32)(p[0] & 0x03); 351 left = 4; 352 } 353 } else if ( p[0] >= 0xF0 ) { 354 if ( (p[0] & 0xF8) != 0xF0 ) { 355 /* Skip illegal sequences 356 return SDL_ICONV_EILSEQ; 357 */ 358 ch = UNKNOWN_UNICODE; 359 } else { 360 if ( p[0] == 0xF0 ) { 361 overlong = SDL_TRUE; 362 } 363 ch = (Uint32)(p[0] & 0x07); 364 left = 3; 365 } 366 } else if ( p[0] >= 0xE0 ) { 367 if ( (p[0] & 0xF0) != 0xE0 ) { 368 /* Skip illegal sequences 369 return SDL_ICONV_EILSEQ; 370 */ 371 ch = UNKNOWN_UNICODE; 372 } else { 373 if ( p[0] == 0xE0 ) { 374 overlong = SDL_TRUE; 375 } 376 ch = (Uint32)(p[0] & 0x0F); 377 left = 2; 378 } 379 } else if ( p[0] >= 0xC0 ) { 380 if ( (p[0] & 0xE0) != 0xC0 ) { 381 /* Skip illegal sequences 382 return SDL_ICONV_EILSEQ; 383 */ 384 ch = UNKNOWN_UNICODE; 385 } else { 386 if ( (p[0] & 0xCE) == 0xC0 ) { 387 overlong = SDL_TRUE; 388 } 389 ch = (Uint32)(p[0] & 0x1F); 390 left = 1; 391 } 392 } else { 393 if ( (p[0] & 0x80) != 0x00 ) { 394 /* Skip illegal sequences 395 return SDL_ICONV_EILSEQ; 396 */ 397 ch = UNKNOWN_UNICODE; 398 } else { 399 ch = (Uint32)p[0]; 400 } 401 } 402 ++src; 403 --srclen; 404 if ( srclen < left ) { 405 return SDL_ICONV_EINVAL; 406 } 407 while ( left-- ) { 408 ++p; 409 if ( (p[0] & 0xC0) != 0x80 ) { 410 /* Skip illegal sequences 411 return SDL_ICONV_EILSEQ; 412 */ 413 ch = UNKNOWN_UNICODE; 414 break; 415 } 416 ch <<= 6; 417 ch |= (p[0] & 0x3F); 418 ++src; 419 --srclen; 420 } 421 if ( overlong ) { 422 /* Potential security risk 423 return SDL_ICONV_EILSEQ; 424 */ 425 ch = UNKNOWN_UNICODE; 426 } 427 if ( (ch >= 0xD800 && ch <= 0xDFFF) || 428 (ch == 0xFFFE || ch == 0xFFFF) || 429 ch > 0x10FFFF ) { 430 /* Skip illegal sequences 431 return SDL_ICONV_EILSEQ; 432 */ 433 ch = UNKNOWN_UNICODE; 434 } 435 } 436 break; 437 case ENCODING_UTF16BE: /* RFC 2781 */ 438 { 439 Uint8 *p = (Uint8 *)src; 440 Uint16 W1, W2; 441 if ( srclen < 2 ) { 442 return SDL_ICONV_EINVAL; 443 } 444 W1 = ((Uint16)p[0] << 8) | 445 (Uint16)p[1]; 446 src += 2; 447 srclen -= 2; 448 if ( W1 < 0xD800 || W1 > 0xDFFF ) { 449 ch = (Uint32)W1; 450 break; 451 } 452 if ( W1 > 0xDBFF ) { 453 /* Skip illegal sequences 454 return SDL_ICONV_EILSEQ; 455 */ 456 ch = UNKNOWN_UNICODE; 457 break; 458 } 459 if ( srclen < 2 ) { 460 return SDL_ICONV_EINVAL; 461 } 462 p = (Uint8 *)src; 463 W2 = ((Uint16)p[0] << 8) | 464 (Uint16)p[1]; 465 src += 2; 466 srclen -= 2; 467 if ( W2 < 0xDC00 || W2 > 0xDFFF ) { 468 /* Skip illegal sequences 469 return SDL_ICONV_EILSEQ; 470 */ 471 ch = UNKNOWN_UNICODE; 472 break; 473 } 474 ch = (((Uint32)(W1 & 0x3FF) << 10) | 475 (Uint32)(W2 & 0x3FF)) + 0x10000; 476 } 477 break; 478 case ENCODING_UTF16LE: /* RFC 2781 */ 479 { 480 Uint8 *p = (Uint8 *)src; 481 Uint16 W1, W2; 482 if ( srclen < 2 ) { 483 return SDL_ICONV_EINVAL; 484 } 485 W1 = ((Uint16)p[1] << 8) | 486 (Uint16)p[0]; 487 src += 2; 488 srclen -= 2; 489 if ( W1 < 0xD800 || W1 > 0xDFFF ) { 490 ch = (Uint32)W1; 491 break; 492 } 493 if ( W1 > 0xDBFF ) { 494 /* Skip illegal sequences 495 return SDL_ICONV_EILSEQ; 496 */ 497 ch = UNKNOWN_UNICODE; 498 break; 499 } 500 if ( srclen < 2 ) { 501 return SDL_ICONV_EINVAL; 502 } 503 p = (Uint8 *)src; 504 W2 = ((Uint16)p[1] << 8) | 505 (Uint16)p[0]; 506 src += 2; 507 srclen -= 2; 508 if ( W2 < 0xDC00 || W2 > 0xDFFF ) { 509 /* Skip illegal sequences 510 return SDL_ICONV_EILSEQ; 511 */ 512 ch = UNKNOWN_UNICODE; 513 break; 514 } 515 ch = (((Uint32)(W1 & 0x3FF) << 10) | 516 (Uint32)(W2 & 0x3FF)) + 0x10000; 517 } 518 break; 519 case ENCODING_UTF32BE: 520 { 521 Uint8 *p = (Uint8 *)src; 522 if ( srclen < 4 ) { 523 return SDL_ICONV_EINVAL; 524 } 525 ch = ((Uint32)p[0] << 24) | 526 ((Uint32)p[1] << 16) | 527 ((Uint32)p[2] << 8) | 528 (Uint32)p[3]; 529 src += 4; 530 srclen -= 4; 531 } 532 break; 533 case ENCODING_UTF32LE: 534 { 535 Uint8 *p = (Uint8 *)src; 536 if ( srclen < 4 ) { 537 return SDL_ICONV_EINVAL; 538 } 539 ch = ((Uint32)p[3] << 24) | 540 ((Uint32)p[2] << 16) | 541 ((Uint32)p[1] << 8) | 542 (Uint32)p[0]; 543 src += 4; 544 srclen -= 4; 545 } 546 break; 547 case ENCODING_UCS2: 548 { 549 Uint16 *p = (Uint16 *)src; 550 if ( srclen < 2 ) { 551 return SDL_ICONV_EINVAL; 552 } 553 ch = *p; 554 src += 2; 555 srclen -= 2; 556 } 557 break; 558 case ENCODING_UCS4: 559 { 560 Uint32 *p = (Uint32 *)src; 561 if ( srclen < 4 ) { 562 return SDL_ICONV_EINVAL; 563 } 564 ch = *p; 565 src += 4; 566 srclen -= 4; 567 } 568 break; 569 } 570 571 /* Encode a character */ 572 switch ( cd->dst_fmt ) { 573 case ENCODING_ASCII: 574 { 575 Uint8 *p = (Uint8 *)dst; 576 if ( dstlen < 1 ) { 577 return SDL_ICONV_E2BIG; 578 } 579 if ( ch > 0x7F ) { 580 *p = UNKNOWN_ASCII; 581 } else { 582 *p = (Uint8)ch; 583 } 584 ++dst; 585 --dstlen; 586 } 587 break; 588 case ENCODING_LATIN1: 589 { 590 Uint8 *p = (Uint8 *)dst; 591 if ( dstlen < 1 ) { 592 return SDL_ICONV_E2BIG; 593 } 594 if ( ch > 0xFF ) { 595 *p = UNKNOWN_ASCII; 596 } else { 597 *p = (Uint8)ch; 598 } 599 ++dst; 600 --dstlen; 601 } 602 break; 603 case ENCODING_UTF8: /* RFC 3629 */ 604 { 605 Uint8 *p = (Uint8 *)dst; 606 if ( ch > 0x10FFFF ) { 607 ch = UNKNOWN_UNICODE; 608 } 609 if ( ch <= 0x7F ) { 610 if ( dstlen < 1 ) { 611 return SDL_ICONV_E2BIG; 612 } 613 *p = (Uint8)ch; 614 ++dst; 615 --dstlen; 616 } else if ( ch <= 0x7FF ) { 617 if ( dstlen < 2 ) { 618 return SDL_ICONV_E2BIG; 619 } 620 p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F); 621 p[1] = 0x80 | (Uint8)(ch & 0x3F); 622 dst += 2; 623 dstlen -= 2; 624 } else if ( ch <= 0xFFFF ) { 625 if ( dstlen < 3 ) { 626 return SDL_ICONV_E2BIG; 627 } 628 p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F); 629 p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F); 630 p[2] = 0x80 | (Uint8)(ch & 0x3F); 631 dst += 3; 632 dstlen -= 3; 633 } else if ( ch <= 0x1FFFFF ) { 634 if ( dstlen < 4 ) { 635 return SDL_ICONV_E2BIG; 636 } 637 p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07); 638 p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F); 639 p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F); 640 p[3] = 0x80 | (Uint8)(ch & 0x3F); 641 dst += 4; 642 dstlen -= 4; 643 } else if ( ch <= 0x3FFFFFF ) { 644 if ( dstlen < 5 ) { 645 return SDL_ICONV_E2BIG; 646 } 647 p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03); 648 p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F); 649 p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F); 650 p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F); 651 p[4] = 0x80 | (Uint8)(ch & 0x3F); 652 dst += 5; 653 dstlen -= 5; 654 } else { 655 if ( dstlen < 6 ) { 656 return SDL_ICONV_E2BIG; 657 } 658 p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01); 659 p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F); 660 p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F); 661 p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F); 662 p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F); 663 p[5] = 0x80 | (Uint8)(ch & 0x3F); 664 dst += 6; 665 dstlen -= 6; 666 } 667 } 668 break; 669 case ENCODING_UTF16BE: /* RFC 2781 */ 670 { 671 Uint8 *p = (Uint8 *)dst; 672 if ( ch > 0x10FFFF ) { 673 ch = UNKNOWN_UNICODE; 674 } 675 if ( ch < 0x10000 ) { 676 if ( dstlen < 2 ) { 677 return SDL_ICONV_E2BIG; 678 } 679 p[0] = (Uint8)(ch >> 8); 680 p[1] = (Uint8)ch; 681 dst += 2; 682 dstlen -= 2; 683 } else { 684 Uint16 W1, W2; 685 if ( dstlen < 4 ) { 686 return SDL_ICONV_E2BIG; 687 } 688 ch = ch - 0x10000; 689 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); 690 W2 = 0xDC00 | (Uint16)(ch & 0x3FF); 691 p[0] = (Uint8)(W1 >> 8); 692 p[1] = (Uint8)W1; 693 p[2] = (Uint8)(W2 >> 8); 694 p[3] = (Uint8)W2; 695 dst += 4; 696 dstlen -= 4; 697 } 698 } 699 break; 700 case ENCODING_UTF16LE: /* RFC 2781 */ 701 { 702 Uint8 *p = (Uint8 *)dst; 703 if ( ch > 0x10FFFF ) { 704 ch = UNKNOWN_UNICODE; 705 } 706 if ( ch < 0x10000 ) { 707 if ( dstlen < 2 ) { 708 return SDL_ICONV_E2BIG; 709 } 710 p[1] = (Uint8)(ch >> 8); 711 p[0] = (Uint8)ch; 712 dst += 2; 713 dstlen -= 2; 714 } else { 715 Uint16 W1, W2; 716 if ( dstlen < 4 ) { 717 return SDL_ICONV_E2BIG; 718 } 719 ch = ch - 0x10000; 720 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF); 721 W2 = 0xDC00 | (Uint16)(ch & 0x3FF); 722 p[1] = (Uint8)(W1 >> 8); 723 p[0] = (Uint8)W1; 724 p[3] = (Uint8)(W2 >> 8); 725 p[2] = (Uint8)W2; 726 dst += 4; 727 dstlen -= 4; 728 } 729 } 730 break; 731 case ENCODING_UTF32BE: 732 { 733 Uint8 *p = (Uint8 *)dst; 734 if ( ch > 0x10FFFF ) { 735 ch = UNKNOWN_UNICODE; 736 } 737 if ( dstlen < 4 ) { 738 return SDL_ICONV_E2BIG; 739 } 740 p[0] = (Uint8)(ch >> 24); 741 p[1] = (Uint8)(ch >> 16); 742 p[2] = (Uint8)(ch >> 8); 743 p[3] = (Uint8)ch; 744 dst += 4; 745 dstlen -= 4; 746 } 747 break; 748 case ENCODING_UTF32LE: 749 { 750 Uint8 *p = (Uint8 *)dst; 751 if ( ch > 0x10FFFF ) { 752 ch = UNKNOWN_UNICODE; 753 } 754 if ( dstlen < 4 ) { 755 return SDL_ICONV_E2BIG; 756 } 757 p[3] = (Uint8)(ch >> 24); 758 p[2] = (Uint8)(ch >> 16); 759 p[1] = (Uint8)(ch >> 8); 760 p[0] = (Uint8)ch; 761 dst += 4; 762 dstlen -= 4; 763 } 764 break; 765 case ENCODING_UCS2: 766 { 767 Uint16 *p = (Uint16 *)dst; 768 if ( ch > 0xFFFF ) { 769 ch = UNKNOWN_UNICODE; 770 } 771 if ( dstlen < 2 ) { 772 return SDL_ICONV_E2BIG; 773 } 774 *p = (Uint16)ch; 775 dst += 2; 776 dstlen -= 2; 777 } 778 break; 779 case ENCODING_UCS4: 780 { 781 Uint32 *p = (Uint32 *)dst; 782 if ( ch > 0x7FFFFFFF ) { 783 ch = UNKNOWN_UNICODE; 784 } 785 if ( dstlen < 4 ) { 786 return SDL_ICONV_E2BIG; 787 } 788 *p = ch; 789 dst += 4; 790 dstlen -= 4; 791 } 792 break; 793 } 794 795 /* Update state */ 796 *inbuf = src; 797 *inbytesleft = srclen; 798 *outbuf = dst; 799 *outbytesleft = dstlen; 800 ++total; 801 } 802 return total; 803 } 804 805 int SDL_iconv_close(SDL_iconv_t cd) 806 { 807 if ( cd && cd != (SDL_iconv_t)-1 ) { 808 SDL_free(cd); 809 } 810 return 0; 811 } 812 813 #endif /* !HAVE_ICONV */ 814 815 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft) 816 { 817 SDL_iconv_t cd; 818 char *string; 819 size_t stringsize; 820 char *outbuf; 821 size_t outbytesleft; 822 size_t retCode = 0; 823 824 cd = SDL_iconv_open(tocode, fromcode); 825 if ( cd == (SDL_iconv_t)-1 ) { 826 /* See if we can recover here (fixes iconv on Solaris 11) */ 827 if ( !tocode || !*tocode ) { 828 tocode = "UTF-8"; 829 } 830 if ( !fromcode || !*fromcode ) { 831 tocode = "UTF-8"; 832 } 833 cd = SDL_iconv_open(tocode, fromcode); 834 } 835 if ( cd == (SDL_iconv_t)-1 ) { 836 return NULL; 837 } 838 839 stringsize = inbytesleft > 4 ? inbytesleft : 4; 840 string = SDL_malloc(stringsize); 841 if ( !string ) { 842 SDL_iconv_close(cd); 843 return NULL; 844 } 845 outbuf = string; 846 outbytesleft = stringsize; 847 SDL_memset(outbuf, 0, 4); 848 849 while ( inbytesleft > 0 ) { 850 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); 851 switch (retCode) { 852 case SDL_ICONV_E2BIG: 853 { 854 char *oldstring = string; 855 stringsize *= 2; 856 string = SDL_realloc(string, stringsize); 857 if ( !string ) { 858 SDL_iconv_close(cd); 859 return NULL; 860 } 861 outbuf = string + (outbuf - oldstring); 862 outbytesleft = stringsize - (outbuf - string); 863 SDL_memset(outbuf, 0, 4); 864 } 865 break; 866 case SDL_ICONV_EILSEQ: 867 /* Try skipping some input data - not perfect, but... */ 868 ++inbuf; 869 --inbytesleft; 870 break; 871 case SDL_ICONV_EINVAL: 872 case SDL_ICONV_ERROR: 873 /* We can't continue... */ 874 inbytesleft = 0; 875 break; 876 } 877 } 878 SDL_iconv_close(cd); 879 880 return string; 881 } 882