1 /* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel (at) veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst (at) w3.org> 21 */ 22 23 #define IN_LIBXML 24 #include "libxml.h" 25 26 #include <string.h> 27 28 #ifdef HAVE_CTYPE_H 29 #include <ctype.h> 30 #endif 31 #ifdef HAVE_STDLIB_H 32 #include <stdlib.h> 33 #endif 34 #ifdef LIBXML_ICONV_ENABLED 35 #ifdef HAVE_ERRNO_H 36 #include <errno.h> 37 #endif 38 #endif 39 #include <libxml/encoding.h> 40 #include <libxml/xmlmemory.h> 41 #ifdef LIBXML_HTML_ENABLED 42 #include <libxml/HTMLparser.h> 43 #endif 44 #include <libxml/globals.h> 45 #include <libxml/xmlerror.h> 46 47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 49 50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 52 struct _xmlCharEncodingAlias { 53 const char *name; 54 const char *alias; 55 }; 56 57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 58 static int xmlCharEncodingAliasesNb = 0; 59 static int xmlCharEncodingAliasesMax = 0; 60 61 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 62 #if 0 63 #define DEBUG_ENCODING /* Define this to get encoding traces */ 64 #endif 65 #else 66 #ifdef LIBXML_ISO8859X_ENABLED 67 static void xmlRegisterCharEncodingHandlersISO8859x (void); 68 #endif 69 #endif 70 71 static int xmlLittleEndian = 1; 72 73 /** 74 * xmlEncodingErrMemory: 75 * @extra: extra informations 76 * 77 * Handle an out of memory condition 78 */ 79 static void 80 xmlEncodingErrMemory(const char *extra) 81 { 82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 83 } 84 85 /** 86 * xmlErrEncoding: 87 * @error: the error number 88 * @msg: the error message 89 * 90 * n encoding error 91 */ 92 static void 93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 94 { 95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 96 XML_FROM_I18N, error, XML_ERR_FATAL, 97 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 98 } 99 100 #ifdef LIBXML_ICU_ENABLED 101 static uconv_t* 102 openIcuConverter(const char* name, int toUnicode) 103 { 104 UErrorCode status = U_ZERO_ERROR; 105 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 106 if (conv == NULL) 107 return NULL; 108 109 conv->uconv = ucnv_open(name, &status); 110 if (U_FAILURE(status)) 111 goto error; 112 113 status = U_ZERO_ERROR; 114 if (toUnicode) { 115 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 116 NULL, NULL, NULL, &status); 117 } 118 else { 119 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 120 NULL, NULL, NULL, &status); 121 } 122 if (U_FAILURE(status)) 123 goto error; 124 125 status = U_ZERO_ERROR; 126 conv->utf8 = ucnv_open("UTF-8", &status); 127 if (U_SUCCESS(status)) 128 return conv; 129 130 error: 131 if (conv->uconv) 132 ucnv_close(conv->uconv); 133 xmlFree(conv); 134 return NULL; 135 } 136 137 static void 138 closeIcuConverter(uconv_t *conv) 139 { 140 if (conv != NULL) { 141 ucnv_close(conv->uconv); 142 ucnv_close(conv->utf8); 143 xmlFree(conv); 144 } 145 } 146 #endif /* LIBXML_ICU_ENABLED */ 147 148 /************************************************************************ 149 * * 150 * Conversions To/From UTF8 encoding * 151 * * 152 ************************************************************************/ 153 154 /** 155 * asciiToUTF8: 156 * @out: a pointer to an array of bytes to store the result 157 * @outlen: the length of @out 158 * @in: a pointer to an array of ASCII chars 159 * @inlen: the length of @in 160 * 161 * Take a block of ASCII chars in and try to convert it to an UTF-8 162 * block of chars out. 163 * Returns 0 if success, or -1 otherwise 164 * The value of @inlen after return is the number of octets consumed 165 * if the return value is positive, else unpredictable. 166 * The value of @outlen after return is the number of octets consumed. 167 */ 168 static int 169 asciiToUTF8(unsigned char* out, int *outlen, 170 const unsigned char* in, int *inlen) { 171 unsigned char* outstart = out; 172 const unsigned char* base = in; 173 const unsigned char* processed = in; 174 unsigned char* outend = out + *outlen; 175 const unsigned char* inend; 176 unsigned int c; 177 178 inend = in + (*inlen); 179 while ((in < inend) && (out - outstart + 5 < *outlen)) { 180 c= *in++; 181 182 if (out >= outend) 183 break; 184 if (c < 0x80) { 185 *out++ = c; 186 } else { 187 *outlen = out - outstart; 188 *inlen = processed - base; 189 return(-1); 190 } 191 192 processed = (const unsigned char*) in; 193 } 194 *outlen = out - outstart; 195 *inlen = processed - base; 196 return(*outlen); 197 } 198 199 #ifdef LIBXML_OUTPUT_ENABLED 200 /** 201 * UTF8Toascii: 202 * @out: a pointer to an array of bytes to store the result 203 * @outlen: the length of @out 204 * @in: a pointer to an array of UTF-8 chars 205 * @inlen: the length of @in 206 * 207 * Take a block of UTF-8 chars in and try to convert it to an ASCII 208 * block of chars out. 209 * 210 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 211 * The value of @inlen after return is the number of octets consumed 212 * if the return value is positive, else unpredictable. 213 * The value of @outlen after return is the number of octets consumed. 214 */ 215 static int 216 UTF8Toascii(unsigned char* out, int *outlen, 217 const unsigned char* in, int *inlen) { 218 const unsigned char* processed = in; 219 const unsigned char* outend; 220 const unsigned char* outstart = out; 221 const unsigned char* instart = in; 222 const unsigned char* inend; 223 unsigned int c, d; 224 int trailing; 225 226 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 227 if (in == NULL) { 228 /* 229 * initialization nothing to do 230 */ 231 *outlen = 0; 232 *inlen = 0; 233 return(0); 234 } 235 inend = in + (*inlen); 236 outend = out + (*outlen); 237 while (in < inend) { 238 d = *in++; 239 if (d < 0x80) { c= d; trailing= 0; } 240 else if (d < 0xC0) { 241 /* trailing byte in leading position */ 242 *outlen = out - outstart; 243 *inlen = processed - instart; 244 return(-2); 245 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 246 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 247 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 248 else { 249 /* no chance for this in Ascii */ 250 *outlen = out - outstart; 251 *inlen = processed - instart; 252 return(-2); 253 } 254 255 if (inend - in < trailing) { 256 break; 257 } 258 259 for ( ; trailing; trailing--) { 260 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 261 break; 262 c <<= 6; 263 c |= d & 0x3F; 264 } 265 266 /* assertion: c is a single UTF-4 value */ 267 if (c < 0x80) { 268 if (out >= outend) 269 break; 270 *out++ = c; 271 } else { 272 /* no chance for this in Ascii */ 273 *outlen = out - outstart; 274 *inlen = processed - instart; 275 return(-2); 276 } 277 processed = in; 278 } 279 *outlen = out - outstart; 280 *inlen = processed - instart; 281 return(*outlen); 282 } 283 #endif /* LIBXML_OUTPUT_ENABLED */ 284 285 /** 286 * isolat1ToUTF8: 287 * @out: a pointer to an array of bytes to store the result 288 * @outlen: the length of @out 289 * @in: a pointer to an array of ISO Latin 1 chars 290 * @inlen: the length of @in 291 * 292 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 293 * block of chars out. 294 * Returns the number of bytes written if success, or -1 otherwise 295 * The value of @inlen after return is the number of octets consumed 296 * if the return value is positive, else unpredictable. 297 * The value of @outlen after return is the number of octets consumed. 298 */ 299 int 300 isolat1ToUTF8(unsigned char* out, int *outlen, 301 const unsigned char* in, int *inlen) { 302 unsigned char* outstart = out; 303 const unsigned char* base = in; 304 unsigned char* outend; 305 const unsigned char* inend; 306 const unsigned char* instop; 307 308 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 309 return(-1); 310 311 outend = out + *outlen; 312 inend = in + (*inlen); 313 instop = inend; 314 315 while (in < inend && out < outend - 1) { 316 if (*in >= 0x80) { 317 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 318 *out++ = ((*in) & 0x3F) | 0x80; 319 ++in; 320 } 321 if (instop - in > outend - out) instop = in + (outend - out); 322 while (in < instop && *in < 0x80) { 323 *out++ = *in++; 324 } 325 } 326 if (in < inend && out < outend && *in < 0x80) { 327 *out++ = *in++; 328 } 329 *outlen = out - outstart; 330 *inlen = in - base; 331 return(*outlen); 332 } 333 334 /** 335 * UTF8ToUTF8: 336 * @out: a pointer to an array of bytes to store the result 337 * @outlen: the length of @out 338 * @inb: a pointer to an array of UTF-8 chars 339 * @inlenb: the length of @in in UTF-8 chars 340 * 341 * No op copy operation for UTF8 handling. 342 * 343 * Returns the number of bytes written, or -1 if lack of space. 344 * The value of *inlen after return is the number of octets consumed 345 * if the return value is positive, else unpredictable. 346 */ 347 static int 348 UTF8ToUTF8(unsigned char* out, int *outlen, 349 const unsigned char* inb, int *inlenb) 350 { 351 int len; 352 353 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 354 return(-1); 355 if (*outlen > *inlenb) { 356 len = *inlenb; 357 } else { 358 len = *outlen; 359 } 360 if (len < 0) 361 return(-1); 362 363 memcpy(out, inb, len); 364 365 *outlen = len; 366 *inlenb = len; 367 return(*outlen); 368 } 369 370 371 #ifdef LIBXML_OUTPUT_ENABLED 372 /** 373 * UTF8Toisolat1: 374 * @out: a pointer to an array of bytes to store the result 375 * @outlen: the length of @out 376 * @in: a pointer to an array of UTF-8 chars 377 * @inlen: the length of @in 378 * 379 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 380 * block of chars out. 381 * 382 * Returns the number of bytes written if success, -2 if the transcoding fails, 383 or -1 otherwise 384 * The value of @inlen after return is the number of octets consumed 385 * if the return value is positive, else unpredictable. 386 * The value of @outlen after return is the number of octets consumed. 387 */ 388 int 389 UTF8Toisolat1(unsigned char* out, int *outlen, 390 const unsigned char* in, int *inlen) { 391 const unsigned char* processed = in; 392 const unsigned char* outend; 393 const unsigned char* outstart = out; 394 const unsigned char* instart = in; 395 const unsigned char* inend; 396 unsigned int c, d; 397 int trailing; 398 399 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 400 if (in == NULL) { 401 /* 402 * initialization nothing to do 403 */ 404 *outlen = 0; 405 *inlen = 0; 406 return(0); 407 } 408 inend = in + (*inlen); 409 outend = out + (*outlen); 410 while (in < inend) { 411 d = *in++; 412 if (d < 0x80) { c= d; trailing= 0; } 413 else if (d < 0xC0) { 414 /* trailing byte in leading position */ 415 *outlen = out - outstart; 416 *inlen = processed - instart; 417 return(-2); 418 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 419 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 420 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 421 else { 422 /* no chance for this in IsoLat1 */ 423 *outlen = out - outstart; 424 *inlen = processed - instart; 425 return(-2); 426 } 427 428 if (inend - in < trailing) { 429 break; 430 } 431 432 for ( ; trailing; trailing--) { 433 if (in >= inend) 434 break; 435 if (((d= *in++) & 0xC0) != 0x80) { 436 *outlen = out - outstart; 437 *inlen = processed - instart; 438 return(-2); 439 } 440 c <<= 6; 441 c |= d & 0x3F; 442 } 443 444 /* assertion: c is a single UTF-4 value */ 445 if (c <= 0xFF) { 446 if (out >= outend) 447 break; 448 *out++ = c; 449 } else { 450 /* no chance for this in IsoLat1 */ 451 *outlen = out - outstart; 452 *inlen = processed - instart; 453 return(-2); 454 } 455 processed = in; 456 } 457 *outlen = out - outstart; 458 *inlen = processed - instart; 459 return(*outlen); 460 } 461 #endif /* LIBXML_OUTPUT_ENABLED */ 462 463 /** 464 * UTF16LEToUTF8: 465 * @out: a pointer to an array of bytes to store the result 466 * @outlen: the length of @out 467 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 468 * @inlenb: the length of @in in UTF-16LE chars 469 * 470 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 471 * block of chars out. This function assumes the endian property 472 * is the same between the native type of this machine and the 473 * inputed one. 474 * 475 * Returns the number of bytes written, or -1 if lack of space, or -2 476 * if the transcoding fails (if *in is not a valid utf16 string) 477 * The value of *inlen after return is the number of octets consumed 478 * if the return value is positive, else unpredictable. 479 */ 480 static int 481 UTF16LEToUTF8(unsigned char* out, int *outlen, 482 const unsigned char* inb, int *inlenb) 483 { 484 unsigned char* outstart = out; 485 const unsigned char* processed = inb; 486 unsigned char* outend = out + *outlen; 487 unsigned short* in = (unsigned short*) inb; 488 unsigned short* inend; 489 unsigned int c, d, inlen; 490 unsigned char *tmp; 491 int bits; 492 493 if ((*inlenb % 2) == 1) 494 (*inlenb)--; 495 inlen = *inlenb / 2; 496 inend = in + inlen; 497 while ((in < inend) && (out - outstart + 5 < *outlen)) { 498 if (xmlLittleEndian) { 499 c= *in++; 500 } else { 501 tmp = (unsigned char *) in; 502 c = *tmp++; 503 c = c | (((unsigned int)*tmp) << 8); 504 in++; 505 } 506 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 507 if (in >= inend) { /* (in > inend) shouldn't happens */ 508 break; 509 } 510 if (xmlLittleEndian) { 511 d = *in++; 512 } else { 513 tmp = (unsigned char *) in; 514 d = *tmp++; 515 d = d | (((unsigned int)*tmp) << 8); 516 in++; 517 } 518 if ((d & 0xFC00) == 0xDC00) { 519 c &= 0x03FF; 520 c <<= 10; 521 c |= d & 0x03FF; 522 c += 0x10000; 523 } 524 else { 525 *outlen = out - outstart; 526 *inlenb = processed - inb; 527 return(-2); 528 } 529 } 530 531 /* assertion: c is a single UTF-4 value */ 532 if (out >= outend) 533 break; 534 if (c < 0x80) { *out++= c; bits= -6; } 535 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 536 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 537 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 538 539 for ( ; bits >= 0; bits-= 6) { 540 if (out >= outend) 541 break; 542 *out++= ((c >> bits) & 0x3F) | 0x80; 543 } 544 processed = (const unsigned char*) in; 545 } 546 *outlen = out - outstart; 547 *inlenb = processed - inb; 548 return(*outlen); 549 } 550 551 #ifdef LIBXML_OUTPUT_ENABLED 552 /** 553 * UTF8ToUTF16LE: 554 * @outb: a pointer to an array of bytes to store the result 555 * @outlen: the length of @outb 556 * @in: a pointer to an array of UTF-8 chars 557 * @inlen: the length of @in 558 * 559 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 560 * block of chars out. 561 * 562 * Returns the number of bytes written, or -1 if lack of space, or -2 563 * if the transcoding failed. 564 */ 565 static int 566 UTF8ToUTF16LE(unsigned char* outb, int *outlen, 567 const unsigned char* in, int *inlen) 568 { 569 unsigned short* out = (unsigned short*) outb; 570 const unsigned char* processed = in; 571 const unsigned char *const instart = in; 572 unsigned short* outstart= out; 573 unsigned short* outend; 574 const unsigned char* inend; 575 unsigned int c, d; 576 int trailing; 577 unsigned char *tmp; 578 unsigned short tmp1, tmp2; 579 580 /* UTF16LE encoding has no BOM */ 581 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 582 if (in == NULL) { 583 *outlen = 0; 584 *inlen = 0; 585 return(0); 586 } 587 inend= in + *inlen; 588 outend = out + (*outlen / 2); 589 while (in < inend) { 590 d= *in++; 591 if (d < 0x80) { c= d; trailing= 0; } 592 else if (d < 0xC0) { 593 /* trailing byte in leading position */ 594 *outlen = (out - outstart) * 2; 595 *inlen = processed - instart; 596 return(-2); 597 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 598 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 599 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 600 else { 601 /* no chance for this in UTF-16 */ 602 *outlen = (out - outstart) * 2; 603 *inlen = processed - instart; 604 return(-2); 605 } 606 607 if (inend - in < trailing) { 608 break; 609 } 610 611 for ( ; trailing; trailing--) { 612 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 613 break; 614 c <<= 6; 615 c |= d & 0x3F; 616 } 617 618 /* assertion: c is a single UTF-4 value */ 619 if (c < 0x10000) { 620 if (out >= outend) 621 break; 622 if (xmlLittleEndian) { 623 *out++ = c; 624 } else { 625 tmp = (unsigned char *) out; 626 *tmp = c ; 627 *(tmp + 1) = c >> 8 ; 628 out++; 629 } 630 } 631 else if (c < 0x110000) { 632 if (out+1 >= outend) 633 break; 634 c -= 0x10000; 635 if (xmlLittleEndian) { 636 *out++ = 0xD800 | (c >> 10); 637 *out++ = 0xDC00 | (c & 0x03FF); 638 } else { 639 tmp1 = 0xD800 | (c >> 10); 640 tmp = (unsigned char *) out; 641 *tmp = (unsigned char) tmp1; 642 *(tmp + 1) = tmp1 >> 8; 643 out++; 644 645 tmp2 = 0xDC00 | (c & 0x03FF); 646 tmp = (unsigned char *) out; 647 *tmp = (unsigned char) tmp2; 648 *(tmp + 1) = tmp2 >> 8; 649 out++; 650 } 651 } 652 else 653 break; 654 processed = in; 655 } 656 *outlen = (out - outstart) * 2; 657 *inlen = processed - instart; 658 return(*outlen); 659 } 660 661 /** 662 * UTF8ToUTF16: 663 * @outb: a pointer to an array of bytes to store the result 664 * @outlen: the length of @outb 665 * @in: a pointer to an array of UTF-8 chars 666 * @inlen: the length of @in 667 * 668 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 669 * block of chars out. 670 * 671 * Returns the number of bytes written, or -1 if lack of space, or -2 672 * if the transcoding failed. 673 */ 674 static int 675 UTF8ToUTF16(unsigned char* outb, int *outlen, 676 const unsigned char* in, int *inlen) 677 { 678 if (in == NULL) { 679 /* 680 * initialization, add the Byte Order Mark for UTF-16LE 681 */ 682 if (*outlen >= 2) { 683 outb[0] = 0xFF; 684 outb[1] = 0xFE; 685 *outlen = 2; 686 *inlen = 0; 687 #ifdef DEBUG_ENCODING 688 xmlGenericError(xmlGenericErrorContext, 689 "Added FFFE Byte Order Mark\n"); 690 #endif 691 return(2); 692 } 693 *outlen = 0; 694 *inlen = 0; 695 return(0); 696 } 697 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 698 } 699 #endif /* LIBXML_OUTPUT_ENABLED */ 700 701 /** 702 * UTF16BEToUTF8: 703 * @out: a pointer to an array of bytes to store the result 704 * @outlen: the length of @out 705 * @inb: a pointer to an array of UTF-16 passed as a byte array 706 * @inlenb: the length of @in in UTF-16 chars 707 * 708 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 709 * block of chars out. This function assumes the endian property 710 * is the same between the native type of this machine and the 711 * inputed one. 712 * 713 * Returns the number of bytes written, or -1 if lack of space, or -2 714 * if the transcoding fails (if *in is not a valid utf16 string) 715 * The value of *inlen after return is the number of octets consumed 716 * if the return value is positive, else unpredictable. 717 */ 718 static int 719 UTF16BEToUTF8(unsigned char* out, int *outlen, 720 const unsigned char* inb, int *inlenb) 721 { 722 unsigned char* outstart = out; 723 const unsigned char* processed = inb; 724 unsigned char* outend = out + *outlen; 725 unsigned short* in = (unsigned short*) inb; 726 unsigned short* inend; 727 unsigned int c, d, inlen; 728 unsigned char *tmp; 729 int bits; 730 731 if ((*inlenb % 2) == 1) 732 (*inlenb)--; 733 inlen = *inlenb / 2; 734 inend= in + inlen; 735 while (in < inend) { 736 if (xmlLittleEndian) { 737 tmp = (unsigned char *) in; 738 c = *tmp++; 739 c = c << 8; 740 c = c | (unsigned int) *tmp; 741 in++; 742 } else { 743 c= *in++; 744 } 745 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 746 if (in >= inend) { /* (in > inend) shouldn't happens */ 747 *outlen = out - outstart; 748 *inlenb = processed - inb; 749 return(-2); 750 } 751 if (xmlLittleEndian) { 752 tmp = (unsigned char *) in; 753 d = *tmp++; 754 d = d << 8; 755 d = d | (unsigned int) *tmp; 756 in++; 757 } else { 758 d= *in++; 759 } 760 if ((d & 0xFC00) == 0xDC00) { 761 c &= 0x03FF; 762 c <<= 10; 763 c |= d & 0x03FF; 764 c += 0x10000; 765 } 766 else { 767 *outlen = out - outstart; 768 *inlenb = processed - inb; 769 return(-2); 770 } 771 } 772 773 /* assertion: c is a single UTF-4 value */ 774 if (out >= outend) 775 break; 776 if (c < 0x80) { *out++= c; bits= -6; } 777 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 778 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 779 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 780 781 for ( ; bits >= 0; bits-= 6) { 782 if (out >= outend) 783 break; 784 *out++= ((c >> bits) & 0x3F) | 0x80; 785 } 786 processed = (const unsigned char*) in; 787 } 788 *outlen = out - outstart; 789 *inlenb = processed - inb; 790 return(*outlen); 791 } 792 793 #ifdef LIBXML_OUTPUT_ENABLED 794 /** 795 * UTF8ToUTF16BE: 796 * @outb: a pointer to an array of bytes to store the result 797 * @outlen: the length of @outb 798 * @in: a pointer to an array of UTF-8 chars 799 * @inlen: the length of @in 800 * 801 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 802 * block of chars out. 803 * 804 * Returns the number of byte written, or -1 by lack of space, or -2 805 * if the transcoding failed. 806 */ 807 static int 808 UTF8ToUTF16BE(unsigned char* outb, int *outlen, 809 const unsigned char* in, int *inlen) 810 { 811 unsigned short* out = (unsigned short*) outb; 812 const unsigned char* processed = in; 813 const unsigned char *const instart = in; 814 unsigned short* outstart= out; 815 unsigned short* outend; 816 const unsigned char* inend; 817 unsigned int c, d; 818 int trailing; 819 unsigned char *tmp; 820 unsigned short tmp1, tmp2; 821 822 /* UTF-16BE has no BOM */ 823 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 824 if (in == NULL) { 825 *outlen = 0; 826 *inlen = 0; 827 return(0); 828 } 829 inend= in + *inlen; 830 outend = out + (*outlen / 2); 831 while (in < inend) { 832 d= *in++; 833 if (d < 0x80) { c= d; trailing= 0; } 834 else if (d < 0xC0) { 835 /* trailing byte in leading position */ 836 *outlen = out - outstart; 837 *inlen = processed - instart; 838 return(-2); 839 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 840 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 841 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 842 else { 843 /* no chance for this in UTF-16 */ 844 *outlen = out - outstart; 845 *inlen = processed - instart; 846 return(-2); 847 } 848 849 if (inend - in < trailing) { 850 break; 851 } 852 853 for ( ; trailing; trailing--) { 854 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 855 c <<= 6; 856 c |= d & 0x3F; 857 } 858 859 /* assertion: c is a single UTF-4 value */ 860 if (c < 0x10000) { 861 if (out >= outend) break; 862 if (xmlLittleEndian) { 863 tmp = (unsigned char *) out; 864 *tmp = c >> 8; 865 *(tmp + 1) = c; 866 out++; 867 } else { 868 *out++ = c; 869 } 870 } 871 else if (c < 0x110000) { 872 if (out+1 >= outend) break; 873 c -= 0x10000; 874 if (xmlLittleEndian) { 875 tmp1 = 0xD800 | (c >> 10); 876 tmp = (unsigned char *) out; 877 *tmp = tmp1 >> 8; 878 *(tmp + 1) = (unsigned char) tmp1; 879 out++; 880 881 tmp2 = 0xDC00 | (c & 0x03FF); 882 tmp = (unsigned char *) out; 883 *tmp = tmp2 >> 8; 884 *(tmp + 1) = (unsigned char) tmp2; 885 out++; 886 } else { 887 *out++ = 0xD800 | (c >> 10); 888 *out++ = 0xDC00 | (c & 0x03FF); 889 } 890 } 891 else 892 break; 893 processed = in; 894 } 895 *outlen = (out - outstart) * 2; 896 *inlen = processed - instart; 897 return(*outlen); 898 } 899 #endif /* LIBXML_OUTPUT_ENABLED */ 900 901 /************************************************************************ 902 * * 903 * Generic encoding handling routines * 904 * * 905 ************************************************************************/ 906 907 /** 908 * xmlDetectCharEncoding: 909 * @in: a pointer to the first bytes of the XML entity, must be at least 910 * 2 bytes long (at least 4 if encoding is UTF4 variant). 911 * @len: pointer to the length of the buffer 912 * 913 * Guess the encoding of the entity using the first bytes of the entity content 914 * according to the non-normative appendix F of the XML-1.0 recommendation. 915 * 916 * Returns one of the XML_CHAR_ENCODING_... values. 917 */ 918 xmlCharEncoding 919 xmlDetectCharEncoding(const unsigned char* in, int len) 920 { 921 if (in == NULL) 922 return(XML_CHAR_ENCODING_NONE); 923 if (len >= 4) { 924 if ((in[0] == 0x00) && (in[1] == 0x00) && 925 (in[2] == 0x00) && (in[3] == 0x3C)) 926 return(XML_CHAR_ENCODING_UCS4BE); 927 if ((in[0] == 0x3C) && (in[1] == 0x00) && 928 (in[2] == 0x00) && (in[3] == 0x00)) 929 return(XML_CHAR_ENCODING_UCS4LE); 930 if ((in[0] == 0x00) && (in[1] == 0x00) && 931 (in[2] == 0x3C) && (in[3] == 0x00)) 932 return(XML_CHAR_ENCODING_UCS4_2143); 933 if ((in[0] == 0x00) && (in[1] == 0x3C) && 934 (in[2] == 0x00) && (in[3] == 0x00)) 935 return(XML_CHAR_ENCODING_UCS4_3412); 936 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 937 (in[2] == 0xA7) && (in[3] == 0x94)) 938 return(XML_CHAR_ENCODING_EBCDIC); 939 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 940 (in[2] == 0x78) && (in[3] == 0x6D)) 941 return(XML_CHAR_ENCODING_UTF8); 942 /* 943 * Although not part of the recommendation, we also 944 * attempt an "auto-recognition" of UTF-16LE and 945 * UTF-16BE encodings. 946 */ 947 if ((in[0] == 0x3C) && (in[1] == 0x00) && 948 (in[2] == 0x3F) && (in[3] == 0x00)) 949 return(XML_CHAR_ENCODING_UTF16LE); 950 if ((in[0] == 0x00) && (in[1] == 0x3C) && 951 (in[2] == 0x00) && (in[3] == 0x3F)) 952 return(XML_CHAR_ENCODING_UTF16BE); 953 } 954 if (len >= 3) { 955 /* 956 * Errata on XML-1.0 June 20 2001 957 * We now allow an UTF8 encoded BOM 958 */ 959 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 960 (in[2] == 0xBF)) 961 return(XML_CHAR_ENCODING_UTF8); 962 } 963 /* For UTF-16 we can recognize by the BOM */ 964 if (len >= 2) { 965 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 966 return(XML_CHAR_ENCODING_UTF16BE); 967 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 968 return(XML_CHAR_ENCODING_UTF16LE); 969 } 970 return(XML_CHAR_ENCODING_NONE); 971 } 972 973 /** 974 * xmlCleanupEncodingAliases: 975 * 976 * Unregisters all aliases 977 */ 978 void 979 xmlCleanupEncodingAliases(void) { 980 int i; 981 982 if (xmlCharEncodingAliases == NULL) 983 return; 984 985 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 986 if (xmlCharEncodingAliases[i].name != NULL) 987 xmlFree((char *) xmlCharEncodingAliases[i].name); 988 if (xmlCharEncodingAliases[i].alias != NULL) 989 xmlFree((char *) xmlCharEncodingAliases[i].alias); 990 } 991 xmlCharEncodingAliasesNb = 0; 992 xmlCharEncodingAliasesMax = 0; 993 xmlFree(xmlCharEncodingAliases); 994 xmlCharEncodingAliases = NULL; 995 } 996 997 /** 998 * xmlGetEncodingAlias: 999 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1000 * 1001 * Lookup an encoding name for the given alias. 1002 * 1003 * Returns NULL if not found, otherwise the original name 1004 */ 1005 const char * 1006 xmlGetEncodingAlias(const char *alias) { 1007 int i; 1008 char upper[100]; 1009 1010 if (alias == NULL) 1011 return(NULL); 1012 1013 if (xmlCharEncodingAliases == NULL) 1014 return(NULL); 1015 1016 for (i = 0;i < 99;i++) { 1017 upper[i] = toupper(alias[i]); 1018 if (upper[i] == 0) break; 1019 } 1020 upper[i] = 0; 1021 1022 /* 1023 * Walk down the list looking for a definition of the alias 1024 */ 1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1027 return(xmlCharEncodingAliases[i].name); 1028 } 1029 } 1030 return(NULL); 1031 } 1032 1033 /** 1034 * xmlAddEncodingAlias: 1035 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1036 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1037 * 1038 * Registers an alias @alias for an encoding named @name. Existing alias 1039 * will be overwritten. 1040 * 1041 * Returns 0 in case of success, -1 in case of error 1042 */ 1043 int 1044 xmlAddEncodingAlias(const char *name, const char *alias) { 1045 int i; 1046 char upper[100]; 1047 1048 if ((name == NULL) || (alias == NULL)) 1049 return(-1); 1050 1051 for (i = 0;i < 99;i++) { 1052 upper[i] = toupper(alias[i]); 1053 if (upper[i] == 0) break; 1054 } 1055 upper[i] = 0; 1056 1057 if (xmlCharEncodingAliases == NULL) { 1058 xmlCharEncodingAliasesNb = 0; 1059 xmlCharEncodingAliasesMax = 20; 1060 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1061 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1062 if (xmlCharEncodingAliases == NULL) 1063 return(-1); 1064 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1065 xmlCharEncodingAliasesMax *= 2; 1066 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1067 xmlRealloc(xmlCharEncodingAliases, 1068 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1069 } 1070 /* 1071 * Walk down the list looking for a definition of the alias 1072 */ 1073 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1074 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1075 /* 1076 * Replace the definition. 1077 */ 1078 xmlFree((char *) xmlCharEncodingAliases[i].name); 1079 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1080 return(0); 1081 } 1082 } 1083 /* 1084 * Add the definition 1085 */ 1086 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1087 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1088 xmlCharEncodingAliasesNb++; 1089 return(0); 1090 } 1091 1092 /** 1093 * xmlDelEncodingAlias: 1094 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1095 * 1096 * Unregisters an encoding alias @alias 1097 * 1098 * Returns 0 in case of success, -1 in case of error 1099 */ 1100 int 1101 xmlDelEncodingAlias(const char *alias) { 1102 int i; 1103 1104 if (alias == NULL) 1105 return(-1); 1106 1107 if (xmlCharEncodingAliases == NULL) 1108 return(-1); 1109 /* 1110 * Walk down the list looking for a definition of the alias 1111 */ 1112 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1113 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1114 xmlFree((char *) xmlCharEncodingAliases[i].name); 1115 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1116 xmlCharEncodingAliasesNb--; 1117 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1118 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1119 return(0); 1120 } 1121 } 1122 return(-1); 1123 } 1124 1125 /** 1126 * xmlParseCharEncoding: 1127 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1128 * 1129 * Compare the string to the encoding schemes already known. Note 1130 * that the comparison is case insensitive accordingly to the section 1131 * [XML] 4.3.3 Character Encoding in Entities. 1132 * 1133 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1134 * if not recognized. 1135 */ 1136 xmlCharEncoding 1137 xmlParseCharEncoding(const char* name) 1138 { 1139 const char *alias; 1140 char upper[500]; 1141 int i; 1142 1143 if (name == NULL) 1144 return(XML_CHAR_ENCODING_NONE); 1145 1146 /* 1147 * Do the alias resolution 1148 */ 1149 alias = xmlGetEncodingAlias(name); 1150 if (alias != NULL) 1151 name = alias; 1152 1153 for (i = 0;i < 499;i++) { 1154 upper[i] = toupper(name[i]); 1155 if (upper[i] == 0) break; 1156 } 1157 upper[i] = 0; 1158 1159 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1160 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1161 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1162 1163 /* 1164 * NOTE: if we were able to parse this, the endianness of UTF16 is 1165 * already found and in use 1166 */ 1167 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1168 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1169 1170 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1171 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1172 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1173 1174 /* 1175 * NOTE: if we were able to parse this, the endianness of UCS4 is 1176 * already found and in use 1177 */ 1178 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1179 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1180 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1181 1182 1183 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1184 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1185 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1186 1187 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1188 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1189 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1190 1191 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1192 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1193 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1194 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1195 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1196 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1197 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1198 1199 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1200 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1201 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1202 1203 #ifdef DEBUG_ENCODING 1204 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1205 #endif 1206 return(XML_CHAR_ENCODING_ERROR); 1207 } 1208 1209 /** 1210 * xmlGetCharEncodingName: 1211 * @enc: the encoding 1212 * 1213 * The "canonical" name for XML encoding. 1214 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1215 * Section 4.3.3 Character Encoding in Entities 1216 * 1217 * Returns the canonical name for the given encoding 1218 */ 1219 1220 const char* 1221 xmlGetCharEncodingName(xmlCharEncoding enc) { 1222 switch (enc) { 1223 case XML_CHAR_ENCODING_ERROR: 1224 return(NULL); 1225 case XML_CHAR_ENCODING_NONE: 1226 return(NULL); 1227 case XML_CHAR_ENCODING_UTF8: 1228 return("UTF-8"); 1229 case XML_CHAR_ENCODING_UTF16LE: 1230 return("UTF-16"); 1231 case XML_CHAR_ENCODING_UTF16BE: 1232 return("UTF-16"); 1233 case XML_CHAR_ENCODING_EBCDIC: 1234 return("EBCDIC"); 1235 case XML_CHAR_ENCODING_UCS4LE: 1236 return("ISO-10646-UCS-4"); 1237 case XML_CHAR_ENCODING_UCS4BE: 1238 return("ISO-10646-UCS-4"); 1239 case XML_CHAR_ENCODING_UCS4_2143: 1240 return("ISO-10646-UCS-4"); 1241 case XML_CHAR_ENCODING_UCS4_3412: 1242 return("ISO-10646-UCS-4"); 1243 case XML_CHAR_ENCODING_UCS2: 1244 return("ISO-10646-UCS-2"); 1245 case XML_CHAR_ENCODING_8859_1: 1246 return("ISO-8859-1"); 1247 case XML_CHAR_ENCODING_8859_2: 1248 return("ISO-8859-2"); 1249 case XML_CHAR_ENCODING_8859_3: 1250 return("ISO-8859-3"); 1251 case XML_CHAR_ENCODING_8859_4: 1252 return("ISO-8859-4"); 1253 case XML_CHAR_ENCODING_8859_5: 1254 return("ISO-8859-5"); 1255 case XML_CHAR_ENCODING_8859_6: 1256 return("ISO-8859-6"); 1257 case XML_CHAR_ENCODING_8859_7: 1258 return("ISO-8859-7"); 1259 case XML_CHAR_ENCODING_8859_8: 1260 return("ISO-8859-8"); 1261 case XML_CHAR_ENCODING_8859_9: 1262 return("ISO-8859-9"); 1263 case XML_CHAR_ENCODING_2022_JP: 1264 return("ISO-2022-JP"); 1265 case XML_CHAR_ENCODING_SHIFT_JIS: 1266 return("Shift-JIS"); 1267 case XML_CHAR_ENCODING_EUC_JP: 1268 return("EUC-JP"); 1269 case XML_CHAR_ENCODING_ASCII: 1270 return(NULL); 1271 } 1272 return(NULL); 1273 } 1274 1275 /************************************************************************ 1276 * * 1277 * Char encoding handlers * 1278 * * 1279 ************************************************************************/ 1280 1281 1282 /* the size should be growable, but it's not a big deal ... */ 1283 #define MAX_ENCODING_HANDLERS 50 1284 static xmlCharEncodingHandlerPtr *handlers = NULL; 1285 static int nbCharEncodingHandler = 0; 1286 1287 /* 1288 * The default is UTF-8 for XML, that's also the default used for the 1289 * parser internals, so the default encoding handler is NULL 1290 */ 1291 1292 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1293 1294 /** 1295 * xmlNewCharEncodingHandler: 1296 * @name: the encoding name, in UTF-8 format (ASCII actually) 1297 * @input: the xmlCharEncodingInputFunc to read that encoding 1298 * @output: the xmlCharEncodingOutputFunc to write that encoding 1299 * 1300 * Create and registers an xmlCharEncodingHandler. 1301 * 1302 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1303 */ 1304 xmlCharEncodingHandlerPtr 1305 xmlNewCharEncodingHandler(const char *name, 1306 xmlCharEncodingInputFunc input, 1307 xmlCharEncodingOutputFunc output) { 1308 xmlCharEncodingHandlerPtr handler; 1309 const char *alias; 1310 char upper[500]; 1311 int i; 1312 char *up = NULL; 1313 1314 /* 1315 * Do the alias resolution 1316 */ 1317 alias = xmlGetEncodingAlias(name); 1318 if (alias != NULL) 1319 name = alias; 1320 1321 /* 1322 * Keep only the uppercase version of the encoding. 1323 */ 1324 if (name == NULL) { 1325 xmlEncodingErr(XML_I18N_NO_NAME, 1326 "xmlNewCharEncodingHandler : no name !\n", NULL); 1327 return(NULL); 1328 } 1329 for (i = 0;i < 499;i++) { 1330 upper[i] = toupper(name[i]); 1331 if (upper[i] == 0) break; 1332 } 1333 upper[i] = 0; 1334 up = xmlMemStrdup(upper); 1335 if (up == NULL) { 1336 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1337 return(NULL); 1338 } 1339 1340 /* 1341 * allocate and fill-up an handler block. 1342 */ 1343 handler = (xmlCharEncodingHandlerPtr) 1344 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1345 if (handler == NULL) { 1346 xmlFree(up); 1347 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1348 return(NULL); 1349 } 1350 handler->input = input; 1351 handler->output = output; 1352 handler->name = up; 1353 1354 #ifdef LIBXML_ICONV_ENABLED 1355 handler->iconv_in = NULL; 1356 handler->iconv_out = NULL; 1357 #endif 1358 #ifdef LIBXML_ICU_ENABLED 1359 handler->uconv_in = NULL; 1360 handler->uconv_out = NULL; 1361 #endif 1362 1363 /* 1364 * registers and returns the handler. 1365 */ 1366 xmlRegisterCharEncodingHandler(handler); 1367 #ifdef DEBUG_ENCODING 1368 xmlGenericError(xmlGenericErrorContext, 1369 "Registered encoding handler for %s\n", name); 1370 #endif 1371 return(handler); 1372 } 1373 1374 /** 1375 * xmlInitCharEncodingHandlers: 1376 * 1377 * Initialize the char encoding support, it registers the default 1378 * encoding supported. 1379 * NOTE: while public, this function usually doesn't need to be called 1380 * in normal processing. 1381 */ 1382 void 1383 xmlInitCharEncodingHandlers(void) { 1384 unsigned short int tst = 0x1234; 1385 unsigned char *ptr = (unsigned char *) &tst; 1386 1387 if (handlers != NULL) return; 1388 1389 handlers = (xmlCharEncodingHandlerPtr *) 1390 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1391 1392 if (*ptr == 0x12) xmlLittleEndian = 0; 1393 else if (*ptr == 0x34) xmlLittleEndian = 1; 1394 else { 1395 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1396 "Odd problem at endianness detection\n", NULL); 1397 } 1398 1399 if (handlers == NULL) { 1400 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1401 return; 1402 } 1403 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1404 #ifdef LIBXML_OUTPUT_ENABLED 1405 xmlUTF16LEHandler = 1406 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1407 xmlUTF16BEHandler = 1408 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1409 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1410 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1411 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1412 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1413 #ifdef LIBXML_HTML_ENABLED 1414 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1415 #endif 1416 #else 1417 xmlUTF16LEHandler = 1418 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1419 xmlUTF16BEHandler = 1420 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1421 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1422 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1423 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1424 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1425 #endif /* LIBXML_OUTPUT_ENABLED */ 1426 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1427 #ifdef LIBXML_ISO8859X_ENABLED 1428 xmlRegisterCharEncodingHandlersISO8859x (); 1429 #endif 1430 #endif 1431 1432 } 1433 1434 /** 1435 * xmlCleanupCharEncodingHandlers: 1436 * 1437 * Cleanup the memory allocated for the char encoding support, it 1438 * unregisters all the encoding handlers and the aliases. 1439 */ 1440 void 1441 xmlCleanupCharEncodingHandlers(void) { 1442 xmlCleanupEncodingAliases(); 1443 1444 if (handlers == NULL) return; 1445 1446 for (;nbCharEncodingHandler > 0;) { 1447 nbCharEncodingHandler--; 1448 if (handlers[nbCharEncodingHandler] != NULL) { 1449 if (handlers[nbCharEncodingHandler]->name != NULL) 1450 xmlFree(handlers[nbCharEncodingHandler]->name); 1451 xmlFree(handlers[nbCharEncodingHandler]); 1452 } 1453 } 1454 xmlFree(handlers); 1455 handlers = NULL; 1456 nbCharEncodingHandler = 0; 1457 xmlDefaultCharEncodingHandler = NULL; 1458 } 1459 1460 /** 1461 * xmlRegisterCharEncodingHandler: 1462 * @handler: the xmlCharEncodingHandlerPtr handler block 1463 * 1464 * Register the char encoding handler, surprising, isn't it ? 1465 */ 1466 void 1467 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1468 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1469 if ((handler == NULL) || (handlers == NULL)) { 1470 xmlEncodingErr(XML_I18N_NO_HANDLER, 1471 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1472 return; 1473 } 1474 1475 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1476 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1477 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1478 "MAX_ENCODING_HANDLERS"); 1479 return; 1480 } 1481 handlers[nbCharEncodingHandler++] = handler; 1482 } 1483 1484 /** 1485 * xmlGetCharEncodingHandler: 1486 * @enc: an xmlCharEncoding value. 1487 * 1488 * Search in the registered set the handler able to read/write that encoding. 1489 * 1490 * Returns the handler or NULL if not found 1491 */ 1492 xmlCharEncodingHandlerPtr 1493 xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1494 xmlCharEncodingHandlerPtr handler; 1495 1496 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1497 switch (enc) { 1498 case XML_CHAR_ENCODING_ERROR: 1499 return(NULL); 1500 case XML_CHAR_ENCODING_NONE: 1501 return(NULL); 1502 case XML_CHAR_ENCODING_UTF8: 1503 return(NULL); 1504 case XML_CHAR_ENCODING_UTF16LE: 1505 return(xmlUTF16LEHandler); 1506 case XML_CHAR_ENCODING_UTF16BE: 1507 return(xmlUTF16BEHandler); 1508 case XML_CHAR_ENCODING_EBCDIC: 1509 handler = xmlFindCharEncodingHandler("EBCDIC"); 1510 if (handler != NULL) return(handler); 1511 handler = xmlFindCharEncodingHandler("ebcdic"); 1512 if (handler != NULL) return(handler); 1513 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1514 if (handler != NULL) return(handler); 1515 break; 1516 case XML_CHAR_ENCODING_UCS4BE: 1517 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1518 if (handler != NULL) return(handler); 1519 handler = xmlFindCharEncodingHandler("UCS-4"); 1520 if (handler != NULL) return(handler); 1521 handler = xmlFindCharEncodingHandler("UCS4"); 1522 if (handler != NULL) return(handler); 1523 break; 1524 case XML_CHAR_ENCODING_UCS4LE: 1525 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1526 if (handler != NULL) return(handler); 1527 handler = xmlFindCharEncodingHandler("UCS-4"); 1528 if (handler != NULL) return(handler); 1529 handler = xmlFindCharEncodingHandler("UCS4"); 1530 if (handler != NULL) return(handler); 1531 break; 1532 case XML_CHAR_ENCODING_UCS4_2143: 1533 break; 1534 case XML_CHAR_ENCODING_UCS4_3412: 1535 break; 1536 case XML_CHAR_ENCODING_UCS2: 1537 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1538 if (handler != NULL) return(handler); 1539 handler = xmlFindCharEncodingHandler("UCS-2"); 1540 if (handler != NULL) return(handler); 1541 handler = xmlFindCharEncodingHandler("UCS2"); 1542 if (handler != NULL) return(handler); 1543 break; 1544 1545 /* 1546 * We used to keep ISO Latin encodings native in the 1547 * generated data. This led to so many problems that 1548 * this has been removed. One can still change this 1549 * back by registering no-ops encoders for those 1550 */ 1551 case XML_CHAR_ENCODING_8859_1: 1552 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1553 if (handler != NULL) return(handler); 1554 break; 1555 case XML_CHAR_ENCODING_8859_2: 1556 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1557 if (handler != NULL) return(handler); 1558 break; 1559 case XML_CHAR_ENCODING_8859_3: 1560 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1561 if (handler != NULL) return(handler); 1562 break; 1563 case XML_CHAR_ENCODING_8859_4: 1564 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1565 if (handler != NULL) return(handler); 1566 break; 1567 case XML_CHAR_ENCODING_8859_5: 1568 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1569 if (handler != NULL) return(handler); 1570 break; 1571 case XML_CHAR_ENCODING_8859_6: 1572 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1573 if (handler != NULL) return(handler); 1574 break; 1575 case XML_CHAR_ENCODING_8859_7: 1576 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1577 if (handler != NULL) return(handler); 1578 break; 1579 case XML_CHAR_ENCODING_8859_8: 1580 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1581 if (handler != NULL) return(handler); 1582 break; 1583 case XML_CHAR_ENCODING_8859_9: 1584 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1585 if (handler != NULL) return(handler); 1586 break; 1587 1588 1589 case XML_CHAR_ENCODING_2022_JP: 1590 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1591 if (handler != NULL) return(handler); 1592 break; 1593 case XML_CHAR_ENCODING_SHIFT_JIS: 1594 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1595 if (handler != NULL) return(handler); 1596 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1597 if (handler != NULL) return(handler); 1598 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1599 if (handler != NULL) return(handler); 1600 break; 1601 case XML_CHAR_ENCODING_EUC_JP: 1602 handler = xmlFindCharEncodingHandler("EUC-JP"); 1603 if (handler != NULL) return(handler); 1604 break; 1605 default: 1606 break; 1607 } 1608 1609 #ifdef DEBUG_ENCODING 1610 xmlGenericError(xmlGenericErrorContext, 1611 "No handler found for encoding %d\n", enc); 1612 #endif 1613 return(NULL); 1614 } 1615 1616 /** 1617 * xmlFindCharEncodingHandler: 1618 * @name: a string describing the char encoding. 1619 * 1620 * Search in the registered set the handler able to read/write that encoding. 1621 * 1622 * Returns the handler or NULL if not found 1623 */ 1624 xmlCharEncodingHandlerPtr 1625 xmlFindCharEncodingHandler(const char *name) { 1626 const char *nalias; 1627 const char *norig; 1628 xmlCharEncoding alias; 1629 #ifdef LIBXML_ICONV_ENABLED 1630 xmlCharEncodingHandlerPtr enc; 1631 iconv_t icv_in, icv_out; 1632 #endif /* LIBXML_ICONV_ENABLED */ 1633 #ifdef LIBXML_ICU_ENABLED 1634 xmlCharEncodingHandlerPtr enc; 1635 uconv_t *ucv_in, *ucv_out; 1636 #endif /* LIBXML_ICU_ENABLED */ 1637 char upper[100]; 1638 int i; 1639 1640 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1641 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1642 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1643 1644 /* 1645 * Do the alias resolution 1646 */ 1647 norig = name; 1648 nalias = xmlGetEncodingAlias(name); 1649 if (nalias != NULL) 1650 name = nalias; 1651 1652 /* 1653 * Check first for directly registered encoding names 1654 */ 1655 for (i = 0;i < 99;i++) { 1656 upper[i] = toupper(name[i]); 1657 if (upper[i] == 0) break; 1658 } 1659 upper[i] = 0; 1660 1661 if (handlers != NULL) { 1662 for (i = 0;i < nbCharEncodingHandler; i++) { 1663 if (!strcmp(upper, handlers[i]->name)) { 1664 #ifdef DEBUG_ENCODING 1665 xmlGenericError(xmlGenericErrorContext, 1666 "Found registered handler for encoding %s\n", name); 1667 #endif 1668 return(handlers[i]); 1669 } 1670 } 1671 } 1672 1673 #ifdef LIBXML_ICONV_ENABLED 1674 /* check whether iconv can handle this */ 1675 icv_in = iconv_open("UTF-8", name); 1676 icv_out = iconv_open(name, "UTF-8"); 1677 if (icv_in == (iconv_t) -1) { 1678 icv_in = iconv_open("UTF-8", upper); 1679 } 1680 if (icv_out == (iconv_t) -1) { 1681 icv_out = iconv_open(upper, "UTF-8"); 1682 } 1683 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1684 enc = (xmlCharEncodingHandlerPtr) 1685 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1686 if (enc == NULL) { 1687 iconv_close(icv_in); 1688 iconv_close(icv_out); 1689 return(NULL); 1690 } 1691 enc->name = xmlMemStrdup(name); 1692 enc->input = NULL; 1693 enc->output = NULL; 1694 enc->iconv_in = icv_in; 1695 enc->iconv_out = icv_out; 1696 #ifdef DEBUG_ENCODING 1697 xmlGenericError(xmlGenericErrorContext, 1698 "Found iconv handler for encoding %s\n", name); 1699 #endif 1700 return enc; 1701 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1702 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1703 "iconv : problems with filters for '%s'\n", name); 1704 } 1705 #endif /* LIBXML_ICONV_ENABLED */ 1706 #ifdef LIBXML_ICU_ENABLED 1707 /* check whether icu can handle this */ 1708 ucv_in = openIcuConverter(name, 1); 1709 ucv_out = openIcuConverter(name, 0); 1710 if (ucv_in != NULL && ucv_out != NULL) { 1711 enc = (xmlCharEncodingHandlerPtr) 1712 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1713 if (enc == NULL) { 1714 closeIcuConverter(ucv_in); 1715 closeIcuConverter(ucv_out); 1716 return(NULL); 1717 } 1718 enc->name = xmlMemStrdup(name); 1719 enc->input = NULL; 1720 enc->output = NULL; 1721 enc->uconv_in = ucv_in; 1722 enc->uconv_out = ucv_out; 1723 #ifdef DEBUG_ENCODING 1724 xmlGenericError(xmlGenericErrorContext, 1725 "Found ICU converter handler for encoding %s\n", name); 1726 #endif 1727 return enc; 1728 } else if (ucv_in != NULL || ucv_out != NULL) { 1729 closeIcuConverter(ucv_in); 1730 closeIcuConverter(ucv_out); 1731 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1732 "ICU converter : problems with filters for '%s'\n", name); 1733 } 1734 #endif /* LIBXML_ICU_ENABLED */ 1735 1736 #ifdef DEBUG_ENCODING 1737 xmlGenericError(xmlGenericErrorContext, 1738 "No handler found for encoding %s\n", name); 1739 #endif 1740 1741 /* 1742 * Fallback using the canonical names 1743 */ 1744 alias = xmlParseCharEncoding(norig); 1745 if (alias != XML_CHAR_ENCODING_ERROR) { 1746 const char* canon; 1747 canon = xmlGetCharEncodingName(alias); 1748 if ((canon != NULL) && (strcmp(name, canon))) { 1749 return(xmlFindCharEncodingHandler(canon)); 1750 } 1751 } 1752 1753 /* If "none of the above", give up */ 1754 return(NULL); 1755 } 1756 1757 /************************************************************************ 1758 * * 1759 * ICONV based generic conversion functions * 1760 * * 1761 ************************************************************************/ 1762 1763 #ifdef LIBXML_ICONV_ENABLED 1764 /** 1765 * xmlIconvWrapper: 1766 * @cd: iconv converter data structure 1767 * @out: a pointer to an array of bytes to store the result 1768 * @outlen: the length of @out 1769 * @in: a pointer to an array of ISO Latin 1 chars 1770 * @inlen: the length of @in 1771 * 1772 * Returns 0 if success, or 1773 * -1 by lack of space, or 1774 * -2 if the transcoding fails (for *in is not valid utf8 string or 1775 * the result of transformation can't fit into the encoding we want), or 1776 * -3 if there the last byte can't form a single output char. 1777 * 1778 * The value of @inlen after return is the number of octets consumed 1779 * as the return value is positive, else unpredictable. 1780 * The value of @outlen after return is the number of ocetes consumed. 1781 */ 1782 static int 1783 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1784 const unsigned char *in, int *inlen) { 1785 size_t icv_inlen, icv_outlen; 1786 const char *icv_in = (const char *) in; 1787 char *icv_out = (char *) out; 1788 int ret; 1789 1790 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1791 if (outlen != NULL) *outlen = 0; 1792 return(-1); 1793 } 1794 icv_inlen = *inlen; 1795 icv_outlen = *outlen; 1796 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1797 *inlen -= icv_inlen; 1798 *outlen -= icv_outlen; 1799 if ((icv_inlen != 0) || (ret == -1)) { 1800 #ifdef EILSEQ 1801 if (errno == EILSEQ) { 1802 return -2; 1803 } else 1804 #endif 1805 #ifdef E2BIG 1806 if (errno == E2BIG) { 1807 return -1; 1808 } else 1809 #endif 1810 #ifdef EINVAL 1811 if (errno == EINVAL) { 1812 return -3; 1813 } else 1814 #endif 1815 { 1816 return -3; 1817 } 1818 } 1819 return 0; 1820 } 1821 #endif /* LIBXML_ICONV_ENABLED */ 1822 1823 /************************************************************************ 1824 * * 1825 * ICU based generic conversion functions * 1826 * * 1827 ************************************************************************/ 1828 1829 #ifdef LIBXML_ICU_ENABLED 1830 /** 1831 * xmlUconvWrapper: 1832 * @cd: ICU uconverter data structure 1833 * @toUnicode : non-zero if toUnicode. 0 otherwise. 1834 * @out: a pointer to an array of bytes to store the result 1835 * @outlen: the length of @out 1836 * @in: a pointer to an array of ISO Latin 1 chars 1837 * @inlen: the length of @in 1838 * 1839 * Returns 0 if success, or 1840 * -1 by lack of space, or 1841 * -2 if the transcoding fails (for *in is not valid utf8 string or 1842 * the result of transformation can't fit into the encoding we want), or 1843 * -3 if there the last byte can't form a single output char. 1844 * 1845 * The value of @inlen after return is the number of octets consumed 1846 * as the return value is positive, else unpredictable. 1847 * The value of @outlen after return is the number of ocetes consumed. 1848 */ 1849 static int 1850 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1851 const unsigned char *in, int *inlen) { 1852 const char *ucv_in = (const char *) in; 1853 char *ucv_out = (char *) out; 1854 UErrorCode err = U_ZERO_ERROR; 1855 1856 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1857 if (outlen != NULL) *outlen = 0; 1858 return(-1); 1859 } 1860 1861 /* 1862 * TODO(jungshik) 1863 * 1. is ucnv_convert(To|From)Algorithmic better? 1864 * 2. had we better use an explicit pivot buffer? 1865 * 3. error returned comes from 'fromUnicode' only even 1866 * when toUnicode is true ! 1867 */ 1868 if (toUnicode) { 1869 /* encoding => UTF-16 => UTF-8 */ 1870 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1871 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1872 0, TRUE, &err); 1873 } else { 1874 /* UTF-8 => UTF-16 => encoding */ 1875 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1876 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1877 0, TRUE, &err); 1878 } 1879 *inlen = ucv_in - (const char*) in; 1880 *outlen = ucv_out - (char *) out; 1881 if (U_SUCCESS(err)) 1882 return 0; 1883 if (err == U_BUFFER_OVERFLOW_ERROR) 1884 return -1; 1885 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1886 return -2; 1887 /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1888 return -3; 1889 } 1890 #endif /* LIBXML_ICU_ENABLED */ 1891 1892 /************************************************************************ 1893 * * 1894 * The real API used by libxml for on-the-fly conversion * 1895 * * 1896 ************************************************************************/ 1897 int 1898 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1899 xmlBufferPtr in, int len); 1900 1901 /** 1902 * xmlCharEncFirstLineInt: 1903 * @handler: char enconding transformation data structure 1904 * @out: an xmlBuffer for the output. 1905 * @in: an xmlBuffer for the input 1906 * @len: number of bytes to convert for the first line, or -1 1907 * 1908 * Front-end for the encoding handler input function, but handle only 1909 * the very first line, i.e. limit itself to 45 chars. 1910 * 1911 * Returns the number of byte written if success, or 1912 * -1 general error 1913 * -2 if the transcoding fails (for *in is not valid utf8 string or 1914 * the result of transformation can't fit into the encoding we want), or 1915 */ 1916 int 1917 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1918 xmlBufferPtr in, int len) { 1919 int ret = -2; 1920 int written; 1921 int toconv; 1922 1923 if (handler == NULL) return(-1); 1924 if (out == NULL) return(-1); 1925 if (in == NULL) return(-1); 1926 1927 /* calculate space available */ 1928 written = out->size - out->use; 1929 toconv = in->use; 1930 /* 1931 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1932 * 45 chars should be sufficient to reach the end of the encoding 1933 * declaration without going too far inside the document content. 1934 * on UTF-16 this means 90bytes, on UCS4 this means 180 1935 * The actual value depending on guessed encoding is passed as @len 1936 * if provided 1937 */ 1938 if (len >= 0) { 1939 if (toconv > len) 1940 toconv = len; 1941 } else { 1942 if (toconv > 180) 1943 toconv = 180; 1944 } 1945 if (toconv * 2 >= written) { 1946 xmlBufferGrow(out, toconv); 1947 written = out->size - out->use - 1; 1948 } 1949 1950 if (handler->input != NULL) { 1951 ret = handler->input(&out->content[out->use], &written, 1952 in->content, &toconv); 1953 xmlBufferShrink(in, toconv); 1954 out->use += written; 1955 out->content[out->use] = 0; 1956 } 1957 #ifdef LIBXML_ICONV_ENABLED 1958 else if (handler->iconv_in != NULL) { 1959 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1960 &written, in->content, &toconv); 1961 xmlBufferShrink(in, toconv); 1962 out->use += written; 1963 out->content[out->use] = 0; 1964 if (ret == -1) ret = -3; 1965 } 1966 #endif /* LIBXML_ICONV_ENABLED */ 1967 #ifdef LIBXML_ICU_ENABLED 1968 else if (handler->uconv_in != NULL) { 1969 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 1970 &written, in->content, &toconv); 1971 xmlBufferShrink(in, toconv); 1972 out->use += written; 1973 out->content[out->use] = 0; 1974 if (ret == -1) ret = -3; 1975 } 1976 #endif /* LIBXML_ICU_ENABLED */ 1977 #ifdef DEBUG_ENCODING 1978 switch (ret) { 1979 case 0: 1980 xmlGenericError(xmlGenericErrorContext, 1981 "converted %d bytes to %d bytes of input\n", 1982 toconv, written); 1983 break; 1984 case -1: 1985 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1986 toconv, written, in->use); 1987 break; 1988 case -2: 1989 xmlGenericError(xmlGenericErrorContext, 1990 "input conversion failed due to input error\n"); 1991 break; 1992 case -3: 1993 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1994 toconv, written, in->use); 1995 break; 1996 default: 1997 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 1998 } 1999 #endif /* DEBUG_ENCODING */ 2000 /* 2001 * Ignore when input buffer is not on a boundary 2002 */ 2003 if (ret == -3) ret = 0; 2004 if (ret == -1) ret = 0; 2005 return(ret); 2006 } 2007 2008 /** 2009 * xmlCharEncFirstLine: 2010 * @handler: char enconding transformation data structure 2011 * @out: an xmlBuffer for the output. 2012 * @in: an xmlBuffer for the input 2013 * 2014 * Front-end for the encoding handler input function, but handle only 2015 * the very first line, i.e. limit itself to 45 chars. 2016 * 2017 * Returns the number of byte written if success, or 2018 * -1 general error 2019 * -2 if the transcoding fails (for *in is not valid utf8 string or 2020 * the result of transformation can't fit into the encoding we want), or 2021 */ 2022 int 2023 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2024 xmlBufferPtr in) { 2025 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2026 } 2027 2028 /** 2029 * xmlCharEncInFunc: 2030 * @handler: char encoding transformation data structure 2031 * @out: an xmlBuffer for the output. 2032 * @in: an xmlBuffer for the input 2033 * 2034 * Generic front-end for the encoding handler input function 2035 * 2036 * Returns the number of byte written if success, or 2037 * -1 general error 2038 * -2 if the transcoding fails (for *in is not valid utf8 string or 2039 * the result of transformation can't fit into the encoding we want), or 2040 */ 2041 int 2042 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2043 xmlBufferPtr in) 2044 { 2045 int ret = -2; 2046 int written; 2047 int toconv; 2048 2049 if (handler == NULL) 2050 return (-1); 2051 if (out == NULL) 2052 return (-1); 2053 if (in == NULL) 2054 return (-1); 2055 2056 toconv = in->use; 2057 if (toconv == 0) 2058 return (0); 2059 written = out->size - out->use; 2060 if (toconv * 2 >= written) { 2061 xmlBufferGrow(out, out->size + toconv * 2); 2062 written = out->size - out->use - 1; 2063 } 2064 if (handler->input != NULL) { 2065 ret = handler->input(&out->content[out->use], &written, 2066 in->content, &toconv); 2067 xmlBufferShrink(in, toconv); 2068 out->use += written; 2069 out->content[out->use] = 0; 2070 } 2071 #ifdef LIBXML_ICONV_ENABLED 2072 else if (handler->iconv_in != NULL) { 2073 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 2074 &written, in->content, &toconv); 2075 xmlBufferShrink(in, toconv); 2076 out->use += written; 2077 out->content[out->use] = 0; 2078 if (ret == -1) 2079 ret = -3; 2080 } 2081 #endif /* LIBXML_ICONV_ENABLED */ 2082 #ifdef LIBXML_ICU_ENABLED 2083 else if (handler->uconv_in != NULL) { 2084 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 2085 &written, in->content, &toconv); 2086 xmlBufferShrink(in, toconv); 2087 out->use += written; 2088 out->content[out->use] = 0; 2089 if (ret == -1) 2090 ret = -3; 2091 } 2092 #endif /* LIBXML_ICU_ENABLED */ 2093 switch (ret) { 2094 case 0: 2095 #ifdef DEBUG_ENCODING 2096 xmlGenericError(xmlGenericErrorContext, 2097 "converted %d bytes to %d bytes of input\n", 2098 toconv, written); 2099 #endif 2100 break; 2101 case -1: 2102 #ifdef DEBUG_ENCODING 2103 xmlGenericError(xmlGenericErrorContext, 2104 "converted %d bytes to %d bytes of input, %d left\n", 2105 toconv, written, in->use); 2106 #endif 2107 break; 2108 case -3: 2109 #ifdef DEBUG_ENCODING 2110 xmlGenericError(xmlGenericErrorContext, 2111 "converted %d bytes to %d bytes of input, %d left\n", 2112 toconv, written, in->use); 2113 #endif 2114 break; 2115 case -2: { 2116 char buf[50]; 2117 2118 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2119 in->content[0], in->content[1], 2120 in->content[2], in->content[3]); 2121 buf[49] = 0; 2122 xmlEncodingErr(XML_I18N_CONV_FAILED, 2123 "input conversion failed due to input error, bytes %s\n", 2124 buf); 2125 } 2126 } 2127 /* 2128 * Ignore when input buffer is not on a boundary 2129 */ 2130 if (ret == -3) 2131 ret = 0; 2132 return (written? written : ret); 2133 } 2134 2135 /** 2136 * xmlCharEncOutFunc: 2137 * @handler: char enconding transformation data structure 2138 * @out: an xmlBuffer for the output. 2139 * @in: an xmlBuffer for the input 2140 * 2141 * Generic front-end for the encoding handler output function 2142 * a first call with @in == NULL has to be made firs to initiate the 2143 * output in case of non-stateless encoding needing to initiate their 2144 * state or the output (like the BOM in UTF16). 2145 * In case of UTF8 sequence conversion errors for the given encoder, 2146 * the content will be automatically remapped to a CharRef sequence. 2147 * 2148 * Returns the number of byte written if success, or 2149 * -1 general error 2150 * -2 if the transcoding fails (for *in is not valid utf8 string or 2151 * the result of transformation can't fit into the encoding we want), or 2152 */ 2153 int 2154 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2155 xmlBufferPtr in) { 2156 int ret = -2; 2157 int written; 2158 int writtentot = 0; 2159 int toconv; 2160 int output = 0; 2161 2162 if (handler == NULL) return(-1); 2163 if (out == NULL) return(-1); 2164 2165 retry: 2166 2167 written = out->size - out->use; 2168 2169 if (written > 0) 2170 written--; /* Gennady: count '/0' */ 2171 2172 /* 2173 * First specific handling of in = NULL, i.e. the initialization call 2174 */ 2175 if (in == NULL) { 2176 toconv = 0; 2177 if (handler->output != NULL) { 2178 ret = handler->output(&out->content[out->use], &written, 2179 NULL, &toconv); 2180 if (ret >= 0) { /* Gennady: check return value */ 2181 out->use += written; 2182 out->content[out->use] = 0; 2183 } 2184 } 2185 #ifdef LIBXML_ICONV_ENABLED 2186 else if (handler->iconv_out != NULL) { 2187 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2188 &written, NULL, &toconv); 2189 out->use += written; 2190 out->content[out->use] = 0; 2191 } 2192 #endif /* LIBXML_ICONV_ENABLED */ 2193 #ifdef LIBXML_ICU_ENABLED 2194 else if (handler->uconv_out != NULL) { 2195 ret = xmlUconvWrapper(handler->uconv_out, 0, 2196 &out->content[out->use], 2197 &written, NULL, &toconv); 2198 out->use += written; 2199 out->content[out->use] = 0; 2200 } 2201 #endif /* LIBXML_ICU_ENABLED */ 2202 #ifdef DEBUG_ENCODING 2203 xmlGenericError(xmlGenericErrorContext, 2204 "initialized encoder\n"); 2205 #endif 2206 return(0); 2207 } 2208 2209 /* 2210 * Conversion itself. 2211 */ 2212 toconv = in->use; 2213 if (toconv == 0) 2214 return(0); 2215 if (toconv * 4 >= written) { 2216 xmlBufferGrow(out, toconv * 4); 2217 written = out->size - out->use - 1; 2218 } 2219 if (handler->output != NULL) { 2220 ret = handler->output(&out->content[out->use], &written, 2221 in->content, &toconv); 2222 if (written > 0) { 2223 xmlBufferShrink(in, toconv); 2224 out->use += written; 2225 writtentot += written; 2226 } 2227 out->content[out->use] = 0; 2228 } 2229 #ifdef LIBXML_ICONV_ENABLED 2230 else if (handler->iconv_out != NULL) { 2231 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2232 &written, in->content, &toconv); 2233 xmlBufferShrink(in, toconv); 2234 out->use += written; 2235 writtentot += written; 2236 out->content[out->use] = 0; 2237 if (ret == -1) { 2238 if (written > 0) { 2239 /* 2240 * Can be a limitation of iconv 2241 */ 2242 goto retry; 2243 } 2244 ret = -3; 2245 } 2246 } 2247 #endif /* LIBXML_ICONV_ENABLED */ 2248 #ifdef LIBXML_ICU_ENABLED 2249 else if (handler->uconv_out != NULL) { 2250 ret = xmlUconvWrapper(handler->uconv_out, 0, 2251 &out->content[out->use], 2252 &written, in->content, &toconv); 2253 xmlBufferShrink(in, toconv); 2254 out->use += written; 2255 writtentot += written; 2256 out->content[out->use] = 0; 2257 if (ret == -1) { 2258 if (written > 0) { 2259 /* 2260 * Can be a limitation of iconv 2261 */ 2262 goto retry; 2263 } 2264 ret = -3; 2265 } 2266 } 2267 #endif /* LIBXML_ICU_ENABLED */ 2268 else { 2269 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2270 "xmlCharEncOutFunc: no output function !\n", NULL); 2271 return(-1); 2272 } 2273 2274 if (ret >= 0) output += ret; 2275 2276 /* 2277 * Attempt to handle error cases 2278 */ 2279 switch (ret) { 2280 case 0: 2281 #ifdef DEBUG_ENCODING 2282 xmlGenericError(xmlGenericErrorContext, 2283 "converted %d bytes to %d bytes of output\n", 2284 toconv, written); 2285 #endif 2286 break; 2287 case -1: 2288 #ifdef DEBUG_ENCODING 2289 xmlGenericError(xmlGenericErrorContext, 2290 "output conversion failed by lack of space\n"); 2291 #endif 2292 break; 2293 case -3: 2294 #ifdef DEBUG_ENCODING 2295 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2296 toconv, written, in->use); 2297 #endif 2298 break; 2299 case -2: { 2300 int len = in->use; 2301 const xmlChar *utf = (const xmlChar *) in->content; 2302 int cur; 2303 2304 cur = xmlGetUTF8Char(utf, &len); 2305 if (cur > 0) { 2306 xmlChar charref[20]; 2307 2308 #ifdef DEBUG_ENCODING 2309 xmlGenericError(xmlGenericErrorContext, 2310 "handling output conversion error\n"); 2311 xmlGenericError(xmlGenericErrorContext, 2312 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2313 in->content[0], in->content[1], 2314 in->content[2], in->content[3]); 2315 #endif 2316 /* 2317 * Removes the UTF8 sequence, and replace it by a charref 2318 * and continue the transcoding phase, hoping the error 2319 * did not mangle the encoder state. 2320 */ 2321 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur); 2322 xmlBufferShrink(in, len); 2323 xmlBufferAddHead(in, charref, -1); 2324 2325 goto retry; 2326 } else { 2327 char buf[50]; 2328 2329 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2330 in->content[0], in->content[1], 2331 in->content[2], in->content[3]); 2332 buf[49] = 0; 2333 xmlEncodingErr(XML_I18N_CONV_FAILED, 2334 "output conversion failed due to conv error, bytes %s\n", 2335 buf); 2336 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2337 in->content[0] = ' '; 2338 } 2339 break; 2340 } 2341 } 2342 return(ret); 2343 } 2344 2345 /** 2346 * xmlCharEncCloseFunc: 2347 * @handler: char enconding transformation data structure 2348 * 2349 * Generic front-end for encoding handler close function 2350 * 2351 * Returns 0 if success, or -1 in case of error 2352 */ 2353 int 2354 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2355 int ret = 0; 2356 if (handler == NULL) return(-1); 2357 if (handler->name == NULL) return(-1); 2358 #ifdef LIBXML_ICONV_ENABLED 2359 /* 2360 * Iconv handlers can be used only once, free the whole block. 2361 * and the associated icon resources. 2362 */ 2363 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2364 if (handler->name != NULL) 2365 xmlFree(handler->name); 2366 handler->name = NULL; 2367 if (handler->iconv_out != NULL) { 2368 if (iconv_close(handler->iconv_out)) 2369 ret = -1; 2370 handler->iconv_out = NULL; 2371 } 2372 if (handler->iconv_in != NULL) { 2373 if (iconv_close(handler->iconv_in)) 2374 ret = -1; 2375 handler->iconv_in = NULL; 2376 } 2377 xmlFree(handler); 2378 } 2379 #endif /* LIBXML_ICONV_ENABLED */ 2380 #ifdef LIBXML_ICU_ENABLED 2381 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { 2382 if (handler->name != NULL) 2383 xmlFree(handler->name); 2384 handler->name = NULL; 2385 if (handler->uconv_out != NULL) { 2386 closeIcuConverter(handler->uconv_out); 2387 handler->uconv_out = NULL; 2388 } 2389 if (handler->uconv_in != NULL) { 2390 closeIcuConverter(handler->uconv_in); 2391 handler->uconv_in = NULL; 2392 } 2393 xmlFree(handler); 2394 } 2395 #endif 2396 #ifdef DEBUG_ENCODING 2397 if (ret) 2398 xmlGenericError(xmlGenericErrorContext, 2399 "failed to close the encoding handler\n"); 2400 else 2401 xmlGenericError(xmlGenericErrorContext, 2402 "closed the encoding handler\n"); 2403 #endif 2404 2405 return(ret); 2406 } 2407 2408 /** 2409 * xmlByteConsumed: 2410 * @ctxt: an XML parser context 2411 * 2412 * This function provides the current index of the parser relative 2413 * to the start of the current entity. This function is computed in 2414 * bytes from the beginning starting at zero and finishing at the 2415 * size in byte of the file if parsing a file. The function is 2416 * of constant cost if the input is UTF-8 but can be costly if run 2417 * on non-UTF-8 input. 2418 * 2419 * Returns the index in bytes from the beginning of the entity or -1 2420 * in case the index could not be computed. 2421 */ 2422 long 2423 xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2424 xmlParserInputPtr in; 2425 2426 if (ctxt == NULL) return(-1); 2427 in = ctxt->input; 2428 if (in == NULL) return(-1); 2429 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2430 unsigned int unused = 0; 2431 xmlCharEncodingHandler * handler = in->buf->encoder; 2432 /* 2433 * Encoding conversion, compute the number of unused original 2434 * bytes from the input not consumed and substract that from 2435 * the raw consumed value, this is not a cheap operation 2436 */ 2437 if (in->end - in->cur > 0) { 2438 unsigned char convbuf[32000]; 2439 const unsigned char *cur = (const unsigned char *)in->cur; 2440 int toconv = in->end - in->cur, written = 32000; 2441 2442 int ret; 2443 2444 if (handler->output != NULL) { 2445 do { 2446 toconv = in->end - cur; 2447 written = 32000; 2448 ret = handler->output(&convbuf[0], &written, 2449 cur, &toconv); 2450 if (ret == -1) return(-1); 2451 unused += written; 2452 cur += toconv; 2453 } while (ret == -2); 2454 #ifdef LIBXML_ICONV_ENABLED 2455 } else if (handler->iconv_out != NULL) { 2456 do { 2457 toconv = in->end - cur; 2458 written = 32000; 2459 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2460 &written, cur, &toconv); 2461 if (ret < 0) { 2462 if (written > 0) 2463 ret = -2; 2464 else 2465 return(-1); 2466 } 2467 unused += written; 2468 cur += toconv; 2469 } while (ret == -2); 2470 #endif 2471 #ifdef LIBXML_ICU_ENABLED 2472 } else if (handler->uconv_out != NULL) { 2473 do { 2474 toconv = in->end - cur; 2475 written = 32000; 2476 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], 2477 &written, cur, &toconv); 2478 if (ret < 0) { 2479 if (written > 0) 2480 ret = -2; 2481 else 2482 return(-1); 2483 } 2484 unused += written; 2485 cur += toconv; 2486 } while (ret == -2); 2487 } else { 2488 /* could not find a converter */ 2489 return(-1); 2490 } 2491 } 2492 if (in->buf->rawconsumed < unused) 2493 return(-1); 2494 return(in->buf->rawconsumed - unused); 2495 } 2496 return(in->consumed + (in->cur - in->base)); 2497 } 2498 #endif 2499 2500 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 2501 #ifdef LIBXML_ISO8859X_ENABLED 2502 2503 /** 2504 * UTF8ToISO8859x: 2505 * @out: a pointer to an array of bytes to store the result 2506 * @outlen: the length of @out 2507 * @in: a pointer to an array of UTF-8 chars 2508 * @inlen: the length of @in 2509 * @xlattable: the 2-level transcoding table 2510 * 2511 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 2512 * block of chars out. 2513 * 2514 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 2515 * The value of @inlen after return is the number of octets consumed 2516 * as the return value is positive, else unpredictable. 2517 * The value of @outlen after return is the number of ocetes consumed. 2518 */ 2519 static int 2520 UTF8ToISO8859x(unsigned char* out, int *outlen, 2521 const unsigned char* in, int *inlen, 2522 unsigned char const *xlattable) { 2523 const unsigned char* outstart = out; 2524 const unsigned char* inend; 2525 const unsigned char* instart = in; 2526 2527 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2528 (xlattable == NULL)) 2529 return(-1); 2530 if (in == NULL) { 2531 /* 2532 * initialization nothing to do 2533 */ 2534 *outlen = 0; 2535 *inlen = 0; 2536 return(0); 2537 } 2538 inend = in + (*inlen); 2539 while (in < inend) { 2540 unsigned char d = *in++; 2541 if (d < 0x80) { 2542 *out++ = d; 2543 } else if (d < 0xC0) { 2544 /* trailing byte in leading position */ 2545 *outlen = out - outstart; 2546 *inlen = in - instart - 1; 2547 return(-2); 2548 } else if (d < 0xE0) { 2549 unsigned char c; 2550 if (!(in < inend)) { 2551 /* trailing byte not in input buffer */ 2552 *outlen = out - outstart; 2553 *inlen = in - instart - 1; 2554 return(-2); 2555 } 2556 c = *in++; 2557 if ((c & 0xC0) != 0x80) { 2558 /* not a trailing byte */ 2559 *outlen = out - outstart; 2560 *inlen = in - instart - 2; 2561 return(-2); 2562 } 2563 c = c & 0x3F; 2564 d = d & 0x1F; 2565 d = xlattable [48 + c + xlattable [d] * 64]; 2566 if (d == 0) { 2567 /* not in character set */ 2568 *outlen = out - outstart; 2569 *inlen = in - instart - 2; 2570 return(-2); 2571 } 2572 *out++ = d; 2573 } else if (d < 0xF0) { 2574 unsigned char c1; 2575 unsigned char c2; 2576 if (!(in < inend - 1)) { 2577 /* trailing bytes not in input buffer */ 2578 *outlen = out - outstart; 2579 *inlen = in - instart - 1; 2580 return(-2); 2581 } 2582 c1 = *in++; 2583 if ((c1 & 0xC0) != 0x80) { 2584 /* not a trailing byte (c1) */ 2585 *outlen = out - outstart; 2586 *inlen = in - instart - 2; 2587 return(-2); 2588 } 2589 c2 = *in++; 2590 if ((c2 & 0xC0) != 0x80) { 2591 /* not a trailing byte (c2) */ 2592 *outlen = out - outstart; 2593 *inlen = in - instart - 2; 2594 return(-2); 2595 } 2596 c1 = c1 & 0x3F; 2597 c2 = c2 & 0x3F; 2598 d = d & 0x0F; 2599 d = xlattable [48 + c2 + xlattable [48 + c1 + 2600 xlattable [32 + d] * 64] * 64]; 2601 if (d == 0) { 2602 /* not in character set */ 2603 *outlen = out - outstart; 2604 *inlen = in - instart - 3; 2605 return(-2); 2606 } 2607 *out++ = d; 2608 } else { 2609 /* cannot transcode >= U+010000 */ 2610 *outlen = out - outstart; 2611 *inlen = in - instart - 1; 2612 return(-2); 2613 } 2614 } 2615 *outlen = out - outstart; 2616 *inlen = in - instart; 2617 return(*outlen); 2618 } 2619 2620 /** 2621 * ISO8859xToUTF8 2622 * @out: a pointer to an array of bytes to store the result 2623 * @outlen: the length of @out 2624 * @in: a pointer to an array of ISO Latin 1 chars 2625 * @inlen: the length of @in 2626 * 2627 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 2628 * block of chars out. 2629 * Returns 0 if success, or -1 otherwise 2630 * The value of @inlen after return is the number of octets consumed 2631 * The value of @outlen after return is the number of ocetes produced. 2632 */ 2633 static int 2634 ISO8859xToUTF8(unsigned char* out, int *outlen, 2635 const unsigned char* in, int *inlen, 2636 unsigned short const *unicodetable) { 2637 unsigned char* outstart = out; 2638 unsigned char* outend; 2639 const unsigned char* instart = in; 2640 const unsigned char* inend; 2641 const unsigned char* instop; 2642 unsigned int c; 2643 2644 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 2645 (in == NULL) || (unicodetable == NULL)) 2646 return(-1); 2647 outend = out + *outlen; 2648 inend = in + *inlen; 2649 instop = inend; 2650 c = *in; 2651 while (in < inend && out < outend - 1) { 2652 if (c >= 0x80) { 2653 c = unicodetable [c - 0x80]; 2654 if (c == 0) { 2655 /* undefined code point */ 2656 *outlen = out - outstart; 2657 *inlen = in - instart; 2658 return (-1); 2659 } 2660 if (c < 0x800) { 2661 *out++ = ((c >> 6) & 0x1F) | 0xC0; 2662 *out++ = (c & 0x3F) | 0x80; 2663 } else { 2664 *out++ = ((c >> 12) & 0x0F) | 0xE0; 2665 *out++ = ((c >> 6) & 0x3F) | 0x80; 2666 *out++ = (c & 0x3F) | 0x80; 2667 } 2668 ++in; 2669 c = *in; 2670 } 2671 if (instop - in > outend - out) instop = in + (outend - out); 2672 while (c < 0x80 && in < instop) { 2673 *out++ = c; 2674 ++in; 2675 c = *in; 2676 } 2677 } 2678 if (in < inend && out < outend && c < 0x80) { 2679 *out++ = c; 2680 ++in; 2681 } 2682 *outlen = out - outstart; 2683 *inlen = in - instart; 2684 return (*outlen); 2685 } 2686 2687 2688 /************************************************************************ 2689 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 2690 ************************************************************************/ 2691 2692 static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 2693 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2694 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2695 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2696 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2697 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 2698 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 2699 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 2700 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 2701 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 2702 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 2703 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 2704 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 2705 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 2706 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 2707 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 2708 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 2709 }; 2710 2711 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 2712 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2717 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2718 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2719 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2720 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2721 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2722 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2723 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 2724 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 2725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 2727 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2728 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2731 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 2732 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 2733 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 2734 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 2735 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2736 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 2737 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 2738 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 2739 }; 2740 2741 static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 2742 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2743 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2744 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2745 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2746 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 2747 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 2748 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 2749 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 2750 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 2751 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 2752 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 2753 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 2754 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 2755 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 2756 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 2757 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 2758 }; 2759 2760 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 2761 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 2762 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2763 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2765 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2767 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2768 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2769 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2770 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 2771 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 2772 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 2773 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 2774 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 2775 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2777 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 2778 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2780 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 2786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 2787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 2788 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2789 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 2790 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2791 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 2792 }; 2793 2794 static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 2795 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2796 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2797 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2798 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2799 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 2800 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 2801 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 2802 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 2803 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 2804 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 2805 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 2806 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 2807 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 2808 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 2809 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 2810 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 2811 }; 2812 2813 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 2814 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 2815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2819 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2821 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2822 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2823 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 2824 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 2825 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 2826 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 2827 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 2828 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 2829 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 2830 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 2831 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 2832 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 2833 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 2834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 2835 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2837 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 2838 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 2839 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 2840 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 2841 }; 2842 2843 static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 2844 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2845 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2846 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2847 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2848 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 2849 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 2850 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 2851 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 2852 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 2853 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 2854 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 2855 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 2856 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 2857 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 2858 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 2859 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 2860 }; 2861 2862 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 2863 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2864 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2865 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2867 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2869 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2870 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2871 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2872 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 2873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2874 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 2875 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 2876 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2877 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2878 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2879 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2881 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2882 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2883 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2884 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2885 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2887 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2890 }; 2891 2892 static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 2893 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2894 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2895 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2896 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2897 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 2898 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 2899 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2900 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 2901 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 2902 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 2903 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 2904 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2905 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 2906 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 2907 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2908 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 2909 }; 2910 2911 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 2912 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2913 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 2914 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2915 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2918 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2919 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2920 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2921 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 2922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2923 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2926 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2927 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 2928 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 2929 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2930 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 2931 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2932 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2935 }; 2936 2937 static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 2938 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2939 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2940 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2941 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2942 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 2943 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 2944 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 2945 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 2946 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 2947 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 2948 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 2949 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 2950 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 2951 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 2952 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 2953 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 2954 }; 2955 2956 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 2957 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2959 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2960 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2962 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2963 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2964 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 2965 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 2966 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 2967 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 2968 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2969 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2970 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2971 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2972 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2973 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 2974 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2975 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2976 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2980 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 2981 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 2982 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 2983 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 2984 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 2988 }; 2989 2990 static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 2991 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 2992 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 2993 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 2994 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 2995 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 2996 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 2997 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 2998 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 2999 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3000 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3001 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3002 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3003 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3004 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3005 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3006 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3007 }; 3008 3009 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3010 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3011 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3012 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3013 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3014 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3015 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3016 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3017 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3018 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3019 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3020 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3021 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3022 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3023 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3024 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3025 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3026 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3027 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3028 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3029 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3032 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3034 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3038 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3039 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3040 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3041 }; 3042 3043 static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3044 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3045 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3046 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3047 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3048 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3049 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3050 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3051 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3052 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3053 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3054 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3055 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3056 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3057 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3058 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3059 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3060 }; 3061 3062 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3063 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3066 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3067 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3068 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3069 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3070 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3071 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3072 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3073 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3074 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3075 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3076 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3077 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3078 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3079 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3080 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3081 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3082 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3083 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3084 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3085 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3086 }; 3087 3088 static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3089 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3090 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3091 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3092 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3093 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3094 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3095 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3096 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3097 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3098 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3099 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3100 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3101 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3102 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3103 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3104 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3105 }; 3106 3107 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3108 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3110 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3114 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3115 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3116 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3117 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3118 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3119 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3120 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3121 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3122 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3123 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3125 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3126 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3127 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3132 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3135 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3136 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3137 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3138 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3139 }; 3140 3141 static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3142 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3143 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3144 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3145 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3146 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3147 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3148 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3149 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3150 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3151 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3152 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3153 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3154 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3155 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3156 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3157 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3158 }; 3159 3160 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3161 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3163 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3168 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3169 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3170 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3172 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3174 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3175 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3176 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3177 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3178 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3179 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3180 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3181 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3182 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3184 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3185 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3188 }; 3189 3190 static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3191 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3192 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3193 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3194 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3195 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3196 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3197 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3198 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3199 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3200 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3201 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3202 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3203 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3204 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3205 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3206 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3207 }; 3208 3209 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3210 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3211 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3212 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3217 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3218 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3219 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3220 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3221 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3229 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3230 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3231 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3232 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3233 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3234 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3235 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3236 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3237 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3238 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3239 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3240 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3241 }; 3242 3243 static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3244 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3245 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3246 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3247 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3248 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3249 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3250 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3251 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3252 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3253 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3254 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3255 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3256 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3257 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3258 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3259 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3260 }; 3261 3262 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3263 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3265 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3268 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3270 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3271 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3272 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3273 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3277 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3278 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3279 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3284 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3285 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3286 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3293 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3297 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3298 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3299 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3300 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3302 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3303 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3304 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3305 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3306 }; 3307 3308 static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3309 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3310 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3311 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3312 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3313 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3314 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3315 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3316 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3317 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3318 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3319 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3320 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3321 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3322 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3323 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3324 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3325 }; 3326 3327 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3328 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3330 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3335 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3336 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3337 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3338 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3339 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3341 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3343 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3344 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3345 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3348 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3349 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3350 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3351 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3352 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3353 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3354 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3355 }; 3356 3357 static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3358 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3359 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3360 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3361 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3362 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3363 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3364 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3365 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3366 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3367 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3368 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3369 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3370 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3371 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3372 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3373 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3374 }; 3375 3376 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3377 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3379 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3384 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3385 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3386 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3387 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3388 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3389 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3390 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3391 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3392 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3393 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3394 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3395 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3396 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3397 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3398 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3401 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3402 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3403 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3408 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3409 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3412 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3413 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3414 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3415 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3416 }; 3417 3418 3419 /* 3420 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3421 */ 3422 3423 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3424 const unsigned char* in, int *inlen) { 3425 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3426 } 3427 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3428 const unsigned char* in, int *inlen) { 3429 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3430 } 3431 3432 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3433 const unsigned char* in, int *inlen) { 3434 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3435 } 3436 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3437 const unsigned char* in, int *inlen) { 3438 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3439 } 3440 3441 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3442 const unsigned char* in, int *inlen) { 3443 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3444 } 3445 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3446 const unsigned char* in, int *inlen) { 3447 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3448 } 3449 3450 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3451 const unsigned char* in, int *inlen) { 3452 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3453 } 3454 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3455 const unsigned char* in, int *inlen) { 3456 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3457 } 3458 3459 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3460 const unsigned char* in, int *inlen) { 3461 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3462 } 3463 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3464 const unsigned char* in, int *inlen) { 3465 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3466 } 3467 3468 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3469 const unsigned char* in, int *inlen) { 3470 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3471 } 3472 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3473 const unsigned char* in, int *inlen) { 3474 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3475 } 3476 3477 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3478 const unsigned char* in, int *inlen) { 3479 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3480 } 3481 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3482 const unsigned char* in, int *inlen) { 3483 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3484 } 3485 3486 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3487 const unsigned char* in, int *inlen) { 3488 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3489 } 3490 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3491 const unsigned char* in, int *inlen) { 3492 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3493 } 3494 3495 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3496 const unsigned char* in, int *inlen) { 3497 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3498 } 3499 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 3500 const unsigned char* in, int *inlen) { 3501 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 3502 } 3503 3504 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 3505 const unsigned char* in, int *inlen) { 3506 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 3507 } 3508 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 3509 const unsigned char* in, int *inlen) { 3510 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 3511 } 3512 3513 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 3514 const unsigned char* in, int *inlen) { 3515 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 3516 } 3517 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 3518 const unsigned char* in, int *inlen) { 3519 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 3520 } 3521 3522 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 3523 const unsigned char* in, int *inlen) { 3524 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 3525 } 3526 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 3527 const unsigned char* in, int *inlen) { 3528 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 3529 } 3530 3531 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 3532 const unsigned char* in, int *inlen) { 3533 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 3534 } 3535 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 3536 const unsigned char* in, int *inlen) { 3537 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 3538 } 3539 3540 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 3541 const unsigned char* in, int *inlen) { 3542 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 3543 } 3544 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 3545 const unsigned char* in, int *inlen) { 3546 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 3547 } 3548 3549 static void 3550 xmlRegisterCharEncodingHandlersISO8859x (void) { 3551 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 3552 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 3553 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 3554 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 3555 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 3556 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 3557 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 3558 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 3559 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 3560 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 3561 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 3562 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 3563 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 3564 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 3565 } 3566 3567 #endif 3568 #endif 3569 3570 #define bottom_encoding 3571 #include "elfgcchack.h" 3572 3573