1 /* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel (at) veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst (at) w3.org> 21 */ 22 23 #define IN_LIBXML 24 #include "libxml.h" 25 26 #include <string.h> 27 #include <limits.h> 28 29 #ifdef HAVE_CTYPE_H 30 #include <ctype.h> 31 #endif 32 #ifdef HAVE_STDLIB_H 33 #include <stdlib.h> 34 #endif 35 #ifdef LIBXML_ICONV_ENABLED 36 #ifdef HAVE_ERRNO_H 37 #include <errno.h> 38 #endif 39 #endif 40 #include <libxml/encoding.h> 41 #include <libxml/xmlmemory.h> 42 #ifdef LIBXML_HTML_ENABLED 43 #include <libxml/HTMLparser.h> 44 #endif 45 #include <libxml/globals.h> 46 #include <libxml/xmlerror.h> 47 48 #include "buf.h" 49 #include "enc.h" 50 51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 53 54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 56 struct _xmlCharEncodingAlias { 57 const char *name; 58 const char *alias; 59 }; 60 61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 62 static int xmlCharEncodingAliasesNb = 0; 63 static int xmlCharEncodingAliasesMax = 0; 64 65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 66 #if 0 67 #define DEBUG_ENCODING /* Define this to get encoding traces */ 68 #endif 69 #else 70 #ifdef LIBXML_ISO8859X_ENABLED 71 static void xmlRegisterCharEncodingHandlersISO8859x (void); 72 #endif 73 #endif 74 75 static int xmlLittleEndian = 1; 76 77 /** 78 * xmlEncodingErrMemory: 79 * @extra: extra informations 80 * 81 * Handle an out of memory condition 82 */ 83 static void 84 xmlEncodingErrMemory(const char *extra) 85 { 86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 87 } 88 89 /** 90 * xmlErrEncoding: 91 * @error: the error number 92 * @msg: the error message 93 * 94 * n encoding error 95 */ 96 static void 97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 98 { 99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 100 XML_FROM_I18N, error, XML_ERR_FATAL, 101 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 102 } 103 104 #ifdef LIBXML_ICU_ENABLED 105 static uconv_t* 106 openIcuConverter(const char* name, int toUnicode) 107 { 108 UErrorCode status = U_ZERO_ERROR; 109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 110 if (conv == NULL) 111 return NULL; 112 113 conv->uconv = ucnv_open(name, &status); 114 if (U_FAILURE(status)) 115 goto error; 116 117 status = U_ZERO_ERROR; 118 if (toUnicode) { 119 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 120 NULL, NULL, NULL, &status); 121 } 122 else { 123 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 124 NULL, NULL, NULL, &status); 125 } 126 if (U_FAILURE(status)) 127 goto error; 128 129 status = U_ZERO_ERROR; 130 conv->utf8 = ucnv_open("UTF-8", &status); 131 if (U_SUCCESS(status)) 132 return conv; 133 134 error: 135 if (conv->uconv) 136 ucnv_close(conv->uconv); 137 xmlFree(conv); 138 return NULL; 139 } 140 141 static void 142 closeIcuConverter(uconv_t *conv) 143 { 144 if (conv != NULL) { 145 ucnv_close(conv->uconv); 146 ucnv_close(conv->utf8); 147 xmlFree(conv); 148 } 149 } 150 #endif /* LIBXML_ICU_ENABLED */ 151 152 /************************************************************************ 153 * * 154 * Conversions To/From UTF8 encoding * 155 * * 156 ************************************************************************/ 157 158 /** 159 * asciiToUTF8: 160 * @out: a pointer to an array of bytes to store the result 161 * @outlen: the length of @out 162 * @in: a pointer to an array of ASCII chars 163 * @inlen: the length of @in 164 * 165 * Take a block of ASCII chars in and try to convert it to an UTF-8 166 * block of chars out. 167 * Returns 0 if success, or -1 otherwise 168 * The value of @inlen after return is the number of octets consumed 169 * if the return value is positive, else unpredictable. 170 * The value of @outlen after return is the number of octets consumed. 171 */ 172 static int 173 asciiToUTF8(unsigned char* out, int *outlen, 174 const unsigned char* in, int *inlen) { 175 unsigned char* outstart = out; 176 const unsigned char* base = in; 177 const unsigned char* processed = in; 178 unsigned char* outend = out + *outlen; 179 const unsigned char* inend; 180 unsigned int c; 181 182 inend = in + (*inlen); 183 while ((in < inend) && (out - outstart + 5 < *outlen)) { 184 c= *in++; 185 186 if (out >= outend) 187 break; 188 if (c < 0x80) { 189 *out++ = c; 190 } else { 191 *outlen = out - outstart; 192 *inlen = processed - base; 193 return(-1); 194 } 195 196 processed = (const unsigned char*) in; 197 } 198 *outlen = out - outstart; 199 *inlen = processed - base; 200 return(*outlen); 201 } 202 203 #ifdef LIBXML_OUTPUT_ENABLED 204 /** 205 * UTF8Toascii: 206 * @out: a pointer to an array of bytes to store the result 207 * @outlen: the length of @out 208 * @in: a pointer to an array of UTF-8 chars 209 * @inlen: the length of @in 210 * 211 * Take a block of UTF-8 chars in and try to convert it to an ASCII 212 * block of chars out. 213 * 214 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 215 * The value of @inlen after return is the number of octets consumed 216 * if the return value is positive, else unpredictable. 217 * The value of @outlen after return is the number of octets consumed. 218 */ 219 static int 220 UTF8Toascii(unsigned char* out, int *outlen, 221 const unsigned char* in, int *inlen) { 222 const unsigned char* processed = in; 223 const unsigned char* outend; 224 const unsigned char* outstart = out; 225 const unsigned char* instart = in; 226 const unsigned char* inend; 227 unsigned int c, d; 228 int trailing; 229 230 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 231 if (in == NULL) { 232 /* 233 * initialization nothing to do 234 */ 235 *outlen = 0; 236 *inlen = 0; 237 return(0); 238 } 239 inend = in + (*inlen); 240 outend = out + (*outlen); 241 while (in < inend) { 242 d = *in++; 243 if (d < 0x80) { c= d; trailing= 0; } 244 else if (d < 0xC0) { 245 /* trailing byte in leading position */ 246 *outlen = out - outstart; 247 *inlen = processed - instart; 248 return(-2); 249 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 250 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 251 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 252 else { 253 /* no chance for this in Ascii */ 254 *outlen = out - outstart; 255 *inlen = processed - instart; 256 return(-2); 257 } 258 259 if (inend - in < trailing) { 260 break; 261 } 262 263 for ( ; trailing; trailing--) { 264 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 265 break; 266 c <<= 6; 267 c |= d & 0x3F; 268 } 269 270 /* assertion: c is a single UTF-4 value */ 271 if (c < 0x80) { 272 if (out >= outend) 273 break; 274 *out++ = c; 275 } else { 276 /* no chance for this in Ascii */ 277 *outlen = out - outstart; 278 *inlen = processed - instart; 279 return(-2); 280 } 281 processed = in; 282 } 283 *outlen = out - outstart; 284 *inlen = processed - instart; 285 return(*outlen); 286 } 287 #endif /* LIBXML_OUTPUT_ENABLED */ 288 289 /** 290 * isolat1ToUTF8: 291 * @out: a pointer to an array of bytes to store the result 292 * @outlen: the length of @out 293 * @in: a pointer to an array of ISO Latin 1 chars 294 * @inlen: the length of @in 295 * 296 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 297 * block of chars out. 298 * Returns the number of bytes written if success, or -1 otherwise 299 * The value of @inlen after return is the number of octets consumed 300 * if the return value is positive, else unpredictable. 301 * The value of @outlen after return is the number of octets consumed. 302 */ 303 int 304 isolat1ToUTF8(unsigned char* out, int *outlen, 305 const unsigned char* in, int *inlen) { 306 unsigned char* outstart = out; 307 const unsigned char* base = in; 308 unsigned char* outend; 309 const unsigned char* inend; 310 const unsigned char* instop; 311 312 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 313 return(-1); 314 315 outend = out + *outlen; 316 inend = in + (*inlen); 317 instop = inend; 318 319 while ((in < inend) && (out < outend - 1)) { 320 if (*in >= 0x80) { 321 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 322 *out++ = ((*in) & 0x3F) | 0x80; 323 ++in; 324 } 325 if ((instop - in) > (outend - out)) instop = in + (outend - out); 326 while ((in < instop) && (*in < 0x80)) { 327 *out++ = *in++; 328 } 329 } 330 if ((in < inend) && (out < outend) && (*in < 0x80)) { 331 *out++ = *in++; 332 } 333 *outlen = out - outstart; 334 *inlen = in - base; 335 return(*outlen); 336 } 337 338 /** 339 * UTF8ToUTF8: 340 * @out: a pointer to an array of bytes to store the result 341 * @outlen: the length of @out 342 * @inb: a pointer to an array of UTF-8 chars 343 * @inlenb: the length of @in in UTF-8 chars 344 * 345 * No op copy operation for UTF8 handling. 346 * 347 * Returns the number of bytes written, or -1 if lack of space. 348 * The value of *inlen after return is the number of octets consumed 349 * if the return value is positive, else unpredictable. 350 */ 351 static int 352 UTF8ToUTF8(unsigned char* out, int *outlen, 353 const unsigned char* inb, int *inlenb) 354 { 355 int len; 356 357 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 358 return(-1); 359 if (*outlen > *inlenb) { 360 len = *inlenb; 361 } else { 362 len = *outlen; 363 } 364 if (len < 0) 365 return(-1); 366 367 memcpy(out, inb, len); 368 369 *outlen = len; 370 *inlenb = len; 371 return(*outlen); 372 } 373 374 375 #ifdef LIBXML_OUTPUT_ENABLED 376 /** 377 * UTF8Toisolat1: 378 * @out: a pointer to an array of bytes to store the result 379 * @outlen: the length of @out 380 * @in: a pointer to an array of UTF-8 chars 381 * @inlen: the length of @in 382 * 383 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 384 * block of chars out. 385 * 386 * Returns the number of bytes written if success, -2 if the transcoding fails, 387 or -1 otherwise 388 * The value of @inlen after return is the number of octets consumed 389 * if the return value is positive, else unpredictable. 390 * The value of @outlen after return is the number of octets consumed. 391 */ 392 int 393 UTF8Toisolat1(unsigned char* out, int *outlen, 394 const unsigned char* in, int *inlen) { 395 const unsigned char* processed = in; 396 const unsigned char* outend; 397 const unsigned char* outstart = out; 398 const unsigned char* instart = in; 399 const unsigned char* inend; 400 unsigned int c, d; 401 int trailing; 402 403 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 404 if (in == NULL) { 405 /* 406 * initialization nothing to do 407 */ 408 *outlen = 0; 409 *inlen = 0; 410 return(0); 411 } 412 inend = in + (*inlen); 413 outend = out + (*outlen); 414 while (in < inend) { 415 d = *in++; 416 if (d < 0x80) { c= d; trailing= 0; } 417 else if (d < 0xC0) { 418 /* trailing byte in leading position */ 419 *outlen = out - outstart; 420 *inlen = processed - instart; 421 return(-2); 422 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 423 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 424 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 425 else { 426 /* no chance for this in IsoLat1 */ 427 *outlen = out - outstart; 428 *inlen = processed - instart; 429 return(-2); 430 } 431 432 if (inend - in < trailing) { 433 break; 434 } 435 436 for ( ; trailing; trailing--) { 437 if (in >= inend) 438 break; 439 if (((d= *in++) & 0xC0) != 0x80) { 440 *outlen = out - outstart; 441 *inlen = processed - instart; 442 return(-2); 443 } 444 c <<= 6; 445 c |= d & 0x3F; 446 } 447 448 /* assertion: c is a single UTF-4 value */ 449 if (c <= 0xFF) { 450 if (out >= outend) 451 break; 452 *out++ = c; 453 } else { 454 /* no chance for this in IsoLat1 */ 455 *outlen = out - outstart; 456 *inlen = processed - instart; 457 return(-2); 458 } 459 processed = in; 460 } 461 *outlen = out - outstart; 462 *inlen = processed - instart; 463 return(*outlen); 464 } 465 #endif /* LIBXML_OUTPUT_ENABLED */ 466 467 /** 468 * UTF16LEToUTF8: 469 * @out: a pointer to an array of bytes to store the result 470 * @outlen: the length of @out 471 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 472 * @inlenb: the length of @in in UTF-16LE chars 473 * 474 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 475 * block of chars out. This function assumes the endian property 476 * is the same between the native type of this machine and the 477 * inputed one. 478 * 479 * Returns the number of bytes written, or -1 if lack of space, or -2 480 * if the transcoding fails (if *in is not a valid utf16 string) 481 * The value of *inlen after return is the number of octets consumed 482 * if the return value is positive, else unpredictable. 483 */ 484 static int 485 UTF16LEToUTF8(unsigned char* out, int *outlen, 486 const unsigned char* inb, int *inlenb) 487 { 488 unsigned char* outstart = out; 489 const unsigned char* processed = inb; 490 unsigned char* outend = out + *outlen; 491 unsigned short* in = (unsigned short*) inb; 492 unsigned short* inend; 493 unsigned int c, d, inlen; 494 unsigned char *tmp; 495 int bits; 496 497 if ((*inlenb % 2) == 1) 498 (*inlenb)--; 499 inlen = *inlenb / 2; 500 inend = in + inlen; 501 while ((in < inend) && (out - outstart + 5 < *outlen)) { 502 if (xmlLittleEndian) { 503 c= *in++; 504 } else { 505 tmp = (unsigned char *) in; 506 c = *tmp++; 507 c = c | (((unsigned int)*tmp) << 8); 508 in++; 509 } 510 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 511 if (in >= inend) { /* (in > inend) shouldn't happens */ 512 break; 513 } 514 if (xmlLittleEndian) { 515 d = *in++; 516 } else { 517 tmp = (unsigned char *) in; 518 d = *tmp++; 519 d = d | (((unsigned int)*tmp) << 8); 520 in++; 521 } 522 if ((d & 0xFC00) == 0xDC00) { 523 c &= 0x03FF; 524 c <<= 10; 525 c |= d & 0x03FF; 526 c += 0x10000; 527 } 528 else { 529 *outlen = out - outstart; 530 *inlenb = processed - inb; 531 return(-2); 532 } 533 } 534 535 /* assertion: c is a single UTF-4 value */ 536 if (out >= outend) 537 break; 538 if (c < 0x80) { *out++= c; bits= -6; } 539 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 540 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 541 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 542 543 for ( ; bits >= 0; bits-= 6) { 544 if (out >= outend) 545 break; 546 *out++= ((c >> bits) & 0x3F) | 0x80; 547 } 548 processed = (const unsigned char*) in; 549 } 550 *outlen = out - outstart; 551 *inlenb = processed - inb; 552 return(*outlen); 553 } 554 555 #ifdef LIBXML_OUTPUT_ENABLED 556 /** 557 * UTF8ToUTF16LE: 558 * @outb: a pointer to an array of bytes to store the result 559 * @outlen: the length of @outb 560 * @in: a pointer to an array of UTF-8 chars 561 * @inlen: the length of @in 562 * 563 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 564 * block of chars out. 565 * 566 * Returns the number of bytes written, or -1 if lack of space, or -2 567 * if the transcoding failed. 568 */ 569 static int 570 UTF8ToUTF16LE(unsigned char* outb, int *outlen, 571 const unsigned char* in, int *inlen) 572 { 573 unsigned short* out = (unsigned short*) outb; 574 const unsigned char* processed = in; 575 const unsigned char *const instart = in; 576 unsigned short* outstart= out; 577 unsigned short* outend; 578 const unsigned char* inend; 579 unsigned int c, d; 580 int trailing; 581 unsigned char *tmp; 582 unsigned short tmp1, tmp2; 583 584 /* UTF16LE encoding has no BOM */ 585 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 586 if (in == NULL) { 587 *outlen = 0; 588 *inlen = 0; 589 return(0); 590 } 591 inend= in + *inlen; 592 outend = out + (*outlen / 2); 593 while (in < inend) { 594 d= *in++; 595 if (d < 0x80) { c= d; trailing= 0; } 596 else if (d < 0xC0) { 597 /* trailing byte in leading position */ 598 *outlen = (out - outstart) * 2; 599 *inlen = processed - instart; 600 return(-2); 601 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 602 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 603 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 604 else { 605 /* no chance for this in UTF-16 */ 606 *outlen = (out - outstart) * 2; 607 *inlen = processed - instart; 608 return(-2); 609 } 610 611 if (inend - in < trailing) { 612 break; 613 } 614 615 for ( ; trailing; trailing--) { 616 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 617 break; 618 c <<= 6; 619 c |= d & 0x3F; 620 } 621 622 /* assertion: c is a single UTF-4 value */ 623 if (c < 0x10000) { 624 if (out >= outend) 625 break; 626 if (xmlLittleEndian) { 627 *out++ = c; 628 } else { 629 tmp = (unsigned char *) out; 630 *tmp = c ; 631 *(tmp + 1) = c >> 8 ; 632 out++; 633 } 634 } 635 else if (c < 0x110000) { 636 if (out+1 >= outend) 637 break; 638 c -= 0x10000; 639 if (xmlLittleEndian) { 640 *out++ = 0xD800 | (c >> 10); 641 *out++ = 0xDC00 | (c & 0x03FF); 642 } else { 643 tmp1 = 0xD800 | (c >> 10); 644 tmp = (unsigned char *) out; 645 *tmp = (unsigned char) tmp1; 646 *(tmp + 1) = tmp1 >> 8; 647 out++; 648 649 tmp2 = 0xDC00 | (c & 0x03FF); 650 tmp = (unsigned char *) out; 651 *tmp = (unsigned char) tmp2; 652 *(tmp + 1) = tmp2 >> 8; 653 out++; 654 } 655 } 656 else 657 break; 658 processed = in; 659 } 660 *outlen = (out - outstart) * 2; 661 *inlen = processed - instart; 662 return(*outlen); 663 } 664 665 /** 666 * UTF8ToUTF16: 667 * @outb: a pointer to an array of bytes to store the result 668 * @outlen: the length of @outb 669 * @in: a pointer to an array of UTF-8 chars 670 * @inlen: the length of @in 671 * 672 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 673 * block of chars out. 674 * 675 * Returns the number of bytes written, or -1 if lack of space, or -2 676 * if the transcoding failed. 677 */ 678 static int 679 UTF8ToUTF16(unsigned char* outb, int *outlen, 680 const unsigned char* in, int *inlen) 681 { 682 if (in == NULL) { 683 /* 684 * initialization, add the Byte Order Mark for UTF-16LE 685 */ 686 if (*outlen >= 2) { 687 outb[0] = 0xFF; 688 outb[1] = 0xFE; 689 *outlen = 2; 690 *inlen = 0; 691 #ifdef DEBUG_ENCODING 692 xmlGenericError(xmlGenericErrorContext, 693 "Added FFFE Byte Order Mark\n"); 694 #endif 695 return(2); 696 } 697 *outlen = 0; 698 *inlen = 0; 699 return(0); 700 } 701 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 702 } 703 #endif /* LIBXML_OUTPUT_ENABLED */ 704 705 /** 706 * UTF16BEToUTF8: 707 * @out: a pointer to an array of bytes to store the result 708 * @outlen: the length of @out 709 * @inb: a pointer to an array of UTF-16 passed as a byte array 710 * @inlenb: the length of @in in UTF-16 chars 711 * 712 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 713 * block of chars out. This function assumes the endian property 714 * is the same between the native type of this machine and the 715 * inputed one. 716 * 717 * Returns the number of bytes written, or -1 if lack of space, or -2 718 * if the transcoding fails (if *in is not a valid utf16 string) 719 * The value of *inlen after return is the number of octets consumed 720 * if the return value is positive, else unpredictable. 721 */ 722 static int 723 UTF16BEToUTF8(unsigned char* out, int *outlen, 724 const unsigned char* inb, int *inlenb) 725 { 726 unsigned char* outstart = out; 727 const unsigned char* processed = inb; 728 unsigned char* outend = out + *outlen; 729 unsigned short* in = (unsigned short*) inb; 730 unsigned short* inend; 731 unsigned int c, d, inlen; 732 unsigned char *tmp; 733 int bits; 734 735 if ((*inlenb % 2) == 1) 736 (*inlenb)--; 737 inlen = *inlenb / 2; 738 inend= in + inlen; 739 while (in < inend) { 740 if (xmlLittleEndian) { 741 tmp = (unsigned char *) in; 742 c = *tmp++; 743 c = c << 8; 744 c = c | (unsigned int) *tmp; 745 in++; 746 } else { 747 c= *in++; 748 } 749 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 750 if (in >= inend) { /* (in > inend) shouldn't happens */ 751 *outlen = out - outstart; 752 *inlenb = processed - inb; 753 return(-2); 754 } 755 if (xmlLittleEndian) { 756 tmp = (unsigned char *) in; 757 d = *tmp++; 758 d = d << 8; 759 d = d | (unsigned int) *tmp; 760 in++; 761 } else { 762 d= *in++; 763 } 764 if ((d & 0xFC00) == 0xDC00) { 765 c &= 0x03FF; 766 c <<= 10; 767 c |= d & 0x03FF; 768 c += 0x10000; 769 } 770 else { 771 *outlen = out - outstart; 772 *inlenb = processed - inb; 773 return(-2); 774 } 775 } 776 777 /* assertion: c is a single UTF-4 value */ 778 if (out >= outend) 779 break; 780 if (c < 0x80) { *out++= c; bits= -6; } 781 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 782 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 783 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 784 785 for ( ; bits >= 0; bits-= 6) { 786 if (out >= outend) 787 break; 788 *out++= ((c >> bits) & 0x3F) | 0x80; 789 } 790 processed = (const unsigned char*) in; 791 } 792 *outlen = out - outstart; 793 *inlenb = processed - inb; 794 return(*outlen); 795 } 796 797 #ifdef LIBXML_OUTPUT_ENABLED 798 /** 799 * UTF8ToUTF16BE: 800 * @outb: a pointer to an array of bytes to store the result 801 * @outlen: the length of @outb 802 * @in: a pointer to an array of UTF-8 chars 803 * @inlen: the length of @in 804 * 805 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 806 * block of chars out. 807 * 808 * Returns the number of byte written, or -1 by lack of space, or -2 809 * if the transcoding failed. 810 */ 811 static int 812 UTF8ToUTF16BE(unsigned char* outb, int *outlen, 813 const unsigned char* in, int *inlen) 814 { 815 unsigned short* out = (unsigned short*) outb; 816 const unsigned char* processed = in; 817 const unsigned char *const instart = in; 818 unsigned short* outstart= out; 819 unsigned short* outend; 820 const unsigned char* inend; 821 unsigned int c, d; 822 int trailing; 823 unsigned char *tmp; 824 unsigned short tmp1, tmp2; 825 826 /* UTF-16BE has no BOM */ 827 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 828 if (in == NULL) { 829 *outlen = 0; 830 *inlen = 0; 831 return(0); 832 } 833 inend= in + *inlen; 834 outend = out + (*outlen / 2); 835 while (in < inend) { 836 d= *in++; 837 if (d < 0x80) { c= d; trailing= 0; } 838 else if (d < 0xC0) { 839 /* trailing byte in leading position */ 840 *outlen = out - outstart; 841 *inlen = processed - instart; 842 return(-2); 843 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 844 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 845 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 846 else { 847 /* no chance for this in UTF-16 */ 848 *outlen = out - outstart; 849 *inlen = processed - instart; 850 return(-2); 851 } 852 853 if (inend - in < trailing) { 854 break; 855 } 856 857 for ( ; trailing; trailing--) { 858 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 859 c <<= 6; 860 c |= d & 0x3F; 861 } 862 863 /* assertion: c is a single UTF-4 value */ 864 if (c < 0x10000) { 865 if (out >= outend) break; 866 if (xmlLittleEndian) { 867 tmp = (unsigned char *) out; 868 *tmp = c >> 8; 869 *(tmp + 1) = c; 870 out++; 871 } else { 872 *out++ = c; 873 } 874 } 875 else if (c < 0x110000) { 876 if (out+1 >= outend) break; 877 c -= 0x10000; 878 if (xmlLittleEndian) { 879 tmp1 = 0xD800 | (c >> 10); 880 tmp = (unsigned char *) out; 881 *tmp = tmp1 >> 8; 882 *(tmp + 1) = (unsigned char) tmp1; 883 out++; 884 885 tmp2 = 0xDC00 | (c & 0x03FF); 886 tmp = (unsigned char *) out; 887 *tmp = tmp2 >> 8; 888 *(tmp + 1) = (unsigned char) tmp2; 889 out++; 890 } else { 891 *out++ = 0xD800 | (c >> 10); 892 *out++ = 0xDC00 | (c & 0x03FF); 893 } 894 } 895 else 896 break; 897 processed = in; 898 } 899 *outlen = (out - outstart) * 2; 900 *inlen = processed - instart; 901 return(*outlen); 902 } 903 #endif /* LIBXML_OUTPUT_ENABLED */ 904 905 /************************************************************************ 906 * * 907 * Generic encoding handling routines * 908 * * 909 ************************************************************************/ 910 911 /** 912 * xmlDetectCharEncoding: 913 * @in: a pointer to the first bytes of the XML entity, must be at least 914 * 2 bytes long (at least 4 if encoding is UTF4 variant). 915 * @len: pointer to the length of the buffer 916 * 917 * Guess the encoding of the entity using the first bytes of the entity content 918 * according to the non-normative appendix F of the XML-1.0 recommendation. 919 * 920 * Returns one of the XML_CHAR_ENCODING_... values. 921 */ 922 xmlCharEncoding 923 xmlDetectCharEncoding(const unsigned char* in, int len) 924 { 925 if (in == NULL) 926 return(XML_CHAR_ENCODING_NONE); 927 if (len >= 4) { 928 if ((in[0] == 0x00) && (in[1] == 0x00) && 929 (in[2] == 0x00) && (in[3] == 0x3C)) 930 return(XML_CHAR_ENCODING_UCS4BE); 931 if ((in[0] == 0x3C) && (in[1] == 0x00) && 932 (in[2] == 0x00) && (in[3] == 0x00)) 933 return(XML_CHAR_ENCODING_UCS4LE); 934 if ((in[0] == 0x00) && (in[1] == 0x00) && 935 (in[2] == 0x3C) && (in[3] == 0x00)) 936 return(XML_CHAR_ENCODING_UCS4_2143); 937 if ((in[0] == 0x00) && (in[1] == 0x3C) && 938 (in[2] == 0x00) && (in[3] == 0x00)) 939 return(XML_CHAR_ENCODING_UCS4_3412); 940 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 941 (in[2] == 0xA7) && (in[3] == 0x94)) 942 return(XML_CHAR_ENCODING_EBCDIC); 943 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 944 (in[2] == 0x78) && (in[3] == 0x6D)) 945 return(XML_CHAR_ENCODING_UTF8); 946 /* 947 * Although not part of the recommendation, we also 948 * attempt an "auto-recognition" of UTF-16LE and 949 * UTF-16BE encodings. 950 */ 951 if ((in[0] == 0x3C) && (in[1] == 0x00) && 952 (in[2] == 0x3F) && (in[3] == 0x00)) 953 return(XML_CHAR_ENCODING_UTF16LE); 954 if ((in[0] == 0x00) && (in[1] == 0x3C) && 955 (in[2] == 0x00) && (in[3] == 0x3F)) 956 return(XML_CHAR_ENCODING_UTF16BE); 957 } 958 if (len >= 3) { 959 /* 960 * Errata on XML-1.0 June 20 2001 961 * We now allow an UTF8 encoded BOM 962 */ 963 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 964 (in[2] == 0xBF)) 965 return(XML_CHAR_ENCODING_UTF8); 966 } 967 /* For UTF-16 we can recognize by the BOM */ 968 if (len >= 2) { 969 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 970 return(XML_CHAR_ENCODING_UTF16BE); 971 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 972 return(XML_CHAR_ENCODING_UTF16LE); 973 } 974 return(XML_CHAR_ENCODING_NONE); 975 } 976 977 /** 978 * xmlCleanupEncodingAliases: 979 * 980 * Unregisters all aliases 981 */ 982 void 983 xmlCleanupEncodingAliases(void) { 984 int i; 985 986 if (xmlCharEncodingAliases == NULL) 987 return; 988 989 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 990 if (xmlCharEncodingAliases[i].name != NULL) 991 xmlFree((char *) xmlCharEncodingAliases[i].name); 992 if (xmlCharEncodingAliases[i].alias != NULL) 993 xmlFree((char *) xmlCharEncodingAliases[i].alias); 994 } 995 xmlCharEncodingAliasesNb = 0; 996 xmlCharEncodingAliasesMax = 0; 997 xmlFree(xmlCharEncodingAliases); 998 xmlCharEncodingAliases = NULL; 999 } 1000 1001 /** 1002 * xmlGetEncodingAlias: 1003 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1004 * 1005 * Lookup an encoding name for the given alias. 1006 * 1007 * Returns NULL if not found, otherwise the original name 1008 */ 1009 const char * 1010 xmlGetEncodingAlias(const char *alias) { 1011 int i; 1012 char upper[100]; 1013 1014 if (alias == NULL) 1015 return(NULL); 1016 1017 if (xmlCharEncodingAliases == NULL) 1018 return(NULL); 1019 1020 for (i = 0;i < 99;i++) { 1021 upper[i] = toupper(alias[i]); 1022 if (upper[i] == 0) break; 1023 } 1024 upper[i] = 0; 1025 1026 /* 1027 * Walk down the list looking for a definition of the alias 1028 */ 1029 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1030 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1031 return(xmlCharEncodingAliases[i].name); 1032 } 1033 } 1034 return(NULL); 1035 } 1036 1037 /** 1038 * xmlAddEncodingAlias: 1039 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1040 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1041 * 1042 * Registers an alias @alias for an encoding named @name. Existing alias 1043 * will be overwritten. 1044 * 1045 * Returns 0 in case of success, -1 in case of error 1046 */ 1047 int 1048 xmlAddEncodingAlias(const char *name, const char *alias) { 1049 int i; 1050 char upper[100]; 1051 1052 if ((name == NULL) || (alias == NULL)) 1053 return(-1); 1054 1055 for (i = 0;i < 99;i++) { 1056 upper[i] = toupper(alias[i]); 1057 if (upper[i] == 0) break; 1058 } 1059 upper[i] = 0; 1060 1061 if (xmlCharEncodingAliases == NULL) { 1062 xmlCharEncodingAliasesNb = 0; 1063 xmlCharEncodingAliasesMax = 20; 1064 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1065 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1066 if (xmlCharEncodingAliases == NULL) 1067 return(-1); 1068 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1069 xmlCharEncodingAliasesMax *= 2; 1070 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1071 xmlRealloc(xmlCharEncodingAliases, 1072 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1073 } 1074 /* 1075 * Walk down the list looking for a definition of the alias 1076 */ 1077 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1078 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1079 /* 1080 * Replace the definition. 1081 */ 1082 xmlFree((char *) xmlCharEncodingAliases[i].name); 1083 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1084 return(0); 1085 } 1086 } 1087 /* 1088 * Add the definition 1089 */ 1090 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1091 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1092 xmlCharEncodingAliasesNb++; 1093 return(0); 1094 } 1095 1096 /** 1097 * xmlDelEncodingAlias: 1098 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1099 * 1100 * Unregisters an encoding alias @alias 1101 * 1102 * Returns 0 in case of success, -1 in case of error 1103 */ 1104 int 1105 xmlDelEncodingAlias(const char *alias) { 1106 int i; 1107 1108 if (alias == NULL) 1109 return(-1); 1110 1111 if (xmlCharEncodingAliases == NULL) 1112 return(-1); 1113 /* 1114 * Walk down the list looking for a definition of the alias 1115 */ 1116 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1117 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1118 xmlFree((char *) xmlCharEncodingAliases[i].name); 1119 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1120 xmlCharEncodingAliasesNb--; 1121 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1122 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1123 return(0); 1124 } 1125 } 1126 return(-1); 1127 } 1128 1129 /** 1130 * xmlParseCharEncoding: 1131 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1132 * 1133 * Compare the string to the encoding schemes already known. Note 1134 * that the comparison is case insensitive accordingly to the section 1135 * [XML] 4.3.3 Character Encoding in Entities. 1136 * 1137 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1138 * if not recognized. 1139 */ 1140 xmlCharEncoding 1141 xmlParseCharEncoding(const char* name) 1142 { 1143 const char *alias; 1144 char upper[500]; 1145 int i; 1146 1147 if (name == NULL) 1148 return(XML_CHAR_ENCODING_NONE); 1149 1150 /* 1151 * Do the alias resolution 1152 */ 1153 alias = xmlGetEncodingAlias(name); 1154 if (alias != NULL) 1155 name = alias; 1156 1157 for (i = 0;i < 499;i++) { 1158 upper[i] = toupper(name[i]); 1159 if (upper[i] == 0) break; 1160 } 1161 upper[i] = 0; 1162 1163 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1164 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1165 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1166 1167 /* 1168 * NOTE: if we were able to parse this, the endianness of UTF16 is 1169 * already found and in use 1170 */ 1171 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1172 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1173 1174 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1175 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1176 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1177 1178 /* 1179 * NOTE: if we were able to parse this, the endianness of UCS4 is 1180 * already found and in use 1181 */ 1182 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1183 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1184 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1185 1186 1187 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1188 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1189 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1190 1191 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1192 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1193 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1194 1195 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1196 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1197 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1198 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1199 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1200 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1201 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1202 1203 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1204 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1205 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1206 1207 #ifdef DEBUG_ENCODING 1208 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1209 #endif 1210 return(XML_CHAR_ENCODING_ERROR); 1211 } 1212 1213 /** 1214 * xmlGetCharEncodingName: 1215 * @enc: the encoding 1216 * 1217 * The "canonical" name for XML encoding. 1218 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1219 * Section 4.3.3 Character Encoding in Entities 1220 * 1221 * Returns the canonical name for the given encoding 1222 */ 1223 1224 const char* 1225 xmlGetCharEncodingName(xmlCharEncoding enc) { 1226 switch (enc) { 1227 case XML_CHAR_ENCODING_ERROR: 1228 return(NULL); 1229 case XML_CHAR_ENCODING_NONE: 1230 return(NULL); 1231 case XML_CHAR_ENCODING_UTF8: 1232 return("UTF-8"); 1233 case XML_CHAR_ENCODING_UTF16LE: 1234 return("UTF-16"); 1235 case XML_CHAR_ENCODING_UTF16BE: 1236 return("UTF-16"); 1237 case XML_CHAR_ENCODING_EBCDIC: 1238 return("EBCDIC"); 1239 case XML_CHAR_ENCODING_UCS4LE: 1240 return("ISO-10646-UCS-4"); 1241 case XML_CHAR_ENCODING_UCS4BE: 1242 return("ISO-10646-UCS-4"); 1243 case XML_CHAR_ENCODING_UCS4_2143: 1244 return("ISO-10646-UCS-4"); 1245 case XML_CHAR_ENCODING_UCS4_3412: 1246 return("ISO-10646-UCS-4"); 1247 case XML_CHAR_ENCODING_UCS2: 1248 return("ISO-10646-UCS-2"); 1249 case XML_CHAR_ENCODING_8859_1: 1250 return("ISO-8859-1"); 1251 case XML_CHAR_ENCODING_8859_2: 1252 return("ISO-8859-2"); 1253 case XML_CHAR_ENCODING_8859_3: 1254 return("ISO-8859-3"); 1255 case XML_CHAR_ENCODING_8859_4: 1256 return("ISO-8859-4"); 1257 case XML_CHAR_ENCODING_8859_5: 1258 return("ISO-8859-5"); 1259 case XML_CHAR_ENCODING_8859_6: 1260 return("ISO-8859-6"); 1261 case XML_CHAR_ENCODING_8859_7: 1262 return("ISO-8859-7"); 1263 case XML_CHAR_ENCODING_8859_8: 1264 return("ISO-8859-8"); 1265 case XML_CHAR_ENCODING_8859_9: 1266 return("ISO-8859-9"); 1267 case XML_CHAR_ENCODING_2022_JP: 1268 return("ISO-2022-JP"); 1269 case XML_CHAR_ENCODING_SHIFT_JIS: 1270 return("Shift-JIS"); 1271 case XML_CHAR_ENCODING_EUC_JP: 1272 return("EUC-JP"); 1273 case XML_CHAR_ENCODING_ASCII: 1274 return(NULL); 1275 } 1276 return(NULL); 1277 } 1278 1279 /************************************************************************ 1280 * * 1281 * Char encoding handlers * 1282 * * 1283 ************************************************************************/ 1284 1285 1286 /* the size should be growable, but it's not a big deal ... */ 1287 #define MAX_ENCODING_HANDLERS 50 1288 static xmlCharEncodingHandlerPtr *handlers = NULL; 1289 static int nbCharEncodingHandler = 0; 1290 1291 /* 1292 * The default is UTF-8 for XML, that's also the default used for the 1293 * parser internals, so the default encoding handler is NULL 1294 */ 1295 1296 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1297 1298 /** 1299 * xmlNewCharEncodingHandler: 1300 * @name: the encoding name, in UTF-8 format (ASCII actually) 1301 * @input: the xmlCharEncodingInputFunc to read that encoding 1302 * @output: the xmlCharEncodingOutputFunc to write that encoding 1303 * 1304 * Create and registers an xmlCharEncodingHandler. 1305 * 1306 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1307 */ 1308 xmlCharEncodingHandlerPtr 1309 xmlNewCharEncodingHandler(const char *name, 1310 xmlCharEncodingInputFunc input, 1311 xmlCharEncodingOutputFunc output) { 1312 xmlCharEncodingHandlerPtr handler; 1313 const char *alias; 1314 char upper[500]; 1315 int i; 1316 char *up = NULL; 1317 1318 /* 1319 * Do the alias resolution 1320 */ 1321 alias = xmlGetEncodingAlias(name); 1322 if (alias != NULL) 1323 name = alias; 1324 1325 /* 1326 * Keep only the uppercase version of the encoding. 1327 */ 1328 if (name == NULL) { 1329 xmlEncodingErr(XML_I18N_NO_NAME, 1330 "xmlNewCharEncodingHandler : no name !\n", NULL); 1331 return(NULL); 1332 } 1333 for (i = 0;i < 499;i++) { 1334 upper[i] = toupper(name[i]); 1335 if (upper[i] == 0) break; 1336 } 1337 upper[i] = 0; 1338 up = xmlMemStrdup(upper); 1339 if (up == NULL) { 1340 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1341 return(NULL); 1342 } 1343 1344 /* 1345 * allocate and fill-up an handler block. 1346 */ 1347 handler = (xmlCharEncodingHandlerPtr) 1348 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1349 if (handler == NULL) { 1350 xmlFree(up); 1351 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1352 return(NULL); 1353 } 1354 memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1355 handler->input = input; 1356 handler->output = output; 1357 handler->name = up; 1358 1359 #ifdef LIBXML_ICONV_ENABLED 1360 handler->iconv_in = NULL; 1361 handler->iconv_out = NULL; 1362 #endif 1363 #ifdef LIBXML_ICU_ENABLED 1364 handler->uconv_in = NULL; 1365 handler->uconv_out = NULL; 1366 #endif 1367 1368 /* 1369 * registers and returns the handler. 1370 */ 1371 xmlRegisterCharEncodingHandler(handler); 1372 #ifdef DEBUG_ENCODING 1373 xmlGenericError(xmlGenericErrorContext, 1374 "Registered encoding handler for %s\n", name); 1375 #endif 1376 return(handler); 1377 } 1378 1379 /** 1380 * xmlInitCharEncodingHandlers: 1381 * 1382 * Initialize the char encoding support, it registers the default 1383 * encoding supported. 1384 * NOTE: while public, this function usually doesn't need to be called 1385 * in normal processing. 1386 */ 1387 void 1388 xmlInitCharEncodingHandlers(void) { 1389 unsigned short int tst = 0x1234; 1390 unsigned char *ptr = (unsigned char *) &tst; 1391 1392 if (handlers != NULL) return; 1393 1394 handlers = (xmlCharEncodingHandlerPtr *) 1395 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1396 1397 if (*ptr == 0x12) xmlLittleEndian = 0; 1398 else if (*ptr == 0x34) xmlLittleEndian = 1; 1399 else { 1400 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1401 "Odd problem at endianness detection\n", NULL); 1402 } 1403 1404 if (handlers == NULL) { 1405 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1406 return; 1407 } 1408 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1409 #ifdef LIBXML_OUTPUT_ENABLED 1410 xmlUTF16LEHandler = 1411 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1412 xmlUTF16BEHandler = 1413 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1414 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1415 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1416 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1417 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1418 #ifdef LIBXML_HTML_ENABLED 1419 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1420 #endif 1421 #else 1422 xmlUTF16LEHandler = 1423 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1424 xmlUTF16BEHandler = 1425 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1426 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1427 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1428 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1429 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1430 #endif /* LIBXML_OUTPUT_ENABLED */ 1431 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1432 #ifdef LIBXML_ISO8859X_ENABLED 1433 xmlRegisterCharEncodingHandlersISO8859x (); 1434 #endif 1435 #endif 1436 1437 } 1438 1439 /** 1440 * xmlCleanupCharEncodingHandlers: 1441 * 1442 * Cleanup the memory allocated for the char encoding support, it 1443 * unregisters all the encoding handlers and the aliases. 1444 */ 1445 void 1446 xmlCleanupCharEncodingHandlers(void) { 1447 xmlCleanupEncodingAliases(); 1448 1449 if (handlers == NULL) return; 1450 1451 for (;nbCharEncodingHandler > 0;) { 1452 nbCharEncodingHandler--; 1453 if (handlers[nbCharEncodingHandler] != NULL) { 1454 if (handlers[nbCharEncodingHandler]->name != NULL) 1455 xmlFree(handlers[nbCharEncodingHandler]->name); 1456 xmlFree(handlers[nbCharEncodingHandler]); 1457 } 1458 } 1459 xmlFree(handlers); 1460 handlers = NULL; 1461 nbCharEncodingHandler = 0; 1462 xmlDefaultCharEncodingHandler = NULL; 1463 } 1464 1465 /** 1466 * xmlRegisterCharEncodingHandler: 1467 * @handler: the xmlCharEncodingHandlerPtr handler block 1468 * 1469 * Register the char encoding handler, surprising, isn't it ? 1470 */ 1471 void 1472 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1473 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1474 if ((handler == NULL) || (handlers == NULL)) { 1475 xmlEncodingErr(XML_I18N_NO_HANDLER, 1476 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1477 return; 1478 } 1479 1480 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1481 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1482 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1483 "MAX_ENCODING_HANDLERS"); 1484 return; 1485 } 1486 handlers[nbCharEncodingHandler++] = handler; 1487 } 1488 1489 /** 1490 * xmlGetCharEncodingHandler: 1491 * @enc: an xmlCharEncoding value. 1492 * 1493 * Search in the registered set the handler able to read/write that encoding. 1494 * 1495 * Returns the handler or NULL if not found 1496 */ 1497 xmlCharEncodingHandlerPtr 1498 xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1499 xmlCharEncodingHandlerPtr handler; 1500 1501 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1502 switch (enc) { 1503 case XML_CHAR_ENCODING_ERROR: 1504 return(NULL); 1505 case XML_CHAR_ENCODING_NONE: 1506 return(NULL); 1507 case XML_CHAR_ENCODING_UTF8: 1508 return(NULL); 1509 case XML_CHAR_ENCODING_UTF16LE: 1510 return(xmlUTF16LEHandler); 1511 case XML_CHAR_ENCODING_UTF16BE: 1512 return(xmlUTF16BEHandler); 1513 case XML_CHAR_ENCODING_EBCDIC: 1514 handler = xmlFindCharEncodingHandler("EBCDIC"); 1515 if (handler != NULL) return(handler); 1516 handler = xmlFindCharEncodingHandler("ebcdic"); 1517 if (handler != NULL) return(handler); 1518 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1519 if (handler != NULL) return(handler); 1520 handler = xmlFindCharEncodingHandler("IBM-037"); 1521 if (handler != NULL) return(handler); 1522 break; 1523 case XML_CHAR_ENCODING_UCS4BE: 1524 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1525 if (handler != NULL) return(handler); 1526 handler = xmlFindCharEncodingHandler("UCS-4"); 1527 if (handler != NULL) return(handler); 1528 handler = xmlFindCharEncodingHandler("UCS4"); 1529 if (handler != NULL) return(handler); 1530 break; 1531 case XML_CHAR_ENCODING_UCS4LE: 1532 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1533 if (handler != NULL) return(handler); 1534 handler = xmlFindCharEncodingHandler("UCS-4"); 1535 if (handler != NULL) return(handler); 1536 handler = xmlFindCharEncodingHandler("UCS4"); 1537 if (handler != NULL) return(handler); 1538 break; 1539 case XML_CHAR_ENCODING_UCS4_2143: 1540 break; 1541 case XML_CHAR_ENCODING_UCS4_3412: 1542 break; 1543 case XML_CHAR_ENCODING_UCS2: 1544 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1545 if (handler != NULL) return(handler); 1546 handler = xmlFindCharEncodingHandler("UCS-2"); 1547 if (handler != NULL) return(handler); 1548 handler = xmlFindCharEncodingHandler("UCS2"); 1549 if (handler != NULL) return(handler); 1550 break; 1551 1552 /* 1553 * We used to keep ISO Latin encodings native in the 1554 * generated data. This led to so many problems that 1555 * this has been removed. One can still change this 1556 * back by registering no-ops encoders for those 1557 */ 1558 case XML_CHAR_ENCODING_8859_1: 1559 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1560 if (handler != NULL) return(handler); 1561 break; 1562 case XML_CHAR_ENCODING_8859_2: 1563 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1564 if (handler != NULL) return(handler); 1565 break; 1566 case XML_CHAR_ENCODING_8859_3: 1567 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1568 if (handler != NULL) return(handler); 1569 break; 1570 case XML_CHAR_ENCODING_8859_4: 1571 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1572 if (handler != NULL) return(handler); 1573 break; 1574 case XML_CHAR_ENCODING_8859_5: 1575 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1576 if (handler != NULL) return(handler); 1577 break; 1578 case XML_CHAR_ENCODING_8859_6: 1579 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1580 if (handler != NULL) return(handler); 1581 break; 1582 case XML_CHAR_ENCODING_8859_7: 1583 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1584 if (handler != NULL) return(handler); 1585 break; 1586 case XML_CHAR_ENCODING_8859_8: 1587 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1588 if (handler != NULL) return(handler); 1589 break; 1590 case XML_CHAR_ENCODING_8859_9: 1591 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1592 if (handler != NULL) return(handler); 1593 break; 1594 1595 1596 case XML_CHAR_ENCODING_2022_JP: 1597 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1598 if (handler != NULL) return(handler); 1599 break; 1600 case XML_CHAR_ENCODING_SHIFT_JIS: 1601 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1602 if (handler != NULL) return(handler); 1603 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1604 if (handler != NULL) return(handler); 1605 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1606 if (handler != NULL) return(handler); 1607 break; 1608 case XML_CHAR_ENCODING_EUC_JP: 1609 handler = xmlFindCharEncodingHandler("EUC-JP"); 1610 if (handler != NULL) return(handler); 1611 break; 1612 default: 1613 break; 1614 } 1615 1616 #ifdef DEBUG_ENCODING 1617 xmlGenericError(xmlGenericErrorContext, 1618 "No handler found for encoding %d\n", enc); 1619 #endif 1620 return(NULL); 1621 } 1622 1623 /** 1624 * xmlFindCharEncodingHandler: 1625 * @name: a string describing the char encoding. 1626 * 1627 * Search in the registered set the handler able to read/write that encoding. 1628 * 1629 * Returns the handler or NULL if not found 1630 */ 1631 xmlCharEncodingHandlerPtr 1632 xmlFindCharEncodingHandler(const char *name) { 1633 const char *nalias; 1634 const char *norig; 1635 xmlCharEncoding alias; 1636 #ifdef LIBXML_ICONV_ENABLED 1637 xmlCharEncodingHandlerPtr enc; 1638 iconv_t icv_in, icv_out; 1639 #endif /* LIBXML_ICONV_ENABLED */ 1640 #ifdef LIBXML_ICU_ENABLED 1641 xmlCharEncodingHandlerPtr encu; 1642 uconv_t *ucv_in, *ucv_out; 1643 #endif /* LIBXML_ICU_ENABLED */ 1644 char upper[100]; 1645 int i; 1646 1647 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1648 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1649 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1650 1651 /* 1652 * Do the alias resolution 1653 */ 1654 norig = name; 1655 nalias = xmlGetEncodingAlias(name); 1656 if (nalias != NULL) 1657 name = nalias; 1658 1659 /* 1660 * Check first for directly registered encoding names 1661 */ 1662 for (i = 0;i < 99;i++) { 1663 upper[i] = toupper(name[i]); 1664 if (upper[i] == 0) break; 1665 } 1666 upper[i] = 0; 1667 1668 if (handlers != NULL) { 1669 for (i = 0;i < nbCharEncodingHandler; i++) { 1670 if (!strcmp(upper, handlers[i]->name)) { 1671 #ifdef DEBUG_ENCODING 1672 xmlGenericError(xmlGenericErrorContext, 1673 "Found registered handler for encoding %s\n", name); 1674 #endif 1675 return(handlers[i]); 1676 } 1677 } 1678 } 1679 1680 #ifdef LIBXML_ICONV_ENABLED 1681 /* check whether iconv can handle this */ 1682 icv_in = iconv_open("UTF-8", name); 1683 icv_out = iconv_open(name, "UTF-8"); 1684 if (icv_in == (iconv_t) -1) { 1685 icv_in = iconv_open("UTF-8", upper); 1686 } 1687 if (icv_out == (iconv_t) -1) { 1688 icv_out = iconv_open(upper, "UTF-8"); 1689 } 1690 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1691 enc = (xmlCharEncodingHandlerPtr) 1692 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1693 if (enc == NULL) { 1694 iconv_close(icv_in); 1695 iconv_close(icv_out); 1696 return(NULL); 1697 } 1698 memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1699 enc->name = xmlMemStrdup(name); 1700 enc->input = NULL; 1701 enc->output = NULL; 1702 enc->iconv_in = icv_in; 1703 enc->iconv_out = icv_out; 1704 #ifdef DEBUG_ENCODING 1705 xmlGenericError(xmlGenericErrorContext, 1706 "Found iconv handler for encoding %s\n", name); 1707 #endif 1708 return enc; 1709 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1710 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1711 "iconv : problems with filters for '%s'\n", name); 1712 } 1713 #endif /* LIBXML_ICONV_ENABLED */ 1714 #ifdef LIBXML_ICU_ENABLED 1715 /* check whether icu can handle this */ 1716 ucv_in = openIcuConverter(name, 1); 1717 ucv_out = openIcuConverter(name, 0); 1718 if (ucv_in != NULL && ucv_out != NULL) { 1719 encu = (xmlCharEncodingHandlerPtr) 1720 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1721 if (encu == NULL) { 1722 closeIcuConverter(ucv_in); 1723 closeIcuConverter(ucv_out); 1724 return(NULL); 1725 } 1726 memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1727 encu->name = xmlMemStrdup(name); 1728 encu->input = NULL; 1729 encu->output = NULL; 1730 encu->uconv_in = ucv_in; 1731 encu->uconv_out = ucv_out; 1732 #ifdef DEBUG_ENCODING 1733 xmlGenericError(xmlGenericErrorContext, 1734 "Found ICU converter handler for encoding %s\n", name); 1735 #endif 1736 return encu; 1737 } else if (ucv_in != NULL || ucv_out != NULL) { 1738 closeIcuConverter(ucv_in); 1739 closeIcuConverter(ucv_out); 1740 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1741 "ICU converter : problems with filters for '%s'\n", name); 1742 } 1743 #endif /* LIBXML_ICU_ENABLED */ 1744 1745 #ifdef DEBUG_ENCODING 1746 xmlGenericError(xmlGenericErrorContext, 1747 "No handler found for encoding %s\n", name); 1748 #endif 1749 1750 /* 1751 * Fallback using the canonical names 1752 */ 1753 alias = xmlParseCharEncoding(norig); 1754 if (alias != XML_CHAR_ENCODING_ERROR) { 1755 const char* canon; 1756 canon = xmlGetCharEncodingName(alias); 1757 if ((canon != NULL) && (strcmp(name, canon))) { 1758 return(xmlFindCharEncodingHandler(canon)); 1759 } 1760 } 1761 1762 /* If "none of the above", give up */ 1763 return(NULL); 1764 } 1765 1766 /************************************************************************ 1767 * * 1768 * ICONV based generic conversion functions * 1769 * * 1770 ************************************************************************/ 1771 1772 #ifdef LIBXML_ICONV_ENABLED 1773 /** 1774 * xmlIconvWrapper: 1775 * @cd: iconv converter data structure 1776 * @out: a pointer to an array of bytes to store the result 1777 * @outlen: the length of @out 1778 * @in: a pointer to an array of ISO Latin 1 chars 1779 * @inlen: the length of @in 1780 * 1781 * Returns 0 if success, or 1782 * -1 by lack of space, or 1783 * -2 if the transcoding fails (for *in is not valid utf8 string or 1784 * the result of transformation can't fit into the encoding we want), or 1785 * -3 if there the last byte can't form a single output char. 1786 * 1787 * The value of @inlen after return is the number of octets consumed 1788 * as the return value is positive, else unpredictable. 1789 * The value of @outlen after return is the number of ocetes consumed. 1790 */ 1791 static int 1792 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1793 const unsigned char *in, int *inlen) { 1794 size_t icv_inlen, icv_outlen; 1795 const char *icv_in = (const char *) in; 1796 char *icv_out = (char *) out; 1797 int ret; 1798 1799 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1800 if (outlen != NULL) *outlen = 0; 1801 return(-1); 1802 } 1803 icv_inlen = *inlen; 1804 icv_outlen = *outlen; 1805 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1806 *inlen -= icv_inlen; 1807 *outlen -= icv_outlen; 1808 if ((icv_inlen != 0) || (ret == -1)) { 1809 #ifdef EILSEQ 1810 if (errno == EILSEQ) { 1811 return -2; 1812 } else 1813 #endif 1814 #ifdef E2BIG 1815 if (errno == E2BIG) { 1816 return -1; 1817 } else 1818 #endif 1819 #ifdef EINVAL 1820 if (errno == EINVAL) { 1821 return -3; 1822 } else 1823 #endif 1824 { 1825 return -3; 1826 } 1827 } 1828 return 0; 1829 } 1830 #endif /* LIBXML_ICONV_ENABLED */ 1831 1832 /************************************************************************ 1833 * * 1834 * ICU based generic conversion functions * 1835 * * 1836 ************************************************************************/ 1837 1838 #ifdef LIBXML_ICU_ENABLED 1839 /** 1840 * xmlUconvWrapper: 1841 * @cd: ICU uconverter data structure 1842 * @toUnicode : non-zero if toUnicode. 0 otherwise. 1843 * @out: a pointer to an array of bytes to store the result 1844 * @outlen: the length of @out 1845 * @in: a pointer to an array of ISO Latin 1 chars 1846 * @inlen: the length of @in 1847 * 1848 * Returns 0 if success, or 1849 * -1 by lack of space, or 1850 * -2 if the transcoding fails (for *in is not valid utf8 string or 1851 * the result of transformation can't fit into the encoding we want), or 1852 * -3 if there the last byte can't form a single output char. 1853 * 1854 * The value of @inlen after return is the number of octets consumed 1855 * as the return value is positive, else unpredictable. 1856 * The value of @outlen after return is the number of ocetes consumed. 1857 */ 1858 static int 1859 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1860 const unsigned char *in, int *inlen) { 1861 const char *ucv_in = (const char *) in; 1862 char *ucv_out = (char *) out; 1863 UErrorCode err = U_ZERO_ERROR; 1864 1865 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1866 if (outlen != NULL) *outlen = 0; 1867 return(-1); 1868 } 1869 1870 /* 1871 * TODO(jungshik) 1872 * 1. is ucnv_convert(To|From)Algorithmic better? 1873 * 2. had we better use an explicit pivot buffer? 1874 * 3. error returned comes from 'fromUnicode' only even 1875 * when toUnicode is true ! 1876 */ 1877 if (toUnicode) { 1878 /* encoding => UTF-16 => UTF-8 */ 1879 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1880 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1881 0, TRUE, &err); 1882 } else { 1883 /* UTF-8 => UTF-16 => encoding */ 1884 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1885 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1886 0, TRUE, &err); 1887 } 1888 *inlen = ucv_in - (const char*) in; 1889 *outlen = ucv_out - (char *) out; 1890 if (U_SUCCESS(err)) 1891 return 0; 1892 if (err == U_BUFFER_OVERFLOW_ERROR) 1893 return -1; 1894 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1895 return -2; 1896 /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1897 return -3; 1898 } 1899 #endif /* LIBXML_ICU_ENABLED */ 1900 1901 /************************************************************************ 1902 * * 1903 * The real API used by libxml for on-the-fly conversion * 1904 * * 1905 ************************************************************************/ 1906 1907 /** 1908 * xmlCharEncFirstLineInt: 1909 * @handler: char enconding transformation data structure 1910 * @out: an xmlBuffer for the output. 1911 * @in: an xmlBuffer for the input 1912 * @len: number of bytes to convert for the first line, or -1 1913 * 1914 * Front-end for the encoding handler input function, but handle only 1915 * the very first line, i.e. limit itself to 45 chars. 1916 * 1917 * Returns the number of byte written if success, or 1918 * -1 general error 1919 * -2 if the transcoding fails (for *in is not valid utf8 string or 1920 * the result of transformation can't fit into the encoding we want), or 1921 */ 1922 int 1923 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1924 xmlBufferPtr in, int len) { 1925 int ret = -2; 1926 int written; 1927 int toconv; 1928 1929 if (handler == NULL) return(-1); 1930 if (out == NULL) return(-1); 1931 if (in == NULL) return(-1); 1932 1933 /* calculate space available */ 1934 written = out->size - out->use - 1; /* count '\0' */ 1935 toconv = in->use; 1936 /* 1937 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1938 * 45 chars should be sufficient to reach the end of the encoding 1939 * declaration without going too far inside the document content. 1940 * on UTF-16 this means 90bytes, on UCS4 this means 180 1941 * The actual value depending on guessed encoding is passed as @len 1942 * if provided 1943 */ 1944 if (len >= 0) { 1945 if (toconv > len) 1946 toconv = len; 1947 } else { 1948 if (toconv > 180) 1949 toconv = 180; 1950 } 1951 if (toconv * 2 >= written) { 1952 xmlBufferGrow(out, toconv * 2); 1953 written = out->size - out->use - 1; 1954 } 1955 1956 if (handler->input != NULL) { 1957 ret = handler->input(&out->content[out->use], &written, 1958 in->content, &toconv); 1959 xmlBufferShrink(in, toconv); 1960 out->use += written; 1961 out->content[out->use] = 0; 1962 } 1963 #ifdef LIBXML_ICONV_ENABLED 1964 else if (handler->iconv_in != NULL) { 1965 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1966 &written, in->content, &toconv); 1967 xmlBufferShrink(in, toconv); 1968 out->use += written; 1969 out->content[out->use] = 0; 1970 if (ret == -1) ret = -3; 1971 } 1972 #endif /* LIBXML_ICONV_ENABLED */ 1973 #ifdef LIBXML_ICU_ENABLED 1974 else if (handler->uconv_in != NULL) { 1975 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 1976 &written, in->content, &toconv); 1977 xmlBufferShrink(in, toconv); 1978 out->use += written; 1979 out->content[out->use] = 0; 1980 if (ret == -1) ret = -3; 1981 } 1982 #endif /* LIBXML_ICU_ENABLED */ 1983 #ifdef DEBUG_ENCODING 1984 switch (ret) { 1985 case 0: 1986 xmlGenericError(xmlGenericErrorContext, 1987 "converted %d bytes to %d bytes of input\n", 1988 toconv, written); 1989 break; 1990 case -1: 1991 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 1992 toconv, written, in->use); 1993 break; 1994 case -2: 1995 xmlGenericError(xmlGenericErrorContext, 1996 "input conversion failed due to input error\n"); 1997 break; 1998 case -3: 1999 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2000 toconv, written, in->use); 2001 break; 2002 default: 2003 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2004 } 2005 #endif /* DEBUG_ENCODING */ 2006 /* 2007 * Ignore when input buffer is not on a boundary 2008 */ 2009 if (ret == -3) ret = 0; 2010 if (ret == -1) ret = 0; 2011 return(ret); 2012 } 2013 2014 /** 2015 * xmlCharEncFirstLine: 2016 * @handler: char enconding transformation data structure 2017 * @out: an xmlBuffer for the output. 2018 * @in: an xmlBuffer for the input 2019 * 2020 * Front-end for the encoding handler input function, but handle only 2021 * the very first line, i.e. limit itself to 45 chars. 2022 * 2023 * Returns the number of byte written if success, or 2024 * -1 general error 2025 * -2 if the transcoding fails (for *in is not valid utf8 string or 2026 * the result of transformation can't fit into the encoding we want), or 2027 */ 2028 int 2029 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2030 xmlBufferPtr in) { 2031 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2032 } 2033 2034 /** 2035 * xmlCharEncFirstLineInput: 2036 * @input: a parser input buffer 2037 * @len: number of bytes to convert for the first line, or -1 2038 * 2039 * Front-end for the encoding handler input function, but handle only 2040 * the very first line. Point is that this is based on autodetection 2041 * of the encoding and once that first line is converted we may find 2042 * out that a different decoder is needed to process the input. 2043 * 2044 * Returns the number of byte written if success, or 2045 * -1 general error 2046 * -2 if the transcoding fails (for *in is not valid utf8 string or 2047 * the result of transformation can't fit into the encoding we want), or 2048 */ 2049 int 2050 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) 2051 { 2052 int ret = -2; 2053 size_t written; 2054 size_t toconv; 2055 int c_in; 2056 int c_out; 2057 xmlBufPtr in; 2058 xmlBufPtr out; 2059 2060 if ((input == NULL) || (input->encoder == NULL) || 2061 (input->buffer == NULL) || (input->raw == NULL)) 2062 return (-1); 2063 out = input->buffer; 2064 in = input->raw; 2065 2066 toconv = xmlBufUse(in); 2067 if (toconv == 0) 2068 return (0); 2069 written = xmlBufAvail(out) - 1; /* count '\0' */ 2070 /* 2071 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2072 * 45 chars should be sufficient to reach the end of the encoding 2073 * declaration without going too far inside the document content. 2074 * on UTF-16 this means 90bytes, on UCS4 this means 180 2075 * The actual value depending on guessed encoding is passed as @len 2076 * if provided 2077 */ 2078 if (len >= 0) { 2079 if (toconv > (unsigned int) len) 2080 toconv = len; 2081 } else { 2082 if (toconv > 180) 2083 toconv = 180; 2084 } 2085 if (toconv * 2 >= written) { 2086 xmlBufGrow(out, toconv * 2); 2087 written = xmlBufAvail(out) - 1; 2088 } 2089 if (written > 360) 2090 written = 360; 2091 2092 c_in = toconv; 2093 c_out = written; 2094 if (input->encoder->input != NULL) { 2095 ret = input->encoder->input(xmlBufEnd(out), &c_out, 2096 xmlBufContent(in), &c_in); 2097 xmlBufShrink(in, c_in); 2098 xmlBufAddLen(out, c_out); 2099 } 2100 #ifdef LIBXML_ICONV_ENABLED 2101 else if (input->encoder->iconv_in != NULL) { 2102 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2103 &c_out, xmlBufContent(in), &c_in); 2104 xmlBufShrink(in, c_in); 2105 xmlBufAddLen(out, c_out); 2106 if (ret == -1) 2107 ret = -3; 2108 } 2109 #endif /* LIBXML_ICONV_ENABLED */ 2110 #ifdef LIBXML_ICU_ENABLED 2111 else if (input->encoder->uconv_in != NULL) { 2112 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2113 &c_out, xmlBufContent(in), &c_in); 2114 xmlBufShrink(in, c_in); 2115 xmlBufAddLen(out, c_out); 2116 if (ret == -1) 2117 ret = -3; 2118 } 2119 #endif /* LIBXML_ICU_ENABLED */ 2120 switch (ret) { 2121 case 0: 2122 #ifdef DEBUG_ENCODING 2123 xmlGenericError(xmlGenericErrorContext, 2124 "converted %d bytes to %d bytes of input\n", 2125 c_in, c_out); 2126 #endif 2127 break; 2128 case -1: 2129 #ifdef DEBUG_ENCODING 2130 xmlGenericError(xmlGenericErrorContext, 2131 "converted %d bytes to %d bytes of input, %d left\n", 2132 c_in, c_out, (int)xmlBufUse(in)); 2133 #endif 2134 break; 2135 case -3: 2136 #ifdef DEBUG_ENCODING 2137 xmlGenericError(xmlGenericErrorContext, 2138 "converted %d bytes to %d bytes of input, %d left\n", 2139 c_in, c_out, (int)xmlBufUse(in)); 2140 #endif 2141 break; 2142 case -2: { 2143 char buf[50]; 2144 const xmlChar *content = xmlBufContent(in); 2145 2146 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2147 content[0], content[1], 2148 content[2], content[3]); 2149 buf[49] = 0; 2150 xmlEncodingErr(XML_I18N_CONV_FAILED, 2151 "input conversion failed due to input error, bytes %s\n", 2152 buf); 2153 } 2154 } 2155 /* 2156 * Ignore when input buffer is not on a boundary 2157 */ 2158 if (ret == -3) ret = 0; 2159 if (ret == -1) ret = 0; 2160 return(ret); 2161 } 2162 2163 /** 2164 * xmlCharEncInput: 2165 * @input: a parser input buffer 2166 * @flush: try to flush all the raw buffer 2167 * 2168 * Generic front-end for the encoding handler on parser input 2169 * 2170 * Returns the number of byte written if success, or 2171 * -1 general error 2172 * -2 if the transcoding fails (for *in is not valid utf8 string or 2173 * the result of transformation can't fit into the encoding we want), or 2174 */ 2175 int 2176 xmlCharEncInput(xmlParserInputBufferPtr input, int flush) 2177 { 2178 int ret = -2; 2179 size_t written; 2180 size_t toconv; 2181 int c_in; 2182 int c_out; 2183 xmlBufPtr in; 2184 xmlBufPtr out; 2185 2186 if ((input == NULL) || (input->encoder == NULL) || 2187 (input->buffer == NULL) || (input->raw == NULL)) 2188 return (-1); 2189 out = input->buffer; 2190 in = input->raw; 2191 2192 toconv = xmlBufUse(in); 2193 if (toconv == 0) 2194 return (0); 2195 if ((toconv > 64 * 1024) && (flush == 0)) 2196 toconv = 64 * 1024; 2197 written = xmlBufAvail(out); 2198 if (written > 0) 2199 written--; /* count '\0' */ 2200 if (toconv * 2 >= written) { 2201 xmlBufGrow(out, toconv * 2); 2202 written = xmlBufAvail(out); 2203 if (written > 0) 2204 written--; /* count '\0' */ 2205 } 2206 if ((written > 128 * 1024) && (flush == 0)) 2207 written = 128 * 1024; 2208 2209 c_in = toconv; 2210 c_out = written; 2211 if (input->encoder->input != NULL) { 2212 ret = input->encoder->input(xmlBufEnd(out), &c_out, 2213 xmlBufContent(in), &c_in); 2214 xmlBufShrink(in, c_in); 2215 xmlBufAddLen(out, c_out); 2216 } 2217 #ifdef LIBXML_ICONV_ENABLED 2218 else if (input->encoder->iconv_in != NULL) { 2219 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2220 &c_out, xmlBufContent(in), &c_in); 2221 xmlBufShrink(in, c_in); 2222 xmlBufAddLen(out, c_out); 2223 if (ret == -1) 2224 ret = -3; 2225 } 2226 #endif /* LIBXML_ICONV_ENABLED */ 2227 #ifdef LIBXML_ICU_ENABLED 2228 else if (input->encoder->uconv_in != NULL) { 2229 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2230 &c_out, xmlBufContent(in), &c_in); 2231 xmlBufShrink(in, c_in); 2232 xmlBufAddLen(out, c_out); 2233 if (ret == -1) 2234 ret = -3; 2235 } 2236 #endif /* LIBXML_ICU_ENABLED */ 2237 switch (ret) { 2238 case 0: 2239 #ifdef DEBUG_ENCODING 2240 xmlGenericError(xmlGenericErrorContext, 2241 "converted %d bytes to %d bytes of input\n", 2242 c_in, c_out); 2243 #endif 2244 break; 2245 case -1: 2246 #ifdef DEBUG_ENCODING 2247 xmlGenericError(xmlGenericErrorContext, 2248 "converted %d bytes to %d bytes of input, %d left\n", 2249 c_in, c_out, (int)xmlBufUse(in)); 2250 #endif 2251 break; 2252 case -3: 2253 #ifdef DEBUG_ENCODING 2254 xmlGenericError(xmlGenericErrorContext, 2255 "converted %d bytes to %d bytes of input, %d left\n", 2256 c_in, c_out, (int)xmlBufUse(in)); 2257 #endif 2258 break; 2259 case -2: { 2260 char buf[50]; 2261 const xmlChar *content = xmlBufContent(in); 2262 2263 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2264 content[0], content[1], 2265 content[2], content[3]); 2266 buf[49] = 0; 2267 xmlEncodingErr(XML_I18N_CONV_FAILED, 2268 "input conversion failed due to input error, bytes %s\n", 2269 buf); 2270 } 2271 } 2272 /* 2273 * Ignore when input buffer is not on a boundary 2274 */ 2275 if (ret == -3) 2276 ret = 0; 2277 return (c_out? c_out : ret); 2278 } 2279 2280 /** 2281 * xmlCharEncInFunc: 2282 * @handler: char encoding transformation data structure 2283 * @out: an xmlBuffer for the output. 2284 * @in: an xmlBuffer for the input 2285 * 2286 * Generic front-end for the encoding handler input function 2287 * 2288 * Returns the number of byte written if success, or 2289 * -1 general error 2290 * -2 if the transcoding fails (for *in is not valid utf8 string or 2291 * the result of transformation can't fit into the encoding we want), or 2292 */ 2293 int 2294 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2295 xmlBufferPtr in) 2296 { 2297 int ret = -2; 2298 int written; 2299 int toconv; 2300 2301 if (handler == NULL) 2302 return (-1); 2303 if (out == NULL) 2304 return (-1); 2305 if (in == NULL) 2306 return (-1); 2307 2308 toconv = in->use; 2309 if (toconv == 0) 2310 return (0); 2311 written = out->size - out->use -1; /* count '\0' */ 2312 if (toconv * 2 >= written) { 2313 xmlBufferGrow(out, out->size + toconv * 2); 2314 written = out->size - out->use - 1; 2315 } 2316 if (handler->input != NULL) { 2317 ret = handler->input(&out->content[out->use], &written, 2318 in->content, &toconv); 2319 xmlBufferShrink(in, toconv); 2320 out->use += written; 2321 out->content[out->use] = 0; 2322 } 2323 #ifdef LIBXML_ICONV_ENABLED 2324 else if (handler->iconv_in != NULL) { 2325 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 2326 &written, in->content, &toconv); 2327 xmlBufferShrink(in, toconv); 2328 out->use += written; 2329 out->content[out->use] = 0; 2330 if (ret == -1) 2331 ret = -3; 2332 } 2333 #endif /* LIBXML_ICONV_ENABLED */ 2334 #ifdef LIBXML_ICU_ENABLED 2335 else if (handler->uconv_in != NULL) { 2336 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 2337 &written, in->content, &toconv); 2338 xmlBufferShrink(in, toconv); 2339 out->use += written; 2340 out->content[out->use] = 0; 2341 if (ret == -1) 2342 ret = -3; 2343 } 2344 #endif /* LIBXML_ICU_ENABLED */ 2345 switch (ret) { 2346 case 0: 2347 #ifdef DEBUG_ENCODING 2348 xmlGenericError(xmlGenericErrorContext, 2349 "converted %d bytes to %d bytes of input\n", 2350 toconv, written); 2351 #endif 2352 break; 2353 case -1: 2354 #ifdef DEBUG_ENCODING 2355 xmlGenericError(xmlGenericErrorContext, 2356 "converted %d bytes to %d bytes of input, %d left\n", 2357 toconv, written, in->use); 2358 #endif 2359 break; 2360 case -3: 2361 #ifdef DEBUG_ENCODING 2362 xmlGenericError(xmlGenericErrorContext, 2363 "converted %d bytes to %d bytes of input, %d left\n", 2364 toconv, written, in->use); 2365 #endif 2366 break; 2367 case -2: { 2368 char buf[50]; 2369 2370 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2371 in->content[0], in->content[1], 2372 in->content[2], in->content[3]); 2373 buf[49] = 0; 2374 xmlEncodingErr(XML_I18N_CONV_FAILED, 2375 "input conversion failed due to input error, bytes %s\n", 2376 buf); 2377 } 2378 } 2379 /* 2380 * Ignore when input buffer is not on a boundary 2381 */ 2382 if (ret == -3) 2383 ret = 0; 2384 return (written? written : ret); 2385 } 2386 2387 /** 2388 * xmlCharEncOutput: 2389 * @output: a parser output buffer 2390 * @init: is this an initialization call without data 2391 * 2392 * Generic front-end for the encoding handler on parser output 2393 * a first call with @init == 1 has to be made first to initiate the 2394 * output in case of non-stateless encoding needing to initiate their 2395 * state or the output (like the BOM in UTF16). 2396 * In case of UTF8 sequence conversion errors for the given encoder, 2397 * the content will be automatically remapped to a CharRef sequence. 2398 * 2399 * Returns the number of byte written if success, or 2400 * -1 general error 2401 * -2 if the transcoding fails (for *in is not valid utf8 string or 2402 * the result of transformation can't fit into the encoding we want), or 2403 */ 2404 int 2405 xmlCharEncOutput(xmlOutputBufferPtr output, int init) 2406 { 2407 int ret = -2; 2408 size_t written; 2409 size_t writtentot = 0; 2410 size_t toconv; 2411 int c_in; 2412 int c_out; 2413 xmlBufPtr in; 2414 xmlBufPtr out; 2415 int charref_len = 0; 2416 2417 if ((output == NULL) || (output->encoder == NULL) || 2418 (output->buffer == NULL) || (output->conv == NULL)) 2419 return (-1); 2420 out = output->conv; 2421 in = output->buffer; 2422 2423 retry: 2424 2425 written = xmlBufAvail(out); 2426 if (written > 0) 2427 written--; /* count '\0' */ 2428 2429 /* 2430 * First specific handling of the initialization call 2431 */ 2432 if (init) { 2433 c_in = 0; 2434 c_out = written; 2435 if (output->encoder->output != NULL) { 2436 ret = output->encoder->output(xmlBufEnd(out), &c_out, 2437 NULL, &c_in); 2438 if (ret > 0) /* Gennady: check return value */ 2439 xmlBufAddLen(out, c_out); 2440 } 2441 #ifdef LIBXML_ICONV_ENABLED 2442 else if (output->encoder->iconv_out != NULL) { 2443 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2444 &c_out, NULL, &c_in); 2445 xmlBufAddLen(out, c_out); 2446 } 2447 #endif /* LIBXML_ICONV_ENABLED */ 2448 #ifdef LIBXML_ICU_ENABLED 2449 else if (output->encoder->uconv_out != NULL) { 2450 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2451 &c_out, NULL, &c_in); 2452 xmlBufAddLen(out, c_out); 2453 } 2454 #endif /* LIBXML_ICU_ENABLED */ 2455 #ifdef DEBUG_ENCODING 2456 xmlGenericError(xmlGenericErrorContext, 2457 "initialized encoder\n"); 2458 #endif 2459 return(0); 2460 } 2461 2462 /* 2463 * Conversion itself. 2464 */ 2465 toconv = xmlBufUse(in); 2466 if (toconv == 0) 2467 return (0); 2468 if (toconv > 64 * 1024) 2469 toconv = 64 * 1024; 2470 if (toconv * 4 >= written) { 2471 xmlBufGrow(out, toconv * 4); 2472 written = xmlBufAvail(out) - 1; 2473 } 2474 if (written > 256 * 1024) 2475 written = 256 * 1024; 2476 2477 c_in = toconv; 2478 c_out = written; 2479 if (output->encoder->output != NULL) { 2480 ret = output->encoder->output(xmlBufEnd(out), &c_out, 2481 xmlBufContent(in), &c_in); 2482 if (c_out > 0) { 2483 xmlBufShrink(in, c_in); 2484 xmlBufAddLen(out, c_out); 2485 writtentot += c_out; 2486 } 2487 } 2488 #ifdef LIBXML_ICONV_ENABLED 2489 else if (output->encoder->iconv_out != NULL) { 2490 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2491 &c_out, xmlBufContent(in), &c_in); 2492 xmlBufShrink(in, c_in); 2493 xmlBufAddLen(out, c_out); 2494 writtentot += c_out; 2495 if (ret == -1) { 2496 if (c_out > 0) { 2497 /* 2498 * Can be a limitation of iconv 2499 */ 2500 charref_len = 0; 2501 goto retry; 2502 } 2503 ret = -3; 2504 } 2505 } 2506 #endif /* LIBXML_ICONV_ENABLED */ 2507 #ifdef LIBXML_ICU_ENABLED 2508 else if (output->encoder->uconv_out != NULL) { 2509 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2510 &c_out, xmlBufContent(in), &c_in); 2511 xmlBufShrink(in, c_in); 2512 xmlBufAddLen(out, c_out); 2513 writtentot += c_out; 2514 if (ret == -1) { 2515 if (c_out > 0) { 2516 /* 2517 * Can be a limitation of uconv 2518 */ 2519 charref_len = 0; 2520 goto retry; 2521 } 2522 ret = -3; 2523 } 2524 } 2525 #endif /* LIBXML_ICU_ENABLED */ 2526 else { 2527 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2528 "xmlCharEncOutFunc: no output function !\n", NULL); 2529 return(-1); 2530 } 2531 2532 if (ret >= 0) output += ret; 2533 2534 /* 2535 * Attempt to handle error cases 2536 */ 2537 switch (ret) { 2538 case 0: 2539 #ifdef DEBUG_ENCODING 2540 xmlGenericError(xmlGenericErrorContext, 2541 "converted %d bytes to %d bytes of output\n", 2542 c_in, c_out); 2543 #endif 2544 break; 2545 case -1: 2546 #ifdef DEBUG_ENCODING 2547 xmlGenericError(xmlGenericErrorContext, 2548 "output conversion failed by lack of space\n"); 2549 #endif 2550 break; 2551 case -3: 2552 #ifdef DEBUG_ENCODING 2553 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2554 c_in, c_out, (int) xmlBufUse(in)); 2555 #endif 2556 break; 2557 case -2: { 2558 int len = (int) xmlBufUse(in); 2559 xmlChar *content = xmlBufContent(in); 2560 int cur; 2561 2562 cur = xmlGetUTF8Char(content, &len); 2563 if ((charref_len != 0) && (c_out < charref_len)) { 2564 /* 2565 * We attempted to insert a character reference and failed. 2566 * Undo what was written and skip the remaining charref. 2567 */ 2568 xmlBufErase(out, c_out); 2569 writtentot -= c_out; 2570 xmlBufShrink(in, charref_len - c_out); 2571 charref_len = 0; 2572 2573 ret = -1; 2574 break; 2575 } else if (cur > 0) { 2576 xmlChar charref[20]; 2577 2578 #ifdef DEBUG_ENCODING 2579 xmlGenericError(xmlGenericErrorContext, 2580 "handling output conversion error\n"); 2581 xmlGenericError(xmlGenericErrorContext, 2582 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2583 content[0], content[1], 2584 content[2], content[3]); 2585 #endif 2586 /* 2587 * Removes the UTF8 sequence, and replace it by a charref 2588 * and continue the transcoding phase, hoping the error 2589 * did not mangle the encoder state. 2590 */ 2591 charref_len = snprintf((char *) &charref[0], sizeof(charref), 2592 "&#%d;", cur); 2593 xmlBufShrink(in, len); 2594 xmlBufAddHead(in, charref, -1); 2595 2596 goto retry; 2597 } else { 2598 char buf[50]; 2599 2600 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2601 content[0], content[1], 2602 content[2], content[3]); 2603 buf[49] = 0; 2604 xmlEncodingErr(XML_I18N_CONV_FAILED, 2605 "output conversion failed due to conv error, bytes %s\n", 2606 buf); 2607 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) 2608 content[0] = ' '; 2609 } 2610 break; 2611 } 2612 } 2613 return(ret); 2614 } 2615 2616 /** 2617 * xmlCharEncOutFunc: 2618 * @handler: char enconding transformation data structure 2619 * @out: an xmlBuffer for the output. 2620 * @in: an xmlBuffer for the input 2621 * 2622 * Generic front-end for the encoding handler output function 2623 * a first call with @in == NULL has to be made firs to initiate the 2624 * output in case of non-stateless encoding needing to initiate their 2625 * state or the output (like the BOM in UTF16). 2626 * In case of UTF8 sequence conversion errors for the given encoder, 2627 * the content will be automatically remapped to a CharRef sequence. 2628 * 2629 * Returns the number of byte written if success, or 2630 * -1 general error 2631 * -2 if the transcoding fails (for *in is not valid utf8 string or 2632 * the result of transformation can't fit into the encoding we want), or 2633 */ 2634 int 2635 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2636 xmlBufferPtr in) { 2637 int ret = -2; 2638 int written; 2639 int writtentot = 0; 2640 int toconv; 2641 int output = 0; 2642 int charref_len = 0; 2643 2644 if (handler == NULL) return(-1); 2645 if (out == NULL) return(-1); 2646 2647 retry: 2648 2649 written = out->size - out->use; 2650 2651 if (written > 0) 2652 written--; /* Gennady: count '/0' */ 2653 2654 /* 2655 * First specific handling of in = NULL, i.e. the initialization call 2656 */ 2657 if (in == NULL) { 2658 toconv = 0; 2659 if (handler->output != NULL) { 2660 ret = handler->output(&out->content[out->use], &written, 2661 NULL, &toconv); 2662 if (ret >= 0) { /* Gennady: check return value */ 2663 out->use += written; 2664 out->content[out->use] = 0; 2665 } 2666 } 2667 #ifdef LIBXML_ICONV_ENABLED 2668 else if (handler->iconv_out != NULL) { 2669 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2670 &written, NULL, &toconv); 2671 out->use += written; 2672 out->content[out->use] = 0; 2673 } 2674 #endif /* LIBXML_ICONV_ENABLED */ 2675 #ifdef LIBXML_ICU_ENABLED 2676 else if (handler->uconv_out != NULL) { 2677 ret = xmlUconvWrapper(handler->uconv_out, 0, 2678 &out->content[out->use], 2679 &written, NULL, &toconv); 2680 out->use += written; 2681 out->content[out->use] = 0; 2682 } 2683 #endif /* LIBXML_ICU_ENABLED */ 2684 #ifdef DEBUG_ENCODING 2685 xmlGenericError(xmlGenericErrorContext, 2686 "initialized encoder\n"); 2687 #endif 2688 return(0); 2689 } 2690 2691 /* 2692 * Conversion itself. 2693 */ 2694 toconv = in->use; 2695 if (toconv == 0) 2696 return(0); 2697 if (toconv * 4 >= written) { 2698 xmlBufferGrow(out, toconv * 4); 2699 written = out->size - out->use - 1; 2700 } 2701 if (handler->output != NULL) { 2702 ret = handler->output(&out->content[out->use], &written, 2703 in->content, &toconv); 2704 if (written > 0) { 2705 xmlBufferShrink(in, toconv); 2706 out->use += written; 2707 writtentot += written; 2708 } 2709 out->content[out->use] = 0; 2710 } 2711 #ifdef LIBXML_ICONV_ENABLED 2712 else if (handler->iconv_out != NULL) { 2713 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2714 &written, in->content, &toconv); 2715 xmlBufferShrink(in, toconv); 2716 out->use += written; 2717 writtentot += written; 2718 out->content[out->use] = 0; 2719 if (ret == -1) { 2720 if (written > 0) { 2721 /* 2722 * Can be a limitation of iconv 2723 */ 2724 charref_len = 0; 2725 goto retry; 2726 } 2727 ret = -3; 2728 } 2729 } 2730 #endif /* LIBXML_ICONV_ENABLED */ 2731 #ifdef LIBXML_ICU_ENABLED 2732 else if (handler->uconv_out != NULL) { 2733 ret = xmlUconvWrapper(handler->uconv_out, 0, 2734 &out->content[out->use], 2735 &written, in->content, &toconv); 2736 xmlBufferShrink(in, toconv); 2737 out->use += written; 2738 writtentot += written; 2739 out->content[out->use] = 0; 2740 if (ret == -1) { 2741 if (written > 0) { 2742 /* 2743 * Can be a limitation of iconv 2744 */ 2745 charref_len = 0; 2746 goto retry; 2747 } 2748 ret = -3; 2749 } 2750 } 2751 #endif /* LIBXML_ICU_ENABLED */ 2752 else { 2753 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2754 "xmlCharEncOutFunc: no output function !\n", NULL); 2755 return(-1); 2756 } 2757 2758 if (ret >= 0) output += ret; 2759 2760 /* 2761 * Attempt to handle error cases 2762 */ 2763 switch (ret) { 2764 case 0: 2765 #ifdef DEBUG_ENCODING 2766 xmlGenericError(xmlGenericErrorContext, 2767 "converted %d bytes to %d bytes of output\n", 2768 toconv, written); 2769 #endif 2770 break; 2771 case -1: 2772 #ifdef DEBUG_ENCODING 2773 xmlGenericError(xmlGenericErrorContext, 2774 "output conversion failed by lack of space\n"); 2775 #endif 2776 break; 2777 case -3: 2778 #ifdef DEBUG_ENCODING 2779 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2780 toconv, written, in->use); 2781 #endif 2782 break; 2783 case -2: { 2784 int len = in->use; 2785 const xmlChar *utf = (const xmlChar *) in->content; 2786 int cur; 2787 2788 cur = xmlGetUTF8Char(utf, &len); 2789 if ((charref_len != 0) && (written < charref_len)) { 2790 /* 2791 * We attempted to insert a character reference and failed. 2792 * Undo what was written and skip the remaining charref. 2793 */ 2794 out->use -= written; 2795 writtentot -= written; 2796 xmlBufferShrink(in, charref_len - written); 2797 charref_len = 0; 2798 2799 ret = -1; 2800 break; 2801 } else if (cur > 0) { 2802 xmlChar charref[20]; 2803 2804 #ifdef DEBUG_ENCODING 2805 xmlGenericError(xmlGenericErrorContext, 2806 "handling output conversion error\n"); 2807 xmlGenericError(xmlGenericErrorContext, 2808 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2809 in->content[0], in->content[1], 2810 in->content[2], in->content[3]); 2811 #endif 2812 /* 2813 * Removes the UTF8 sequence, and replace it by a charref 2814 * and continue the transcoding phase, hoping the error 2815 * did not mangle the encoder state. 2816 */ 2817 charref_len = snprintf((char *) &charref[0], sizeof(charref), 2818 "&#%d;", cur); 2819 xmlBufferShrink(in, len); 2820 xmlBufferAddHead(in, charref, -1); 2821 2822 goto retry; 2823 } else { 2824 char buf[50]; 2825 2826 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2827 in->content[0], in->content[1], 2828 in->content[2], in->content[3]); 2829 buf[49] = 0; 2830 xmlEncodingErr(XML_I18N_CONV_FAILED, 2831 "output conversion failed due to conv error, bytes %s\n", 2832 buf); 2833 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2834 in->content[0] = ' '; 2835 } 2836 break; 2837 } 2838 } 2839 return(ret); 2840 } 2841 2842 /** 2843 * xmlCharEncCloseFunc: 2844 * @handler: char enconding transformation data structure 2845 * 2846 * Generic front-end for encoding handler close function 2847 * 2848 * Returns 0 if success, or -1 in case of error 2849 */ 2850 int 2851 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2852 int ret = 0; 2853 int tofree = 0; 2854 if (handler == NULL) return(-1); 2855 if (handler->name == NULL) return(-1); 2856 #ifdef LIBXML_ICONV_ENABLED 2857 /* 2858 * Iconv handlers can be used only once, free the whole block. 2859 * and the associated icon resources. 2860 */ 2861 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) { 2862 tofree = 1; 2863 if (handler->iconv_out != NULL) { 2864 if (iconv_close(handler->iconv_out)) 2865 ret = -1; 2866 handler->iconv_out = NULL; 2867 } 2868 if (handler->iconv_in != NULL) { 2869 if (iconv_close(handler->iconv_in)) 2870 ret = -1; 2871 handler->iconv_in = NULL; 2872 } 2873 } 2874 #endif /* LIBXML_ICONV_ENABLED */ 2875 #ifdef LIBXML_ICU_ENABLED 2876 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) { 2877 tofree = 1; 2878 if (handler->uconv_out != NULL) { 2879 closeIcuConverter(handler->uconv_out); 2880 handler->uconv_out = NULL; 2881 } 2882 if (handler->uconv_in != NULL) { 2883 closeIcuConverter(handler->uconv_in); 2884 handler->uconv_in = NULL; 2885 } 2886 } 2887 #endif 2888 if (tofree) { 2889 /* free up only dynamic handlers iconv/uconv */ 2890 if (handler->name != NULL) 2891 xmlFree(handler->name); 2892 handler->name = NULL; 2893 xmlFree(handler); 2894 } 2895 #ifdef DEBUG_ENCODING 2896 if (ret) 2897 xmlGenericError(xmlGenericErrorContext, 2898 "failed to close the encoding handler\n"); 2899 else 2900 xmlGenericError(xmlGenericErrorContext, 2901 "closed the encoding handler\n"); 2902 #endif 2903 2904 return(ret); 2905 } 2906 2907 /** 2908 * xmlByteConsumed: 2909 * @ctxt: an XML parser context 2910 * 2911 * This function provides the current index of the parser relative 2912 * to the start of the current entity. This function is computed in 2913 * bytes from the beginning starting at zero and finishing at the 2914 * size in byte of the file if parsing a file. The function is 2915 * of constant cost if the input is UTF-8 but can be costly if run 2916 * on non-UTF-8 input. 2917 * 2918 * Returns the index in bytes from the beginning of the entity or -1 2919 * in case the index could not be computed. 2920 */ 2921 long 2922 xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2923 xmlParserInputPtr in; 2924 2925 if (ctxt == NULL) return(-1); 2926 in = ctxt->input; 2927 if (in == NULL) return(-1); 2928 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2929 unsigned int unused = 0; 2930 xmlCharEncodingHandler * handler = in->buf->encoder; 2931 /* 2932 * Encoding conversion, compute the number of unused original 2933 * bytes from the input not consumed and substract that from 2934 * the raw consumed value, this is not a cheap operation 2935 */ 2936 if (in->end - in->cur > 0) { 2937 unsigned char convbuf[32000]; 2938 const unsigned char *cur = (const unsigned char *)in->cur; 2939 int toconv = in->end - in->cur, written = 32000; 2940 2941 int ret; 2942 2943 if (handler->output != NULL) { 2944 do { 2945 toconv = in->end - cur; 2946 written = 32000; 2947 ret = handler->output(&convbuf[0], &written, 2948 cur, &toconv); 2949 if (ret == -1) return(-1); 2950 unused += written; 2951 cur += toconv; 2952 } while (ret == -2); 2953 #ifdef LIBXML_ICONV_ENABLED 2954 } else if (handler->iconv_out != NULL) { 2955 do { 2956 toconv = in->end - cur; 2957 written = 32000; 2958 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2959 &written, cur, &toconv); 2960 if (ret < 0) { 2961 if (written > 0) 2962 ret = -2; 2963 else 2964 return(-1); 2965 } 2966 unused += written; 2967 cur += toconv; 2968 } while (ret == -2); 2969 #endif 2970 #ifdef LIBXML_ICU_ENABLED 2971 } else if (handler->uconv_out != NULL) { 2972 do { 2973 toconv = in->end - cur; 2974 written = 32000; 2975 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], 2976 &written, cur, &toconv); 2977 if (ret < 0) { 2978 if (written > 0) 2979 ret = -2; 2980 else 2981 return(-1); 2982 } 2983 unused += written; 2984 cur += toconv; 2985 } while (ret == -2); 2986 #endif 2987 } else { 2988 /* could not find a converter */ 2989 return(-1); 2990 } 2991 } 2992 if (in->buf->rawconsumed < unused) 2993 return(-1); 2994 return(in->buf->rawconsumed - unused); 2995 } 2996 return(in->consumed + (in->cur - in->base)); 2997 } 2998 2999 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 3000 #ifdef LIBXML_ISO8859X_ENABLED 3001 3002 /** 3003 * UTF8ToISO8859x: 3004 * @out: a pointer to an array of bytes to store the result 3005 * @outlen: the length of @out 3006 * @in: a pointer to an array of UTF-8 chars 3007 * @inlen: the length of @in 3008 * @xlattable: the 2-level transcoding table 3009 * 3010 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 3011 * block of chars out. 3012 * 3013 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 3014 * The value of @inlen after return is the number of octets consumed 3015 * as the return value is positive, else unpredictable. 3016 * The value of @outlen after return is the number of ocetes consumed. 3017 */ 3018 static int 3019 UTF8ToISO8859x(unsigned char* out, int *outlen, 3020 const unsigned char* in, int *inlen, 3021 unsigned char const *xlattable) { 3022 const unsigned char* outstart = out; 3023 const unsigned char* inend; 3024 const unsigned char* instart = in; 3025 const unsigned char* processed = in; 3026 3027 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3028 (xlattable == NULL)) 3029 return(-1); 3030 if (in == NULL) { 3031 /* 3032 * initialization nothing to do 3033 */ 3034 *outlen = 0; 3035 *inlen = 0; 3036 return(0); 3037 } 3038 inend = in + (*inlen); 3039 while (in < inend) { 3040 unsigned char d = *in++; 3041 if (d < 0x80) { 3042 *out++ = d; 3043 } else if (d < 0xC0) { 3044 /* trailing byte in leading position */ 3045 *outlen = out - outstart; 3046 *inlen = processed - instart; 3047 return(-2); 3048 } else if (d < 0xE0) { 3049 unsigned char c; 3050 if (!(in < inend)) { 3051 /* trailing byte not in input buffer */ 3052 *outlen = out - outstart; 3053 *inlen = processed - instart; 3054 return(-3); 3055 } 3056 c = *in++; 3057 if ((c & 0xC0) != 0x80) { 3058 /* not a trailing byte */ 3059 *outlen = out - outstart; 3060 *inlen = processed - instart; 3061 return(-2); 3062 } 3063 c = c & 0x3F; 3064 d = d & 0x1F; 3065 d = xlattable [48 + c + xlattable [d] * 64]; 3066 if (d == 0) { 3067 /* not in character set */ 3068 *outlen = out - outstart; 3069 *inlen = processed - instart; 3070 return(-2); 3071 } 3072 *out++ = d; 3073 } else if (d < 0xF0) { 3074 unsigned char c1; 3075 unsigned char c2; 3076 if (!(in < inend - 1)) { 3077 /* trailing bytes not in input buffer */ 3078 *outlen = out - outstart; 3079 *inlen = processed - instart; 3080 return(-3); 3081 } 3082 c1 = *in++; 3083 if ((c1 & 0xC0) != 0x80) { 3084 /* not a trailing byte (c1) */ 3085 *outlen = out - outstart; 3086 *inlen = processed - instart; 3087 return(-2); 3088 } 3089 c2 = *in++; 3090 if ((c2 & 0xC0) != 0x80) { 3091 /* not a trailing byte (c2) */ 3092 *outlen = out - outstart; 3093 *inlen = processed - instart; 3094 return(-2); 3095 } 3096 c1 = c1 & 0x3F; 3097 c2 = c2 & 0x3F; 3098 d = d & 0x0F; 3099 d = xlattable [48 + c2 + xlattable [48 + c1 + 3100 xlattable [32 + d] * 64] * 64]; 3101 if (d == 0) { 3102 /* not in character set */ 3103 *outlen = out - outstart; 3104 *inlen = processed - instart; 3105 return(-2); 3106 } 3107 *out++ = d; 3108 } else { 3109 /* cannot transcode >= U+010000 */ 3110 *outlen = out - outstart; 3111 *inlen = processed - instart; 3112 return(-2); 3113 } 3114 processed = in; 3115 } 3116 *outlen = out - outstart; 3117 *inlen = processed - instart; 3118 return(*outlen); 3119 } 3120 3121 /** 3122 * ISO8859xToUTF8 3123 * @out: a pointer to an array of bytes to store the result 3124 * @outlen: the length of @out 3125 * @in: a pointer to an array of ISO Latin 1 chars 3126 * @inlen: the length of @in 3127 * 3128 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 3129 * block of chars out. 3130 * Returns 0 if success, or -1 otherwise 3131 * The value of @inlen after return is the number of octets consumed 3132 * The value of @outlen after return is the number of ocetes produced. 3133 */ 3134 static int 3135 ISO8859xToUTF8(unsigned char* out, int *outlen, 3136 const unsigned char* in, int *inlen, 3137 unsigned short const *unicodetable) { 3138 unsigned char* outstart = out; 3139 unsigned char* outend; 3140 const unsigned char* instart = in; 3141 const unsigned char* inend; 3142 const unsigned char* instop; 3143 unsigned int c; 3144 3145 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3146 (in == NULL) || (unicodetable == NULL)) 3147 return(-1); 3148 outend = out + *outlen; 3149 inend = in + *inlen; 3150 instop = inend; 3151 3152 while ((in < inend) && (out < outend - 2)) { 3153 if (*in >= 0x80) { 3154 c = unicodetable [*in - 0x80]; 3155 if (c == 0) { 3156 /* undefined code point */ 3157 *outlen = out - outstart; 3158 *inlen = in - instart; 3159 return (-1); 3160 } 3161 if (c < 0x800) { 3162 *out++ = ((c >> 6) & 0x1F) | 0xC0; 3163 *out++ = (c & 0x3F) | 0x80; 3164 } else { 3165 *out++ = ((c >> 12) & 0x0F) | 0xE0; 3166 *out++ = ((c >> 6) & 0x3F) | 0x80; 3167 *out++ = (c & 0x3F) | 0x80; 3168 } 3169 ++in; 3170 } 3171 if (instop - in > outend - out) instop = in + (outend - out); 3172 while ((*in < 0x80) && (in < instop)) { 3173 *out++ = *in++; 3174 } 3175 } 3176 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3177 *out++ = *in++; 3178 } 3179 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3180 *out++ = *in++; 3181 } 3182 *outlen = out - outstart; 3183 *inlen = in - instart; 3184 return (*outlen); 3185 } 3186 3187 3188 /************************************************************************ 3189 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 3190 ************************************************************************/ 3191 3192 static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 3193 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3194 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3195 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3196 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3197 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 3198 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 3199 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 3200 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 3201 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 3202 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 3203 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 3204 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 3205 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 3206 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 3207 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 3208 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 3209 }; 3210 3211 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 3212 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3217 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3218 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3219 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3220 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3221 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3222 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3223 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 3224 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 3227 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3228 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3231 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 3232 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 3233 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 3234 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 3235 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3236 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 3237 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 3238 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 3239 }; 3240 3241 static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 3242 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3243 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3244 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3245 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3246 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 3247 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 3248 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 3249 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 3250 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 3251 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3252 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 3253 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 3254 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 3255 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3256 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 3257 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 3258 }; 3259 3260 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 3261 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3266 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3267 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3268 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3269 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3270 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3271 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 3272 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 3274 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 3275 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3277 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3279 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3280 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3282 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3283 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3284 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3285 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 3288 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3289 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3290 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3291 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 3292 }; 3293 3294 static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 3295 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3296 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3297 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3298 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3299 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 3300 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 3301 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 3302 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 3303 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3304 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 3305 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3306 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 3307 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3308 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 3309 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3310 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 3311 }; 3312 3313 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 3314 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 3315 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3321 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3322 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3323 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 3324 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3325 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3326 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3327 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 3328 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 3329 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 3330 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 3331 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 3332 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 3333 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3337 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3338 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 3339 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 3340 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 3341 }; 3342 3343 static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 3344 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3345 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3346 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3347 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3348 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 3349 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 3350 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 3351 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 3352 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 3353 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 3354 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 3355 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 3356 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 3357 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 3358 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 3359 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 3360 }; 3361 3362 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 3363 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3364 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3365 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3370 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3371 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3372 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 3373 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3374 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 3375 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3376 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3377 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3378 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3379 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3382 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3387 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3388 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3389 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3390 }; 3391 3392 static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 3393 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3394 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3395 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3396 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3397 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 3398 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 3399 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3400 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 3401 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 3402 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 3403 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 3404 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3405 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 3406 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 3407 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3408 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3409 }; 3410 3411 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 3412 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3419 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3420 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3421 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3423 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3424 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 3429 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3430 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 3431 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3432 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3435 }; 3436 3437 static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 3438 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3439 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3440 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3441 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3442 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 3443 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 3444 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 3445 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 3446 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 3447 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 3448 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 3449 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 3450 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 3451 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 3452 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 3453 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 3454 }; 3455 3456 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 3457 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3459 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3462 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3464 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3465 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3466 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 3467 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 3468 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3469 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3470 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3473 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3476 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3480 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3481 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3482 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3483 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3484 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3488 }; 3489 3490 static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3491 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3492 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3493 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3494 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3495 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3496 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3497 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3498 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3499 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3500 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3501 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3502 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3503 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3504 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3505 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3506 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3507 }; 3508 3509 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3510 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3511 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3512 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3515 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3517 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3518 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3519 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3520 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3521 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3523 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3524 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3525 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3526 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3527 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3528 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3529 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3532 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3534 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3538 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3539 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3541 }; 3542 3543 static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3544 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3545 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3546 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3547 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3548 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3549 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3550 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3551 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3552 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3553 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3554 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3555 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3556 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3557 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3558 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3559 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3560 }; 3561 3562 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3563 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3564 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3568 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3570 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3571 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3572 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3573 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3574 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3575 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3576 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3577 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3581 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3583 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3584 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3585 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3586 }; 3587 3588 static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3589 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3590 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3591 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3592 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3593 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3594 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3595 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3596 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3597 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3598 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3599 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3600 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3601 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3602 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3603 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3604 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3605 }; 3606 3607 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3608 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3610 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3615 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3616 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3617 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3618 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3619 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3620 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3621 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3622 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3623 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3625 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3626 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3627 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3632 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3635 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3636 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3637 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3638 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3639 }; 3640 3641 static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3642 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3643 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3644 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3645 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3646 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3647 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3648 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3649 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3650 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3651 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3652 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3653 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3654 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3655 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3656 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3657 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3658 }; 3659 3660 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3661 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3663 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3664 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3665 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3666 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3667 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3668 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3669 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3670 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3674 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3676 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3677 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3678 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3679 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3680 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3684 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3685 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3688 }; 3689 3690 static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3691 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3692 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3693 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3694 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3695 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3696 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3697 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3698 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3699 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3700 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3701 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3702 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3703 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3704 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3705 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3706 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3707 }; 3708 3709 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3710 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3711 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3712 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3714 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3715 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3716 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3717 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3718 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3719 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3720 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3721 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3722 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3723 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3724 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3729 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3730 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3731 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3732 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3733 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3734 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3735 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3736 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3737 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3738 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3739 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3740 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3741 }; 3742 3743 static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3744 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3745 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3746 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3747 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3748 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3749 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3750 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3751 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3752 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3753 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3754 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3755 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3756 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3757 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3758 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3759 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3760 }; 3761 3762 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3763 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3764 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3765 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3766 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3767 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3768 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3769 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3770 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3771 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3772 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3773 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3774 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3775 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3776 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3777 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3778 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3784 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3786 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3793 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3797 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3798 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3799 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3800 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3802 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3803 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3804 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3805 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3806 }; 3807 3808 static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3809 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3810 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3811 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3812 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3813 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3814 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3815 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3816 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3817 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3818 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3819 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3820 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3821 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3822 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3823 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3824 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3825 }; 3826 3827 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3828 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3829 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3830 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3833 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3834 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3835 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3836 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3837 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3838 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3839 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3841 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3842 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3843 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3844 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3848 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3849 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3850 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3851 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3852 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3853 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3854 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3855 }; 3856 3857 static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3858 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3859 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3860 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3861 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3862 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3863 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3864 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3865 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3866 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3867 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3868 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3869 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3870 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3871 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3872 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3873 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3874 }; 3875 3876 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3877 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3879 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3881 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3883 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3884 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3885 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3886 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3887 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3888 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3889 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3892 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3893 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3894 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3895 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3896 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3899 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3900 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3901 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3902 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3903 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3905 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3909 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3910 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3911 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3912 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3913 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3914 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3915 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3916 }; 3917 3918 3919 /* 3920 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3921 */ 3922 3923 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3924 const unsigned char* in, int *inlen) { 3925 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3926 } 3927 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3928 const unsigned char* in, int *inlen) { 3929 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3930 } 3931 3932 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3933 const unsigned char* in, int *inlen) { 3934 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3935 } 3936 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3937 const unsigned char* in, int *inlen) { 3938 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3939 } 3940 3941 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3942 const unsigned char* in, int *inlen) { 3943 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3944 } 3945 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3946 const unsigned char* in, int *inlen) { 3947 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3948 } 3949 3950 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3951 const unsigned char* in, int *inlen) { 3952 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3953 } 3954 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3955 const unsigned char* in, int *inlen) { 3956 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3957 } 3958 3959 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3960 const unsigned char* in, int *inlen) { 3961 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3962 } 3963 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3964 const unsigned char* in, int *inlen) { 3965 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3966 } 3967 3968 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3969 const unsigned char* in, int *inlen) { 3970 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3971 } 3972 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3973 const unsigned char* in, int *inlen) { 3974 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 3975 } 3976 3977 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 3978 const unsigned char* in, int *inlen) { 3979 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 3980 } 3981 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 3982 const unsigned char* in, int *inlen) { 3983 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 3984 } 3985 3986 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 3987 const unsigned char* in, int *inlen) { 3988 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 3989 } 3990 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 3991 const unsigned char* in, int *inlen) { 3992 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 3993 } 3994 3995 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 3996 const unsigned char* in, int *inlen) { 3997 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 3998 } 3999 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 4000 const unsigned char* in, int *inlen) { 4001 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 4002 } 4003 4004 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 4005 const unsigned char* in, int *inlen) { 4006 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 4007 } 4008 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 4009 const unsigned char* in, int *inlen) { 4010 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 4011 } 4012 4013 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 4014 const unsigned char* in, int *inlen) { 4015 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 4016 } 4017 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 4018 const unsigned char* in, int *inlen) { 4019 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 4020 } 4021 4022 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 4023 const unsigned char* in, int *inlen) { 4024 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 4025 } 4026 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 4027 const unsigned char* in, int *inlen) { 4028 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 4029 } 4030 4031 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 4032 const unsigned char* in, int *inlen) { 4033 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 4034 } 4035 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 4036 const unsigned char* in, int *inlen) { 4037 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 4038 } 4039 4040 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 4041 const unsigned char* in, int *inlen) { 4042 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 4043 } 4044 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 4045 const unsigned char* in, int *inlen) { 4046 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 4047 } 4048 4049 static void 4050 xmlRegisterCharEncodingHandlersISO8859x (void) { 4051 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 4052 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 4053 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 4054 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 4055 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 4056 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 4057 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 4058 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 4059 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 4060 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 4061 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 4062 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 4063 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 4064 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 4065 } 4066 4067 #endif 4068 #endif 4069 4070 #define bottom_encoding 4071 #include "elfgcchack.h" 4072