Home | History | Annotate | Download | only in libxml2
      1 /*
      2  * encoding.c : implements the encoding conversion functions needed for XML
      3  *
      4  * Related specs:
      5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
      6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
      7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
      8  * [ISO-8859-1]   ISO Latin-1 characters codes.
      9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
     10  *                Worldwide Character Encoding -- Version 1.0", Addison-
     11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
     12  *                described in Unicode Technical Report #4.
     13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
     14  *                Information Interchange, ANSI X3.4-1986.
     15  *
     16  * See Copyright for the status of this software.
     17  *
     18  * daniel (at) veillard.com
     19  *
     20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst (at) w3.org>
     21  */
     22 
     23 #define IN_LIBXML
     24 #include "libxml.h"
     25 
     26 #include <string.h>
     27 #include <limits.h>
     28 
     29 #ifdef HAVE_CTYPE_H
     30 #include <ctype.h>
     31 #endif
     32 #ifdef HAVE_STDLIB_H
     33 #include <stdlib.h>
     34 #endif
     35 #ifdef LIBXML_ICONV_ENABLED
     36 #ifdef HAVE_ERRNO_H
     37 #include <errno.h>
     38 #endif
     39 #endif
     40 #include <libxml/encoding.h>
     41 #include <libxml/xmlmemory.h>
     42 #ifdef LIBXML_HTML_ENABLED
     43 #include <libxml/HTMLparser.h>
     44 #endif
     45 #include <libxml/globals.h>
     46 #include <libxml/xmlerror.h>
     47 
     48 #include "buf.h"
     49 #include "enc.h"
     50 
     51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
     52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
     53 
     54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
     55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
     56 struct _xmlCharEncodingAlias {
     57     const char *name;
     58     const char *alias;
     59 };
     60 
     61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
     62 static int xmlCharEncodingAliasesNb = 0;
     63 static int xmlCharEncodingAliasesMax = 0;
     64 
     65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
     66 #if 0
     67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
     68 #endif
     69 #else
     70 #ifdef LIBXML_ISO8859X_ENABLED
     71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
     72 #endif
     73 #endif
     74 
     75 static int xmlLittleEndian = 1;
     76 
     77 /**
     78  * xmlEncodingErrMemory:
     79  * @extra:  extra informations
     80  *
     81  * Handle an out of memory condition
     82  */
     83 static void
     84 xmlEncodingErrMemory(const char *extra)
     85 {
     86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
     87 }
     88 
     89 /**
     90  * xmlErrEncoding:
     91  * @error:  the error number
     92  * @msg:  the error message
     93  *
     94  * n encoding error
     95  */
     96 static void
     97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
     98 {
     99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
    100                     XML_FROM_I18N, error, XML_ERR_FATAL,
    101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
    102 }
    103 
    104 #ifdef LIBXML_ICU_ENABLED
    105 static uconv_t*
    106 openIcuConverter(const char* name, int toUnicode)
    107 {
    108   UErrorCode status = U_ZERO_ERROR;
    109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
    110   if (conv == NULL)
    111     return NULL;
    112 
    113   conv->uconv = ucnv_open(name, &status);
    114   if (U_FAILURE(status))
    115     goto error;
    116 
    117   status = U_ZERO_ERROR;
    118   if (toUnicode) {
    119     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
    120                         NULL, NULL, NULL, &status);
    121   }
    122   else {
    123     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
    124                         NULL, NULL, NULL, &status);
    125   }
    126   if (U_FAILURE(status))
    127     goto error;
    128 
    129   status = U_ZERO_ERROR;
    130   conv->utf8 = ucnv_open("UTF-8", &status);
    131   if (U_SUCCESS(status))
    132     return conv;
    133 
    134 error:
    135   if (conv->uconv)
    136     ucnv_close(conv->uconv);
    137   xmlFree(conv);
    138   return NULL;
    139 }
    140 
    141 static void
    142 closeIcuConverter(uconv_t *conv)
    143 {
    144   if (conv != NULL) {
    145     ucnv_close(conv->uconv);
    146     ucnv_close(conv->utf8);
    147     xmlFree(conv);
    148   }
    149 }
    150 #endif /* LIBXML_ICU_ENABLED */
    151 
    152 /************************************************************************
    153  *									*
    154  *		Conversions To/From UTF8 encoding			*
    155  *									*
    156  ************************************************************************/
    157 
    158 /**
    159  * asciiToUTF8:
    160  * @out:  a pointer to an array of bytes to store the result
    161  * @outlen:  the length of @out
    162  * @in:  a pointer to an array of ASCII chars
    163  * @inlen:  the length of @in
    164  *
    165  * Take a block of ASCII chars in and try to convert it to an UTF-8
    166  * block of chars out.
    167  * Returns 0 if success, or -1 otherwise
    168  * The value of @inlen after return is the number of octets consumed
    169  *     if the return value is positive, else unpredictable.
    170  * The value of @outlen after return is the number of octets consumed.
    171  */
    172 static int
    173 asciiToUTF8(unsigned char* out, int *outlen,
    174               const unsigned char* in, int *inlen) {
    175     unsigned char* outstart = out;
    176     const unsigned char* base = in;
    177     const unsigned char* processed = in;
    178     unsigned char* outend = out + *outlen;
    179     const unsigned char* inend;
    180     unsigned int c;
    181 
    182     inend = in + (*inlen);
    183     while ((in < inend) && (out - outstart + 5 < *outlen)) {
    184 	c= *in++;
    185 
    186         if (out >= outend)
    187 	    break;
    188         if (c < 0x80) {
    189 	    *out++ = c;
    190 	} else {
    191 	    *outlen = out - outstart;
    192 	    *inlen = processed - base;
    193 	    return(-1);
    194 	}
    195 
    196 	processed = (const unsigned char*) in;
    197     }
    198     *outlen = out - outstart;
    199     *inlen = processed - base;
    200     return(*outlen);
    201 }
    202 
    203 #ifdef LIBXML_OUTPUT_ENABLED
    204 /**
    205  * UTF8Toascii:
    206  * @out:  a pointer to an array of bytes to store the result
    207  * @outlen:  the length of @out
    208  * @in:  a pointer to an array of UTF-8 chars
    209  * @inlen:  the length of @in
    210  *
    211  * Take a block of UTF-8 chars in and try to convert it to an ASCII
    212  * block of chars out.
    213  *
    214  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
    215  * The value of @inlen after return is the number of octets consumed
    216  *     if the return value is positive, else unpredictable.
    217  * The value of @outlen after return is the number of octets consumed.
    218  */
    219 static int
    220 UTF8Toascii(unsigned char* out, int *outlen,
    221               const unsigned char* in, int *inlen) {
    222     const unsigned char* processed = in;
    223     const unsigned char* outend;
    224     const unsigned char* outstart = out;
    225     const unsigned char* instart = in;
    226     const unsigned char* inend;
    227     unsigned int c, d;
    228     int trailing;
    229 
    230     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    231     if (in == NULL) {
    232         /*
    233 	 * initialization nothing to do
    234 	 */
    235 	*outlen = 0;
    236 	*inlen = 0;
    237 	return(0);
    238     }
    239     inend = in + (*inlen);
    240     outend = out + (*outlen);
    241     while (in < inend) {
    242 	d = *in++;
    243 	if      (d < 0x80)  { c= d; trailing= 0; }
    244 	else if (d < 0xC0) {
    245 	    /* trailing byte in leading position */
    246 	    *outlen = out - outstart;
    247 	    *inlen = processed - instart;
    248 	    return(-2);
    249         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    250         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    251         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    252 	else {
    253 	    /* no chance for this in Ascii */
    254 	    *outlen = out - outstart;
    255 	    *inlen = processed - instart;
    256 	    return(-2);
    257 	}
    258 
    259 	if (inend - in < trailing) {
    260 	    break;
    261 	}
    262 
    263 	for ( ; trailing; trailing--) {
    264 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
    265 		break;
    266 	    c <<= 6;
    267 	    c |= d & 0x3F;
    268 	}
    269 
    270 	/* assertion: c is a single UTF-4 value */
    271 	if (c < 0x80) {
    272 	    if (out >= outend)
    273 		break;
    274 	    *out++ = c;
    275 	} else {
    276 	    /* no chance for this in Ascii */
    277 	    *outlen = out - outstart;
    278 	    *inlen = processed - instart;
    279 	    return(-2);
    280 	}
    281 	processed = in;
    282     }
    283     *outlen = out - outstart;
    284     *inlen = processed - instart;
    285     return(*outlen);
    286 }
    287 #endif /* LIBXML_OUTPUT_ENABLED */
    288 
    289 /**
    290  * isolat1ToUTF8:
    291  * @out:  a pointer to an array of bytes to store the result
    292  * @outlen:  the length of @out
    293  * @in:  a pointer to an array of ISO Latin 1 chars
    294  * @inlen:  the length of @in
    295  *
    296  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
    297  * block of chars out.
    298  * Returns the number of bytes written if success, or -1 otherwise
    299  * The value of @inlen after return is the number of octets consumed
    300  *     if the return value is positive, else unpredictable.
    301  * The value of @outlen after return is the number of octets consumed.
    302  */
    303 int
    304 isolat1ToUTF8(unsigned char* out, int *outlen,
    305               const unsigned char* in, int *inlen) {
    306     unsigned char* outstart = out;
    307     const unsigned char* base = in;
    308     unsigned char* outend;
    309     const unsigned char* inend;
    310     const unsigned char* instop;
    311 
    312     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
    313 	return(-1);
    314 
    315     outend = out + *outlen;
    316     inend = in + (*inlen);
    317     instop = inend;
    318 
    319     while ((in < inend) && (out < outend - 1)) {
    320 	if (*in >= 0x80) {
    321 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
    322             *out++ = ((*in) & 0x3F) | 0x80;
    323 	    ++in;
    324 	}
    325 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
    326 	while ((in < instop) && (*in < 0x80)) {
    327 	    *out++ = *in++;
    328 	}
    329     }
    330     if ((in < inend) && (out < outend) && (*in < 0x80)) {
    331         *out++ = *in++;
    332     }
    333     *outlen = out - outstart;
    334     *inlen = in - base;
    335     return(*outlen);
    336 }
    337 
    338 /**
    339  * UTF8ToUTF8:
    340  * @out:  a pointer to an array of bytes to store the result
    341  * @outlen:  the length of @out
    342  * @inb:  a pointer to an array of UTF-8 chars
    343  * @inlenb:  the length of @in in UTF-8 chars
    344  *
    345  * No op copy operation for UTF8 handling.
    346  *
    347  * Returns the number of bytes written, or -1 if lack of space.
    348  *     The value of *inlen after return is the number of octets consumed
    349  *     if the return value is positive, else unpredictable.
    350  */
    351 static int
    352 UTF8ToUTF8(unsigned char* out, int *outlen,
    353            const unsigned char* inb, int *inlenb)
    354 {
    355     int len;
    356 
    357     if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
    358 	return(-1);
    359     if (*outlen > *inlenb) {
    360 	len = *inlenb;
    361     } else {
    362 	len = *outlen;
    363     }
    364     if (len < 0)
    365 	return(-1);
    366 
    367     memcpy(out, inb, len);
    368 
    369     *outlen = len;
    370     *inlenb = len;
    371     return(*outlen);
    372 }
    373 
    374 
    375 #ifdef LIBXML_OUTPUT_ENABLED
    376 /**
    377  * UTF8Toisolat1:
    378  * @out:  a pointer to an array of bytes to store the result
    379  * @outlen:  the length of @out
    380  * @in:  a pointer to an array of UTF-8 chars
    381  * @inlen:  the length of @in
    382  *
    383  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
    384  * block of chars out.
    385  *
    386  * Returns the number of bytes written if success, -2 if the transcoding fails,
    387            or -1 otherwise
    388  * The value of @inlen after return is the number of octets consumed
    389  *     if the return value is positive, else unpredictable.
    390  * The value of @outlen after return is the number of octets consumed.
    391  */
    392 int
    393 UTF8Toisolat1(unsigned char* out, int *outlen,
    394               const unsigned char* in, int *inlen) {
    395     const unsigned char* processed = in;
    396     const unsigned char* outend;
    397     const unsigned char* outstart = out;
    398     const unsigned char* instart = in;
    399     const unsigned char* inend;
    400     unsigned int c, d;
    401     int trailing;
    402 
    403     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    404     if (in == NULL) {
    405         /*
    406 	 * initialization nothing to do
    407 	 */
    408 	*outlen = 0;
    409 	*inlen = 0;
    410 	return(0);
    411     }
    412     inend = in + (*inlen);
    413     outend = out + (*outlen);
    414     while (in < inend) {
    415 	d = *in++;
    416 	if      (d < 0x80)  { c= d; trailing= 0; }
    417 	else if (d < 0xC0) {
    418 	    /* trailing byte in leading position */
    419 	    *outlen = out - outstart;
    420 	    *inlen = processed - instart;
    421 	    return(-2);
    422         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    423         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    424         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    425 	else {
    426 	    /* no chance for this in IsoLat1 */
    427 	    *outlen = out - outstart;
    428 	    *inlen = processed - instart;
    429 	    return(-2);
    430 	}
    431 
    432 	if (inend - in < trailing) {
    433 	    break;
    434 	}
    435 
    436 	for ( ; trailing; trailing--) {
    437 	    if (in >= inend)
    438 		break;
    439 	    if (((d= *in++) & 0xC0) != 0x80) {
    440 		*outlen = out - outstart;
    441 		*inlen = processed - instart;
    442 		return(-2);
    443 	    }
    444 	    c <<= 6;
    445 	    c |= d & 0x3F;
    446 	}
    447 
    448 	/* assertion: c is a single UTF-4 value */
    449 	if (c <= 0xFF) {
    450 	    if (out >= outend)
    451 		break;
    452 	    *out++ = c;
    453 	} else {
    454 	    /* no chance for this in IsoLat1 */
    455 	    *outlen = out - outstart;
    456 	    *inlen = processed - instart;
    457 	    return(-2);
    458 	}
    459 	processed = in;
    460     }
    461     *outlen = out - outstart;
    462     *inlen = processed - instart;
    463     return(*outlen);
    464 }
    465 #endif /* LIBXML_OUTPUT_ENABLED */
    466 
    467 /**
    468  * UTF16LEToUTF8:
    469  * @out:  a pointer to an array of bytes to store the result
    470  * @outlen:  the length of @out
    471  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
    472  * @inlenb:  the length of @in in UTF-16LE chars
    473  *
    474  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
    475  * block of chars out. This function assumes the endian property
    476  * is the same between the native type of this machine and the
    477  * inputed one.
    478  *
    479  * Returns the number of bytes written, or -1 if lack of space, or -2
    480  *     if the transcoding fails (if *in is not a valid utf16 string)
    481  *     The value of *inlen after return is the number of octets consumed
    482  *     if the return value is positive, else unpredictable.
    483  */
    484 static int
    485 UTF16LEToUTF8(unsigned char* out, int *outlen,
    486             const unsigned char* inb, int *inlenb)
    487 {
    488     unsigned char* outstart = out;
    489     const unsigned char* processed = inb;
    490     unsigned char* outend = out + *outlen;
    491     unsigned short* in = (unsigned short*) inb;
    492     unsigned short* inend;
    493     unsigned int c, d, inlen;
    494     unsigned char *tmp;
    495     int bits;
    496 
    497     if ((*inlenb % 2) == 1)
    498         (*inlenb)--;
    499     inlen = *inlenb / 2;
    500     inend = in + inlen;
    501     while ((in < inend) && (out - outstart + 5 < *outlen)) {
    502         if (xmlLittleEndian) {
    503 	    c= *in++;
    504 	} else {
    505 	    tmp = (unsigned char *) in;
    506 	    c = *tmp++;
    507 	    c = c | (((unsigned int)*tmp) << 8);
    508 	    in++;
    509 	}
    510         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
    511 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
    512 		break;
    513 	    }
    514 	    if (xmlLittleEndian) {
    515 		d = *in++;
    516 	    } else {
    517 		tmp = (unsigned char *) in;
    518 		d = *tmp++;
    519 		d = d | (((unsigned int)*tmp) << 8);
    520 		in++;
    521 	    }
    522             if ((d & 0xFC00) == 0xDC00) {
    523                 c &= 0x03FF;
    524                 c <<= 10;
    525                 c |= d & 0x03FF;
    526                 c += 0x10000;
    527             }
    528             else {
    529 		*outlen = out - outstart;
    530 		*inlenb = processed - inb;
    531 	        return(-2);
    532 	    }
    533         }
    534 
    535 	/* assertion: c is a single UTF-4 value */
    536         if (out >= outend)
    537 	    break;
    538         if      (c <    0x80) {  *out++=  c;                bits= -6; }
    539         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
    540         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
    541         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
    542 
    543         for ( ; bits >= 0; bits-= 6) {
    544             if (out >= outend)
    545 	        break;
    546             *out++= ((c >> bits) & 0x3F) | 0x80;
    547         }
    548 	processed = (const unsigned char*) in;
    549     }
    550     *outlen = out - outstart;
    551     *inlenb = processed - inb;
    552     return(*outlen);
    553 }
    554 
    555 #ifdef LIBXML_OUTPUT_ENABLED
    556 /**
    557  * UTF8ToUTF16LE:
    558  * @outb:  a pointer to an array of bytes to store the result
    559  * @outlen:  the length of @outb
    560  * @in:  a pointer to an array of UTF-8 chars
    561  * @inlen:  the length of @in
    562  *
    563  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
    564  * block of chars out.
    565  *
    566  * Returns the number of bytes written, or -1 if lack of space, or -2
    567  *     if the transcoding failed.
    568  */
    569 static int
    570 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
    571             const unsigned char* in, int *inlen)
    572 {
    573     unsigned short* out = (unsigned short*) outb;
    574     const unsigned char* processed = in;
    575     const unsigned char *const instart = in;
    576     unsigned short* outstart= out;
    577     unsigned short* outend;
    578     const unsigned char* inend;
    579     unsigned int c, d;
    580     int trailing;
    581     unsigned char *tmp;
    582     unsigned short tmp1, tmp2;
    583 
    584     /* UTF16LE encoding has no BOM */
    585     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    586     if (in == NULL) {
    587 	*outlen = 0;
    588 	*inlen = 0;
    589 	return(0);
    590     }
    591     inend= in + *inlen;
    592     outend = out + (*outlen / 2);
    593     while (in < inend) {
    594       d= *in++;
    595       if      (d < 0x80)  { c= d; trailing= 0; }
    596       else if (d < 0xC0) {
    597           /* trailing byte in leading position */
    598 	  *outlen = (out - outstart) * 2;
    599 	  *inlen = processed - instart;
    600 	  return(-2);
    601       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    602       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    603       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    604       else {
    605 	/* no chance for this in UTF-16 */
    606 	*outlen = (out - outstart) * 2;
    607 	*inlen = processed - instart;
    608 	return(-2);
    609       }
    610 
    611       if (inend - in < trailing) {
    612           break;
    613       }
    614 
    615       for ( ; trailing; trailing--) {
    616           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
    617 	      break;
    618           c <<= 6;
    619           c |= d & 0x3F;
    620       }
    621 
    622       /* assertion: c is a single UTF-4 value */
    623         if (c < 0x10000) {
    624             if (out >= outend)
    625 	        break;
    626 	    if (xmlLittleEndian) {
    627 		*out++ = c;
    628 	    } else {
    629 		tmp = (unsigned char *) out;
    630 		*tmp = c ;
    631 		*(tmp + 1) = c >> 8 ;
    632 		out++;
    633 	    }
    634         }
    635         else if (c < 0x110000) {
    636             if (out+1 >= outend)
    637 	        break;
    638             c -= 0x10000;
    639 	    if (xmlLittleEndian) {
    640 		*out++ = 0xD800 | (c >> 10);
    641 		*out++ = 0xDC00 | (c & 0x03FF);
    642 	    } else {
    643 		tmp1 = 0xD800 | (c >> 10);
    644 		tmp = (unsigned char *) out;
    645 		*tmp = (unsigned char) tmp1;
    646 		*(tmp + 1) = tmp1 >> 8;
    647 		out++;
    648 
    649 		tmp2 = 0xDC00 | (c & 0x03FF);
    650 		tmp = (unsigned char *) out;
    651 		*tmp  = (unsigned char) tmp2;
    652 		*(tmp + 1) = tmp2 >> 8;
    653 		out++;
    654 	    }
    655         }
    656         else
    657 	    break;
    658 	processed = in;
    659     }
    660     *outlen = (out - outstart) * 2;
    661     *inlen = processed - instart;
    662     return(*outlen);
    663 }
    664 
    665 /**
    666  * UTF8ToUTF16:
    667  * @outb:  a pointer to an array of bytes to store the result
    668  * @outlen:  the length of @outb
    669  * @in:  a pointer to an array of UTF-8 chars
    670  * @inlen:  the length of @in
    671  *
    672  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
    673  * block of chars out.
    674  *
    675  * Returns the number of bytes written, or -1 if lack of space, or -2
    676  *     if the transcoding failed.
    677  */
    678 static int
    679 UTF8ToUTF16(unsigned char* outb, int *outlen,
    680             const unsigned char* in, int *inlen)
    681 {
    682     if (in == NULL) {
    683 	/*
    684 	 * initialization, add the Byte Order Mark for UTF-16LE
    685 	 */
    686         if (*outlen >= 2) {
    687 	    outb[0] = 0xFF;
    688 	    outb[1] = 0xFE;
    689 	    *outlen = 2;
    690 	    *inlen = 0;
    691 #ifdef DEBUG_ENCODING
    692             xmlGenericError(xmlGenericErrorContext,
    693 		    "Added FFFE Byte Order Mark\n");
    694 #endif
    695 	    return(2);
    696 	}
    697 	*outlen = 0;
    698 	*inlen = 0;
    699 	return(0);
    700     }
    701     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
    702 }
    703 #endif /* LIBXML_OUTPUT_ENABLED */
    704 
    705 /**
    706  * UTF16BEToUTF8:
    707  * @out:  a pointer to an array of bytes to store the result
    708  * @outlen:  the length of @out
    709  * @inb:  a pointer to an array of UTF-16 passed as a byte array
    710  * @inlenb:  the length of @in in UTF-16 chars
    711  *
    712  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
    713  * block of chars out. This function assumes the endian property
    714  * is the same between the native type of this machine and the
    715  * inputed one.
    716  *
    717  * Returns the number of bytes written, or -1 if lack of space, or -2
    718  *     if the transcoding fails (if *in is not a valid utf16 string)
    719  * The value of *inlen after return is the number of octets consumed
    720  *     if the return value is positive, else unpredictable.
    721  */
    722 static int
    723 UTF16BEToUTF8(unsigned char* out, int *outlen,
    724             const unsigned char* inb, int *inlenb)
    725 {
    726     unsigned char* outstart = out;
    727     const unsigned char* processed = inb;
    728     unsigned char* outend = out + *outlen;
    729     unsigned short* in = (unsigned short*) inb;
    730     unsigned short* inend;
    731     unsigned int c, d, inlen;
    732     unsigned char *tmp;
    733     int bits;
    734 
    735     if ((*inlenb % 2) == 1)
    736         (*inlenb)--;
    737     inlen = *inlenb / 2;
    738     inend= in + inlen;
    739     while (in < inend) {
    740 	if (xmlLittleEndian) {
    741 	    tmp = (unsigned char *) in;
    742 	    c = *tmp++;
    743 	    c = c << 8;
    744 	    c = c | (unsigned int) *tmp;
    745 	    in++;
    746 	} else {
    747 	    c= *in++;
    748 	}
    749         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
    750 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
    751 		*outlen = out - outstart;
    752 		*inlenb = processed - inb;
    753 	        return(-2);
    754 	    }
    755 	    if (xmlLittleEndian) {
    756 		tmp = (unsigned char *) in;
    757 		d = *tmp++;
    758 		d = d << 8;
    759 		d = d | (unsigned int) *tmp;
    760 		in++;
    761 	    } else {
    762 		d= *in++;
    763 	    }
    764             if ((d & 0xFC00) == 0xDC00) {
    765                 c &= 0x03FF;
    766                 c <<= 10;
    767                 c |= d & 0x03FF;
    768                 c += 0x10000;
    769             }
    770             else {
    771 		*outlen = out - outstart;
    772 		*inlenb = processed - inb;
    773 	        return(-2);
    774 	    }
    775         }
    776 
    777 	/* assertion: c is a single UTF-4 value */
    778         if (out >= outend)
    779 	    break;
    780         if      (c <    0x80) {  *out++=  c;                bits= -6; }
    781         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
    782         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
    783         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
    784 
    785         for ( ; bits >= 0; bits-= 6) {
    786             if (out >= outend)
    787 	        break;
    788             *out++= ((c >> bits) & 0x3F) | 0x80;
    789         }
    790 	processed = (const unsigned char*) in;
    791     }
    792     *outlen = out - outstart;
    793     *inlenb = processed - inb;
    794     return(*outlen);
    795 }
    796 
    797 #ifdef LIBXML_OUTPUT_ENABLED
    798 /**
    799  * UTF8ToUTF16BE:
    800  * @outb:  a pointer to an array of bytes to store the result
    801  * @outlen:  the length of @outb
    802  * @in:  a pointer to an array of UTF-8 chars
    803  * @inlen:  the length of @in
    804  *
    805  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
    806  * block of chars out.
    807  *
    808  * Returns the number of byte written, or -1 by lack of space, or -2
    809  *     if the transcoding failed.
    810  */
    811 static int
    812 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
    813             const unsigned char* in, int *inlen)
    814 {
    815     unsigned short* out = (unsigned short*) outb;
    816     const unsigned char* processed = in;
    817     const unsigned char *const instart = in;
    818     unsigned short* outstart= out;
    819     unsigned short* outend;
    820     const unsigned char* inend;
    821     unsigned int c, d;
    822     int trailing;
    823     unsigned char *tmp;
    824     unsigned short tmp1, tmp2;
    825 
    826     /* UTF-16BE has no BOM */
    827     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    828     if (in == NULL) {
    829 	*outlen = 0;
    830 	*inlen = 0;
    831 	return(0);
    832     }
    833     inend= in + *inlen;
    834     outend = out + (*outlen / 2);
    835     while (in < inend) {
    836       d= *in++;
    837       if      (d < 0x80)  { c= d; trailing= 0; }
    838       else if (d < 0xC0)  {
    839           /* trailing byte in leading position */
    840 	  *outlen = out - outstart;
    841 	  *inlen = processed - instart;
    842 	  return(-2);
    843       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    844       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    845       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    846       else {
    847           /* no chance for this in UTF-16 */
    848 	  *outlen = out - outstart;
    849 	  *inlen = processed - instart;
    850 	  return(-2);
    851       }
    852 
    853       if (inend - in < trailing) {
    854           break;
    855       }
    856 
    857       for ( ; trailing; trailing--) {
    858           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
    859           c <<= 6;
    860           c |= d & 0x3F;
    861       }
    862 
    863       /* assertion: c is a single UTF-4 value */
    864         if (c < 0x10000) {
    865             if (out >= outend)  break;
    866 	    if (xmlLittleEndian) {
    867 		tmp = (unsigned char *) out;
    868 		*tmp = c >> 8;
    869 		*(tmp + 1) = c;
    870 		out++;
    871 	    } else {
    872 		*out++ = c;
    873 	    }
    874         }
    875         else if (c < 0x110000) {
    876             if (out+1 >= outend)  break;
    877             c -= 0x10000;
    878 	    if (xmlLittleEndian) {
    879 		tmp1 = 0xD800 | (c >> 10);
    880 		tmp = (unsigned char *) out;
    881 		*tmp = tmp1 >> 8;
    882 		*(tmp + 1) = (unsigned char) tmp1;
    883 		out++;
    884 
    885 		tmp2 = 0xDC00 | (c & 0x03FF);
    886 		tmp = (unsigned char *) out;
    887 		*tmp = tmp2 >> 8;
    888 		*(tmp + 1) = (unsigned char) tmp2;
    889 		out++;
    890 	    } else {
    891 		*out++ = 0xD800 | (c >> 10);
    892 		*out++ = 0xDC00 | (c & 0x03FF);
    893 	    }
    894         }
    895         else
    896 	    break;
    897 	processed = in;
    898     }
    899     *outlen = (out - outstart) * 2;
    900     *inlen = processed - instart;
    901     return(*outlen);
    902 }
    903 #endif /* LIBXML_OUTPUT_ENABLED */
    904 
    905 /************************************************************************
    906  *									*
    907  *		Generic encoding handling routines			*
    908  *									*
    909  ************************************************************************/
    910 
    911 /**
    912  * xmlDetectCharEncoding:
    913  * @in:  a pointer to the first bytes of the XML entity, must be at least
    914  *       2 bytes long (at least 4 if encoding is UTF4 variant).
    915  * @len:  pointer to the length of the buffer
    916  *
    917  * Guess the encoding of the entity using the first bytes of the entity content
    918  * according to the non-normative appendix F of the XML-1.0 recommendation.
    919  *
    920  * Returns one of the XML_CHAR_ENCODING_... values.
    921  */
    922 xmlCharEncoding
    923 xmlDetectCharEncoding(const unsigned char* in, int len)
    924 {
    925     if (in == NULL)
    926         return(XML_CHAR_ENCODING_NONE);
    927     if (len >= 4) {
    928 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
    929 	    (in[2] == 0x00) && (in[3] == 0x3C))
    930 	    return(XML_CHAR_ENCODING_UCS4BE);
    931 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
    932 	    (in[2] == 0x00) && (in[3] == 0x00))
    933 	    return(XML_CHAR_ENCODING_UCS4LE);
    934 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
    935 	    (in[2] == 0x3C) && (in[3] == 0x00))
    936 	    return(XML_CHAR_ENCODING_UCS4_2143);
    937 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
    938 	    (in[2] == 0x00) && (in[3] == 0x00))
    939 	    return(XML_CHAR_ENCODING_UCS4_3412);
    940 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
    941 	    (in[2] == 0xA7) && (in[3] == 0x94))
    942 	    return(XML_CHAR_ENCODING_EBCDIC);
    943 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
    944 	    (in[2] == 0x78) && (in[3] == 0x6D))
    945 	    return(XML_CHAR_ENCODING_UTF8);
    946 	/*
    947 	 * Although not part of the recommendation, we also
    948 	 * attempt an "auto-recognition" of UTF-16LE and
    949 	 * UTF-16BE encodings.
    950 	 */
    951 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
    952 	    (in[2] == 0x3F) && (in[3] == 0x00))
    953 	    return(XML_CHAR_ENCODING_UTF16LE);
    954 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
    955 	    (in[2] == 0x00) && (in[3] == 0x3F))
    956 	    return(XML_CHAR_ENCODING_UTF16BE);
    957     }
    958     if (len >= 3) {
    959 	/*
    960 	 * Errata on XML-1.0 June 20 2001
    961 	 * We now allow an UTF8 encoded BOM
    962 	 */
    963 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
    964 	    (in[2] == 0xBF))
    965 	    return(XML_CHAR_ENCODING_UTF8);
    966     }
    967     /* For UTF-16 we can recognize by the BOM */
    968     if (len >= 2) {
    969 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
    970 	    return(XML_CHAR_ENCODING_UTF16BE);
    971 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
    972 	    return(XML_CHAR_ENCODING_UTF16LE);
    973     }
    974     return(XML_CHAR_ENCODING_NONE);
    975 }
    976 
    977 /**
    978  * xmlCleanupEncodingAliases:
    979  *
    980  * Unregisters all aliases
    981  */
    982 void
    983 xmlCleanupEncodingAliases(void) {
    984     int i;
    985 
    986     if (xmlCharEncodingAliases == NULL)
    987 	return;
    988 
    989     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
    990 	if (xmlCharEncodingAliases[i].name != NULL)
    991 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
    992 	if (xmlCharEncodingAliases[i].alias != NULL)
    993 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
    994     }
    995     xmlCharEncodingAliasesNb = 0;
    996     xmlCharEncodingAliasesMax = 0;
    997     xmlFree(xmlCharEncodingAliases);
    998     xmlCharEncodingAliases = NULL;
    999 }
   1000 
   1001 /**
   1002  * xmlGetEncodingAlias:
   1003  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
   1004  *
   1005  * Lookup an encoding name for the given alias.
   1006  *
   1007  * Returns NULL if not found, otherwise the original name
   1008  */
   1009 const char *
   1010 xmlGetEncodingAlias(const char *alias) {
   1011     int i;
   1012     char upper[100];
   1013 
   1014     if (alias == NULL)
   1015 	return(NULL);
   1016 
   1017     if (xmlCharEncodingAliases == NULL)
   1018 	return(NULL);
   1019 
   1020     for (i = 0;i < 99;i++) {
   1021         upper[i] = toupper(alias[i]);
   1022 	if (upper[i] == 0) break;
   1023     }
   1024     upper[i] = 0;
   1025 
   1026     /*
   1027      * Walk down the list looking for a definition of the alias
   1028      */
   1029     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
   1030 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
   1031 	    return(xmlCharEncodingAliases[i].name);
   1032 	}
   1033     }
   1034     return(NULL);
   1035 }
   1036 
   1037 /**
   1038  * xmlAddEncodingAlias:
   1039  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
   1040  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
   1041  *
   1042  * Registers an alias @alias for an encoding named @name. Existing alias
   1043  * will be overwritten.
   1044  *
   1045  * Returns 0 in case of success, -1 in case of error
   1046  */
   1047 int
   1048 xmlAddEncodingAlias(const char *name, const char *alias) {
   1049     int i;
   1050     char upper[100];
   1051 
   1052     if ((name == NULL) || (alias == NULL))
   1053 	return(-1);
   1054 
   1055     for (i = 0;i < 99;i++) {
   1056         upper[i] = toupper(alias[i]);
   1057 	if (upper[i] == 0) break;
   1058     }
   1059     upper[i] = 0;
   1060 
   1061     if (xmlCharEncodingAliases == NULL) {
   1062 	xmlCharEncodingAliasesNb = 0;
   1063 	xmlCharEncodingAliasesMax = 20;
   1064 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
   1065 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
   1066 	if (xmlCharEncodingAliases == NULL)
   1067 	    return(-1);
   1068     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
   1069 	xmlCharEncodingAliasesMax *= 2;
   1070 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
   1071 	      xmlRealloc(xmlCharEncodingAliases,
   1072 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
   1073     }
   1074     /*
   1075      * Walk down the list looking for a definition of the alias
   1076      */
   1077     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
   1078 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
   1079 	    /*
   1080 	     * Replace the definition.
   1081 	     */
   1082 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
   1083 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
   1084 	    return(0);
   1085 	}
   1086     }
   1087     /*
   1088      * Add the definition
   1089      */
   1090     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
   1091     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
   1092     xmlCharEncodingAliasesNb++;
   1093     return(0);
   1094 }
   1095 
   1096 /**
   1097  * xmlDelEncodingAlias:
   1098  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
   1099  *
   1100  * Unregisters an encoding alias @alias
   1101  *
   1102  * Returns 0 in case of success, -1 in case of error
   1103  */
   1104 int
   1105 xmlDelEncodingAlias(const char *alias) {
   1106     int i;
   1107 
   1108     if (alias == NULL)
   1109 	return(-1);
   1110 
   1111     if (xmlCharEncodingAliases == NULL)
   1112 	return(-1);
   1113     /*
   1114      * Walk down the list looking for a definition of the alias
   1115      */
   1116     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
   1117 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
   1118 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
   1119 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
   1120 	    xmlCharEncodingAliasesNb--;
   1121 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
   1122 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
   1123 	    return(0);
   1124 	}
   1125     }
   1126     return(-1);
   1127 }
   1128 
   1129 /**
   1130  * xmlParseCharEncoding:
   1131  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
   1132  *
   1133  * Compare the string to the encoding schemes already known. Note
   1134  * that the comparison is case insensitive accordingly to the section
   1135  * [XML] 4.3.3 Character Encoding in Entities.
   1136  *
   1137  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
   1138  * if not recognized.
   1139  */
   1140 xmlCharEncoding
   1141 xmlParseCharEncoding(const char* name)
   1142 {
   1143     const char *alias;
   1144     char upper[500];
   1145     int i;
   1146 
   1147     if (name == NULL)
   1148 	return(XML_CHAR_ENCODING_NONE);
   1149 
   1150     /*
   1151      * Do the alias resolution
   1152      */
   1153     alias = xmlGetEncodingAlias(name);
   1154     if (alias != NULL)
   1155 	name = alias;
   1156 
   1157     for (i = 0;i < 499;i++) {
   1158         upper[i] = toupper(name[i]);
   1159 	if (upper[i] == 0) break;
   1160     }
   1161     upper[i] = 0;
   1162 
   1163     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
   1164     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
   1165     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
   1166 
   1167     /*
   1168      * NOTE: if we were able to parse this, the endianness of UTF16 is
   1169      *       already found and in use
   1170      */
   1171     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
   1172     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
   1173 
   1174     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
   1175     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
   1176     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
   1177 
   1178     /*
   1179      * NOTE: if we were able to parse this, the endianness of UCS4 is
   1180      *       already found and in use
   1181      */
   1182     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
   1183     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
   1184     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
   1185 
   1186 
   1187     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
   1188     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
   1189     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
   1190 
   1191     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
   1192     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
   1193     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
   1194 
   1195     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
   1196     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
   1197     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
   1198     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
   1199     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
   1200     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
   1201     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
   1202 
   1203     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
   1204     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
   1205     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
   1206 
   1207 #ifdef DEBUG_ENCODING
   1208     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
   1209 #endif
   1210     return(XML_CHAR_ENCODING_ERROR);
   1211 }
   1212 
   1213 /**
   1214  * xmlGetCharEncodingName:
   1215  * @enc:  the encoding
   1216  *
   1217  * The "canonical" name for XML encoding.
   1218  * C.f. http://www.w3.org/TR/REC-xml#charencoding
   1219  * Section 4.3.3  Character Encoding in Entities
   1220  *
   1221  * Returns the canonical name for the given encoding
   1222  */
   1223 
   1224 const char*
   1225 xmlGetCharEncodingName(xmlCharEncoding enc) {
   1226     switch (enc) {
   1227         case XML_CHAR_ENCODING_ERROR:
   1228 	    return(NULL);
   1229         case XML_CHAR_ENCODING_NONE:
   1230 	    return(NULL);
   1231         case XML_CHAR_ENCODING_UTF8:
   1232 	    return("UTF-8");
   1233         case XML_CHAR_ENCODING_UTF16LE:
   1234 	    return("UTF-16");
   1235         case XML_CHAR_ENCODING_UTF16BE:
   1236 	    return("UTF-16");
   1237         case XML_CHAR_ENCODING_EBCDIC:
   1238             return("EBCDIC");
   1239         case XML_CHAR_ENCODING_UCS4LE:
   1240             return("ISO-10646-UCS-4");
   1241         case XML_CHAR_ENCODING_UCS4BE:
   1242             return("ISO-10646-UCS-4");
   1243         case XML_CHAR_ENCODING_UCS4_2143:
   1244             return("ISO-10646-UCS-4");
   1245         case XML_CHAR_ENCODING_UCS4_3412:
   1246             return("ISO-10646-UCS-4");
   1247         case XML_CHAR_ENCODING_UCS2:
   1248             return("ISO-10646-UCS-2");
   1249         case XML_CHAR_ENCODING_8859_1:
   1250 	    return("ISO-8859-1");
   1251         case XML_CHAR_ENCODING_8859_2:
   1252 	    return("ISO-8859-2");
   1253         case XML_CHAR_ENCODING_8859_3:
   1254 	    return("ISO-8859-3");
   1255         case XML_CHAR_ENCODING_8859_4:
   1256 	    return("ISO-8859-4");
   1257         case XML_CHAR_ENCODING_8859_5:
   1258 	    return("ISO-8859-5");
   1259         case XML_CHAR_ENCODING_8859_6:
   1260 	    return("ISO-8859-6");
   1261         case XML_CHAR_ENCODING_8859_7:
   1262 	    return("ISO-8859-7");
   1263         case XML_CHAR_ENCODING_8859_8:
   1264 	    return("ISO-8859-8");
   1265         case XML_CHAR_ENCODING_8859_9:
   1266 	    return("ISO-8859-9");
   1267         case XML_CHAR_ENCODING_2022_JP:
   1268             return("ISO-2022-JP");
   1269         case XML_CHAR_ENCODING_SHIFT_JIS:
   1270             return("Shift-JIS");
   1271         case XML_CHAR_ENCODING_EUC_JP:
   1272             return("EUC-JP");
   1273 	case XML_CHAR_ENCODING_ASCII:
   1274 	    return(NULL);
   1275     }
   1276     return(NULL);
   1277 }
   1278 
   1279 /************************************************************************
   1280  *									*
   1281  *			Char encoding handlers				*
   1282  *									*
   1283  ************************************************************************/
   1284 
   1285 
   1286 /* the size should be growable, but it's not a big deal ... */
   1287 #define MAX_ENCODING_HANDLERS 50
   1288 static xmlCharEncodingHandlerPtr *handlers = NULL;
   1289 static int nbCharEncodingHandler = 0;
   1290 
   1291 /*
   1292  * The default is UTF-8 for XML, that's also the default used for the
   1293  * parser internals, so the default encoding handler is NULL
   1294  */
   1295 
   1296 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
   1297 
   1298 /**
   1299  * xmlNewCharEncodingHandler:
   1300  * @name:  the encoding name, in UTF-8 format (ASCII actually)
   1301  * @input:  the xmlCharEncodingInputFunc to read that encoding
   1302  * @output:  the xmlCharEncodingOutputFunc to write that encoding
   1303  *
   1304  * Create and registers an xmlCharEncodingHandler.
   1305  *
   1306  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
   1307  */
   1308 xmlCharEncodingHandlerPtr
   1309 xmlNewCharEncodingHandler(const char *name,
   1310                           xmlCharEncodingInputFunc input,
   1311                           xmlCharEncodingOutputFunc output) {
   1312     xmlCharEncodingHandlerPtr handler;
   1313     const char *alias;
   1314     char upper[500];
   1315     int i;
   1316     char *up = NULL;
   1317 
   1318     /*
   1319      * Do the alias resolution
   1320      */
   1321     alias = xmlGetEncodingAlias(name);
   1322     if (alias != NULL)
   1323 	name = alias;
   1324 
   1325     /*
   1326      * Keep only the uppercase version of the encoding.
   1327      */
   1328     if (name == NULL) {
   1329         xmlEncodingErr(XML_I18N_NO_NAME,
   1330 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
   1331 	return(NULL);
   1332     }
   1333     for (i = 0;i < 499;i++) {
   1334         upper[i] = toupper(name[i]);
   1335 	if (upper[i] == 0) break;
   1336     }
   1337     upper[i] = 0;
   1338     up = xmlMemStrdup(upper);
   1339     if (up == NULL) {
   1340         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
   1341 	return(NULL);
   1342     }
   1343 
   1344     /*
   1345      * allocate and fill-up an handler block.
   1346      */
   1347     handler = (xmlCharEncodingHandlerPtr)
   1348               xmlMalloc(sizeof(xmlCharEncodingHandler));
   1349     if (handler == NULL) {
   1350         xmlFree(up);
   1351         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
   1352 	return(NULL);
   1353     }
   1354     memset(handler, 0, sizeof(xmlCharEncodingHandler));
   1355     handler->input = input;
   1356     handler->output = output;
   1357     handler->name = up;
   1358 
   1359 #ifdef LIBXML_ICONV_ENABLED
   1360     handler->iconv_in = NULL;
   1361     handler->iconv_out = NULL;
   1362 #endif
   1363 #ifdef LIBXML_ICU_ENABLED
   1364     handler->uconv_in = NULL;
   1365     handler->uconv_out = NULL;
   1366 #endif
   1367 
   1368     /*
   1369      * registers and returns the handler.
   1370      */
   1371     xmlRegisterCharEncodingHandler(handler);
   1372 #ifdef DEBUG_ENCODING
   1373     xmlGenericError(xmlGenericErrorContext,
   1374 	    "Registered encoding handler for %s\n", name);
   1375 #endif
   1376     return(handler);
   1377 }
   1378 
   1379 /**
   1380  * xmlInitCharEncodingHandlers:
   1381  *
   1382  * Initialize the char encoding support, it registers the default
   1383  * encoding supported.
   1384  * NOTE: while public, this function usually doesn't need to be called
   1385  *       in normal processing.
   1386  */
   1387 void
   1388 xmlInitCharEncodingHandlers(void) {
   1389     unsigned short int tst = 0x1234;
   1390     unsigned char *ptr = (unsigned char *) &tst;
   1391 
   1392     if (handlers != NULL) return;
   1393 
   1394     handlers = (xmlCharEncodingHandlerPtr *)
   1395         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
   1396 
   1397     if (*ptr == 0x12) xmlLittleEndian = 0;
   1398     else if (*ptr == 0x34) xmlLittleEndian = 1;
   1399     else {
   1400         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
   1401 	               "Odd problem at endianness detection\n", NULL);
   1402     }
   1403 
   1404     if (handlers == NULL) {
   1405         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
   1406 	return;
   1407     }
   1408     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
   1409 #ifdef LIBXML_OUTPUT_ENABLED
   1410     xmlUTF16LEHandler =
   1411           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
   1412     xmlUTF16BEHandler =
   1413           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
   1414     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
   1415     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
   1416     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
   1417     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
   1418 #ifdef LIBXML_HTML_ENABLED
   1419     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
   1420 #endif
   1421 #else
   1422     xmlUTF16LEHandler =
   1423           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
   1424     xmlUTF16BEHandler =
   1425           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
   1426     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
   1427     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
   1428     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
   1429     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
   1430 #endif /* LIBXML_OUTPUT_ENABLED */
   1431 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
   1432 #ifdef LIBXML_ISO8859X_ENABLED
   1433     xmlRegisterCharEncodingHandlersISO8859x ();
   1434 #endif
   1435 #endif
   1436 
   1437 }
   1438 
   1439 /**
   1440  * xmlCleanupCharEncodingHandlers:
   1441  *
   1442  * Cleanup the memory allocated for the char encoding support, it
   1443  * unregisters all the encoding handlers and the aliases.
   1444  */
   1445 void
   1446 xmlCleanupCharEncodingHandlers(void) {
   1447     xmlCleanupEncodingAliases();
   1448 
   1449     if (handlers == NULL) return;
   1450 
   1451     for (;nbCharEncodingHandler > 0;) {
   1452         nbCharEncodingHandler--;
   1453 	if (handlers[nbCharEncodingHandler] != NULL) {
   1454 	    if (handlers[nbCharEncodingHandler]->name != NULL)
   1455 		xmlFree(handlers[nbCharEncodingHandler]->name);
   1456 	    xmlFree(handlers[nbCharEncodingHandler]);
   1457 	}
   1458     }
   1459     xmlFree(handlers);
   1460     handlers = NULL;
   1461     nbCharEncodingHandler = 0;
   1462     xmlDefaultCharEncodingHandler = NULL;
   1463 }
   1464 
   1465 /**
   1466  * xmlRegisterCharEncodingHandler:
   1467  * @handler:  the xmlCharEncodingHandlerPtr handler block
   1468  *
   1469  * Register the char encoding handler, surprising, isn't it ?
   1470  */
   1471 void
   1472 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
   1473     if (handlers == NULL) xmlInitCharEncodingHandlers();
   1474     if ((handler == NULL) || (handlers == NULL)) {
   1475         xmlEncodingErr(XML_I18N_NO_HANDLER,
   1476 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
   1477 	return;
   1478     }
   1479 
   1480     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
   1481         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
   1482 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
   1483 	               "MAX_ENCODING_HANDLERS");
   1484 	return;
   1485     }
   1486     handlers[nbCharEncodingHandler++] = handler;
   1487 }
   1488 
   1489 /**
   1490  * xmlGetCharEncodingHandler:
   1491  * @enc:  an xmlCharEncoding value.
   1492  *
   1493  * Search in the registered set the handler able to read/write that encoding.
   1494  *
   1495  * Returns the handler or NULL if not found
   1496  */
   1497 xmlCharEncodingHandlerPtr
   1498 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
   1499     xmlCharEncodingHandlerPtr handler;
   1500 
   1501     if (handlers == NULL) xmlInitCharEncodingHandlers();
   1502     switch (enc) {
   1503         case XML_CHAR_ENCODING_ERROR:
   1504 	    return(NULL);
   1505         case XML_CHAR_ENCODING_NONE:
   1506 	    return(NULL);
   1507         case XML_CHAR_ENCODING_UTF8:
   1508 	    return(NULL);
   1509         case XML_CHAR_ENCODING_UTF16LE:
   1510 	    return(xmlUTF16LEHandler);
   1511         case XML_CHAR_ENCODING_UTF16BE:
   1512 	    return(xmlUTF16BEHandler);
   1513         case XML_CHAR_ENCODING_EBCDIC:
   1514             handler = xmlFindCharEncodingHandler("EBCDIC");
   1515             if (handler != NULL) return(handler);
   1516             handler = xmlFindCharEncodingHandler("ebcdic");
   1517             if (handler != NULL) return(handler);
   1518             handler = xmlFindCharEncodingHandler("EBCDIC-US");
   1519             if (handler != NULL) return(handler);
   1520             handler = xmlFindCharEncodingHandler("IBM-037");
   1521             if (handler != NULL) return(handler);
   1522 	    break;
   1523         case XML_CHAR_ENCODING_UCS4BE:
   1524             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
   1525             if (handler != NULL) return(handler);
   1526             handler = xmlFindCharEncodingHandler("UCS-4");
   1527             if (handler != NULL) return(handler);
   1528             handler = xmlFindCharEncodingHandler("UCS4");
   1529             if (handler != NULL) return(handler);
   1530 	    break;
   1531         case XML_CHAR_ENCODING_UCS4LE:
   1532             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
   1533             if (handler != NULL) return(handler);
   1534             handler = xmlFindCharEncodingHandler("UCS-4");
   1535             if (handler != NULL) return(handler);
   1536             handler = xmlFindCharEncodingHandler("UCS4");
   1537             if (handler != NULL) return(handler);
   1538 	    break;
   1539         case XML_CHAR_ENCODING_UCS4_2143:
   1540 	    break;
   1541         case XML_CHAR_ENCODING_UCS4_3412:
   1542 	    break;
   1543         case XML_CHAR_ENCODING_UCS2:
   1544             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
   1545             if (handler != NULL) return(handler);
   1546             handler = xmlFindCharEncodingHandler("UCS-2");
   1547             if (handler != NULL) return(handler);
   1548             handler = xmlFindCharEncodingHandler("UCS2");
   1549             if (handler != NULL) return(handler);
   1550 	    break;
   1551 
   1552 	    /*
   1553 	     * We used to keep ISO Latin encodings native in the
   1554 	     * generated data. This led to so many problems that
   1555 	     * this has been removed. One can still change this
   1556 	     * back by registering no-ops encoders for those
   1557 	     */
   1558         case XML_CHAR_ENCODING_8859_1:
   1559 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
   1560 	    if (handler != NULL) return(handler);
   1561 	    break;
   1562         case XML_CHAR_ENCODING_8859_2:
   1563 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
   1564 	    if (handler != NULL) return(handler);
   1565 	    break;
   1566         case XML_CHAR_ENCODING_8859_3:
   1567 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
   1568 	    if (handler != NULL) return(handler);
   1569 	    break;
   1570         case XML_CHAR_ENCODING_8859_4:
   1571 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
   1572 	    if (handler != NULL) return(handler);
   1573 	    break;
   1574         case XML_CHAR_ENCODING_8859_5:
   1575 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
   1576 	    if (handler != NULL) return(handler);
   1577 	    break;
   1578         case XML_CHAR_ENCODING_8859_6:
   1579 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
   1580 	    if (handler != NULL) return(handler);
   1581 	    break;
   1582         case XML_CHAR_ENCODING_8859_7:
   1583 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
   1584 	    if (handler != NULL) return(handler);
   1585 	    break;
   1586         case XML_CHAR_ENCODING_8859_8:
   1587 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
   1588 	    if (handler != NULL) return(handler);
   1589 	    break;
   1590         case XML_CHAR_ENCODING_8859_9:
   1591 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
   1592 	    if (handler != NULL) return(handler);
   1593 	    break;
   1594 
   1595 
   1596         case XML_CHAR_ENCODING_2022_JP:
   1597             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
   1598             if (handler != NULL) return(handler);
   1599 	    break;
   1600         case XML_CHAR_ENCODING_SHIFT_JIS:
   1601             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
   1602             if (handler != NULL) return(handler);
   1603             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
   1604             if (handler != NULL) return(handler);
   1605             handler = xmlFindCharEncodingHandler("Shift_JIS");
   1606             if (handler != NULL) return(handler);
   1607 	    break;
   1608         case XML_CHAR_ENCODING_EUC_JP:
   1609             handler = xmlFindCharEncodingHandler("EUC-JP");
   1610             if (handler != NULL) return(handler);
   1611 	    break;
   1612 	default:
   1613 	    break;
   1614     }
   1615 
   1616 #ifdef DEBUG_ENCODING
   1617     xmlGenericError(xmlGenericErrorContext,
   1618 	    "No handler found for encoding %d\n", enc);
   1619 #endif
   1620     return(NULL);
   1621 }
   1622 
   1623 /**
   1624  * xmlFindCharEncodingHandler:
   1625  * @name:  a string describing the char encoding.
   1626  *
   1627  * Search in the registered set the handler able to read/write that encoding.
   1628  *
   1629  * Returns the handler or NULL if not found
   1630  */
   1631 xmlCharEncodingHandlerPtr
   1632 xmlFindCharEncodingHandler(const char *name) {
   1633     const char *nalias;
   1634     const char *norig;
   1635     xmlCharEncoding alias;
   1636 #ifdef LIBXML_ICONV_ENABLED
   1637     xmlCharEncodingHandlerPtr enc;
   1638     iconv_t icv_in, icv_out;
   1639 #endif /* LIBXML_ICONV_ENABLED */
   1640 #ifdef LIBXML_ICU_ENABLED
   1641     xmlCharEncodingHandlerPtr encu;
   1642     uconv_t *ucv_in, *ucv_out;
   1643 #endif /* LIBXML_ICU_ENABLED */
   1644     char upper[100];
   1645     int i;
   1646 
   1647     if (handlers == NULL) xmlInitCharEncodingHandlers();
   1648     if (name == NULL) return(xmlDefaultCharEncodingHandler);
   1649     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
   1650 
   1651     /*
   1652      * Do the alias resolution
   1653      */
   1654     norig = name;
   1655     nalias = xmlGetEncodingAlias(name);
   1656     if (nalias != NULL)
   1657 	name = nalias;
   1658 
   1659     /*
   1660      * Check first for directly registered encoding names
   1661      */
   1662     for (i = 0;i < 99;i++) {
   1663         upper[i] = toupper(name[i]);
   1664 	if (upper[i] == 0) break;
   1665     }
   1666     upper[i] = 0;
   1667 
   1668     if (handlers != NULL) {
   1669         for (i = 0;i < nbCharEncodingHandler; i++) {
   1670             if (!strcmp(upper, handlers[i]->name)) {
   1671 #ifdef DEBUG_ENCODING
   1672                 xmlGenericError(xmlGenericErrorContext,
   1673                         "Found registered handler for encoding %s\n", name);
   1674 #endif
   1675                 return(handlers[i]);
   1676             }
   1677         }
   1678     }
   1679 
   1680 #ifdef LIBXML_ICONV_ENABLED
   1681     /* check whether iconv can handle this */
   1682     icv_in = iconv_open("UTF-8", name);
   1683     icv_out = iconv_open(name, "UTF-8");
   1684     if (icv_in == (iconv_t) -1) {
   1685         icv_in = iconv_open("UTF-8", upper);
   1686     }
   1687     if (icv_out == (iconv_t) -1) {
   1688 	icv_out = iconv_open(upper, "UTF-8");
   1689     }
   1690     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
   1691 	    enc = (xmlCharEncodingHandlerPtr)
   1692 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
   1693 	    if (enc == NULL) {
   1694 	        iconv_close(icv_in);
   1695 	        iconv_close(icv_out);
   1696 		return(NULL);
   1697 	    }
   1698             memset(enc, 0, sizeof(xmlCharEncodingHandler));
   1699 	    enc->name = xmlMemStrdup(name);
   1700 	    enc->input = NULL;
   1701 	    enc->output = NULL;
   1702 	    enc->iconv_in = icv_in;
   1703 	    enc->iconv_out = icv_out;
   1704 #ifdef DEBUG_ENCODING
   1705             xmlGenericError(xmlGenericErrorContext,
   1706 		    "Found iconv handler for encoding %s\n", name);
   1707 #endif
   1708 	    return enc;
   1709     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
   1710 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
   1711 		    "iconv : problems with filters for '%s'\n", name);
   1712     }
   1713 #endif /* LIBXML_ICONV_ENABLED */
   1714 #ifdef LIBXML_ICU_ENABLED
   1715     /* check whether icu can handle this */
   1716     ucv_in = openIcuConverter(name, 1);
   1717     ucv_out = openIcuConverter(name, 0);
   1718     if (ucv_in != NULL && ucv_out != NULL) {
   1719 	    encu = (xmlCharEncodingHandlerPtr)
   1720 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
   1721 	    if (encu == NULL) {
   1722                 closeIcuConverter(ucv_in);
   1723                 closeIcuConverter(ucv_out);
   1724 		return(NULL);
   1725 	    }
   1726             memset(encu, 0, sizeof(xmlCharEncodingHandler));
   1727 	    encu->name = xmlMemStrdup(name);
   1728 	    encu->input = NULL;
   1729 	    encu->output = NULL;
   1730 	    encu->uconv_in = ucv_in;
   1731 	    encu->uconv_out = ucv_out;
   1732 #ifdef DEBUG_ENCODING
   1733             xmlGenericError(xmlGenericErrorContext,
   1734 		    "Found ICU converter handler for encoding %s\n", name);
   1735 #endif
   1736 	    return encu;
   1737     } else if (ucv_in != NULL || ucv_out != NULL) {
   1738             closeIcuConverter(ucv_in);
   1739             closeIcuConverter(ucv_out);
   1740 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
   1741 		    "ICU converter : problems with filters for '%s'\n", name);
   1742     }
   1743 #endif /* LIBXML_ICU_ENABLED */
   1744 
   1745 #ifdef DEBUG_ENCODING
   1746     xmlGenericError(xmlGenericErrorContext,
   1747 	    "No handler found for encoding %s\n", name);
   1748 #endif
   1749 
   1750     /*
   1751      * Fallback using the canonical names
   1752      */
   1753     alias = xmlParseCharEncoding(norig);
   1754     if (alias != XML_CHAR_ENCODING_ERROR) {
   1755         const char* canon;
   1756         canon = xmlGetCharEncodingName(alias);
   1757         if ((canon != NULL) && (strcmp(name, canon))) {
   1758 	    return(xmlFindCharEncodingHandler(canon));
   1759         }
   1760     }
   1761 
   1762     /* If "none of the above", give up */
   1763     return(NULL);
   1764 }
   1765 
   1766 /************************************************************************
   1767  *									*
   1768  *		ICONV based generic conversion functions		*
   1769  *									*
   1770  ************************************************************************/
   1771 
   1772 #ifdef LIBXML_ICONV_ENABLED
   1773 /**
   1774  * xmlIconvWrapper:
   1775  * @cd:		iconv converter data structure
   1776  * @out:  a pointer to an array of bytes to store the result
   1777  * @outlen:  the length of @out
   1778  * @in:  a pointer to an array of ISO Latin 1 chars
   1779  * @inlen:  the length of @in
   1780  *
   1781  * Returns 0 if success, or
   1782  *     -1 by lack of space, or
   1783  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   1784  *        the result of transformation can't fit into the encoding we want), or
   1785  *     -3 if there the last byte can't form a single output char.
   1786  *
   1787  * The value of @inlen after return is the number of octets consumed
   1788  *     as the return value is positive, else unpredictable.
   1789  * The value of @outlen after return is the number of ocetes consumed.
   1790  */
   1791 static int
   1792 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
   1793                 const unsigned char *in, int *inlen) {
   1794     size_t icv_inlen, icv_outlen;
   1795     const char *icv_in = (const char *) in;
   1796     char *icv_out = (char *) out;
   1797     int ret;
   1798 
   1799     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
   1800         if (outlen != NULL) *outlen = 0;
   1801         return(-1);
   1802     }
   1803     icv_inlen = *inlen;
   1804     icv_outlen = *outlen;
   1805     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
   1806     *inlen -= icv_inlen;
   1807     *outlen -= icv_outlen;
   1808     if ((icv_inlen != 0) || (ret == -1)) {
   1809 #ifdef EILSEQ
   1810         if (errno == EILSEQ) {
   1811             return -2;
   1812         } else
   1813 #endif
   1814 #ifdef E2BIG
   1815         if (errno == E2BIG) {
   1816             return -1;
   1817         } else
   1818 #endif
   1819 #ifdef EINVAL
   1820         if (errno == EINVAL) {
   1821             return -3;
   1822         } else
   1823 #endif
   1824         {
   1825             return -3;
   1826         }
   1827     }
   1828     return 0;
   1829 }
   1830 #endif /* LIBXML_ICONV_ENABLED */
   1831 
   1832 /************************************************************************
   1833  *									*
   1834  *		ICU based generic conversion functions		*
   1835  *									*
   1836  ************************************************************************/
   1837 
   1838 #ifdef LIBXML_ICU_ENABLED
   1839 /**
   1840  * xmlUconvWrapper:
   1841  * @cd: ICU uconverter data structure
   1842  * @toUnicode : non-zero if toUnicode. 0 otherwise.
   1843  * @out:  a pointer to an array of bytes to store the result
   1844  * @outlen:  the length of @out
   1845  * @in:  a pointer to an array of ISO Latin 1 chars
   1846  * @inlen:  the length of @in
   1847  *
   1848  * Returns 0 if success, or
   1849  *     -1 by lack of space, or
   1850  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   1851  *        the result of transformation can't fit into the encoding we want), or
   1852  *     -3 if there the last byte can't form a single output char.
   1853  *
   1854  * The value of @inlen after return is the number of octets consumed
   1855  *     as the return value is positive, else unpredictable.
   1856  * The value of @outlen after return is the number of ocetes consumed.
   1857  */
   1858 static int
   1859 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
   1860                 const unsigned char *in, int *inlen) {
   1861     const char *ucv_in = (const char *) in;
   1862     char *ucv_out = (char *) out;
   1863     UErrorCode err = U_ZERO_ERROR;
   1864 
   1865     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
   1866         if (outlen != NULL) *outlen = 0;
   1867         return(-1);
   1868     }
   1869 
   1870     /*
   1871      * TODO(jungshik)
   1872      * 1. is ucnv_convert(To|From)Algorithmic better?
   1873      * 2. had we better use an explicit pivot buffer?
   1874      * 3. error returned comes from 'fromUnicode' only even
   1875      *    when toUnicode is true !
   1876      */
   1877     if (toUnicode) {
   1878         /* encoding => UTF-16 => UTF-8 */
   1879         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
   1880                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
   1881                        0, TRUE, &err);
   1882     } else {
   1883         /* UTF-8 => UTF-16 => encoding */
   1884         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
   1885                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
   1886                        0, TRUE, &err);
   1887     }
   1888     *inlen = ucv_in - (const char*) in;
   1889     *outlen = ucv_out - (char *) out;
   1890     if (U_SUCCESS(err))
   1891         return 0;
   1892     if (err == U_BUFFER_OVERFLOW_ERROR)
   1893         return -1;
   1894     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
   1895         return -2;
   1896     /* if (err == U_TRUNCATED_CHAR_FOUND) */
   1897     return -3;
   1898 }
   1899 #endif /* LIBXML_ICU_ENABLED */
   1900 
   1901 /************************************************************************
   1902  *									*
   1903  *		The real API used by libxml for on-the-fly conversion	*
   1904  *									*
   1905  ************************************************************************/
   1906 
   1907 /**
   1908  * xmlCharEncFirstLineInt:
   1909  * @handler:	char enconding transformation data structure
   1910  * @out:  an xmlBuffer for the output.
   1911  * @in:  an xmlBuffer for the input
   1912  * @len:  number of bytes to convert for the first line, or -1
   1913  *
   1914  * Front-end for the encoding handler input function, but handle only
   1915  * the very first line, i.e. limit itself to 45 chars.
   1916  *
   1917  * Returns the number of byte written if success, or
   1918  *     -1 general error
   1919  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   1920  *        the result of transformation can't fit into the encoding we want), or
   1921  */
   1922 int
   1923 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   1924                        xmlBufferPtr in, int len) {
   1925     int ret = -2;
   1926     int written;
   1927     int toconv;
   1928 
   1929     if (handler == NULL) return(-1);
   1930     if (out == NULL) return(-1);
   1931     if (in == NULL) return(-1);
   1932 
   1933     /* calculate space available */
   1934     written = out->size - out->use - 1; /* count '\0' */
   1935     toconv = in->use;
   1936     /*
   1937      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
   1938      * 45 chars should be sufficient to reach the end of the encoding
   1939      * declaration without going too far inside the document content.
   1940      * on UTF-16 this means 90bytes, on UCS4 this means 180
   1941      * The actual value depending on guessed encoding is passed as @len
   1942      * if provided
   1943      */
   1944     if (len >= 0) {
   1945         if (toconv > len)
   1946             toconv = len;
   1947     } else {
   1948         if (toconv > 180)
   1949             toconv = 180;
   1950     }
   1951     if (toconv * 2 >= written) {
   1952         xmlBufferGrow(out, toconv * 2);
   1953 	written = out->size - out->use - 1;
   1954     }
   1955 
   1956     if (handler->input != NULL) {
   1957 	ret = handler->input(&out->content[out->use], &written,
   1958 	                     in->content, &toconv);
   1959 	xmlBufferShrink(in, toconv);
   1960 	out->use += written;
   1961 	out->content[out->use] = 0;
   1962     }
   1963 #ifdef LIBXML_ICONV_ENABLED
   1964     else if (handler->iconv_in != NULL) {
   1965 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
   1966 	                      &written, in->content, &toconv);
   1967 	xmlBufferShrink(in, toconv);
   1968 	out->use += written;
   1969 	out->content[out->use] = 0;
   1970 	if (ret == -1) ret = -3;
   1971     }
   1972 #endif /* LIBXML_ICONV_ENABLED */
   1973 #ifdef LIBXML_ICU_ENABLED
   1974     else if (handler->uconv_in != NULL) {
   1975 	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
   1976 	                      &written, in->content, &toconv);
   1977 	xmlBufferShrink(in, toconv);
   1978 	out->use += written;
   1979 	out->content[out->use] = 0;
   1980 	if (ret == -1) ret = -3;
   1981     }
   1982 #endif /* LIBXML_ICU_ENABLED */
   1983 #ifdef DEBUG_ENCODING
   1984     switch (ret) {
   1985         case 0:
   1986 	    xmlGenericError(xmlGenericErrorContext,
   1987 		    "converted %d bytes to %d bytes of input\n",
   1988 	            toconv, written);
   1989 	    break;
   1990         case -1:
   1991 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
   1992 	            toconv, written, in->use);
   1993 	    break;
   1994         case -2:
   1995 	    xmlGenericError(xmlGenericErrorContext,
   1996 		    "input conversion failed due to input error\n");
   1997 	    break;
   1998         case -3:
   1999 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
   2000 	            toconv, written, in->use);
   2001 	    break;
   2002 	default:
   2003 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
   2004     }
   2005 #endif /* DEBUG_ENCODING */
   2006     /*
   2007      * Ignore when input buffer is not on a boundary
   2008      */
   2009     if (ret == -3) ret = 0;
   2010     if (ret == -1) ret = 0;
   2011     return(ret);
   2012 }
   2013 
   2014 /**
   2015  * xmlCharEncFirstLine:
   2016  * @handler:	char enconding transformation data structure
   2017  * @out:  an xmlBuffer for the output.
   2018  * @in:  an xmlBuffer for the input
   2019  *
   2020  * Front-end for the encoding handler input function, but handle only
   2021  * the very first line, i.e. limit itself to 45 chars.
   2022  *
   2023  * Returns the number of byte written if success, or
   2024  *     -1 general error
   2025  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2026  *        the result of transformation can't fit into the encoding we want), or
   2027  */
   2028 int
   2029 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   2030                  xmlBufferPtr in) {
   2031     return(xmlCharEncFirstLineInt(handler, out, in, -1));
   2032 }
   2033 
   2034 /**
   2035  * xmlCharEncFirstLineInput:
   2036  * @input: a parser input buffer
   2037  * @len:  number of bytes to convert for the first line, or -1
   2038  *
   2039  * Front-end for the encoding handler input function, but handle only
   2040  * the very first line. Point is that this is based on autodetection
   2041  * of the encoding and once that first line is converted we may find
   2042  * out that a different decoder is needed to process the input.
   2043  *
   2044  * Returns the number of byte written if success, or
   2045  *     -1 general error
   2046  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2047  *        the result of transformation can't fit into the encoding we want), or
   2048  */
   2049 int
   2050 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
   2051 {
   2052     int ret = -2;
   2053     size_t written;
   2054     size_t toconv;
   2055     int c_in;
   2056     int c_out;
   2057     xmlBufPtr in;
   2058     xmlBufPtr out;
   2059 
   2060     if ((input == NULL) || (input->encoder == NULL) ||
   2061         (input->buffer == NULL) || (input->raw == NULL))
   2062         return (-1);
   2063     out = input->buffer;
   2064     in = input->raw;
   2065 
   2066     toconv = xmlBufUse(in);
   2067     if (toconv == 0)
   2068         return (0);
   2069     written = xmlBufAvail(out) - 1; /* count '\0' */
   2070     /*
   2071      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
   2072      * 45 chars should be sufficient to reach the end of the encoding
   2073      * declaration without going too far inside the document content.
   2074      * on UTF-16 this means 90bytes, on UCS4 this means 180
   2075      * The actual value depending on guessed encoding is passed as @len
   2076      * if provided
   2077      */
   2078     if (len >= 0) {
   2079         if (toconv > (unsigned int) len)
   2080             toconv = len;
   2081     } else {
   2082         if (toconv > 180)
   2083             toconv = 180;
   2084     }
   2085     if (toconv * 2 >= written) {
   2086         xmlBufGrow(out, toconv * 2);
   2087         written = xmlBufAvail(out) - 1;
   2088     }
   2089     if (written > 360)
   2090         written = 360;
   2091 
   2092     c_in = toconv;
   2093     c_out = written;
   2094     if (input->encoder->input != NULL) {
   2095         ret = input->encoder->input(xmlBufEnd(out), &c_out,
   2096                                     xmlBufContent(in), &c_in);
   2097         xmlBufShrink(in, c_in);
   2098         xmlBufAddLen(out, c_out);
   2099     }
   2100 #ifdef LIBXML_ICONV_ENABLED
   2101     else if (input->encoder->iconv_in != NULL) {
   2102         ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
   2103                               &c_out, xmlBufContent(in), &c_in);
   2104         xmlBufShrink(in, c_in);
   2105         xmlBufAddLen(out, c_out);
   2106         if (ret == -1)
   2107             ret = -3;
   2108     }
   2109 #endif /* LIBXML_ICONV_ENABLED */
   2110 #ifdef LIBXML_ICU_ENABLED
   2111     else if (input->encoder->uconv_in != NULL) {
   2112         ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
   2113                               &c_out, xmlBufContent(in), &c_in);
   2114         xmlBufShrink(in, c_in);
   2115         xmlBufAddLen(out, c_out);
   2116         if (ret == -1)
   2117             ret = -3;
   2118     }
   2119 #endif /* LIBXML_ICU_ENABLED */
   2120     switch (ret) {
   2121         case 0:
   2122 #ifdef DEBUG_ENCODING
   2123             xmlGenericError(xmlGenericErrorContext,
   2124                             "converted %d bytes to %d bytes of input\n",
   2125                             c_in, c_out);
   2126 #endif
   2127             break;
   2128         case -1:
   2129 #ifdef DEBUG_ENCODING
   2130             xmlGenericError(xmlGenericErrorContext,
   2131                          "converted %d bytes to %d bytes of input, %d left\n",
   2132                             c_in, c_out, (int)xmlBufUse(in));
   2133 #endif
   2134             break;
   2135         case -3:
   2136 #ifdef DEBUG_ENCODING
   2137             xmlGenericError(xmlGenericErrorContext,
   2138                         "converted %d bytes to %d bytes of input, %d left\n",
   2139                             c_in, c_out, (int)xmlBufUse(in));
   2140 #endif
   2141             break;
   2142         case -2: {
   2143             char buf[50];
   2144             const xmlChar *content = xmlBufContent(in);
   2145 
   2146 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2147 		     content[0], content[1],
   2148 		     content[2], content[3]);
   2149 	    buf[49] = 0;
   2150 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
   2151 		    "input conversion failed due to input error, bytes %s\n",
   2152 		           buf);
   2153         }
   2154     }
   2155     /*
   2156      * Ignore when input buffer is not on a boundary
   2157      */
   2158     if (ret == -3) ret = 0;
   2159     if (ret == -1) ret = 0;
   2160     return(ret);
   2161 }
   2162 
   2163 /**
   2164  * xmlCharEncInput:
   2165  * @input: a parser input buffer
   2166  * @flush: try to flush all the raw buffer
   2167  *
   2168  * Generic front-end for the encoding handler on parser input
   2169  *
   2170  * Returns the number of byte written if success, or
   2171  *     -1 general error
   2172  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2173  *        the result of transformation can't fit into the encoding we want), or
   2174  */
   2175 int
   2176 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
   2177 {
   2178     int ret = -2;
   2179     size_t written;
   2180     size_t toconv;
   2181     int c_in;
   2182     int c_out;
   2183     xmlBufPtr in;
   2184     xmlBufPtr out;
   2185 
   2186     if ((input == NULL) || (input->encoder == NULL) ||
   2187         (input->buffer == NULL) || (input->raw == NULL))
   2188         return (-1);
   2189     out = input->buffer;
   2190     in = input->raw;
   2191 
   2192     toconv = xmlBufUse(in);
   2193     if (toconv == 0)
   2194         return (0);
   2195     if ((toconv > 64 * 1024) && (flush == 0))
   2196         toconv = 64 * 1024;
   2197     written = xmlBufAvail(out);
   2198     if (written > 0)
   2199         written--; /* count '\0' */
   2200     if (toconv * 2 >= written) {
   2201         xmlBufGrow(out, toconv * 2);
   2202         written = xmlBufAvail(out);
   2203         if (written > 0)
   2204             written--; /* count '\0' */
   2205     }
   2206     if ((written > 128 * 1024) && (flush == 0))
   2207         written = 128 * 1024;
   2208 
   2209     c_in = toconv;
   2210     c_out = written;
   2211     if (input->encoder->input != NULL) {
   2212         ret = input->encoder->input(xmlBufEnd(out), &c_out,
   2213                                     xmlBufContent(in), &c_in);
   2214         xmlBufShrink(in, c_in);
   2215         xmlBufAddLen(out, c_out);
   2216     }
   2217 #ifdef LIBXML_ICONV_ENABLED
   2218     else if (input->encoder->iconv_in != NULL) {
   2219         ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out),
   2220                               &c_out, xmlBufContent(in), &c_in);
   2221         xmlBufShrink(in, c_in);
   2222         xmlBufAddLen(out, c_out);
   2223         if (ret == -1)
   2224             ret = -3;
   2225     }
   2226 #endif /* LIBXML_ICONV_ENABLED */
   2227 #ifdef LIBXML_ICU_ENABLED
   2228     else if (input->encoder->uconv_in != NULL) {
   2229         ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out),
   2230                               &c_out, xmlBufContent(in), &c_in);
   2231         xmlBufShrink(in, c_in);
   2232         xmlBufAddLen(out, c_out);
   2233         if (ret == -1)
   2234             ret = -3;
   2235     }
   2236 #endif /* LIBXML_ICU_ENABLED */
   2237     switch (ret) {
   2238         case 0:
   2239 #ifdef DEBUG_ENCODING
   2240             xmlGenericError(xmlGenericErrorContext,
   2241                             "converted %d bytes to %d bytes of input\n",
   2242                             c_in, c_out);
   2243 #endif
   2244             break;
   2245         case -1:
   2246 #ifdef DEBUG_ENCODING
   2247             xmlGenericError(xmlGenericErrorContext,
   2248                          "converted %d bytes to %d bytes of input, %d left\n",
   2249                             c_in, c_out, (int)xmlBufUse(in));
   2250 #endif
   2251             break;
   2252         case -3:
   2253 #ifdef DEBUG_ENCODING
   2254             xmlGenericError(xmlGenericErrorContext,
   2255                         "converted %d bytes to %d bytes of input, %d left\n",
   2256                             c_in, c_out, (int)xmlBufUse(in));
   2257 #endif
   2258             break;
   2259         case -2: {
   2260             char buf[50];
   2261             const xmlChar *content = xmlBufContent(in);
   2262 
   2263 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2264 		     content[0], content[1],
   2265 		     content[2], content[3]);
   2266 	    buf[49] = 0;
   2267 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
   2268 		    "input conversion failed due to input error, bytes %s\n",
   2269 		           buf);
   2270         }
   2271     }
   2272     /*
   2273      * Ignore when input buffer is not on a boundary
   2274      */
   2275     if (ret == -3)
   2276         ret = 0;
   2277     return (c_out? c_out : ret);
   2278 }
   2279 
   2280 /**
   2281  * xmlCharEncInFunc:
   2282  * @handler:	char encoding transformation data structure
   2283  * @out:  an xmlBuffer for the output.
   2284  * @in:  an xmlBuffer for the input
   2285  *
   2286  * Generic front-end for the encoding handler input function
   2287  *
   2288  * Returns the number of byte written if success, or
   2289  *     -1 general error
   2290  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2291  *        the result of transformation can't fit into the encoding we want), or
   2292  */
   2293 int
   2294 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
   2295                  xmlBufferPtr in)
   2296 {
   2297     int ret = -2;
   2298     int written;
   2299     int toconv;
   2300 
   2301     if (handler == NULL)
   2302         return (-1);
   2303     if (out == NULL)
   2304         return (-1);
   2305     if (in == NULL)
   2306         return (-1);
   2307 
   2308     toconv = in->use;
   2309     if (toconv == 0)
   2310         return (0);
   2311     written = out->size - out->use -1; /* count '\0' */
   2312     if (toconv * 2 >= written) {
   2313         xmlBufferGrow(out, out->size + toconv * 2);
   2314         written = out->size - out->use - 1;
   2315     }
   2316     if (handler->input != NULL) {
   2317         ret = handler->input(&out->content[out->use], &written,
   2318                              in->content, &toconv);
   2319         xmlBufferShrink(in, toconv);
   2320         out->use += written;
   2321         out->content[out->use] = 0;
   2322     }
   2323 #ifdef LIBXML_ICONV_ENABLED
   2324     else if (handler->iconv_in != NULL) {
   2325         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
   2326                               &written, in->content, &toconv);
   2327         xmlBufferShrink(in, toconv);
   2328         out->use += written;
   2329         out->content[out->use] = 0;
   2330         if (ret == -1)
   2331             ret = -3;
   2332     }
   2333 #endif /* LIBXML_ICONV_ENABLED */
   2334 #ifdef LIBXML_ICU_ENABLED
   2335     else if (handler->uconv_in != NULL) {
   2336         ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
   2337                               &written, in->content, &toconv);
   2338         xmlBufferShrink(in, toconv);
   2339         out->use += written;
   2340         out->content[out->use] = 0;
   2341         if (ret == -1)
   2342             ret = -3;
   2343     }
   2344 #endif /* LIBXML_ICU_ENABLED */
   2345     switch (ret) {
   2346         case 0:
   2347 #ifdef DEBUG_ENCODING
   2348             xmlGenericError(xmlGenericErrorContext,
   2349                             "converted %d bytes to %d bytes of input\n",
   2350                             toconv, written);
   2351 #endif
   2352             break;
   2353         case -1:
   2354 #ifdef DEBUG_ENCODING
   2355             xmlGenericError(xmlGenericErrorContext,
   2356                          "converted %d bytes to %d bytes of input, %d left\n",
   2357                             toconv, written, in->use);
   2358 #endif
   2359             break;
   2360         case -3:
   2361 #ifdef DEBUG_ENCODING
   2362             xmlGenericError(xmlGenericErrorContext,
   2363                         "converted %d bytes to %d bytes of input, %d left\n",
   2364                             toconv, written, in->use);
   2365 #endif
   2366             break;
   2367         case -2: {
   2368             char buf[50];
   2369 
   2370 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2371 		     in->content[0], in->content[1],
   2372 		     in->content[2], in->content[3]);
   2373 	    buf[49] = 0;
   2374 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
   2375 		    "input conversion failed due to input error, bytes %s\n",
   2376 		           buf);
   2377         }
   2378     }
   2379     /*
   2380      * Ignore when input buffer is not on a boundary
   2381      */
   2382     if (ret == -3)
   2383         ret = 0;
   2384     return (written? written : ret);
   2385 }
   2386 
   2387 /**
   2388  * xmlCharEncOutput:
   2389  * @output: a parser output buffer
   2390  * @init: is this an initialization call without data
   2391  *
   2392  * Generic front-end for the encoding handler on parser output
   2393  * a first call with @init == 1 has to be made first to initiate the
   2394  * output in case of non-stateless encoding needing to initiate their
   2395  * state or the output (like the BOM in UTF16).
   2396  * In case of UTF8 sequence conversion errors for the given encoder,
   2397  * the content will be automatically remapped to a CharRef sequence.
   2398  *
   2399  * Returns the number of byte written if success, or
   2400  *     -1 general error
   2401  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2402  *        the result of transformation can't fit into the encoding we want), or
   2403  */
   2404 int
   2405 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
   2406 {
   2407     int ret = -2;
   2408     size_t written;
   2409     size_t writtentot = 0;
   2410     size_t toconv;
   2411     int c_in;
   2412     int c_out;
   2413     xmlBufPtr in;
   2414     xmlBufPtr out;
   2415     int charref_len = 0;
   2416 
   2417     if ((output == NULL) || (output->encoder == NULL) ||
   2418         (output->buffer == NULL) || (output->conv == NULL))
   2419         return (-1);
   2420     out = output->conv;
   2421     in = output->buffer;
   2422 
   2423 retry:
   2424 
   2425     written = xmlBufAvail(out);
   2426     if (written > 0)
   2427         written--; /* count '\0' */
   2428 
   2429     /*
   2430      * First specific handling of the initialization call
   2431      */
   2432     if (init) {
   2433         c_in = 0;
   2434         c_out = written;
   2435         if (output->encoder->output != NULL) {
   2436             ret = output->encoder->output(xmlBufEnd(out), &c_out,
   2437                                           NULL, &c_in);
   2438             if (ret > 0) /* Gennady: check return value */
   2439                 xmlBufAddLen(out, c_out);
   2440         }
   2441 #ifdef LIBXML_ICONV_ENABLED
   2442         else if (output->encoder->iconv_out != NULL) {
   2443             ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
   2444                                   &c_out, NULL, &c_in);
   2445             xmlBufAddLen(out, c_out);
   2446         }
   2447 #endif /* LIBXML_ICONV_ENABLED */
   2448 #ifdef LIBXML_ICU_ENABLED
   2449         else if (output->encoder->uconv_out != NULL) {
   2450             ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
   2451                                   &c_out, NULL, &c_in);
   2452             xmlBufAddLen(out, c_out);
   2453         }
   2454 #endif /* LIBXML_ICU_ENABLED */
   2455 #ifdef DEBUG_ENCODING
   2456 	xmlGenericError(xmlGenericErrorContext,
   2457 		"initialized encoder\n");
   2458 #endif
   2459         return(0);
   2460     }
   2461 
   2462     /*
   2463      * Conversion itself.
   2464      */
   2465     toconv = xmlBufUse(in);
   2466     if (toconv == 0)
   2467         return (0);
   2468     if (toconv > 64 * 1024)
   2469         toconv = 64 * 1024;
   2470     if (toconv * 4 >= written) {
   2471         xmlBufGrow(out, toconv * 4);
   2472         written = xmlBufAvail(out) - 1;
   2473     }
   2474     if (written > 256 * 1024)
   2475         written = 256 * 1024;
   2476 
   2477     c_in = toconv;
   2478     c_out = written;
   2479     if (output->encoder->output != NULL) {
   2480         ret = output->encoder->output(xmlBufEnd(out), &c_out,
   2481                                       xmlBufContent(in), &c_in);
   2482         if (c_out > 0) {
   2483             xmlBufShrink(in, c_in);
   2484             xmlBufAddLen(out, c_out);
   2485             writtentot += c_out;
   2486         }
   2487     }
   2488 #ifdef LIBXML_ICONV_ENABLED
   2489     else if (output->encoder->iconv_out != NULL) {
   2490         ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out),
   2491                               &c_out, xmlBufContent(in), &c_in);
   2492         xmlBufShrink(in, c_in);
   2493         xmlBufAddLen(out, c_out);
   2494         writtentot += c_out;
   2495         if (ret == -1) {
   2496             if (c_out > 0) {
   2497                 /*
   2498                  * Can be a limitation of iconv
   2499                  */
   2500                 charref_len = 0;
   2501                 goto retry;
   2502             }
   2503             ret = -3;
   2504         }
   2505     }
   2506 #endif /* LIBXML_ICONV_ENABLED */
   2507 #ifdef LIBXML_ICU_ENABLED
   2508     else if (output->encoder->uconv_out != NULL) {
   2509         ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out),
   2510                               &c_out, xmlBufContent(in), &c_in);
   2511         xmlBufShrink(in, c_in);
   2512         xmlBufAddLen(out, c_out);
   2513         writtentot += c_out;
   2514         if (ret == -1) {
   2515             if (c_out > 0) {
   2516                 /*
   2517                  * Can be a limitation of uconv
   2518                  */
   2519                 charref_len = 0;
   2520                 goto retry;
   2521             }
   2522             ret = -3;
   2523         }
   2524     }
   2525 #endif /* LIBXML_ICU_ENABLED */
   2526     else {
   2527         xmlEncodingErr(XML_I18N_NO_OUTPUT,
   2528                        "xmlCharEncOutFunc: no output function !\n", NULL);
   2529         return(-1);
   2530     }
   2531 
   2532     if (ret >= 0) output += ret;
   2533 
   2534     /*
   2535      * Attempt to handle error cases
   2536      */
   2537     switch (ret) {
   2538         case 0:
   2539 #ifdef DEBUG_ENCODING
   2540 	    xmlGenericError(xmlGenericErrorContext,
   2541 		    "converted %d bytes to %d bytes of output\n",
   2542 	            c_in, c_out);
   2543 #endif
   2544 	    break;
   2545         case -1:
   2546 #ifdef DEBUG_ENCODING
   2547 	    xmlGenericError(xmlGenericErrorContext,
   2548 		    "output conversion failed by lack of space\n");
   2549 #endif
   2550 	    break;
   2551         case -3:
   2552 #ifdef DEBUG_ENCODING
   2553 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
   2554 	            c_in, c_out, (int) xmlBufUse(in));
   2555 #endif
   2556 	    break;
   2557         case -2: {
   2558 	    int len = (int) xmlBufUse(in);
   2559             xmlChar *content = xmlBufContent(in);
   2560 	    int cur;
   2561 
   2562 	    cur = xmlGetUTF8Char(content, &len);
   2563 	    if ((charref_len != 0) && (c_out < charref_len)) {
   2564 		/*
   2565 		 * We attempted to insert a character reference and failed.
   2566 		 * Undo what was written and skip the remaining charref.
   2567 		 */
   2568                 xmlBufErase(out, c_out);
   2569 		writtentot -= c_out;
   2570 		xmlBufShrink(in, charref_len - c_out);
   2571 		charref_len = 0;
   2572 
   2573 		ret = -1;
   2574                 break;
   2575 	    } else if (cur > 0) {
   2576 		xmlChar charref[20];
   2577 
   2578 #ifdef DEBUG_ENCODING
   2579 		xmlGenericError(xmlGenericErrorContext,
   2580 			"handling output conversion error\n");
   2581 		xmlGenericError(xmlGenericErrorContext,
   2582 			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
   2583 			content[0], content[1],
   2584 			content[2], content[3]);
   2585 #endif
   2586 		/*
   2587 		 * Removes the UTF8 sequence, and replace it by a charref
   2588 		 * and continue the transcoding phase, hoping the error
   2589 		 * did not mangle the encoder state.
   2590 		 */
   2591 		charref_len = snprintf((char *) &charref[0], sizeof(charref),
   2592 				 "&#%d;", cur);
   2593 		xmlBufShrink(in, len);
   2594 		xmlBufAddHead(in, charref, -1);
   2595 
   2596 		goto retry;
   2597 	    } else {
   2598 		char buf[50];
   2599 
   2600 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2601 			 content[0], content[1],
   2602 			 content[2], content[3]);
   2603 		buf[49] = 0;
   2604 		xmlEncodingErr(XML_I18N_CONV_FAILED,
   2605 		    "output conversion failed due to conv error, bytes %s\n",
   2606 			       buf);
   2607 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
   2608 		    content[0] = ' ';
   2609 	    }
   2610 	    break;
   2611 	}
   2612     }
   2613     return(ret);
   2614 }
   2615 
   2616 /**
   2617  * xmlCharEncOutFunc:
   2618  * @handler:	char enconding transformation data structure
   2619  * @out:  an xmlBuffer for the output.
   2620  * @in:  an xmlBuffer for the input
   2621  *
   2622  * Generic front-end for the encoding handler output function
   2623  * a first call with @in == NULL has to be made firs to initiate the
   2624  * output in case of non-stateless encoding needing to initiate their
   2625  * state or the output (like the BOM in UTF16).
   2626  * In case of UTF8 sequence conversion errors for the given encoder,
   2627  * the content will be automatically remapped to a CharRef sequence.
   2628  *
   2629  * Returns the number of byte written if success, or
   2630  *     -1 general error
   2631  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2632  *        the result of transformation can't fit into the encoding we want), or
   2633  */
   2634 int
   2635 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   2636                   xmlBufferPtr in) {
   2637     int ret = -2;
   2638     int written;
   2639     int writtentot = 0;
   2640     int toconv;
   2641     int output = 0;
   2642     int charref_len = 0;
   2643 
   2644     if (handler == NULL) return(-1);
   2645     if (out == NULL) return(-1);
   2646 
   2647 retry:
   2648 
   2649     written = out->size - out->use;
   2650 
   2651     if (written > 0)
   2652 	written--; /* Gennady: count '/0' */
   2653 
   2654     /*
   2655      * First specific handling of in = NULL, i.e. the initialization call
   2656      */
   2657     if (in == NULL) {
   2658         toconv = 0;
   2659 	if (handler->output != NULL) {
   2660 	    ret = handler->output(&out->content[out->use], &written,
   2661 				  NULL, &toconv);
   2662 	    if (ret >= 0) { /* Gennady: check return value */
   2663 		out->use += written;
   2664 		out->content[out->use] = 0;
   2665 	    }
   2666 	}
   2667 #ifdef LIBXML_ICONV_ENABLED
   2668 	else if (handler->iconv_out != NULL) {
   2669 	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
   2670 				  &written, NULL, &toconv);
   2671 	    out->use += written;
   2672 	    out->content[out->use] = 0;
   2673 	}
   2674 #endif /* LIBXML_ICONV_ENABLED */
   2675 #ifdef LIBXML_ICU_ENABLED
   2676 	else if (handler->uconv_out != NULL) {
   2677 	    ret = xmlUconvWrapper(handler->uconv_out, 0,
   2678                               &out->content[out->use],
   2679 				              &written, NULL, &toconv);
   2680 	    out->use += written;
   2681 	    out->content[out->use] = 0;
   2682 	}
   2683 #endif /* LIBXML_ICU_ENABLED */
   2684 #ifdef DEBUG_ENCODING
   2685 	xmlGenericError(xmlGenericErrorContext,
   2686 		"initialized encoder\n");
   2687 #endif
   2688         return(0);
   2689     }
   2690 
   2691     /*
   2692      * Conversion itself.
   2693      */
   2694     toconv = in->use;
   2695     if (toconv == 0)
   2696 	return(0);
   2697     if (toconv * 4 >= written) {
   2698         xmlBufferGrow(out, toconv * 4);
   2699 	written = out->size - out->use - 1;
   2700     }
   2701     if (handler->output != NULL) {
   2702 	ret = handler->output(&out->content[out->use], &written,
   2703 	                      in->content, &toconv);
   2704 	if (written > 0) {
   2705 	    xmlBufferShrink(in, toconv);
   2706 	    out->use += written;
   2707 	    writtentot += written;
   2708 	}
   2709 	out->content[out->use] = 0;
   2710     }
   2711 #ifdef LIBXML_ICONV_ENABLED
   2712     else if (handler->iconv_out != NULL) {
   2713 	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
   2714 	                      &written, in->content, &toconv);
   2715 	xmlBufferShrink(in, toconv);
   2716 	out->use += written;
   2717 	writtentot += written;
   2718 	out->content[out->use] = 0;
   2719 	if (ret == -1) {
   2720 	    if (written > 0) {
   2721 		/*
   2722 		 * Can be a limitation of iconv
   2723 		 */
   2724                 charref_len = 0;
   2725 		goto retry;
   2726 	    }
   2727 	    ret = -3;
   2728 	}
   2729     }
   2730 #endif /* LIBXML_ICONV_ENABLED */
   2731 #ifdef LIBXML_ICU_ENABLED
   2732     else if (handler->uconv_out != NULL) {
   2733 	ret = xmlUconvWrapper(handler->uconv_out, 0,
   2734                               &out->content[out->use],
   2735 	                      &written, in->content, &toconv);
   2736 	xmlBufferShrink(in, toconv);
   2737 	out->use += written;
   2738 	writtentot += written;
   2739 	out->content[out->use] = 0;
   2740 	if (ret == -1) {
   2741 	    if (written > 0) {
   2742 		/*
   2743 		 * Can be a limitation of iconv
   2744 		 */
   2745                 charref_len = 0;
   2746 		goto retry;
   2747 	    }
   2748 	    ret = -3;
   2749 	}
   2750     }
   2751 #endif /* LIBXML_ICU_ENABLED */
   2752     else {
   2753 	xmlEncodingErr(XML_I18N_NO_OUTPUT,
   2754 		       "xmlCharEncOutFunc: no output function !\n", NULL);
   2755 	return(-1);
   2756     }
   2757 
   2758     if (ret >= 0) output += ret;
   2759 
   2760     /*
   2761      * Attempt to handle error cases
   2762      */
   2763     switch (ret) {
   2764         case 0:
   2765 #ifdef DEBUG_ENCODING
   2766 	    xmlGenericError(xmlGenericErrorContext,
   2767 		    "converted %d bytes to %d bytes of output\n",
   2768 	            toconv, written);
   2769 #endif
   2770 	    break;
   2771         case -1:
   2772 #ifdef DEBUG_ENCODING
   2773 	    xmlGenericError(xmlGenericErrorContext,
   2774 		    "output conversion failed by lack of space\n");
   2775 #endif
   2776 	    break;
   2777         case -3:
   2778 #ifdef DEBUG_ENCODING
   2779 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
   2780 	            toconv, written, in->use);
   2781 #endif
   2782 	    break;
   2783         case -2: {
   2784 	    int len = in->use;
   2785 	    const xmlChar *utf = (const xmlChar *) in->content;
   2786 	    int cur;
   2787 
   2788 	    cur = xmlGetUTF8Char(utf, &len);
   2789 	    if ((charref_len != 0) && (written < charref_len)) {
   2790 		/*
   2791 		 * We attempted to insert a character reference and failed.
   2792 		 * Undo what was written and skip the remaining charref.
   2793 		 */
   2794 		out->use -= written;
   2795 		writtentot -= written;
   2796 		xmlBufferShrink(in, charref_len - written);
   2797 		charref_len = 0;
   2798 
   2799 		ret = -1;
   2800                 break;
   2801 	    } else if (cur > 0) {
   2802 		xmlChar charref[20];
   2803 
   2804 #ifdef DEBUG_ENCODING
   2805 		xmlGenericError(xmlGenericErrorContext,
   2806 			"handling output conversion error\n");
   2807 		xmlGenericError(xmlGenericErrorContext,
   2808 			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
   2809 			in->content[0], in->content[1],
   2810 			in->content[2], in->content[3]);
   2811 #endif
   2812 		/*
   2813 		 * Removes the UTF8 sequence, and replace it by a charref
   2814 		 * and continue the transcoding phase, hoping the error
   2815 		 * did not mangle the encoder state.
   2816 		 */
   2817 		charref_len = snprintf((char *) &charref[0], sizeof(charref),
   2818 				 "&#%d;", cur);
   2819 		xmlBufferShrink(in, len);
   2820 		xmlBufferAddHead(in, charref, -1);
   2821 
   2822 		goto retry;
   2823 	    } else {
   2824 		char buf[50];
   2825 
   2826 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2827 			 in->content[0], in->content[1],
   2828 			 in->content[2], in->content[3]);
   2829 		buf[49] = 0;
   2830 		xmlEncodingErr(XML_I18N_CONV_FAILED,
   2831 		    "output conversion failed due to conv error, bytes %s\n",
   2832 			       buf);
   2833 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
   2834 		    in->content[0] = ' ';
   2835 	    }
   2836 	    break;
   2837 	}
   2838     }
   2839     return(ret);
   2840 }
   2841 
   2842 /**
   2843  * xmlCharEncCloseFunc:
   2844  * @handler:	char enconding transformation data structure
   2845  *
   2846  * Generic front-end for encoding handler close function
   2847  *
   2848  * Returns 0 if success, or -1 in case of error
   2849  */
   2850 int
   2851 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
   2852     int ret = 0;
   2853     int tofree = 0;
   2854     if (handler == NULL) return(-1);
   2855     if (handler->name == NULL) return(-1);
   2856 #ifdef LIBXML_ICONV_ENABLED
   2857     /*
   2858      * Iconv handlers can be used only once, free the whole block.
   2859      * and the associated icon resources.
   2860      */
   2861     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
   2862         tofree = 1;
   2863 	if (handler->iconv_out != NULL) {
   2864 	    if (iconv_close(handler->iconv_out))
   2865 		ret = -1;
   2866 	    handler->iconv_out = NULL;
   2867 	}
   2868 	if (handler->iconv_in != NULL) {
   2869 	    if (iconv_close(handler->iconv_in))
   2870 		ret = -1;
   2871 	    handler->iconv_in = NULL;
   2872 	}
   2873     }
   2874 #endif /* LIBXML_ICONV_ENABLED */
   2875 #ifdef LIBXML_ICU_ENABLED
   2876     if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
   2877         tofree = 1;
   2878 	if (handler->uconv_out != NULL) {
   2879 	    closeIcuConverter(handler->uconv_out);
   2880 	    handler->uconv_out = NULL;
   2881 	}
   2882 	if (handler->uconv_in != NULL) {
   2883 	    closeIcuConverter(handler->uconv_in);
   2884 	    handler->uconv_in = NULL;
   2885 	}
   2886     }
   2887 #endif
   2888     if (tofree) {
   2889         /* free up only dynamic handlers iconv/uconv */
   2890         if (handler->name != NULL)
   2891             xmlFree(handler->name);
   2892         handler->name = NULL;
   2893         xmlFree(handler);
   2894     }
   2895 #ifdef DEBUG_ENCODING
   2896     if (ret)
   2897         xmlGenericError(xmlGenericErrorContext,
   2898 		"failed to close the encoding handler\n");
   2899     else
   2900         xmlGenericError(xmlGenericErrorContext,
   2901 		"closed the encoding handler\n");
   2902 #endif
   2903 
   2904     return(ret);
   2905 }
   2906 
   2907 /**
   2908  * xmlByteConsumed:
   2909  * @ctxt: an XML parser context
   2910  *
   2911  * This function provides the current index of the parser relative
   2912  * to the start of the current entity. This function is computed in
   2913  * bytes from the beginning starting at zero and finishing at the
   2914  * size in byte of the file if parsing a file. The function is
   2915  * of constant cost if the input is UTF-8 but can be costly if run
   2916  * on non-UTF-8 input.
   2917  *
   2918  * Returns the index in bytes from the beginning of the entity or -1
   2919  *         in case the index could not be computed.
   2920  */
   2921 long
   2922 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
   2923     xmlParserInputPtr in;
   2924 
   2925     if (ctxt == NULL) return(-1);
   2926     in = ctxt->input;
   2927     if (in == NULL)  return(-1);
   2928     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
   2929         unsigned int unused = 0;
   2930 	xmlCharEncodingHandler * handler = in->buf->encoder;
   2931         /*
   2932 	 * Encoding conversion, compute the number of unused original
   2933 	 * bytes from the input not consumed and substract that from
   2934 	 * the raw consumed value, this is not a cheap operation
   2935 	 */
   2936         if (in->end - in->cur > 0) {
   2937 	    unsigned char convbuf[32000];
   2938 	    const unsigned char *cur = (const unsigned char *)in->cur;
   2939 	    int toconv = in->end - in->cur, written = 32000;
   2940 
   2941 	    int ret;
   2942 
   2943 	    if (handler->output != NULL) {
   2944 	        do {
   2945 		    toconv = in->end - cur;
   2946 		    written = 32000;
   2947 		    ret = handler->output(&convbuf[0], &written,
   2948 				      cur, &toconv);
   2949 		    if (ret == -1) return(-1);
   2950 		    unused += written;
   2951 		    cur += toconv;
   2952 		} while (ret == -2);
   2953 #ifdef LIBXML_ICONV_ENABLED
   2954 	    } else if (handler->iconv_out != NULL) {
   2955 	        do {
   2956 		    toconv = in->end - cur;
   2957 		    written = 32000;
   2958 		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
   2959 	                      &written, cur, &toconv);
   2960 		    if (ret < 0) {
   2961 		        if (written > 0)
   2962 			    ret = -2;
   2963 			else
   2964 			    return(-1);
   2965 		    }
   2966 		    unused += written;
   2967 		    cur += toconv;
   2968 		} while (ret == -2);
   2969 #endif
   2970 #ifdef LIBXML_ICU_ENABLED
   2971 	    } else if (handler->uconv_out != NULL) {
   2972 	        do {
   2973 		    toconv = in->end - cur;
   2974 		    written = 32000;
   2975 		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
   2976 	                      &written, cur, &toconv);
   2977 		    if (ret < 0) {
   2978 		        if (written > 0)
   2979 			    ret = -2;
   2980 			else
   2981 			    return(-1);
   2982 		    }
   2983 		    unused += written;
   2984 		    cur += toconv;
   2985 		} while (ret == -2);
   2986 #endif
   2987             } else {
   2988 	        /* could not find a converter */
   2989 	        return(-1);
   2990 	    }
   2991 	}
   2992 	if (in->buf->rawconsumed < unused)
   2993 	    return(-1);
   2994 	return(in->buf->rawconsumed - unused);
   2995     }
   2996     return(in->consumed + (in->cur - in->base));
   2997 }
   2998 
   2999 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
   3000 #ifdef LIBXML_ISO8859X_ENABLED
   3001 
   3002 /**
   3003  * UTF8ToISO8859x:
   3004  * @out:  a pointer to an array of bytes to store the result
   3005  * @outlen:  the length of @out
   3006  * @in:  a pointer to an array of UTF-8 chars
   3007  * @inlen:  the length of @in
   3008  * @xlattable: the 2-level transcoding table
   3009  *
   3010  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
   3011  * block of chars out.
   3012  *
   3013  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
   3014  * The value of @inlen after return is the number of octets consumed
   3015  *     as the return value is positive, else unpredictable.
   3016  * The value of @outlen after return is the number of ocetes consumed.
   3017  */
   3018 static int
   3019 UTF8ToISO8859x(unsigned char* out, int *outlen,
   3020               const unsigned char* in, int *inlen,
   3021               unsigned char const *xlattable) {
   3022     const unsigned char* outstart = out;
   3023     const unsigned char* inend;
   3024     const unsigned char* instart = in;
   3025     const unsigned char* processed = in;
   3026 
   3027     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
   3028         (xlattable == NULL))
   3029 	return(-1);
   3030     if (in == NULL) {
   3031         /*
   3032         * initialization nothing to do
   3033         */
   3034         *outlen = 0;
   3035         *inlen = 0;
   3036         return(0);
   3037     }
   3038     inend = in + (*inlen);
   3039     while (in < inend) {
   3040         unsigned char d = *in++;
   3041         if  (d < 0x80)  {
   3042             *out++ = d;
   3043         } else if (d < 0xC0) {
   3044             /* trailing byte in leading position */
   3045             *outlen = out - outstart;
   3046             *inlen = processed - instart;
   3047             return(-2);
   3048         } else if (d < 0xE0) {
   3049             unsigned char c;
   3050             if (!(in < inend)) {
   3051                 /* trailing byte not in input buffer */
   3052                 *outlen = out - outstart;
   3053                 *inlen = processed - instart;
   3054                 return(-3);
   3055             }
   3056             c = *in++;
   3057             if ((c & 0xC0) != 0x80) {
   3058                 /* not a trailing byte */
   3059                 *outlen = out - outstart;
   3060                 *inlen = processed - instart;
   3061                 return(-2);
   3062             }
   3063             c = c & 0x3F;
   3064             d = d & 0x1F;
   3065             d = xlattable [48 + c + xlattable [d] * 64];
   3066             if (d == 0) {
   3067                 /* not in character set */
   3068                 *outlen = out - outstart;
   3069                 *inlen = processed - instart;
   3070                 return(-2);
   3071             }
   3072             *out++ = d;
   3073         } else if (d < 0xF0) {
   3074             unsigned char c1;
   3075             unsigned char c2;
   3076             if (!(in < inend - 1)) {
   3077                 /* trailing bytes not in input buffer */
   3078                 *outlen = out - outstart;
   3079                 *inlen = processed - instart;
   3080                 return(-3);
   3081             }
   3082             c1 = *in++;
   3083             if ((c1 & 0xC0) != 0x80) {
   3084                 /* not a trailing byte (c1) */
   3085                 *outlen = out - outstart;
   3086                 *inlen = processed - instart;
   3087                 return(-2);
   3088             }
   3089             c2 = *in++;
   3090             if ((c2 & 0xC0) != 0x80) {
   3091                 /* not a trailing byte (c2) */
   3092                 *outlen = out - outstart;
   3093                 *inlen = processed - instart;
   3094                 return(-2);
   3095             }
   3096             c1 = c1 & 0x3F;
   3097             c2 = c2 & 0x3F;
   3098 	    d = d & 0x0F;
   3099 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
   3100 			xlattable [32 + d] * 64] * 64];
   3101             if (d == 0) {
   3102                 /* not in character set */
   3103                 *outlen = out - outstart;
   3104                 *inlen = processed - instart;
   3105                 return(-2);
   3106             }
   3107             *out++ = d;
   3108         } else {
   3109             /* cannot transcode >= U+010000 */
   3110             *outlen = out - outstart;
   3111             *inlen = processed - instart;
   3112             return(-2);
   3113         }
   3114         processed = in;
   3115     }
   3116     *outlen = out - outstart;
   3117     *inlen = processed - instart;
   3118     return(*outlen);
   3119 }
   3120 
   3121 /**
   3122  * ISO8859xToUTF8
   3123  * @out:  a pointer to an array of bytes to store the result
   3124  * @outlen:  the length of @out
   3125  * @in:  a pointer to an array of ISO Latin 1 chars
   3126  * @inlen:  the length of @in
   3127  *
   3128  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
   3129  * block of chars out.
   3130  * Returns 0 if success, or -1 otherwise
   3131  * The value of @inlen after return is the number of octets consumed
   3132  * The value of @outlen after return is the number of ocetes produced.
   3133  */
   3134 static int
   3135 ISO8859xToUTF8(unsigned char* out, int *outlen,
   3136               const unsigned char* in, int *inlen,
   3137               unsigned short const *unicodetable) {
   3138     unsigned char* outstart = out;
   3139     unsigned char* outend;
   3140     const unsigned char* instart = in;
   3141     const unsigned char* inend;
   3142     const unsigned char* instop;
   3143     unsigned int c;
   3144 
   3145     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
   3146         (in == NULL) || (unicodetable == NULL))
   3147 	return(-1);
   3148     outend = out + *outlen;
   3149     inend = in + *inlen;
   3150     instop = inend;
   3151 
   3152     while ((in < inend) && (out < outend - 2)) {
   3153         if (*in >= 0x80) {
   3154             c = unicodetable [*in - 0x80];
   3155             if (c == 0) {
   3156                 /* undefined code point */
   3157                 *outlen = out - outstart;
   3158                 *inlen = in - instart;
   3159                 return (-1);
   3160             }
   3161             if (c < 0x800) {
   3162                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
   3163                 *out++ = (c & 0x3F) | 0x80;
   3164             } else {
   3165                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
   3166                 *out++ = ((c >>  6) & 0x3F) | 0x80;
   3167                 *out++ = (c & 0x3F) | 0x80;
   3168             }
   3169             ++in;
   3170         }
   3171         if (instop - in > outend - out) instop = in + (outend - out);
   3172         while ((*in < 0x80) && (in < instop)) {
   3173             *out++ = *in++;
   3174         }
   3175     }
   3176     if ((in < inend) && (out < outend) && (*in < 0x80)) {
   3177         *out++ =  *in++;
   3178     }
   3179     if ((in < inend) && (out < outend) && (*in < 0x80)) {
   3180         *out++ =  *in++;
   3181     }
   3182     *outlen = out - outstart;
   3183     *inlen = in - instart;
   3184     return (*outlen);
   3185 }
   3186 
   3187 
   3188 /************************************************************************
   3189  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
   3190  ************************************************************************/
   3191 
   3192 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
   3193     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3194     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3195     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3196     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3197     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
   3198     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
   3199     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
   3200     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
   3201     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
   3202     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
   3203     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
   3204     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
   3205     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
   3206     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
   3207     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
   3208     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
   3209 };
   3210 
   3211 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
   3212     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
   3213     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3214     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3215     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3216     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3217     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3218     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3219     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3220     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3221     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
   3222     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
   3223     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
   3224     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
   3225     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3226     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
   3227     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
   3228     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
   3229     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3230     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3231     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
   3232     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
   3233     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
   3234     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
   3235     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
   3236     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
   3237     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
   3238     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
   3239 };
   3240 
   3241 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
   3242     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3243     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3244     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3245     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3246     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
   3247     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
   3248     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
   3249     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
   3250     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
   3251     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3252     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
   3253     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
   3254     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
   3255     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3256     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
   3257     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
   3258 };
   3259 
   3260 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
   3261     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
   3262     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3263     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3264     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3265     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3266     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3267     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3268     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3269     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3270     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
   3271     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
   3272     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
   3273     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
   3274     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
   3275     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3276     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3277     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
   3278     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3279     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3280     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3281     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3282     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3283     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3284     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3285     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
   3286     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
   3287     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
   3288     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3289     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
   3290     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3291     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
   3292 };
   3293 
   3294 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
   3295     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3296     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3297     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3298     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3299     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
   3300     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
   3301     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
   3302     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
   3303     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
   3304     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
   3305     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
   3306     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
   3307     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
   3308     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
   3309     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
   3310     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
   3311 };
   3312 
   3313 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
   3314     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
   3315     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3316     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3317     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3318     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3319     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3320     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3321     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3322     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3323     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
   3324     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
   3325     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
   3326     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
   3327     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
   3328     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
   3329     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
   3330     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
   3331     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
   3332     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
   3333     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
   3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
   3335     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3336     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3337     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
   3338     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
   3339     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
   3340     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
   3341 };
   3342 
   3343 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
   3344     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3345     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3346     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3347     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3348     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
   3349     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
   3350     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
   3351     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
   3352     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
   3353     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
   3354     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
   3355     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
   3356     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
   3357     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
   3358     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
   3359     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
   3360 };
   3361 
   3362 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
   3363     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3364     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3365     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3366     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3370     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3371     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3372     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
   3373     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3374     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
   3375     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
   3376     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3377     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
   3378     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3379     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
   3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3381     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3382     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3383     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3385     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3386     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3387     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3388     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3389     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3390 };
   3391 
   3392 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
   3393     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3394     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3395     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3396     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3397     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
   3398     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
   3399     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3400     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
   3401     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
   3402     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
   3403     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
   3404     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3405     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
   3406     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
   3407     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3408     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3409 };
   3410 
   3411 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
   3412     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
   3414     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3415     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3416     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3417     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3418     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3419     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3420     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3421     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
   3422     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3423     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3424     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3425     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3426     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3427     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
   3428     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
   3429     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3430     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
   3431     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3432     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3433     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3434     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3435 };
   3436 
   3437 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
   3438     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3439     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3440     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3441     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3442     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
   3443     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
   3444     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
   3445     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
   3446     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
   3447     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
   3448     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
   3449     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
   3450     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
   3451     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
   3452     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
   3453     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
   3454 };
   3455 
   3456 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
   3457     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
   3458     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3459     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3460     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3461     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3462     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3463     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3464     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3465     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3466     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
   3467     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
   3468     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3469     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3470     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3471     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3472     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3473     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
   3474     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3476     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3477     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3478     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3479     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3480     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
   3481     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3482     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
   3483     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3484     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
   3485     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3486     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3487     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3488 };
   3489 
   3490 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
   3491     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3492     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3493     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3494     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3495     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
   3496     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
   3497     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
   3498     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
   3499     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3500     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3501     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3502     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
   3503     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
   3504     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
   3505     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
   3506     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
   3507 };
   3508 
   3509 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
   3510     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3511     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
   3512     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3513     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3514     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3515     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3516     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3517     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3518     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3519     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
   3520     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
   3521     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3522     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3523     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3524     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3525     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3526     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
   3527     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3528     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
   3529     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3530     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3531     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3532     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3533     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
   3534     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
   3535     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3536     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3537     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3538     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3539     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
   3540     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3541 };
   3542 
   3543 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
   3544     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3545     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3546     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3547     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3548     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
   3549     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
   3550     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
   3551     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
   3552     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
   3553     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3554     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
   3555     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
   3556     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
   3557     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3558     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
   3559     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
   3560 };
   3561 
   3562 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
   3563     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3564     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3565     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3566     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3567     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3568     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3569     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3570     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3571     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3572     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
   3573     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
   3574     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3575     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
   3576     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3577     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
   3578     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3579     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
   3580     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3581     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3582     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3583     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
   3584     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3585     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3586 };
   3587 
   3588 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
   3589     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3590     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3591     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3592     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3593     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
   3594     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
   3595     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
   3596     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
   3597     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
   3598     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
   3599     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
   3600     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
   3601     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
   3602     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
   3603     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
   3604     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
   3605 };
   3606 
   3607 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
   3608     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3609     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3610     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3611     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3612     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3613     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3614     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3615     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3616     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3617     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
   3618     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
   3619     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
   3620     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
   3621     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
   3622     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
   3623     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
   3624     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3625     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
   3626     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
   3627     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3628     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3629     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3630     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3631     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3632     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3633     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3635     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
   3636     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
   3637     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
   3638     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
   3639 };
   3640 
   3641 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
   3642     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3643     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3644     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3645     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3646     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
   3647     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
   3648     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
   3649     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
   3650     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
   3651     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
   3652     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
   3653     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
   3654     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
   3655     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
   3656     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
   3657     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
   3658 };
   3659 
   3660 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
   3661     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3662     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3663     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3664     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3665     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3666     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3667     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3668     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3669     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3670     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3671     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3672     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3673     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3674     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3675     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
   3676     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
   3677     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
   3678     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3679     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
   3680     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3681     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3682     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3683     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3684     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3685     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
   3686     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3688 };
   3689 
   3690 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
   3691     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3692     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3693     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3694     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3695     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
   3696     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
   3697     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
   3698     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
   3699     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
   3700     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
   3701     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
   3702     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
   3703     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
   3704     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
   3705     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
   3706     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
   3707 };
   3708 
   3709 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
   3710     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3711     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3712     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3713     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3714     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3715     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3716     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3717     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3718     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3719     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
   3720     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
   3721     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3722     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3723     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3724     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3725     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3726     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
   3727     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3728     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3729     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
   3730     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
   3731     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
   3732     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
   3733     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
   3734     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
   3735     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
   3736     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
   3737     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
   3738     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
   3739     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
   3740     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
   3741 };
   3742 
   3743 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
   3744     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3745     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3746     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3747     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3748     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
   3749     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
   3750     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
   3751     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
   3752     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
   3753     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3754     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
   3755     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
   3756     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
   3757     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3758     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
   3759     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
   3760 };
   3761 
   3762 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
   3763     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3764     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3765     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3766     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3767     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3768     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3769     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3770     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3771     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3772     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
   3773     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3774     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3775     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3776     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3777     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
   3778     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
   3779     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
   3780     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3781     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3782     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
   3783     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3784     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3785     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3786     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3788     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3789     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3790     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3791     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3792     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3793     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3794     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3795     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3796     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3797     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
   3798     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3799     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
   3800     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
   3801     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3802     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3803     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
   3804     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3805     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
   3806 };
   3807 
   3808 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
   3809     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3810     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3811     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3812     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3813     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
   3814     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
   3815     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
   3816     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
   3817     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
   3818     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3819     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
   3820     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
   3821     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
   3822     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3823     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
   3824     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
   3825 };
   3826 
   3827 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
   3828     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3829     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3830     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3831     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3832     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3833     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3834     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3835     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3836     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3837     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
   3838     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
   3839     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3840     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3841     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3842     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3843     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3844     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3845     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
   3846     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3847     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3848     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3849     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3850     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
   3851     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3852     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
   3853     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3854     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
   3855 };
   3856 
   3857 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
   3858     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3859     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3860     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3861     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3862     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
   3863     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
   3864     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
   3865     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
   3866     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
   3867     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3868     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
   3869     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
   3870     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
   3871     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3872     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
   3873     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
   3874 };
   3875 
   3876 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
   3877     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
   3878     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3879     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3880     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3881     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3882     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3883     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3884     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3885     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3886     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
   3887     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
   3888     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
   3889     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
   3890     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3891     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3892     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3893     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
   3894     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3895     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
   3896     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3897     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3898     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3899     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3900     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3901     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3902     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
   3903     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3904     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3905     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
   3906     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3907     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3908     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3909     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
   3910     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3911     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3912     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3913     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
   3914     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3915     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
   3916 };
   3917 
   3918 
   3919 /*
   3920  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
   3921  */
   3922 
   3923 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
   3924     const unsigned char* in, int *inlen) {
   3925     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
   3926 }
   3927 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
   3928     const unsigned char* in, int *inlen) {
   3929     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
   3930 }
   3931 
   3932 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
   3933     const unsigned char* in, int *inlen) {
   3934     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
   3935 }
   3936 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
   3937     const unsigned char* in, int *inlen) {
   3938     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
   3939 }
   3940 
   3941 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
   3942     const unsigned char* in, int *inlen) {
   3943     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
   3944 }
   3945 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
   3946     const unsigned char* in, int *inlen) {
   3947     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
   3948 }
   3949 
   3950 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
   3951     const unsigned char* in, int *inlen) {
   3952     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
   3953 }
   3954 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
   3955     const unsigned char* in, int *inlen) {
   3956     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
   3957 }
   3958 
   3959 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
   3960     const unsigned char* in, int *inlen) {
   3961     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
   3962 }
   3963 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
   3964     const unsigned char* in, int *inlen) {
   3965     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
   3966 }
   3967 
   3968 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
   3969     const unsigned char* in, int *inlen) {
   3970     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
   3971 }
   3972 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
   3973     const unsigned char* in, int *inlen) {
   3974     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
   3975 }
   3976 
   3977 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
   3978     const unsigned char* in, int *inlen) {
   3979     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
   3980 }
   3981 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
   3982     const unsigned char* in, int *inlen) {
   3983     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
   3984 }
   3985 
   3986 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
   3987     const unsigned char* in, int *inlen) {
   3988     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
   3989 }
   3990 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
   3991     const unsigned char* in, int *inlen) {
   3992     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
   3993 }
   3994 
   3995 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
   3996     const unsigned char* in, int *inlen) {
   3997     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
   3998 }
   3999 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
   4000     const unsigned char* in, int *inlen) {
   4001     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
   4002 }
   4003 
   4004 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
   4005     const unsigned char* in, int *inlen) {
   4006     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
   4007 }
   4008 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
   4009     const unsigned char* in, int *inlen) {
   4010     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
   4011 }
   4012 
   4013 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
   4014     const unsigned char* in, int *inlen) {
   4015     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
   4016 }
   4017 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
   4018     const unsigned char* in, int *inlen) {
   4019     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
   4020 }
   4021 
   4022 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
   4023     const unsigned char* in, int *inlen) {
   4024     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
   4025 }
   4026 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
   4027     const unsigned char* in, int *inlen) {
   4028     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
   4029 }
   4030 
   4031 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
   4032     const unsigned char* in, int *inlen) {
   4033     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
   4034 }
   4035 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
   4036     const unsigned char* in, int *inlen) {
   4037     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
   4038 }
   4039 
   4040 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
   4041     const unsigned char* in, int *inlen) {
   4042     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
   4043 }
   4044 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
   4045     const unsigned char* in, int *inlen) {
   4046     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
   4047 }
   4048 
   4049 static void
   4050 xmlRegisterCharEncodingHandlersISO8859x (void) {
   4051     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
   4052     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
   4053     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
   4054     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
   4055     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
   4056     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
   4057     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
   4058     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
   4059     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
   4060     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
   4061     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
   4062     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
   4063     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
   4064     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
   4065 }
   4066 
   4067 #endif
   4068 #endif
   4069 
   4070 #define bottom_encoding
   4071 #include "elfgcchack.h"
   4072