Home | History | Annotate | Download | only in libxml2
      1 /*
      2  * encoding.c : implements the encoding conversion functions needed for XML
      3  *
      4  * Related specs:
      5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
      6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
      7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
      8  * [ISO-8859-1]   ISO Latin-1 characters codes.
      9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
     10  *                Worldwide Character Encoding -- Version 1.0", Addison-
     11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
     12  *                described in Unicode Technical Report #4.
     13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
     14  *                Information Interchange, ANSI X3.4-1986.
     15  *
     16  * See Copyright for the status of this software.
     17  *
     18  * daniel (at) veillard.com
     19  *
     20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst (at) w3.org>
     21  */
     22 
     23 #define IN_LIBXML
     24 #include "libxml.h"
     25 
     26 #include <string.h>
     27 
     28 #ifdef HAVE_CTYPE_H
     29 #include <ctype.h>
     30 #endif
     31 #ifdef HAVE_STDLIB_H
     32 #include <stdlib.h>
     33 #endif
     34 #ifdef LIBXML_ICONV_ENABLED
     35 #ifdef HAVE_ERRNO_H
     36 #include <errno.h>
     37 #endif
     38 #endif
     39 #include <libxml/encoding.h>
     40 #include <libxml/xmlmemory.h>
     41 #ifdef LIBXML_HTML_ENABLED
     42 #include <libxml/HTMLparser.h>
     43 #endif
     44 #include <libxml/globals.h>
     45 #include <libxml/xmlerror.h>
     46 
     47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
     48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
     49 
     50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
     51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
     52 struct _xmlCharEncodingAlias {
     53     const char *name;
     54     const char *alias;
     55 };
     56 
     57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
     58 static int xmlCharEncodingAliasesNb = 0;
     59 static int xmlCharEncodingAliasesMax = 0;
     60 
     61 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
     62 #if 0
     63 #define DEBUG_ENCODING  /* Define this to get encoding traces */
     64 #endif
     65 #else
     66 #ifdef LIBXML_ISO8859X_ENABLED
     67 static void xmlRegisterCharEncodingHandlersISO8859x (void);
     68 #endif
     69 #endif
     70 
     71 static int xmlLittleEndian = 1;
     72 
     73 /**
     74  * xmlEncodingErrMemory:
     75  * @extra:  extra informations
     76  *
     77  * Handle an out of memory condition
     78  */
     79 static void
     80 xmlEncodingErrMemory(const char *extra)
     81 {
     82     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
     83 }
     84 
     85 /**
     86  * xmlErrEncoding:
     87  * @error:  the error number
     88  * @msg:  the error message
     89  *
     90  * n encoding error
     91  */
     92 static void
     93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
     94 {
     95     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
     96                     XML_FROM_I18N, error, XML_ERR_FATAL,
     97                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
     98 }
     99 
    100 #ifdef LIBXML_ICU_ENABLED
    101 static uconv_t*
    102 openIcuConverter(const char* name, int toUnicode)
    103 {
    104   UErrorCode status = U_ZERO_ERROR;
    105   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
    106   if (conv == NULL)
    107     return NULL;
    108 
    109   conv->uconv = ucnv_open(name, &status);
    110   if (U_FAILURE(status))
    111     goto error;
    112 
    113   status = U_ZERO_ERROR;
    114   if (toUnicode) {
    115     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
    116                         NULL, NULL, NULL, &status);
    117   }
    118   else {
    119     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
    120                         NULL, NULL, NULL, &status);
    121   }
    122   if (U_FAILURE(status))
    123     goto error;
    124 
    125   status = U_ZERO_ERROR;
    126   conv->utf8 = ucnv_open("UTF-8", &status);
    127   if (U_SUCCESS(status))
    128     return conv;
    129 
    130 error:
    131   if (conv->uconv)
    132     ucnv_close(conv->uconv);
    133   xmlFree(conv);
    134   return NULL;
    135 }
    136 
    137 static void
    138 closeIcuConverter(uconv_t *conv)
    139 {
    140   if (conv != NULL) {
    141     ucnv_close(conv->uconv);
    142     ucnv_close(conv->utf8);
    143     xmlFree(conv);
    144   }
    145 }
    146 #endif /* LIBXML_ICU_ENABLED */
    147 
    148 /************************************************************************
    149  *									*
    150  *		Conversions To/From UTF8 encoding			*
    151  *									*
    152  ************************************************************************/
    153 
    154 /**
    155  * asciiToUTF8:
    156  * @out:  a pointer to an array of bytes to store the result
    157  * @outlen:  the length of @out
    158  * @in:  a pointer to an array of ASCII chars
    159  * @inlen:  the length of @in
    160  *
    161  * Take a block of ASCII chars in and try to convert it to an UTF-8
    162  * block of chars out.
    163  * Returns 0 if success, or -1 otherwise
    164  * The value of @inlen after return is the number of octets consumed
    165  *     if the return value is positive, else unpredictable.
    166  * The value of @outlen after return is the number of octets consumed.
    167  */
    168 static int
    169 asciiToUTF8(unsigned char* out, int *outlen,
    170               const unsigned char* in, int *inlen) {
    171     unsigned char* outstart = out;
    172     const unsigned char* base = in;
    173     const unsigned char* processed = in;
    174     unsigned char* outend = out + *outlen;
    175     const unsigned char* inend;
    176     unsigned int c;
    177 
    178     inend = in + (*inlen);
    179     while ((in < inend) && (out - outstart + 5 < *outlen)) {
    180 	c= *in++;
    181 
    182         if (out >= outend)
    183 	    break;
    184         if (c < 0x80) {
    185 	    *out++ = c;
    186 	} else {
    187 	    *outlen = out - outstart;
    188 	    *inlen = processed - base;
    189 	    return(-1);
    190 	}
    191 
    192 	processed = (const unsigned char*) in;
    193     }
    194     *outlen = out - outstart;
    195     *inlen = processed - base;
    196     return(*outlen);
    197 }
    198 
    199 #ifdef LIBXML_OUTPUT_ENABLED
    200 /**
    201  * UTF8Toascii:
    202  * @out:  a pointer to an array of bytes to store the result
    203  * @outlen:  the length of @out
    204  * @in:  a pointer to an array of UTF-8 chars
    205  * @inlen:  the length of @in
    206  *
    207  * Take a block of UTF-8 chars in and try to convert it to an ASCII
    208  * block of chars out.
    209  *
    210  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
    211  * The value of @inlen after return is the number of octets consumed
    212  *     if the return value is positive, else unpredictable.
    213  * The value of @outlen after return is the number of octets consumed.
    214  */
    215 static int
    216 UTF8Toascii(unsigned char* out, int *outlen,
    217               const unsigned char* in, int *inlen) {
    218     const unsigned char* processed = in;
    219     const unsigned char* outend;
    220     const unsigned char* outstart = out;
    221     const unsigned char* instart = in;
    222     const unsigned char* inend;
    223     unsigned int c, d;
    224     int trailing;
    225 
    226     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    227     if (in == NULL) {
    228         /*
    229 	 * initialization nothing to do
    230 	 */
    231 	*outlen = 0;
    232 	*inlen = 0;
    233 	return(0);
    234     }
    235     inend = in + (*inlen);
    236     outend = out + (*outlen);
    237     while (in < inend) {
    238 	d = *in++;
    239 	if      (d < 0x80)  { c= d; trailing= 0; }
    240 	else if (d < 0xC0) {
    241 	    /* trailing byte in leading position */
    242 	    *outlen = out - outstart;
    243 	    *inlen = processed - instart;
    244 	    return(-2);
    245         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    246         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    247         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    248 	else {
    249 	    /* no chance for this in Ascii */
    250 	    *outlen = out - outstart;
    251 	    *inlen = processed - instart;
    252 	    return(-2);
    253 	}
    254 
    255 	if (inend - in < trailing) {
    256 	    break;
    257 	}
    258 
    259 	for ( ; trailing; trailing--) {
    260 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
    261 		break;
    262 	    c <<= 6;
    263 	    c |= d & 0x3F;
    264 	}
    265 
    266 	/* assertion: c is a single UTF-4 value */
    267 	if (c < 0x80) {
    268 	    if (out >= outend)
    269 		break;
    270 	    *out++ = c;
    271 	} else {
    272 	    /* no chance for this in Ascii */
    273 	    *outlen = out - outstart;
    274 	    *inlen = processed - instart;
    275 	    return(-2);
    276 	}
    277 	processed = in;
    278     }
    279     *outlen = out - outstart;
    280     *inlen = processed - instart;
    281     return(*outlen);
    282 }
    283 #endif /* LIBXML_OUTPUT_ENABLED */
    284 
    285 /**
    286  * isolat1ToUTF8:
    287  * @out:  a pointer to an array of bytes to store the result
    288  * @outlen:  the length of @out
    289  * @in:  a pointer to an array of ISO Latin 1 chars
    290  * @inlen:  the length of @in
    291  *
    292  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
    293  * block of chars out.
    294  * Returns the number of bytes written if success, or -1 otherwise
    295  * The value of @inlen after return is the number of octets consumed
    296  *     if the return value is positive, else unpredictable.
    297  * The value of @outlen after return is the number of octets consumed.
    298  */
    299 int
    300 isolat1ToUTF8(unsigned char* out, int *outlen,
    301               const unsigned char* in, int *inlen) {
    302     unsigned char* outstart = out;
    303     const unsigned char* base = in;
    304     unsigned char* outend;
    305     const unsigned char* inend;
    306     const unsigned char* instop;
    307 
    308     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
    309 	return(-1);
    310 
    311     outend = out + *outlen;
    312     inend = in + (*inlen);
    313     instop = inend;
    314 
    315     while ((in < inend) && (out < outend - 1)) {
    316 	if (*in >= 0x80) {
    317 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
    318             *out++ = ((*in) & 0x3F) | 0x80;
    319 	    ++in;
    320 	}
    321 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
    322 	while ((in < instop) && (*in < 0x80)) {
    323 	    *out++ = *in++;
    324 	}
    325     }
    326     if ((in < inend) && (out < outend) && (*in < 0x80)) {
    327         *out++ = *in++;
    328     }
    329     *outlen = out - outstart;
    330     *inlen = in - base;
    331     return(*outlen);
    332 }
    333 
    334 /**
    335  * UTF8ToUTF8:
    336  * @out:  a pointer to an array of bytes to store the result
    337  * @outlen:  the length of @out
    338  * @inb:  a pointer to an array of UTF-8 chars
    339  * @inlenb:  the length of @in in UTF-8 chars
    340  *
    341  * No op copy operation for UTF8 handling.
    342  *
    343  * Returns the number of bytes written, or -1 if lack of space.
    344  *     The value of *inlen after return is the number of octets consumed
    345  *     if the return value is positive, else unpredictable.
    346  */
    347 static int
    348 UTF8ToUTF8(unsigned char* out, int *outlen,
    349            const unsigned char* inb, int *inlenb)
    350 {
    351     int len;
    352 
    353     if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
    354 	return(-1);
    355     if (*outlen > *inlenb) {
    356 	len = *inlenb;
    357     } else {
    358 	len = *outlen;
    359     }
    360     if (len < 0)
    361 	return(-1);
    362 
    363     memcpy(out, inb, len);
    364 
    365     *outlen = len;
    366     *inlenb = len;
    367     return(*outlen);
    368 }
    369 
    370 
    371 #ifdef LIBXML_OUTPUT_ENABLED
    372 /**
    373  * UTF8Toisolat1:
    374  * @out:  a pointer to an array of bytes to store the result
    375  * @outlen:  the length of @out
    376  * @in:  a pointer to an array of UTF-8 chars
    377  * @inlen:  the length of @in
    378  *
    379  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
    380  * block of chars out.
    381  *
    382  * Returns the number of bytes written if success, -2 if the transcoding fails,
    383            or -1 otherwise
    384  * The value of @inlen after return is the number of octets consumed
    385  *     if the return value is positive, else unpredictable.
    386  * The value of @outlen after return is the number of octets consumed.
    387  */
    388 int
    389 UTF8Toisolat1(unsigned char* out, int *outlen,
    390               const unsigned char* in, int *inlen) {
    391     const unsigned char* processed = in;
    392     const unsigned char* outend;
    393     const unsigned char* outstart = out;
    394     const unsigned char* instart = in;
    395     const unsigned char* inend;
    396     unsigned int c, d;
    397     int trailing;
    398 
    399     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    400     if (in == NULL) {
    401         /*
    402 	 * initialization nothing to do
    403 	 */
    404 	*outlen = 0;
    405 	*inlen = 0;
    406 	return(0);
    407     }
    408     inend = in + (*inlen);
    409     outend = out + (*outlen);
    410     while (in < inend) {
    411 	d = *in++;
    412 	if      (d < 0x80)  { c= d; trailing= 0; }
    413 	else if (d < 0xC0) {
    414 	    /* trailing byte in leading position */
    415 	    *outlen = out - outstart;
    416 	    *inlen = processed - instart;
    417 	    return(-2);
    418         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    419         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    420         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    421 	else {
    422 	    /* no chance for this in IsoLat1 */
    423 	    *outlen = out - outstart;
    424 	    *inlen = processed - instart;
    425 	    return(-2);
    426 	}
    427 
    428 	if (inend - in < trailing) {
    429 	    break;
    430 	}
    431 
    432 	for ( ; trailing; trailing--) {
    433 	    if (in >= inend)
    434 		break;
    435 	    if (((d= *in++) & 0xC0) != 0x80) {
    436 		*outlen = out - outstart;
    437 		*inlen = processed - instart;
    438 		return(-2);
    439 	    }
    440 	    c <<= 6;
    441 	    c |= d & 0x3F;
    442 	}
    443 
    444 	/* assertion: c is a single UTF-4 value */
    445 	if (c <= 0xFF) {
    446 	    if (out >= outend)
    447 		break;
    448 	    *out++ = c;
    449 	} else {
    450 	    /* no chance for this in IsoLat1 */
    451 	    *outlen = out - outstart;
    452 	    *inlen = processed - instart;
    453 	    return(-2);
    454 	}
    455 	processed = in;
    456     }
    457     *outlen = out - outstart;
    458     *inlen = processed - instart;
    459     return(*outlen);
    460 }
    461 #endif /* LIBXML_OUTPUT_ENABLED */
    462 
    463 /**
    464  * UTF16LEToUTF8:
    465  * @out:  a pointer to an array of bytes to store the result
    466  * @outlen:  the length of @out
    467  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
    468  * @inlenb:  the length of @in in UTF-16LE chars
    469  *
    470  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
    471  * block of chars out. This function assumes the endian property
    472  * is the same between the native type of this machine and the
    473  * inputed one.
    474  *
    475  * Returns the number of bytes written, or -1 if lack of space, or -2
    476  *     if the transcoding fails (if *in is not a valid utf16 string)
    477  *     The value of *inlen after return is the number of octets consumed
    478  *     if the return value is positive, else unpredictable.
    479  */
    480 static int
    481 UTF16LEToUTF8(unsigned char* out, int *outlen,
    482             const unsigned char* inb, int *inlenb)
    483 {
    484     unsigned char* outstart = out;
    485     const unsigned char* processed = inb;
    486     unsigned char* outend = out + *outlen;
    487     unsigned short* in = (unsigned short*) inb;
    488     unsigned short* inend;
    489     unsigned int c, d, inlen;
    490     unsigned char *tmp;
    491     int bits;
    492 
    493     if ((*inlenb % 2) == 1)
    494         (*inlenb)--;
    495     inlen = *inlenb / 2;
    496     inend = in + inlen;
    497     while ((in < inend) && (out - outstart + 5 < *outlen)) {
    498         if (xmlLittleEndian) {
    499 	    c= *in++;
    500 	} else {
    501 	    tmp = (unsigned char *) in;
    502 	    c = *tmp++;
    503 	    c = c | (((unsigned int)*tmp) << 8);
    504 	    in++;
    505 	}
    506         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
    507 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
    508 		break;
    509 	    }
    510 	    if (xmlLittleEndian) {
    511 		d = *in++;
    512 	    } else {
    513 		tmp = (unsigned char *) in;
    514 		d = *tmp++;
    515 		d = d | (((unsigned int)*tmp) << 8);
    516 		in++;
    517 	    }
    518             if ((d & 0xFC00) == 0xDC00) {
    519                 c &= 0x03FF;
    520                 c <<= 10;
    521                 c |= d & 0x03FF;
    522                 c += 0x10000;
    523             }
    524             else {
    525 		*outlen = out - outstart;
    526 		*inlenb = processed - inb;
    527 	        return(-2);
    528 	    }
    529         }
    530 
    531 	/* assertion: c is a single UTF-4 value */
    532         if (out >= outend)
    533 	    break;
    534         if      (c <    0x80) {  *out++=  c;                bits= -6; }
    535         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
    536         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
    537         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
    538 
    539         for ( ; bits >= 0; bits-= 6) {
    540             if (out >= outend)
    541 	        break;
    542             *out++= ((c >> bits) & 0x3F) | 0x80;
    543         }
    544 	processed = (const unsigned char*) in;
    545     }
    546     *outlen = out - outstart;
    547     *inlenb = processed - inb;
    548     return(*outlen);
    549 }
    550 
    551 #ifdef LIBXML_OUTPUT_ENABLED
    552 /**
    553  * UTF8ToUTF16LE:
    554  * @outb:  a pointer to an array of bytes to store the result
    555  * @outlen:  the length of @outb
    556  * @in:  a pointer to an array of UTF-8 chars
    557  * @inlen:  the length of @in
    558  *
    559  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
    560  * block of chars out.
    561  *
    562  * Returns the number of bytes written, or -1 if lack of space, or -2
    563  *     if the transcoding failed.
    564  */
    565 static int
    566 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
    567             const unsigned char* in, int *inlen)
    568 {
    569     unsigned short* out = (unsigned short*) outb;
    570     const unsigned char* processed = in;
    571     const unsigned char *const instart = in;
    572     unsigned short* outstart= out;
    573     unsigned short* outend;
    574     const unsigned char* inend;
    575     unsigned int c, d;
    576     int trailing;
    577     unsigned char *tmp;
    578     unsigned short tmp1, tmp2;
    579 
    580     /* UTF16LE encoding has no BOM */
    581     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    582     if (in == NULL) {
    583 	*outlen = 0;
    584 	*inlen = 0;
    585 	return(0);
    586     }
    587     inend= in + *inlen;
    588     outend = out + (*outlen / 2);
    589     while (in < inend) {
    590       d= *in++;
    591       if      (d < 0x80)  { c= d; trailing= 0; }
    592       else if (d < 0xC0) {
    593           /* trailing byte in leading position */
    594 	  *outlen = (out - outstart) * 2;
    595 	  *inlen = processed - instart;
    596 	  return(-2);
    597       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    598       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    599       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    600       else {
    601 	/* no chance for this in UTF-16 */
    602 	*outlen = (out - outstart) * 2;
    603 	*inlen = processed - instart;
    604 	return(-2);
    605       }
    606 
    607       if (inend - in < trailing) {
    608           break;
    609       }
    610 
    611       for ( ; trailing; trailing--) {
    612           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
    613 	      break;
    614           c <<= 6;
    615           c |= d & 0x3F;
    616       }
    617 
    618       /* assertion: c is a single UTF-4 value */
    619         if (c < 0x10000) {
    620             if (out >= outend)
    621 	        break;
    622 	    if (xmlLittleEndian) {
    623 		*out++ = c;
    624 	    } else {
    625 		tmp = (unsigned char *) out;
    626 		*tmp = c ;
    627 		*(tmp + 1) = c >> 8 ;
    628 		out++;
    629 	    }
    630         }
    631         else if (c < 0x110000) {
    632             if (out+1 >= outend)
    633 	        break;
    634             c -= 0x10000;
    635 	    if (xmlLittleEndian) {
    636 		*out++ = 0xD800 | (c >> 10);
    637 		*out++ = 0xDC00 | (c & 0x03FF);
    638 	    } else {
    639 		tmp1 = 0xD800 | (c >> 10);
    640 		tmp = (unsigned char *) out;
    641 		*tmp = (unsigned char) tmp1;
    642 		*(tmp + 1) = tmp1 >> 8;
    643 		out++;
    644 
    645 		tmp2 = 0xDC00 | (c & 0x03FF);
    646 		tmp = (unsigned char *) out;
    647 		*tmp  = (unsigned char) tmp2;
    648 		*(tmp + 1) = tmp2 >> 8;
    649 		out++;
    650 	    }
    651         }
    652         else
    653 	    break;
    654 	processed = in;
    655     }
    656     *outlen = (out - outstart) * 2;
    657     *inlen = processed - instart;
    658     return(*outlen);
    659 }
    660 
    661 /**
    662  * UTF8ToUTF16:
    663  * @outb:  a pointer to an array of bytes to store the result
    664  * @outlen:  the length of @outb
    665  * @in:  a pointer to an array of UTF-8 chars
    666  * @inlen:  the length of @in
    667  *
    668  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
    669  * block of chars out.
    670  *
    671  * Returns the number of bytes written, or -1 if lack of space, or -2
    672  *     if the transcoding failed.
    673  */
    674 static int
    675 UTF8ToUTF16(unsigned char* outb, int *outlen,
    676             const unsigned char* in, int *inlen)
    677 {
    678     if (in == NULL) {
    679 	/*
    680 	 * initialization, add the Byte Order Mark for UTF-16LE
    681 	 */
    682         if (*outlen >= 2) {
    683 	    outb[0] = 0xFF;
    684 	    outb[1] = 0xFE;
    685 	    *outlen = 2;
    686 	    *inlen = 0;
    687 #ifdef DEBUG_ENCODING
    688             xmlGenericError(xmlGenericErrorContext,
    689 		    "Added FFFE Byte Order Mark\n");
    690 #endif
    691 	    return(2);
    692 	}
    693 	*outlen = 0;
    694 	*inlen = 0;
    695 	return(0);
    696     }
    697     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
    698 }
    699 #endif /* LIBXML_OUTPUT_ENABLED */
    700 
    701 /**
    702  * UTF16BEToUTF8:
    703  * @out:  a pointer to an array of bytes to store the result
    704  * @outlen:  the length of @out
    705  * @inb:  a pointer to an array of UTF-16 passed as a byte array
    706  * @inlenb:  the length of @in in UTF-16 chars
    707  *
    708  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
    709  * block of chars out. This function assumes the endian property
    710  * is the same between the native type of this machine and the
    711  * inputed one.
    712  *
    713  * Returns the number of bytes written, or -1 if lack of space, or -2
    714  *     if the transcoding fails (if *in is not a valid utf16 string)
    715  * The value of *inlen after return is the number of octets consumed
    716  *     if the return value is positive, else unpredictable.
    717  */
    718 static int
    719 UTF16BEToUTF8(unsigned char* out, int *outlen,
    720             const unsigned char* inb, int *inlenb)
    721 {
    722     unsigned char* outstart = out;
    723     const unsigned char* processed = inb;
    724     unsigned char* outend = out + *outlen;
    725     unsigned short* in = (unsigned short*) inb;
    726     unsigned short* inend;
    727     unsigned int c, d, inlen;
    728     unsigned char *tmp;
    729     int bits;
    730 
    731     if ((*inlenb % 2) == 1)
    732         (*inlenb)--;
    733     inlen = *inlenb / 2;
    734     inend= in + inlen;
    735     while (in < inend) {
    736 	if (xmlLittleEndian) {
    737 	    tmp = (unsigned char *) in;
    738 	    c = *tmp++;
    739 	    c = c << 8;
    740 	    c = c | (unsigned int) *tmp;
    741 	    in++;
    742 	} else {
    743 	    c= *in++;
    744 	}
    745         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
    746 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
    747 		*outlen = out - outstart;
    748 		*inlenb = processed - inb;
    749 	        return(-2);
    750 	    }
    751 	    if (xmlLittleEndian) {
    752 		tmp = (unsigned char *) in;
    753 		d = *tmp++;
    754 		d = d << 8;
    755 		d = d | (unsigned int) *tmp;
    756 		in++;
    757 	    } else {
    758 		d= *in++;
    759 	    }
    760             if ((d & 0xFC00) == 0xDC00) {
    761                 c &= 0x03FF;
    762                 c <<= 10;
    763                 c |= d & 0x03FF;
    764                 c += 0x10000;
    765             }
    766             else {
    767 		*outlen = out - outstart;
    768 		*inlenb = processed - inb;
    769 	        return(-2);
    770 	    }
    771         }
    772 
    773 	/* assertion: c is a single UTF-4 value */
    774         if (out >= outend)
    775 	    break;
    776         if      (c <    0x80) {  *out++=  c;                bits= -6; }
    777         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
    778         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
    779         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
    780 
    781         for ( ; bits >= 0; bits-= 6) {
    782             if (out >= outend)
    783 	        break;
    784             *out++= ((c >> bits) & 0x3F) | 0x80;
    785         }
    786 	processed = (const unsigned char*) in;
    787     }
    788     *outlen = out - outstart;
    789     *inlenb = processed - inb;
    790     return(*outlen);
    791 }
    792 
    793 #ifdef LIBXML_OUTPUT_ENABLED
    794 /**
    795  * UTF8ToUTF16BE:
    796  * @outb:  a pointer to an array of bytes to store the result
    797  * @outlen:  the length of @outb
    798  * @in:  a pointer to an array of UTF-8 chars
    799  * @inlen:  the length of @in
    800  *
    801  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
    802  * block of chars out.
    803  *
    804  * Returns the number of byte written, or -1 by lack of space, or -2
    805  *     if the transcoding failed.
    806  */
    807 static int
    808 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
    809             const unsigned char* in, int *inlen)
    810 {
    811     unsigned short* out = (unsigned short*) outb;
    812     const unsigned char* processed = in;
    813     const unsigned char *const instart = in;
    814     unsigned short* outstart= out;
    815     unsigned short* outend;
    816     const unsigned char* inend;
    817     unsigned int c, d;
    818     int trailing;
    819     unsigned char *tmp;
    820     unsigned short tmp1, tmp2;
    821 
    822     /* UTF-16BE has no BOM */
    823     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
    824     if (in == NULL) {
    825 	*outlen = 0;
    826 	*inlen = 0;
    827 	return(0);
    828     }
    829     inend= in + *inlen;
    830     outend = out + (*outlen / 2);
    831     while (in < inend) {
    832       d= *in++;
    833       if      (d < 0x80)  { c= d; trailing= 0; }
    834       else if (d < 0xC0)  {
    835           /* trailing byte in leading position */
    836 	  *outlen = out - outstart;
    837 	  *inlen = processed - instart;
    838 	  return(-2);
    839       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
    840       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
    841       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    842       else {
    843           /* no chance for this in UTF-16 */
    844 	  *outlen = out - outstart;
    845 	  *inlen = processed - instart;
    846 	  return(-2);
    847       }
    848 
    849       if (inend - in < trailing) {
    850           break;
    851       }
    852 
    853       for ( ; trailing; trailing--) {
    854           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
    855           c <<= 6;
    856           c |= d & 0x3F;
    857       }
    858 
    859       /* assertion: c is a single UTF-4 value */
    860         if (c < 0x10000) {
    861             if (out >= outend)  break;
    862 	    if (xmlLittleEndian) {
    863 		tmp = (unsigned char *) out;
    864 		*tmp = c >> 8;
    865 		*(tmp + 1) = c;
    866 		out++;
    867 	    } else {
    868 		*out++ = c;
    869 	    }
    870         }
    871         else if (c < 0x110000) {
    872             if (out+1 >= outend)  break;
    873             c -= 0x10000;
    874 	    if (xmlLittleEndian) {
    875 		tmp1 = 0xD800 | (c >> 10);
    876 		tmp = (unsigned char *) out;
    877 		*tmp = tmp1 >> 8;
    878 		*(tmp + 1) = (unsigned char) tmp1;
    879 		out++;
    880 
    881 		tmp2 = 0xDC00 | (c & 0x03FF);
    882 		tmp = (unsigned char *) out;
    883 		*tmp = tmp2 >> 8;
    884 		*(tmp + 1) = (unsigned char) tmp2;
    885 		out++;
    886 	    } else {
    887 		*out++ = 0xD800 | (c >> 10);
    888 		*out++ = 0xDC00 | (c & 0x03FF);
    889 	    }
    890         }
    891         else
    892 	    break;
    893 	processed = in;
    894     }
    895     *outlen = (out - outstart) * 2;
    896     *inlen = processed - instart;
    897     return(*outlen);
    898 }
    899 #endif /* LIBXML_OUTPUT_ENABLED */
    900 
    901 /************************************************************************
    902  *									*
    903  *		Generic encoding handling routines			*
    904  *									*
    905  ************************************************************************/
    906 
    907 /**
    908  * xmlDetectCharEncoding:
    909  * @in:  a pointer to the first bytes of the XML entity, must be at least
    910  *       2 bytes long (at least 4 if encoding is UTF4 variant).
    911  * @len:  pointer to the length of the buffer
    912  *
    913  * Guess the encoding of the entity using the first bytes of the entity content
    914  * according to the non-normative appendix F of the XML-1.0 recommendation.
    915  *
    916  * Returns one of the XML_CHAR_ENCODING_... values.
    917  */
    918 xmlCharEncoding
    919 xmlDetectCharEncoding(const unsigned char* in, int len)
    920 {
    921     if (in == NULL)
    922         return(XML_CHAR_ENCODING_NONE);
    923     if (len >= 4) {
    924 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
    925 	    (in[2] == 0x00) && (in[3] == 0x3C))
    926 	    return(XML_CHAR_ENCODING_UCS4BE);
    927 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
    928 	    (in[2] == 0x00) && (in[3] == 0x00))
    929 	    return(XML_CHAR_ENCODING_UCS4LE);
    930 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
    931 	    (in[2] == 0x3C) && (in[3] == 0x00))
    932 	    return(XML_CHAR_ENCODING_UCS4_2143);
    933 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
    934 	    (in[2] == 0x00) && (in[3] == 0x00))
    935 	    return(XML_CHAR_ENCODING_UCS4_3412);
    936 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
    937 	    (in[2] == 0xA7) && (in[3] == 0x94))
    938 	    return(XML_CHAR_ENCODING_EBCDIC);
    939 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
    940 	    (in[2] == 0x78) && (in[3] == 0x6D))
    941 	    return(XML_CHAR_ENCODING_UTF8);
    942 	/*
    943 	 * Although not part of the recommendation, we also
    944 	 * attempt an "auto-recognition" of UTF-16LE and
    945 	 * UTF-16BE encodings.
    946 	 */
    947 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
    948 	    (in[2] == 0x3F) && (in[3] == 0x00))
    949 	    return(XML_CHAR_ENCODING_UTF16LE);
    950 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
    951 	    (in[2] == 0x00) && (in[3] == 0x3F))
    952 	    return(XML_CHAR_ENCODING_UTF16BE);
    953     }
    954     if (len >= 3) {
    955 	/*
    956 	 * Errata on XML-1.0 June 20 2001
    957 	 * We now allow an UTF8 encoded BOM
    958 	 */
    959 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
    960 	    (in[2] == 0xBF))
    961 	    return(XML_CHAR_ENCODING_UTF8);
    962     }
    963     /* For UTF-16 we can recognize by the BOM */
    964     if (len >= 2) {
    965 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
    966 	    return(XML_CHAR_ENCODING_UTF16BE);
    967 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
    968 	    return(XML_CHAR_ENCODING_UTF16LE);
    969     }
    970     return(XML_CHAR_ENCODING_NONE);
    971 }
    972 
    973 /**
    974  * xmlCleanupEncodingAliases:
    975  *
    976  * Unregisters all aliases
    977  */
    978 void
    979 xmlCleanupEncodingAliases(void) {
    980     int i;
    981 
    982     if (xmlCharEncodingAliases == NULL)
    983 	return;
    984 
    985     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
    986 	if (xmlCharEncodingAliases[i].name != NULL)
    987 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
    988 	if (xmlCharEncodingAliases[i].alias != NULL)
    989 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
    990     }
    991     xmlCharEncodingAliasesNb = 0;
    992     xmlCharEncodingAliasesMax = 0;
    993     xmlFree(xmlCharEncodingAliases);
    994     xmlCharEncodingAliases = NULL;
    995 }
    996 
    997 /**
    998  * xmlGetEncodingAlias:
    999  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
   1000  *
   1001  * Lookup an encoding name for the given alias.
   1002  *
   1003  * Returns NULL if not found, otherwise the original name
   1004  */
   1005 const char *
   1006 xmlGetEncodingAlias(const char *alias) {
   1007     int i;
   1008     char upper[100];
   1009 
   1010     if (alias == NULL)
   1011 	return(NULL);
   1012 
   1013     if (xmlCharEncodingAliases == NULL)
   1014 	return(NULL);
   1015 
   1016     for (i = 0;i < 99;i++) {
   1017         upper[i] = toupper(alias[i]);
   1018 	if (upper[i] == 0) break;
   1019     }
   1020     upper[i] = 0;
   1021 
   1022     /*
   1023      * Walk down the list looking for a definition of the alias
   1024      */
   1025     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
   1026 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
   1027 	    return(xmlCharEncodingAliases[i].name);
   1028 	}
   1029     }
   1030     return(NULL);
   1031 }
   1032 
   1033 /**
   1034  * xmlAddEncodingAlias:
   1035  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
   1036  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
   1037  *
   1038  * Registers an alias @alias for an encoding named @name. Existing alias
   1039  * will be overwritten.
   1040  *
   1041  * Returns 0 in case of success, -1 in case of error
   1042  */
   1043 int
   1044 xmlAddEncodingAlias(const char *name, const char *alias) {
   1045     int i;
   1046     char upper[100];
   1047 
   1048     if ((name == NULL) || (alias == NULL))
   1049 	return(-1);
   1050 
   1051     for (i = 0;i < 99;i++) {
   1052         upper[i] = toupper(alias[i]);
   1053 	if (upper[i] == 0) break;
   1054     }
   1055     upper[i] = 0;
   1056 
   1057     if (xmlCharEncodingAliases == NULL) {
   1058 	xmlCharEncodingAliasesNb = 0;
   1059 	xmlCharEncodingAliasesMax = 20;
   1060 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
   1061 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
   1062 	if (xmlCharEncodingAliases == NULL)
   1063 	    return(-1);
   1064     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
   1065 	xmlCharEncodingAliasesMax *= 2;
   1066 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
   1067 	      xmlRealloc(xmlCharEncodingAliases,
   1068 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
   1069     }
   1070     /*
   1071      * Walk down the list looking for a definition of the alias
   1072      */
   1073     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
   1074 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
   1075 	    /*
   1076 	     * Replace the definition.
   1077 	     */
   1078 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
   1079 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
   1080 	    return(0);
   1081 	}
   1082     }
   1083     /*
   1084      * Add the definition
   1085      */
   1086     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
   1087     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
   1088     xmlCharEncodingAliasesNb++;
   1089     return(0);
   1090 }
   1091 
   1092 /**
   1093  * xmlDelEncodingAlias:
   1094  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
   1095  *
   1096  * Unregisters an encoding alias @alias
   1097  *
   1098  * Returns 0 in case of success, -1 in case of error
   1099  */
   1100 int
   1101 xmlDelEncodingAlias(const char *alias) {
   1102     int i;
   1103 
   1104     if (alias == NULL)
   1105 	return(-1);
   1106 
   1107     if (xmlCharEncodingAliases == NULL)
   1108 	return(-1);
   1109     /*
   1110      * Walk down the list looking for a definition of the alias
   1111      */
   1112     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
   1113 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
   1114 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
   1115 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
   1116 	    xmlCharEncodingAliasesNb--;
   1117 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
   1118 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
   1119 	    return(0);
   1120 	}
   1121     }
   1122     return(-1);
   1123 }
   1124 
   1125 /**
   1126  * xmlParseCharEncoding:
   1127  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
   1128  *
   1129  * Compare the string to the encoding schemes already known. Note
   1130  * that the comparison is case insensitive accordingly to the section
   1131  * [XML] 4.3.3 Character Encoding in Entities.
   1132  *
   1133  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
   1134  * if not recognized.
   1135  */
   1136 xmlCharEncoding
   1137 xmlParseCharEncoding(const char* name)
   1138 {
   1139     const char *alias;
   1140     char upper[500];
   1141     int i;
   1142 
   1143     if (name == NULL)
   1144 	return(XML_CHAR_ENCODING_NONE);
   1145 
   1146     /*
   1147      * Do the alias resolution
   1148      */
   1149     alias = xmlGetEncodingAlias(name);
   1150     if (alias != NULL)
   1151 	name = alias;
   1152 
   1153     for (i = 0;i < 499;i++) {
   1154         upper[i] = toupper(name[i]);
   1155 	if (upper[i] == 0) break;
   1156     }
   1157     upper[i] = 0;
   1158 
   1159     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
   1160     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
   1161     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
   1162 
   1163     /*
   1164      * NOTE: if we were able to parse this, the endianness of UTF16 is
   1165      *       already found and in use
   1166      */
   1167     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
   1168     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
   1169 
   1170     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
   1171     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
   1172     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
   1173 
   1174     /*
   1175      * NOTE: if we were able to parse this, the endianness of UCS4 is
   1176      *       already found and in use
   1177      */
   1178     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
   1179     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
   1180     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
   1181 
   1182 
   1183     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
   1184     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
   1185     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
   1186 
   1187     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
   1188     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
   1189     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
   1190 
   1191     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
   1192     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
   1193     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
   1194     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
   1195     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
   1196     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
   1197     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
   1198 
   1199     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
   1200     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
   1201     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
   1202 
   1203 #ifdef DEBUG_ENCODING
   1204     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
   1205 #endif
   1206     return(XML_CHAR_ENCODING_ERROR);
   1207 }
   1208 
   1209 /**
   1210  * xmlGetCharEncodingName:
   1211  * @enc:  the encoding
   1212  *
   1213  * The "canonical" name for XML encoding.
   1214  * C.f. http://www.w3.org/TR/REC-xml#charencoding
   1215  * Section 4.3.3  Character Encoding in Entities
   1216  *
   1217  * Returns the canonical name for the given encoding
   1218  */
   1219 
   1220 const char*
   1221 xmlGetCharEncodingName(xmlCharEncoding enc) {
   1222     switch (enc) {
   1223         case XML_CHAR_ENCODING_ERROR:
   1224 	    return(NULL);
   1225         case XML_CHAR_ENCODING_NONE:
   1226 	    return(NULL);
   1227         case XML_CHAR_ENCODING_UTF8:
   1228 	    return("UTF-8");
   1229         case XML_CHAR_ENCODING_UTF16LE:
   1230 	    return("UTF-16");
   1231         case XML_CHAR_ENCODING_UTF16BE:
   1232 	    return("UTF-16");
   1233         case XML_CHAR_ENCODING_EBCDIC:
   1234             return("EBCDIC");
   1235         case XML_CHAR_ENCODING_UCS4LE:
   1236             return("ISO-10646-UCS-4");
   1237         case XML_CHAR_ENCODING_UCS4BE:
   1238             return("ISO-10646-UCS-4");
   1239         case XML_CHAR_ENCODING_UCS4_2143:
   1240             return("ISO-10646-UCS-4");
   1241         case XML_CHAR_ENCODING_UCS4_3412:
   1242             return("ISO-10646-UCS-4");
   1243         case XML_CHAR_ENCODING_UCS2:
   1244             return("ISO-10646-UCS-2");
   1245         case XML_CHAR_ENCODING_8859_1:
   1246 	    return("ISO-8859-1");
   1247         case XML_CHAR_ENCODING_8859_2:
   1248 	    return("ISO-8859-2");
   1249         case XML_CHAR_ENCODING_8859_3:
   1250 	    return("ISO-8859-3");
   1251         case XML_CHAR_ENCODING_8859_4:
   1252 	    return("ISO-8859-4");
   1253         case XML_CHAR_ENCODING_8859_5:
   1254 	    return("ISO-8859-5");
   1255         case XML_CHAR_ENCODING_8859_6:
   1256 	    return("ISO-8859-6");
   1257         case XML_CHAR_ENCODING_8859_7:
   1258 	    return("ISO-8859-7");
   1259         case XML_CHAR_ENCODING_8859_8:
   1260 	    return("ISO-8859-8");
   1261         case XML_CHAR_ENCODING_8859_9:
   1262 	    return("ISO-8859-9");
   1263         case XML_CHAR_ENCODING_2022_JP:
   1264             return("ISO-2022-JP");
   1265         case XML_CHAR_ENCODING_SHIFT_JIS:
   1266             return("Shift-JIS");
   1267         case XML_CHAR_ENCODING_EUC_JP:
   1268             return("EUC-JP");
   1269 	case XML_CHAR_ENCODING_ASCII:
   1270 	    return(NULL);
   1271     }
   1272     return(NULL);
   1273 }
   1274 
   1275 /************************************************************************
   1276  *									*
   1277  *			Char encoding handlers				*
   1278  *									*
   1279  ************************************************************************/
   1280 
   1281 
   1282 /* the size should be growable, but it's not a big deal ... */
   1283 #define MAX_ENCODING_HANDLERS 50
   1284 static xmlCharEncodingHandlerPtr *handlers = NULL;
   1285 static int nbCharEncodingHandler = 0;
   1286 
   1287 /*
   1288  * The default is UTF-8 for XML, that's also the default used for the
   1289  * parser internals, so the default encoding handler is NULL
   1290  */
   1291 
   1292 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
   1293 
   1294 /**
   1295  * xmlNewCharEncodingHandler:
   1296  * @name:  the encoding name, in UTF-8 format (ASCII actually)
   1297  * @input:  the xmlCharEncodingInputFunc to read that encoding
   1298  * @output:  the xmlCharEncodingOutputFunc to write that encoding
   1299  *
   1300  * Create and registers an xmlCharEncodingHandler.
   1301  *
   1302  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
   1303  */
   1304 xmlCharEncodingHandlerPtr
   1305 xmlNewCharEncodingHandler(const char *name,
   1306                           xmlCharEncodingInputFunc input,
   1307                           xmlCharEncodingOutputFunc output) {
   1308     xmlCharEncodingHandlerPtr handler;
   1309     const char *alias;
   1310     char upper[500];
   1311     int i;
   1312     char *up = NULL;
   1313 
   1314     /*
   1315      * Do the alias resolution
   1316      */
   1317     alias = xmlGetEncodingAlias(name);
   1318     if (alias != NULL)
   1319 	name = alias;
   1320 
   1321     /*
   1322      * Keep only the uppercase version of the encoding.
   1323      */
   1324     if (name == NULL) {
   1325         xmlEncodingErr(XML_I18N_NO_NAME,
   1326 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
   1327 	return(NULL);
   1328     }
   1329     for (i = 0;i < 499;i++) {
   1330         upper[i] = toupper(name[i]);
   1331 	if (upper[i] == 0) break;
   1332     }
   1333     upper[i] = 0;
   1334     up = xmlMemStrdup(upper);
   1335     if (up == NULL) {
   1336         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
   1337 	return(NULL);
   1338     }
   1339 
   1340     /*
   1341      * allocate and fill-up an handler block.
   1342      */
   1343     handler = (xmlCharEncodingHandlerPtr)
   1344               xmlMalloc(sizeof(xmlCharEncodingHandler));
   1345     if (handler == NULL) {
   1346         xmlFree(up);
   1347         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
   1348 	return(NULL);
   1349     }
   1350     memset(handler, 0, sizeof(xmlCharEncodingHandler));
   1351     handler->input = input;
   1352     handler->output = output;
   1353     handler->name = up;
   1354 
   1355 #ifdef LIBXML_ICONV_ENABLED
   1356     handler->iconv_in = NULL;
   1357     handler->iconv_out = NULL;
   1358 #endif
   1359 #ifdef LIBXML_ICU_ENABLED
   1360     handler->uconv_in = NULL;
   1361     handler->uconv_out = NULL;
   1362 #endif
   1363 
   1364     /*
   1365      * registers and returns the handler.
   1366      */
   1367     xmlRegisterCharEncodingHandler(handler);
   1368 #ifdef DEBUG_ENCODING
   1369     xmlGenericError(xmlGenericErrorContext,
   1370 	    "Registered encoding handler for %s\n", name);
   1371 #endif
   1372     return(handler);
   1373 }
   1374 
   1375 /**
   1376  * xmlInitCharEncodingHandlers:
   1377  *
   1378  * Initialize the char encoding support, it registers the default
   1379  * encoding supported.
   1380  * NOTE: while public, this function usually doesn't need to be called
   1381  *       in normal processing.
   1382  */
   1383 void
   1384 xmlInitCharEncodingHandlers(void) {
   1385     unsigned short int tst = 0x1234;
   1386     unsigned char *ptr = (unsigned char *) &tst;
   1387 
   1388     if (handlers != NULL) return;
   1389 
   1390     handlers = (xmlCharEncodingHandlerPtr *)
   1391         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
   1392 
   1393     if (*ptr == 0x12) xmlLittleEndian = 0;
   1394     else if (*ptr == 0x34) xmlLittleEndian = 1;
   1395     else {
   1396         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
   1397 	               "Odd problem at endianness detection\n", NULL);
   1398     }
   1399 
   1400     if (handlers == NULL) {
   1401         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
   1402 	return;
   1403     }
   1404     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
   1405 #ifdef LIBXML_OUTPUT_ENABLED
   1406     xmlUTF16LEHandler =
   1407           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
   1408     xmlUTF16BEHandler =
   1409           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
   1410     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
   1411     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
   1412     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
   1413     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
   1414 #ifdef LIBXML_HTML_ENABLED
   1415     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
   1416 #endif
   1417 #else
   1418     xmlUTF16LEHandler =
   1419           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
   1420     xmlUTF16BEHandler =
   1421           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
   1422     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
   1423     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
   1424     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
   1425     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
   1426 #endif /* LIBXML_OUTPUT_ENABLED */
   1427 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
   1428 #ifdef LIBXML_ISO8859X_ENABLED
   1429     xmlRegisterCharEncodingHandlersISO8859x ();
   1430 #endif
   1431 #endif
   1432 
   1433 }
   1434 
   1435 /**
   1436  * xmlCleanupCharEncodingHandlers:
   1437  *
   1438  * Cleanup the memory allocated for the char encoding support, it
   1439  * unregisters all the encoding handlers and the aliases.
   1440  */
   1441 void
   1442 xmlCleanupCharEncodingHandlers(void) {
   1443     xmlCleanupEncodingAliases();
   1444 
   1445     if (handlers == NULL) return;
   1446 
   1447     for (;nbCharEncodingHandler > 0;) {
   1448         nbCharEncodingHandler--;
   1449 	if (handlers[nbCharEncodingHandler] != NULL) {
   1450 	    if (handlers[nbCharEncodingHandler]->name != NULL)
   1451 		xmlFree(handlers[nbCharEncodingHandler]->name);
   1452 	    xmlFree(handlers[nbCharEncodingHandler]);
   1453 	}
   1454     }
   1455     xmlFree(handlers);
   1456     handlers = NULL;
   1457     nbCharEncodingHandler = 0;
   1458     xmlDefaultCharEncodingHandler = NULL;
   1459 }
   1460 
   1461 /**
   1462  * xmlRegisterCharEncodingHandler:
   1463  * @handler:  the xmlCharEncodingHandlerPtr handler block
   1464  *
   1465  * Register the char encoding handler, surprising, isn't it ?
   1466  */
   1467 void
   1468 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
   1469     if (handlers == NULL) xmlInitCharEncodingHandlers();
   1470     if ((handler == NULL) || (handlers == NULL)) {
   1471         xmlEncodingErr(XML_I18N_NO_HANDLER,
   1472 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
   1473 	return;
   1474     }
   1475 
   1476     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
   1477         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
   1478 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
   1479 	               "MAX_ENCODING_HANDLERS");
   1480 	return;
   1481     }
   1482     handlers[nbCharEncodingHandler++] = handler;
   1483 }
   1484 
   1485 /**
   1486  * xmlGetCharEncodingHandler:
   1487  * @enc:  an xmlCharEncoding value.
   1488  *
   1489  * Search in the registered set the handler able to read/write that encoding.
   1490  *
   1491  * Returns the handler or NULL if not found
   1492  */
   1493 xmlCharEncodingHandlerPtr
   1494 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
   1495     xmlCharEncodingHandlerPtr handler;
   1496 
   1497     if (handlers == NULL) xmlInitCharEncodingHandlers();
   1498     switch (enc) {
   1499         case XML_CHAR_ENCODING_ERROR:
   1500 	    return(NULL);
   1501         case XML_CHAR_ENCODING_NONE:
   1502 	    return(NULL);
   1503         case XML_CHAR_ENCODING_UTF8:
   1504 	    return(NULL);
   1505         case XML_CHAR_ENCODING_UTF16LE:
   1506 	    return(xmlUTF16LEHandler);
   1507         case XML_CHAR_ENCODING_UTF16BE:
   1508 	    return(xmlUTF16BEHandler);
   1509         case XML_CHAR_ENCODING_EBCDIC:
   1510             handler = xmlFindCharEncodingHandler("EBCDIC");
   1511             if (handler != NULL) return(handler);
   1512             handler = xmlFindCharEncodingHandler("ebcdic");
   1513             if (handler != NULL) return(handler);
   1514             handler = xmlFindCharEncodingHandler("EBCDIC-US");
   1515             if (handler != NULL) return(handler);
   1516 	    break;
   1517         case XML_CHAR_ENCODING_UCS4BE:
   1518             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
   1519             if (handler != NULL) return(handler);
   1520             handler = xmlFindCharEncodingHandler("UCS-4");
   1521             if (handler != NULL) return(handler);
   1522             handler = xmlFindCharEncodingHandler("UCS4");
   1523             if (handler != NULL) return(handler);
   1524 	    break;
   1525         case XML_CHAR_ENCODING_UCS4LE:
   1526             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
   1527             if (handler != NULL) return(handler);
   1528             handler = xmlFindCharEncodingHandler("UCS-4");
   1529             if (handler != NULL) return(handler);
   1530             handler = xmlFindCharEncodingHandler("UCS4");
   1531             if (handler != NULL) return(handler);
   1532 	    break;
   1533         case XML_CHAR_ENCODING_UCS4_2143:
   1534 	    break;
   1535         case XML_CHAR_ENCODING_UCS4_3412:
   1536 	    break;
   1537         case XML_CHAR_ENCODING_UCS2:
   1538             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
   1539             if (handler != NULL) return(handler);
   1540             handler = xmlFindCharEncodingHandler("UCS-2");
   1541             if (handler != NULL) return(handler);
   1542             handler = xmlFindCharEncodingHandler("UCS2");
   1543             if (handler != NULL) return(handler);
   1544 	    break;
   1545 
   1546 	    /*
   1547 	     * We used to keep ISO Latin encodings native in the
   1548 	     * generated data. This led to so many problems that
   1549 	     * this has been removed. One can still change this
   1550 	     * back by registering no-ops encoders for those
   1551 	     */
   1552         case XML_CHAR_ENCODING_8859_1:
   1553 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
   1554 	    if (handler != NULL) return(handler);
   1555 	    break;
   1556         case XML_CHAR_ENCODING_8859_2:
   1557 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
   1558 	    if (handler != NULL) return(handler);
   1559 	    break;
   1560         case XML_CHAR_ENCODING_8859_3:
   1561 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
   1562 	    if (handler != NULL) return(handler);
   1563 	    break;
   1564         case XML_CHAR_ENCODING_8859_4:
   1565 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
   1566 	    if (handler != NULL) return(handler);
   1567 	    break;
   1568         case XML_CHAR_ENCODING_8859_5:
   1569 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
   1570 	    if (handler != NULL) return(handler);
   1571 	    break;
   1572         case XML_CHAR_ENCODING_8859_6:
   1573 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
   1574 	    if (handler != NULL) return(handler);
   1575 	    break;
   1576         case XML_CHAR_ENCODING_8859_7:
   1577 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
   1578 	    if (handler != NULL) return(handler);
   1579 	    break;
   1580         case XML_CHAR_ENCODING_8859_8:
   1581 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
   1582 	    if (handler != NULL) return(handler);
   1583 	    break;
   1584         case XML_CHAR_ENCODING_8859_9:
   1585 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
   1586 	    if (handler != NULL) return(handler);
   1587 	    break;
   1588 
   1589 
   1590         case XML_CHAR_ENCODING_2022_JP:
   1591             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
   1592             if (handler != NULL) return(handler);
   1593 	    break;
   1594         case XML_CHAR_ENCODING_SHIFT_JIS:
   1595             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
   1596             if (handler != NULL) return(handler);
   1597             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
   1598             if (handler != NULL) return(handler);
   1599             handler = xmlFindCharEncodingHandler("Shift_JIS");
   1600             if (handler != NULL) return(handler);
   1601 	    break;
   1602         case XML_CHAR_ENCODING_EUC_JP:
   1603             handler = xmlFindCharEncodingHandler("EUC-JP");
   1604             if (handler != NULL) return(handler);
   1605 	    break;
   1606 	default:
   1607 	    break;
   1608     }
   1609 
   1610 #ifdef DEBUG_ENCODING
   1611     xmlGenericError(xmlGenericErrorContext,
   1612 	    "No handler found for encoding %d\n", enc);
   1613 #endif
   1614     return(NULL);
   1615 }
   1616 
   1617 /**
   1618  * xmlFindCharEncodingHandler:
   1619  * @name:  a string describing the char encoding.
   1620  *
   1621  * Search in the registered set the handler able to read/write that encoding.
   1622  *
   1623  * Returns the handler or NULL if not found
   1624  */
   1625 xmlCharEncodingHandlerPtr
   1626 xmlFindCharEncodingHandler(const char *name) {
   1627     const char *nalias;
   1628     const char *norig;
   1629     xmlCharEncoding alias;
   1630 #ifdef LIBXML_ICONV_ENABLED
   1631     xmlCharEncodingHandlerPtr enc;
   1632     iconv_t icv_in, icv_out;
   1633 #endif /* LIBXML_ICONV_ENABLED */
   1634 #ifdef LIBXML_ICU_ENABLED
   1635     xmlCharEncodingHandlerPtr encu;
   1636     uconv_t *ucv_in, *ucv_out;
   1637 #endif /* LIBXML_ICU_ENABLED */
   1638     char upper[100];
   1639     int i;
   1640 
   1641     if (handlers == NULL) xmlInitCharEncodingHandlers();
   1642     if (name == NULL) return(xmlDefaultCharEncodingHandler);
   1643     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
   1644 
   1645     /*
   1646      * Do the alias resolution
   1647      */
   1648     norig = name;
   1649     nalias = xmlGetEncodingAlias(name);
   1650     if (nalias != NULL)
   1651 	name = nalias;
   1652 
   1653     /*
   1654      * Check first for directly registered encoding names
   1655      */
   1656     for (i = 0;i < 99;i++) {
   1657         upper[i] = toupper(name[i]);
   1658 	if (upper[i] == 0) break;
   1659     }
   1660     upper[i] = 0;
   1661 
   1662     if (handlers != NULL) {
   1663         for (i = 0;i < nbCharEncodingHandler; i++) {
   1664             if (!strcmp(upper, handlers[i]->name)) {
   1665 #ifdef DEBUG_ENCODING
   1666                 xmlGenericError(xmlGenericErrorContext,
   1667                         "Found registered handler for encoding %s\n", name);
   1668 #endif
   1669                 return(handlers[i]);
   1670             }
   1671         }
   1672     }
   1673 
   1674 #ifdef LIBXML_ICONV_ENABLED
   1675     /* check whether iconv can handle this */
   1676     icv_in = iconv_open("UTF-8", name);
   1677     icv_out = iconv_open(name, "UTF-8");
   1678     if (icv_in == (iconv_t) -1) {
   1679         icv_in = iconv_open("UTF-8", upper);
   1680     }
   1681     if (icv_out == (iconv_t) -1) {
   1682 	icv_out = iconv_open(upper, "UTF-8");
   1683     }
   1684     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
   1685 	    enc = (xmlCharEncodingHandlerPtr)
   1686 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
   1687 	    if (enc == NULL) {
   1688 	        iconv_close(icv_in);
   1689 	        iconv_close(icv_out);
   1690 		return(NULL);
   1691 	    }
   1692             memset(enc, 0, sizeof(xmlCharEncodingHandler));
   1693 	    enc->name = xmlMemStrdup(name);
   1694 	    enc->input = NULL;
   1695 	    enc->output = NULL;
   1696 	    enc->iconv_in = icv_in;
   1697 	    enc->iconv_out = icv_out;
   1698 #ifdef DEBUG_ENCODING
   1699             xmlGenericError(xmlGenericErrorContext,
   1700 		    "Found iconv handler for encoding %s\n", name);
   1701 #endif
   1702 	    return enc;
   1703     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
   1704 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
   1705 		    "iconv : problems with filters for '%s'\n", name);
   1706     }
   1707 #endif /* LIBXML_ICONV_ENABLED */
   1708 #ifdef LIBXML_ICU_ENABLED
   1709     /* check whether icu can handle this */
   1710     ucv_in = openIcuConverter(name, 1);
   1711     ucv_out = openIcuConverter(name, 0);
   1712     if (ucv_in != NULL && ucv_out != NULL) {
   1713 	    encu = (xmlCharEncodingHandlerPtr)
   1714 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
   1715 	    if (encu == NULL) {
   1716                 closeIcuConverter(ucv_in);
   1717                 closeIcuConverter(ucv_out);
   1718 		return(NULL);
   1719 	    }
   1720             memset(encu, 0, sizeof(xmlCharEncodingHandler));
   1721 	    encu->name = xmlMemStrdup(name);
   1722 	    encu->input = NULL;
   1723 	    encu->output = NULL;
   1724 	    encu->uconv_in = ucv_in;
   1725 	    encu->uconv_out = ucv_out;
   1726 #ifdef DEBUG_ENCODING
   1727             xmlGenericError(xmlGenericErrorContext,
   1728 		    "Found ICU converter handler for encoding %s\n", name);
   1729 #endif
   1730 	    return encu;
   1731     } else if (ucv_in != NULL || ucv_out != NULL) {
   1732             closeIcuConverter(ucv_in);
   1733             closeIcuConverter(ucv_out);
   1734 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
   1735 		    "ICU converter : problems with filters for '%s'\n", name);
   1736     }
   1737 #endif /* LIBXML_ICU_ENABLED */
   1738 
   1739 #ifdef DEBUG_ENCODING
   1740     xmlGenericError(xmlGenericErrorContext,
   1741 	    "No handler found for encoding %s\n", name);
   1742 #endif
   1743 
   1744     /*
   1745      * Fallback using the canonical names
   1746      */
   1747     alias = xmlParseCharEncoding(norig);
   1748     if (alias != XML_CHAR_ENCODING_ERROR) {
   1749         const char* canon;
   1750         canon = xmlGetCharEncodingName(alias);
   1751         if ((canon != NULL) && (strcmp(name, canon))) {
   1752 	    return(xmlFindCharEncodingHandler(canon));
   1753         }
   1754     }
   1755 
   1756     /* If "none of the above", give up */
   1757     return(NULL);
   1758 }
   1759 
   1760 /************************************************************************
   1761  *									*
   1762  *		ICONV based generic conversion functions		*
   1763  *									*
   1764  ************************************************************************/
   1765 
   1766 #ifdef LIBXML_ICONV_ENABLED
   1767 /**
   1768  * xmlIconvWrapper:
   1769  * @cd:		iconv converter data structure
   1770  * @out:  a pointer to an array of bytes to store the result
   1771  * @outlen:  the length of @out
   1772  * @in:  a pointer to an array of ISO Latin 1 chars
   1773  * @inlen:  the length of @in
   1774  *
   1775  * Returns 0 if success, or
   1776  *     -1 by lack of space, or
   1777  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   1778  *        the result of transformation can't fit into the encoding we want), or
   1779  *     -3 if there the last byte can't form a single output char.
   1780  *
   1781  * The value of @inlen after return is the number of octets consumed
   1782  *     as the return value is positive, else unpredictable.
   1783  * The value of @outlen after return is the number of ocetes consumed.
   1784  */
   1785 static int
   1786 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
   1787                 const unsigned char *in, int *inlen) {
   1788     size_t icv_inlen, icv_outlen;
   1789     const char *icv_in = (const char *) in;
   1790     char *icv_out = (char *) out;
   1791     int ret;
   1792 
   1793     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
   1794         if (outlen != NULL) *outlen = 0;
   1795         return(-1);
   1796     }
   1797     icv_inlen = *inlen;
   1798     icv_outlen = *outlen;
   1799     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
   1800     *inlen -= icv_inlen;
   1801     *outlen -= icv_outlen;
   1802     if ((icv_inlen != 0) || (ret == -1)) {
   1803 #ifdef EILSEQ
   1804         if (errno == EILSEQ) {
   1805             return -2;
   1806         } else
   1807 #endif
   1808 #ifdef E2BIG
   1809         if (errno == E2BIG) {
   1810             return -1;
   1811         } else
   1812 #endif
   1813 #ifdef EINVAL
   1814         if (errno == EINVAL) {
   1815             return -3;
   1816         } else
   1817 #endif
   1818         {
   1819             return -3;
   1820         }
   1821     }
   1822     return 0;
   1823 }
   1824 #endif /* LIBXML_ICONV_ENABLED */
   1825 
   1826 /************************************************************************
   1827  *									*
   1828  *		ICU based generic conversion functions	         	*
   1829  *									*
   1830  ************************************************************************/
   1831 
   1832 #ifdef LIBXML_ICU_ENABLED
   1833 /**
   1834  * xmlUconvWrapper:
   1835  * @cd: ICU uconverter data structure
   1836  * @toUnicode : non-zero if toUnicode. 0 otherwise.
   1837  * @out:  a pointer to an array of bytes to store the result
   1838  * @outlen:  the length of @out
   1839  * @in:  a pointer to an array of ISO Latin 1 chars
   1840  * @inlen:  the length of @in
   1841  *
   1842  * Returns 0 if success, or
   1843  *     -1 by lack of space, or
   1844  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   1845  *        the result of transformation can't fit into the encoding we want), or
   1846  *     -3 if there the last byte can't form a single output char.
   1847  *
   1848  * The value of @inlen after return is the number of octets consumed
   1849  *     as the return value is positive, else unpredictable.
   1850  * The value of @outlen after return is the number of ocetes consumed.
   1851  */
   1852 static int
   1853 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
   1854                 const unsigned char *in, int *inlen) {
   1855     const char *ucv_in = (const char *) in;
   1856     char *ucv_out = (char *) out;
   1857     UErrorCode err = U_ZERO_ERROR;
   1858 
   1859     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
   1860         if (outlen != NULL) *outlen = 0;
   1861         return(-1);
   1862     }
   1863 
   1864     /*
   1865      * TODO(jungshik)
   1866      * 1. is ucnv_convert(To|From)Algorithmic better?
   1867      * 2. had we better use an explicit pivot buffer?
   1868      * 3. error returned comes from 'fromUnicode' only even
   1869      *    when toUnicode is true !
   1870      */
   1871     if (toUnicode) {
   1872         /* encoding => UTF-16 => UTF-8 */
   1873         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
   1874                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
   1875                        0, TRUE, &err);
   1876     } else {
   1877         /* UTF-8 => UTF-16 => encoding */
   1878         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
   1879                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
   1880                        0, TRUE, &err);
   1881     }
   1882     *inlen = ucv_in - (const char*) in;
   1883     *outlen = ucv_out - (char *) out;
   1884     if (U_SUCCESS(err))
   1885         return 0;
   1886     if (err == U_BUFFER_OVERFLOW_ERROR)
   1887         return -1;
   1888     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
   1889         return -2;
   1890     /* if (err == U_TRUNCATED_CHAR_FOUND) */
   1891     return -3;
   1892 }
   1893 #endif /* LIBXML_ICU_ENABLED */
   1894 
   1895 /************************************************************************
   1896  *									*
   1897  *		The real API used by libxml for on-the-fly conversion	*
   1898  *									*
   1899  ************************************************************************/
   1900 int
   1901 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   1902                        xmlBufferPtr in, int len);
   1903 
   1904 /**
   1905  * xmlCharEncFirstLineInt:
   1906  * @handler:	char enconding transformation data structure
   1907  * @out:  an xmlBuffer for the output.
   1908  * @in:  an xmlBuffer for the input
   1909  * @len:  number of bytes to convert for the first line, or -1
   1910  *
   1911  * Front-end for the encoding handler input function, but handle only
   1912  * the very first line, i.e. limit itself to 45 chars.
   1913  *
   1914  * Returns the number of byte written if success, or
   1915  *     -1 general error
   1916  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   1917  *        the result of transformation can't fit into the encoding we want), or
   1918  */
   1919 int
   1920 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   1921                        xmlBufferPtr in, int len) {
   1922     int ret = -2;
   1923     int written;
   1924     int toconv;
   1925 
   1926     if (handler == NULL) return(-1);
   1927     if (out == NULL) return(-1);
   1928     if (in == NULL) return(-1);
   1929 
   1930     /* calculate space available */
   1931     written = out->size - out->use - 1; /* count '\0' */
   1932     toconv = in->use;
   1933     /*
   1934      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
   1935      * 45 chars should be sufficient to reach the end of the encoding
   1936      * declaration without going too far inside the document content.
   1937      * on UTF-16 this means 90bytes, on UCS4 this means 180
   1938      * The actual value depending on guessed encoding is passed as @len
   1939      * if provided
   1940      */
   1941     if (len >= 0) {
   1942         if (toconv > len)
   1943             toconv = len;
   1944     } else {
   1945         if (toconv > 180)
   1946             toconv = 180;
   1947     }
   1948     if (toconv * 2 >= written) {
   1949         xmlBufferGrow(out, toconv);
   1950 	written = out->size - out->use - 1;
   1951     }
   1952 
   1953     if (handler->input != NULL) {
   1954 	ret = handler->input(&out->content[out->use], &written,
   1955 	                     in->content, &toconv);
   1956 	xmlBufferShrink(in, toconv);
   1957 	out->use += written;
   1958 	out->content[out->use] = 0;
   1959     }
   1960 #ifdef LIBXML_ICONV_ENABLED
   1961     else if (handler->iconv_in != NULL) {
   1962 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
   1963 	                      &written, in->content, &toconv);
   1964 	xmlBufferShrink(in, toconv);
   1965 	out->use += written;
   1966 	out->content[out->use] = 0;
   1967 	if (ret == -1) ret = -3;
   1968     }
   1969 #endif /* LIBXML_ICONV_ENABLED */
   1970 #ifdef LIBXML_ICU_ENABLED
   1971     else if (handler->uconv_in != NULL) {
   1972 	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
   1973 	                      &written, in->content, &toconv);
   1974 	xmlBufferShrink(in, toconv);
   1975 	out->use += written;
   1976 	out->content[out->use] = 0;
   1977 	if (ret == -1) ret = -3;
   1978     }
   1979 #endif /* LIBXML_ICU_ENABLED */
   1980 #ifdef DEBUG_ENCODING
   1981     switch (ret) {
   1982         case 0:
   1983 	    xmlGenericError(xmlGenericErrorContext,
   1984 		    "converted %d bytes to %d bytes of input\n",
   1985 	            toconv, written);
   1986 	    break;
   1987         case -1:
   1988 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
   1989 	            toconv, written, in->use);
   1990 	    break;
   1991         case -2:
   1992 	    xmlGenericError(xmlGenericErrorContext,
   1993 		    "input conversion failed due to input error\n");
   1994 	    break;
   1995         case -3:
   1996 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
   1997 	            toconv, written, in->use);
   1998 	    break;
   1999 	default:
   2000 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
   2001     }
   2002 #endif /* DEBUG_ENCODING */
   2003     /*
   2004      * Ignore when input buffer is not on a boundary
   2005      */
   2006     if (ret == -3) ret = 0;
   2007     if (ret == -1) ret = 0;
   2008     return(ret);
   2009 }
   2010 
   2011 /**
   2012  * xmlCharEncFirstLine:
   2013  * @handler:	char enconding transformation data structure
   2014  * @out:  an xmlBuffer for the output.
   2015  * @in:  an xmlBuffer for the input
   2016  *
   2017  * Front-end for the encoding handler input function, but handle only
   2018  * the very first line, i.e. limit itself to 45 chars.
   2019  *
   2020  * Returns the number of byte written if success, or
   2021  *     -1 general error
   2022  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2023  *        the result of transformation can't fit into the encoding we want), or
   2024  */
   2025 int
   2026 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   2027                  xmlBufferPtr in) {
   2028     return(xmlCharEncFirstLineInt(handler, out, in, -1));
   2029 }
   2030 
   2031 /**
   2032  * xmlCharEncInFunc:
   2033  * @handler:	char encoding transformation data structure
   2034  * @out:  an xmlBuffer for the output.
   2035  * @in:  an xmlBuffer for the input
   2036  *
   2037  * Generic front-end for the encoding handler input function
   2038  *
   2039  * Returns the number of byte written if success, or
   2040  *     -1 general error
   2041  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2042  *        the result of transformation can't fit into the encoding we want), or
   2043  */
   2044 int
   2045 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
   2046                  xmlBufferPtr in)
   2047 {
   2048     int ret = -2;
   2049     int written;
   2050     int toconv;
   2051 
   2052     if (handler == NULL)
   2053         return (-1);
   2054     if (out == NULL)
   2055         return (-1);
   2056     if (in == NULL)
   2057         return (-1);
   2058 
   2059     toconv = in->use;
   2060     if (toconv == 0)
   2061         return (0);
   2062     written = out->size - out->use -1; /* count '\0' */
   2063     if (toconv * 2 >= written) {
   2064         xmlBufferGrow(out, out->size + toconv * 2);
   2065         written = out->size - out->use - 1;
   2066     }
   2067     if (handler->input != NULL) {
   2068         ret = handler->input(&out->content[out->use], &written,
   2069                              in->content, &toconv);
   2070         xmlBufferShrink(in, toconv);
   2071         out->use += written;
   2072         out->content[out->use] = 0;
   2073     }
   2074 #ifdef LIBXML_ICONV_ENABLED
   2075     else if (handler->iconv_in != NULL) {
   2076         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
   2077                               &written, in->content, &toconv);
   2078         xmlBufferShrink(in, toconv);
   2079         out->use += written;
   2080         out->content[out->use] = 0;
   2081         if (ret == -1)
   2082             ret = -3;
   2083     }
   2084 #endif /* LIBXML_ICONV_ENABLED */
   2085 #ifdef LIBXML_ICU_ENABLED
   2086     else if (handler->uconv_in != NULL) {
   2087         ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
   2088                               &written, in->content, &toconv);
   2089         xmlBufferShrink(in, toconv);
   2090         out->use += written;
   2091         out->content[out->use] = 0;
   2092         if (ret == -1)
   2093             ret = -3;
   2094     }
   2095 #endif /* LIBXML_ICU_ENABLED */
   2096     switch (ret) {
   2097         case 0:
   2098 #ifdef DEBUG_ENCODING
   2099             xmlGenericError(xmlGenericErrorContext,
   2100                             "converted %d bytes to %d bytes of input\n",
   2101                             toconv, written);
   2102 #endif
   2103             break;
   2104         case -1:
   2105 #ifdef DEBUG_ENCODING
   2106             xmlGenericError(xmlGenericErrorContext,
   2107                          "converted %d bytes to %d bytes of input, %d left\n",
   2108                             toconv, written, in->use);
   2109 #endif
   2110             break;
   2111         case -3:
   2112 #ifdef DEBUG_ENCODING
   2113             xmlGenericError(xmlGenericErrorContext,
   2114                         "converted %d bytes to %d bytes of input, %d left\n",
   2115                             toconv, written, in->use);
   2116 #endif
   2117             break;
   2118         case -2: {
   2119             char buf[50];
   2120 
   2121 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2122 		     in->content[0], in->content[1],
   2123 		     in->content[2], in->content[3]);
   2124 	    buf[49] = 0;
   2125 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
   2126 		    "input conversion failed due to input error, bytes %s\n",
   2127 		           buf);
   2128         }
   2129     }
   2130     /*
   2131      * Ignore when input buffer is not on a boundary
   2132      */
   2133     if (ret == -3)
   2134         ret = 0;
   2135     return (written? written : ret);
   2136 }
   2137 
   2138 /**
   2139  * xmlCharEncOutFunc:
   2140  * @handler:	char enconding transformation data structure
   2141  * @out:  an xmlBuffer for the output.
   2142  * @in:  an xmlBuffer for the input
   2143  *
   2144  * Generic front-end for the encoding handler output function
   2145  * a first call with @in == NULL has to be made firs to initiate the
   2146  * output in case of non-stateless encoding needing to initiate their
   2147  * state or the output (like the BOM in UTF16).
   2148  * In case of UTF8 sequence conversion errors for the given encoder,
   2149  * the content will be automatically remapped to a CharRef sequence.
   2150  *
   2151  * Returns the number of byte written if success, or
   2152  *     -1 general error
   2153  *     -2 if the transcoding fails (for *in is not valid utf8 string or
   2154  *        the result of transformation can't fit into the encoding we want), or
   2155  */
   2156 int
   2157 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
   2158                   xmlBufferPtr in) {
   2159     int ret = -2;
   2160     int written;
   2161     int writtentot = 0;
   2162     int toconv;
   2163     int output = 0;
   2164 
   2165     if (handler == NULL) return(-1);
   2166     if (out == NULL) return(-1);
   2167 
   2168 retry:
   2169 
   2170     written = out->size - out->use;
   2171 
   2172     if (written > 0)
   2173 	written--; /* Gennady: count '/0' */
   2174 
   2175     /*
   2176      * First specific handling of in = NULL, i.e. the initialization call
   2177      */
   2178     if (in == NULL) {
   2179         toconv = 0;
   2180 	if (handler->output != NULL) {
   2181 	    ret = handler->output(&out->content[out->use], &written,
   2182 				  NULL, &toconv);
   2183 	    if (ret >= 0) { /* Gennady: check return value */
   2184 		out->use += written;
   2185 		out->content[out->use] = 0;
   2186 	    }
   2187 	}
   2188 #ifdef LIBXML_ICONV_ENABLED
   2189 	else if (handler->iconv_out != NULL) {
   2190 	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
   2191 				  &written, NULL, &toconv);
   2192 	    out->use += written;
   2193 	    out->content[out->use] = 0;
   2194 	}
   2195 #endif /* LIBXML_ICONV_ENABLED */
   2196 #ifdef LIBXML_ICU_ENABLED
   2197 	else if (handler->uconv_out != NULL) {
   2198 	    ret = xmlUconvWrapper(handler->uconv_out, 0,
   2199                               &out->content[out->use],
   2200  				              &written, NULL, &toconv);
   2201 	    out->use += written;
   2202 	    out->content[out->use] = 0;
   2203 	}
   2204 #endif /* LIBXML_ICU_ENABLED */
   2205 #ifdef DEBUG_ENCODING
   2206 	xmlGenericError(xmlGenericErrorContext,
   2207 		"initialized encoder\n");
   2208 #endif
   2209         return(0);
   2210     }
   2211 
   2212     /*
   2213      * Conversion itself.
   2214      */
   2215     toconv = in->use;
   2216     if (toconv == 0)
   2217 	return(0);
   2218     if (toconv * 4 >= written) {
   2219         xmlBufferGrow(out, toconv * 4);
   2220 	written = out->size - out->use - 1;
   2221     }
   2222     if (handler->output != NULL) {
   2223 	ret = handler->output(&out->content[out->use], &written,
   2224 	                      in->content, &toconv);
   2225 	if (written > 0) {
   2226 	    xmlBufferShrink(in, toconv);
   2227 	    out->use += written;
   2228 	    writtentot += written;
   2229 	}
   2230 	out->content[out->use] = 0;
   2231     }
   2232 #ifdef LIBXML_ICONV_ENABLED
   2233     else if (handler->iconv_out != NULL) {
   2234 	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
   2235 	                      &written, in->content, &toconv);
   2236 	xmlBufferShrink(in, toconv);
   2237 	out->use += written;
   2238 	writtentot += written;
   2239 	out->content[out->use] = 0;
   2240 	if (ret == -1) {
   2241 	    if (written > 0) {
   2242 		/*
   2243 		 * Can be a limitation of iconv
   2244 		 */
   2245 		goto retry;
   2246 	    }
   2247 	    ret = -3;
   2248 	}
   2249     }
   2250 #endif /* LIBXML_ICONV_ENABLED */
   2251 #ifdef LIBXML_ICU_ENABLED
   2252     else if (handler->uconv_out != NULL) {
   2253 	ret = xmlUconvWrapper(handler->uconv_out, 0,
   2254                               &out->content[out->use],
   2255 	                      &written, in->content, &toconv);
   2256 	xmlBufferShrink(in, toconv);
   2257 	out->use += written;
   2258 	writtentot += written;
   2259 	out->content[out->use] = 0;
   2260 	if (ret == -1) {
   2261 	    if (written > 0) {
   2262 		/*
   2263 		 * Can be a limitation of iconv
   2264 		 */
   2265 		goto retry;
   2266 	    }
   2267 	    ret = -3;
   2268 	}
   2269     }
   2270 #endif /* LIBXML_ICU_ENABLED */
   2271     else {
   2272 	xmlEncodingErr(XML_I18N_NO_OUTPUT,
   2273 		       "xmlCharEncOutFunc: no output function !\n", NULL);
   2274 	return(-1);
   2275     }
   2276 
   2277     if (ret >= 0) output += ret;
   2278 
   2279     /*
   2280      * Attempt to handle error cases
   2281      */
   2282     switch (ret) {
   2283         case 0:
   2284 #ifdef DEBUG_ENCODING
   2285 	    xmlGenericError(xmlGenericErrorContext,
   2286 		    "converted %d bytes to %d bytes of output\n",
   2287 	            toconv, written);
   2288 #endif
   2289 	    break;
   2290         case -1:
   2291 #ifdef DEBUG_ENCODING
   2292 	    xmlGenericError(xmlGenericErrorContext,
   2293 		    "output conversion failed by lack of space\n");
   2294 #endif
   2295 	    break;
   2296         case -3:
   2297 #ifdef DEBUG_ENCODING
   2298 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
   2299 	            toconv, written, in->use);
   2300 #endif
   2301 	    break;
   2302         case -2: {
   2303 	    int len = in->use;
   2304 	    const xmlChar *utf = (const xmlChar *) in->content;
   2305 	    int cur;
   2306 
   2307 	    cur = xmlGetUTF8Char(utf, &len);
   2308 	    if (cur > 0) {
   2309 		xmlChar charref[20];
   2310 
   2311 #ifdef DEBUG_ENCODING
   2312 		xmlGenericError(xmlGenericErrorContext,
   2313 			"handling output conversion error\n");
   2314 		xmlGenericError(xmlGenericErrorContext,
   2315 			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
   2316 			in->content[0], in->content[1],
   2317 			in->content[2], in->content[3]);
   2318 #endif
   2319 		/*
   2320 		 * Removes the UTF8 sequence, and replace it by a charref
   2321 		 * and continue the transcoding phase, hoping the error
   2322 		 * did not mangle the encoder state.
   2323 		 */
   2324 		snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
   2325 		xmlBufferShrink(in, len);
   2326 		xmlBufferAddHead(in, charref, -1);
   2327 
   2328 		goto retry;
   2329 	    } else {
   2330 		char buf[50];
   2331 
   2332 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
   2333 			 in->content[0], in->content[1],
   2334 			 in->content[2], in->content[3]);
   2335 		buf[49] = 0;
   2336 		xmlEncodingErr(XML_I18N_CONV_FAILED,
   2337 		    "output conversion failed due to conv error, bytes %s\n",
   2338 			       buf);
   2339 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
   2340 		    in->content[0] = ' ';
   2341 	    }
   2342 	    break;
   2343 	}
   2344     }
   2345     return(ret);
   2346 }
   2347 
   2348 /**
   2349  * xmlCharEncCloseFunc:
   2350  * @handler:	char enconding transformation data structure
   2351  *
   2352  * Generic front-end for encoding handler close function
   2353  *
   2354  * Returns 0 if success, or -1 in case of error
   2355  */
   2356 int
   2357 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
   2358     int ret = 0;
   2359     int tofree = 0;
   2360     if (handler == NULL) return(-1);
   2361     if (handler->name == NULL) return(-1);
   2362 #ifdef LIBXML_ICONV_ENABLED
   2363     /*
   2364      * Iconv handlers can be used only once, free the whole block.
   2365      * and the associated icon resources.
   2366      */
   2367     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
   2368         tofree = 1;
   2369 	if (handler->iconv_out != NULL) {
   2370 	    if (iconv_close(handler->iconv_out))
   2371 		ret = -1;
   2372 	    handler->iconv_out = NULL;
   2373 	}
   2374 	if (handler->iconv_in != NULL) {
   2375 	    if (iconv_close(handler->iconv_in))
   2376 		ret = -1;
   2377 	    handler->iconv_in = NULL;
   2378 	}
   2379     }
   2380 #endif /* LIBXML_ICONV_ENABLED */
   2381 #ifdef LIBXML_ICU_ENABLED
   2382     if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
   2383         tofree = 1;
   2384 	if (handler->uconv_out != NULL) {
   2385 	    closeIcuConverter(handler->uconv_out);
   2386 	    handler->uconv_out = NULL;
   2387 	}
   2388 	if (handler->uconv_in != NULL) {
   2389 	    closeIcuConverter(handler->uconv_in);
   2390 	    handler->uconv_in = NULL;
   2391 	}
   2392     }
   2393 #endif
   2394     if (tofree) {
   2395         /* free up only dynamic handlers iconv/uconv */
   2396         if (handler->name != NULL)
   2397             xmlFree(handler->name);
   2398         handler->name = NULL;
   2399         xmlFree(handler);
   2400     }
   2401 #ifdef DEBUG_ENCODING
   2402     if (ret)
   2403         xmlGenericError(xmlGenericErrorContext,
   2404 		"failed to close the encoding handler\n");
   2405     else
   2406         xmlGenericError(xmlGenericErrorContext,
   2407 		"closed the encoding handler\n");
   2408 #endif
   2409 
   2410     return(ret);
   2411 }
   2412 
   2413 /**
   2414  * xmlByteConsumed:
   2415  * @ctxt: an XML parser context
   2416  *
   2417  * This function provides the current index of the parser relative
   2418  * to the start of the current entity. This function is computed in
   2419  * bytes from the beginning starting at zero and finishing at the
   2420  * size in byte of the file if parsing a file. The function is
   2421  * of constant cost if the input is UTF-8 but can be costly if run
   2422  * on non-UTF-8 input.
   2423  *
   2424  * Returns the index in bytes from the beginning of the entity or -1
   2425  *         in case the index could not be computed.
   2426  */
   2427 long
   2428 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
   2429     xmlParserInputPtr in;
   2430 
   2431     if (ctxt == NULL) return(-1);
   2432     in = ctxt->input;
   2433     if (in == NULL)  return(-1);
   2434     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
   2435         unsigned int unused = 0;
   2436 	xmlCharEncodingHandler * handler = in->buf->encoder;
   2437         /*
   2438 	 * Encoding conversion, compute the number of unused original
   2439 	 * bytes from the input not consumed and substract that from
   2440 	 * the raw consumed value, this is not a cheap operation
   2441 	 */
   2442         if (in->end - in->cur > 0) {
   2443 	    unsigned char convbuf[32000];
   2444 	    const unsigned char *cur = (const unsigned char *)in->cur;
   2445 	    int toconv = in->end - in->cur, written = 32000;
   2446 
   2447 	    int ret;
   2448 
   2449 	    if (handler->output != NULL) {
   2450 	        do {
   2451 		    toconv = in->end - cur;
   2452 		    written = 32000;
   2453 		    ret = handler->output(&convbuf[0], &written,
   2454 				      cur, &toconv);
   2455 		    if (ret == -1) return(-1);
   2456 		    unused += written;
   2457 		    cur += toconv;
   2458 		} while (ret == -2);
   2459 #ifdef LIBXML_ICONV_ENABLED
   2460 	    } else if (handler->iconv_out != NULL) {
   2461 	        do {
   2462 		    toconv = in->end - cur;
   2463 		    written = 32000;
   2464 		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
   2465 	                      &written, cur, &toconv);
   2466 		    if (ret < 0) {
   2467 		        if (written > 0)
   2468 			    ret = -2;
   2469 			else
   2470 			    return(-1);
   2471 		    }
   2472 		    unused += written;
   2473 		    cur += toconv;
   2474 		} while (ret == -2);
   2475 #endif
   2476 #ifdef LIBXML_ICU_ENABLED
   2477 	    } else if (handler->uconv_out != NULL) {
   2478 	        do {
   2479 		    toconv = in->end - cur;
   2480 		    written = 32000;
   2481 		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
   2482 	                      &written, cur, &toconv);
   2483 		    if (ret < 0) {
   2484 		        if (written > 0)
   2485 			    ret = -2;
   2486 			else
   2487 			    return(-1);
   2488 		    }
   2489 		    unused += written;
   2490 		    cur += toconv;
   2491 		} while (ret == -2);
   2492 #endif
   2493             } else {
   2494 	        /* could not find a converter */
   2495 	        return(-1);
   2496 	    }
   2497 	}
   2498 	if (in->buf->rawconsumed < unused)
   2499 	    return(-1);
   2500 	return(in->buf->rawconsumed - unused);
   2501     }
   2502     return(in->consumed + (in->cur - in->base));
   2503 }
   2504 
   2505 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
   2506 #ifdef LIBXML_ISO8859X_ENABLED
   2507 
   2508 /**
   2509  * UTF8ToISO8859x:
   2510  * @out:  a pointer to an array of bytes to store the result
   2511  * @outlen:  the length of @out
   2512  * @in:  a pointer to an array of UTF-8 chars
   2513  * @inlen:  the length of @in
   2514  * @xlattable: the 2-level transcoding table
   2515  *
   2516  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
   2517  * block of chars out.
   2518  *
   2519  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
   2520  * The value of @inlen after return is the number of octets consumed
   2521  *     as the return value is positive, else unpredictable.
   2522  * The value of @outlen after return is the number of ocetes consumed.
   2523  */
   2524 static int
   2525 UTF8ToISO8859x(unsigned char* out, int *outlen,
   2526               const unsigned char* in, int *inlen,
   2527               unsigned char const *xlattable) {
   2528     const unsigned char* outstart = out;
   2529     const unsigned char* inend;
   2530     const unsigned char* instart = in;
   2531     const unsigned char* processed = in;
   2532 
   2533     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
   2534         (xlattable == NULL))
   2535 	return(-1);
   2536     if (in == NULL) {
   2537         /*
   2538         * initialization nothing to do
   2539         */
   2540         *outlen = 0;
   2541         *inlen = 0;
   2542         return(0);
   2543     }
   2544     inend = in + (*inlen);
   2545     while (in < inend) {
   2546         unsigned char d = *in++;
   2547         if  (d < 0x80)  {
   2548             *out++ = d;
   2549         } else if (d < 0xC0) {
   2550             /* trailing byte in leading position */
   2551             *outlen = out - outstart;
   2552             *inlen = processed - instart;
   2553             return(-2);
   2554         } else if (d < 0xE0) {
   2555             unsigned char c;
   2556             if (!(in < inend)) {
   2557                 /* trailing byte not in input buffer */
   2558                 *outlen = out - outstart;
   2559                 *inlen = processed - instart;
   2560                 return(-3);
   2561             }
   2562             c = *in++;
   2563             if ((c & 0xC0) != 0x80) {
   2564                 /* not a trailing byte */
   2565                 *outlen = out - outstart;
   2566                 *inlen = processed - instart;
   2567                 return(-2);
   2568             }
   2569             c = c & 0x3F;
   2570             d = d & 0x1F;
   2571             d = xlattable [48 + c + xlattable [d] * 64];
   2572             if (d == 0) {
   2573                 /* not in character set */
   2574                 *outlen = out - outstart;
   2575                 *inlen = processed - instart;
   2576                 return(-2);
   2577             }
   2578             *out++ = d;
   2579         } else if (d < 0xF0) {
   2580             unsigned char c1;
   2581             unsigned char c2;
   2582             if (!(in < inend - 1)) {
   2583                 /* trailing bytes not in input buffer */
   2584                 *outlen = out - outstart;
   2585                 *inlen = processed - instart;
   2586                 return(-3);
   2587             }
   2588             c1 = *in++;
   2589             if ((c1 & 0xC0) != 0x80) {
   2590                 /* not a trailing byte (c1) */
   2591                 *outlen = out - outstart;
   2592                 *inlen = processed - instart;
   2593                 return(-2);
   2594             }
   2595             c2 = *in++;
   2596             if ((c2 & 0xC0) != 0x80) {
   2597                 /* not a trailing byte (c2) */
   2598                 *outlen = out - outstart;
   2599                 *inlen = processed - instart;
   2600                 return(-2);
   2601             }
   2602             c1 = c1 & 0x3F;
   2603             c2 = c2 & 0x3F;
   2604 	    d = d & 0x0F;
   2605 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
   2606 	    		xlattable [32 + d] * 64] * 64];
   2607             if (d == 0) {
   2608                 /* not in character set */
   2609                 *outlen = out - outstart;
   2610                 *inlen = processed - instart;
   2611                 return(-2);
   2612             }
   2613             *out++ = d;
   2614         } else {
   2615             /* cannot transcode >= U+010000 */
   2616             *outlen = out - outstart;
   2617             *inlen = processed - instart;
   2618             return(-2);
   2619         }
   2620         processed = in;
   2621     }
   2622     *outlen = out - outstart;
   2623     *inlen = processed - instart;
   2624     return(*outlen);
   2625 }
   2626 
   2627 /**
   2628  * ISO8859xToUTF8
   2629  * @out:  a pointer to an array of bytes to store the result
   2630  * @outlen:  the length of @out
   2631  * @in:  a pointer to an array of ISO Latin 1 chars
   2632  * @inlen:  the length of @in
   2633  *
   2634  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
   2635  * block of chars out.
   2636  * Returns 0 if success, or -1 otherwise
   2637  * The value of @inlen after return is the number of octets consumed
   2638  * The value of @outlen after return is the number of ocetes produced.
   2639  */
   2640 static int
   2641 ISO8859xToUTF8(unsigned char* out, int *outlen,
   2642               const unsigned char* in, int *inlen,
   2643               unsigned short const *unicodetable) {
   2644     unsigned char* outstart = out;
   2645     unsigned char* outend;
   2646     const unsigned char* instart = in;
   2647     const unsigned char* inend;
   2648     const unsigned char* instop;
   2649     unsigned int c;
   2650 
   2651     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
   2652         (in == NULL) || (unicodetable == NULL))
   2653 	return(-1);
   2654     outend = out + *outlen;
   2655     inend = in + *inlen;
   2656     instop = inend;
   2657 
   2658     while ((in < inend) && (out < outend - 2)) {
   2659         if (*in >= 0x80) {
   2660             c = unicodetable [*in - 0x80];
   2661             if (c == 0) {
   2662                 /* undefined code point */
   2663                 *outlen = out - outstart;
   2664                 *inlen = in - instart;
   2665                 return (-1);
   2666             }
   2667             if (c < 0x800) {
   2668                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
   2669                 *out++ = (c & 0x3F) | 0x80;
   2670             } else {
   2671                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
   2672                 *out++ = ((c >>  6) & 0x3F) | 0x80;
   2673                 *out++ = (c & 0x3F) | 0x80;
   2674             }
   2675             ++in;
   2676         }
   2677         if (instop - in > outend - out) instop = in + (outend - out);
   2678         while ((*in < 0x80) && (in < instop)) {
   2679             *out++ = *in++;
   2680         }
   2681     }
   2682     if ((in < inend) && (out < outend) && (*in < 0x80)) {
   2683         *out++ =  *in++;
   2684     }
   2685     if ((in < inend) && (out < outend) && (*in < 0x80)) {
   2686         *out++ =  *in++;
   2687     }
   2688     *outlen = out - outstart;
   2689     *inlen = in - instart;
   2690     return (*outlen);
   2691 }
   2692 
   2693 
   2694 /************************************************************************
   2695  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
   2696  ************************************************************************/
   2697 
   2698 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
   2699     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2700     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2701     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   2702     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   2703     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
   2704     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
   2705     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
   2706     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
   2707     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
   2708     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
   2709     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
   2710     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
   2711     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
   2712     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
   2713     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
   2714     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
   2715 };
   2716 
   2717 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
   2718     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
   2719     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2720     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2721     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2722     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2723     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2724     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2725     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   2726     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   2727     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
   2728     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
   2729     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
   2730     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
   2731     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2732     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
   2733     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
   2734     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
   2735     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2736     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2737     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
   2738     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
   2739     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
   2740     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
   2741     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
   2742     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
   2743     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
   2744     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
   2745 };
   2746 
   2747 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
   2748     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2749     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2750     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   2751     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   2752     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
   2753     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
   2754     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
   2755     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
   2756     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
   2757     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   2758     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
   2759     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
   2760     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
   2761     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   2762     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
   2763     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
   2764 };
   2765 
   2766 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
   2767     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
   2768     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2769     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2770     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2771     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2772     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2773     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2774     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   2775     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   2776     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
   2777     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
   2778     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
   2779     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
   2780     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
   2781     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2782     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2783     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
   2784     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2785     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2786     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2788     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2789     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2790     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2791     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
   2792     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
   2793     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
   2794     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   2795     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
   2796     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   2797     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
   2798 };
   2799 
   2800 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
   2801     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2802     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2803     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   2804     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   2805     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
   2806     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
   2807     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
   2808     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
   2809     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
   2810     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
   2811     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
   2812     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
   2813     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
   2814     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
   2815     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
   2816     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
   2817 };
   2818 
   2819 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
   2820     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
   2821     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2822     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2823     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2824     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2825     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2826     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2827     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   2828     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   2829     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
   2830     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
   2831     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
   2832     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
   2833     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
   2834     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
   2835     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
   2836     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
   2837     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
   2838     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
   2839     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
   2840     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
   2841     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2842     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2843     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
   2844     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
   2845     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
   2846     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
   2847 };
   2848 
   2849 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
   2850     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2851     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2852     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   2853     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   2854     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
   2855     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
   2856     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
   2857     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
   2858     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
   2859     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
   2860     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
   2861     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
   2862     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
   2863     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
   2864     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
   2865     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
   2866 };
   2867 
   2868 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
   2869     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2870     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2871     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2872     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2873     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2874     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2875     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2876     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   2877     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   2878     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
   2879     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2880     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
   2881     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
   2882     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   2883     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
   2884     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   2885     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
   2886     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2887     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2888     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2889     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2890     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2891     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2892     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2893     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2894     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2895     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2896 };
   2897 
   2898 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
   2899     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2900     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2901     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   2902     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   2903     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
   2904     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
   2905     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   2906     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
   2907     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
   2908     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
   2909     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
   2910     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   2911     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
   2912     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
   2913     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   2914     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   2915 };
   2916 
   2917 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
   2918     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2919     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
   2920     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2921     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2922     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2923     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2924     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2925     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   2926     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   2927     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
   2928     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2929     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2930     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2931     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2932     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2933     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
   2934     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
   2935     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   2936     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
   2937     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   2938     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2939     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2940     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2941 };
   2942 
   2943 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
   2944     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2945     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2946     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   2947     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   2948     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
   2949     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
   2950     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
   2951     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
   2952     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
   2953     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
   2954     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
   2955     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
   2956     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
   2957     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
   2958     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
   2959     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
   2960 };
   2961 
   2962 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
   2963     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
   2964     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2965     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2966     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2967     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2968     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2969     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2970     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   2971     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   2972     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
   2973     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
   2974     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2975     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2976     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2977     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2978     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2979     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
   2980     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2981     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2982     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2983     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2984     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2985     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2986     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
   2987     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   2988     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
   2989     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   2990     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
   2991     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2992     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2993     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   2994 };
   2995 
   2996 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
   2997     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   2998     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   2999     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3000     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3001     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
   3002     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
   3003     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
   3004     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
   3005     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3006     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3007     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   3008     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
   3009     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
   3010     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
   3011     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
   3012     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
   3013 };
   3014 
   3015 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
   3016     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3017     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
   3018     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3019     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3020     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3021     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3022     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3023     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3024     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3025     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
   3026     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
   3027     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3028     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3029     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3030     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3031     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3032     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
   3033     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3034     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
   3035     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3036     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3037     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3038     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3039     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
   3040     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
   3041     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3042     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3043     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3044     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3045     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
   3046     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3047 };
   3048 
   3049 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
   3050     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3051     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3052     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3053     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3054     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
   3055     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
   3056     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
   3057     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
   3058     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
   3059     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3060     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
   3061     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
   3062     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
   3063     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3064     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
   3065     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
   3066 };
   3067 
   3068 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
   3069     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3070     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3071     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3072     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3073     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3074     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3075     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3076     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3077     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3078     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
   3079     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
   3080     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3081     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
   3082     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3083     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
   3084     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3085     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
   3086     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3087     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3088     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3089     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
   3090     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3091     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3092 };
   3093 
   3094 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
   3095     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3096     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3097     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3098     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3099     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
   3100     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
   3101     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
   3102     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
   3103     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
   3104     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
   3105     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
   3106     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
   3107     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
   3108     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
   3109     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
   3110     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
   3111 };
   3112 
   3113 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
   3114     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3115     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3116     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3117     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3118     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3119     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3120     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3121     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3122     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3123     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
   3124     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
   3125     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
   3126     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
   3127     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
   3128     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
   3129     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
   3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3131     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
   3132     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
   3133     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3135     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3136     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3137     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3138     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3139     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3140     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3141     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
   3142     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
   3143     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
   3144     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
   3145 };
   3146 
   3147 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
   3148     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3149     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3150     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3151     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3152     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
   3153     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
   3154     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
   3155     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
   3156     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
   3157     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
   3158     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
   3159     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
   3160     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
   3161     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
   3162     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
   3163     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
   3164 };
   3165 
   3166 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
   3167     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3168     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3169     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3170     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3171     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3172     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3173     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3174     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3175     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3176     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3177     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3178     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3179     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3180     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3181     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
   3182     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
   3183     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
   3184     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3185     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
   3186     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3187     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3190     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3191     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
   3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3194 };
   3195 
   3196 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
   3197     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3198     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3199     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3200     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3201     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
   3202     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
   3203     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
   3204     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
   3205     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
   3206     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
   3207     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
   3208     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
   3209     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
   3210     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
   3211     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
   3212     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
   3213 };
   3214 
   3215 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
   3216     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3217     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3218     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3219     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3220     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3221     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3222     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3223     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3224     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3225     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
   3226     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
   3227     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3229     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3230     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3231     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3232     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
   3233     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3234     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3235     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
   3236     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
   3237     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
   3238     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
   3239     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
   3240     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
   3241     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
   3242     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
   3243     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
   3244     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
   3245     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
   3246     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
   3247 };
   3248 
   3249 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
   3250     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3251     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3252     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3253     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3254     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
   3255     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
   3256     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
   3257     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
   3258     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
   3259     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3260     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
   3261     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
   3262     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
   3263     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3264     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
   3265     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
   3266 };
   3267 
   3268 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
   3269     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3270     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3271     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3272     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3273     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3274     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3275     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3276     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3277     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3278     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
   3279     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3280     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3281     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3282     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3283     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
   3284     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
   3285     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
   3286     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3287     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3288     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
   3289     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3290     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3291     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3292     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3294     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3295     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3296     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3297     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3298     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3299     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3300     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3301     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3302     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3303     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
   3304     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3305     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
   3306     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
   3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3308     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3309     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
   3310     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3311     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
   3312 };
   3313 
   3314 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
   3315     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3316     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3317     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3318     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3319     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
   3320     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
   3321     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
   3322     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
   3323     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
   3324     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3325     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
   3326     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
   3327     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
   3328     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3329     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
   3330     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
   3331 };
   3332 
   3333 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
   3334     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3335     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3336     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3337     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3338     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3339     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3340     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3341     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3342     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3343     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
   3344     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
   3345     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3346     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3347     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3348     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3349     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3350     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3351     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
   3352     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3353     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3354     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3355     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3356     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
   3357     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3358     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
   3359     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3360     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
   3361 };
   3362 
   3363 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
   3364     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
   3365     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
   3366     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
   3367     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
   3368     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
   3369     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
   3370     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
   3371     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
   3372     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
   3373     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
   3374     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
   3375     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
   3376     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
   3377     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
   3378     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
   3379     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
   3380 };
   3381 
   3382 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
   3383     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
   3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3385     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3386     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3387     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3388     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3389     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3390     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
   3391     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
   3392     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
   3393     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
   3394     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
   3395     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
   3396     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3397     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3398     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3399     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
   3400     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3401     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
   3402     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3403     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3404     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3405     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3406     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3407     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3408     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
   3409     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3410     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3411     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
   3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3414     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3415     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
   3416     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3417     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
   3418     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
   3419     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
   3420     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
   3421     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
   3422 };
   3423 
   3424 
   3425 /*
   3426  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
   3427  */
   3428 
   3429 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
   3430     const unsigned char* in, int *inlen) {
   3431     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
   3432 }
   3433 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
   3434     const unsigned char* in, int *inlen) {
   3435     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
   3436 }
   3437 
   3438 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
   3439     const unsigned char* in, int *inlen) {
   3440     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
   3441 }
   3442 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
   3443     const unsigned char* in, int *inlen) {
   3444     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
   3445 }
   3446 
   3447 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
   3448     const unsigned char* in, int *inlen) {
   3449     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
   3450 }
   3451 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
   3452     const unsigned char* in, int *inlen) {
   3453     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
   3454 }
   3455 
   3456 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
   3457     const unsigned char* in, int *inlen) {
   3458     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
   3459 }
   3460 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
   3461     const unsigned char* in, int *inlen) {
   3462     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
   3463 }
   3464 
   3465 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
   3466     const unsigned char* in, int *inlen) {
   3467     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
   3468 }
   3469 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
   3470     const unsigned char* in, int *inlen) {
   3471     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
   3472 }
   3473 
   3474 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
   3475     const unsigned char* in, int *inlen) {
   3476     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
   3477 }
   3478 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
   3479     const unsigned char* in, int *inlen) {
   3480     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
   3481 }
   3482 
   3483 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
   3484     const unsigned char* in, int *inlen) {
   3485     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
   3486 }
   3487 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
   3488     const unsigned char* in, int *inlen) {
   3489     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
   3490 }
   3491 
   3492 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
   3493     const unsigned char* in, int *inlen) {
   3494     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
   3495 }
   3496 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
   3497     const unsigned char* in, int *inlen) {
   3498     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
   3499 }
   3500 
   3501 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
   3502     const unsigned char* in, int *inlen) {
   3503     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
   3504 }
   3505 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
   3506     const unsigned char* in, int *inlen) {
   3507     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
   3508 }
   3509 
   3510 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
   3511     const unsigned char* in, int *inlen) {
   3512     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
   3513 }
   3514 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
   3515     const unsigned char* in, int *inlen) {
   3516     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
   3517 }
   3518 
   3519 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
   3520     const unsigned char* in, int *inlen) {
   3521     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
   3522 }
   3523 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
   3524     const unsigned char* in, int *inlen) {
   3525     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
   3526 }
   3527 
   3528 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
   3529     const unsigned char* in, int *inlen) {
   3530     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
   3531 }
   3532 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
   3533     const unsigned char* in, int *inlen) {
   3534     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
   3535 }
   3536 
   3537 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
   3538     const unsigned char* in, int *inlen) {
   3539     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
   3540 }
   3541 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
   3542     const unsigned char* in, int *inlen) {
   3543     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
   3544 }
   3545 
   3546 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
   3547     const unsigned char* in, int *inlen) {
   3548     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
   3549 }
   3550 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
   3551     const unsigned char* in, int *inlen) {
   3552     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
   3553 }
   3554 
   3555 static void
   3556 xmlRegisterCharEncodingHandlersISO8859x (void) {
   3557     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
   3558     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
   3559     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
   3560     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
   3561     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
   3562     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
   3563     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
   3564     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
   3565     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
   3566     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
   3567     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
   3568     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
   3569     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
   3570     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
   3571 }
   3572 
   3573 #endif
   3574 #endif
   3575 
   3576 #define bottom_encoding
   3577 #include "elfgcchack.h"
   3578