Cross Reference: /external/libxml2/encoding.c

Lines Matching defs:in
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
12  *                described in Unicode Technical Report #4.
162  * @in:  a pointer to an array of ASCII chars
163  * @inlen:  the length of @in
165  * Take a block of ASCII chars in and try to convert it to an UTF-8
174               const unsigned char* in, int *inlen) {
176     const unsigned char* base = in;
177     const unsigned char* processed = in;
182     inend = in + (*inlen);
183     while ((in < inend) && (out - outstart + 5 < *outlen)) {
184 	c= *in++;
196 	processed = (const unsigned char*) in;
208  * @in:  a pointer to an array of UTF-8 chars
209  * @inlen:  the length of @in
211  * Take a block of UTF-8 chars in and try to convert it to an ASCII
221               const unsigned char* in, int *inlen) {
222     const unsigned char* processed = in;
225     const unsigned char* instart = in;
231     if (in == NULL) {
239     inend = in + (*inlen);
241     while (in < inend) {
242 	d = *in++;
245 	    /* trailing byte in leading position */
253 	    /* no chance for this in Ascii */
259 	if (inend - in < trailing) {
264 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
276 	    /* no chance for this in Ascii */
281 	processed = in;
293  * @in:  a pointer to an array of ISO Latin 1 chars
294  * @inlen:  the length of @in
296  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
305               const unsigned char* in, int *inlen) {
307     const unsigned char* base = in;
312     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316     inend = in + (*inlen);
319     while ((in < inend) && (out < outend - 1)) {
320 	if (*in >= 0x80) {
321 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
322             *out++ = ((*in) & 0x3F) | 0x80;
323 	    ++in;
325 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
326 	while ((in < instop) && (*in < 0x80)) {
327 	    *out++ = *in++;
330     if ((in < inend) && (out < outend) && (*in < 0x80)) {
331         *out++ = *in++;
334     *inlen = in - base;
343  * @inlenb:  the length of @in in UTF-8 chars
380  * @in:  a pointer to an array of UTF-8 chars
381  * @inlen:  the length of @in
383  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
394               const unsigned char* in, int *inlen) {
395     const unsigned char* processed = in;
398     const unsigned char* instart = in;
404     if (in == NULL) {
412     inend = in + (*inlen);
414     while (in < inend) {
415 	d = *in++;
418 	    /* trailing byte in leading position */
426 	    /* no chance for this in IsoLat1 */
432 	if (inend - in < trailing) {
437 	    if (in >= inend)
439 	    if (((d= *in++) & 0xC0) != 0x80) {
454 	    /* no chance for this in IsoLat1 */
459 	processed = in;
472  * @inlenb:  the length of @in in UTF-16LE chars
474  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
480  *     if the transcoding fails (if *in is not a valid utf16 string)
491     unsigned short* in = (unsigned short*) inb;
500     inend = in + inlen;
501     while ((in < inend) && (out - outstart + 5 < *outlen)) {
503 	    c= *in++;
505 	    tmp = (unsigned char *) in;
508 	    in++;
511 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
515 		d = *in++;
517 		tmp = (unsigned char *) in;
520 		in++;
548 	processed = (const unsigned char*) in;
560  * @in:  a pointer to an array of UTF-8 chars
561  * @inlen:  the length of @in
563  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
571             const unsigned char* in, int *inlen)
574     const unsigned char* processed = in;
575     const unsigned char *const instart = in;
586     if (in == NULL) {
591     inend= in + *inlen;
593     while (in < inend) {
594       d= *in++;
597           /* trailing byte in leading position */
605 	/* no chance for this in UTF-16 */
611       if (inend - in < trailing) {
616           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
658 	processed = in;
669  * @in:  a pointer to an array of UTF-8 chars
670  * @inlen:  the length of @in
672  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
680             const unsigned char* in, int *inlen)
682     if (in == NULL) {
701     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
710  * @inlenb:  the length of @in in UTF-16 chars
712  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
718  *     if the transcoding fails (if *in is not a valid utf16 string)
729     unsigned short* in = (unsigned short*) inb;
738     inend= in + inlen;
739     while (in < inend) {
741 	    tmp = (unsigned char *) in;
745 	    in++;
747 	    c= *in++;
750 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
756 		tmp = (unsigned char *) in;
760 		in++;
762 		d= *in++;
790 	processed = (const unsigned char*) in;
802  * @in:  a pointer to an array of UTF-8 chars
803  * @inlen:  the length of @in
805  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
813             const unsigned char* in, int *inlen)
816     const unsigned char* processed = in;
817     const unsigned char *const instart = in;
828     if (in == NULL) {
833     inend= in + *inlen;
835     while (in < inend) {
836       d= *in++;
839           /* trailing byte in leading position */
847           /* no chance for this in UTF-16 */
853       if (inend - in < trailing) {
858           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
897 	processed = in;
913  * @in:  a pointer to the first bytes of the XML entity, must be at least
923 xmlDetectCharEncoding(const unsigned char* in, int len)
925     if (in == NULL)
928 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
929 	    (in[2] == 0x00) && (in[3] == 0x3C))
931 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
932 	    (in[2] == 0x00) && (in[3] == 0x00))
934 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
935 	    (in[2] == 0x3C) && (in[3] == 0x00))
937 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
938 	    (in[2] == 0x00) && (in[3] == 0x00))
940 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
941 	    (in[2] == 0xA7) && (in[3] == 0x94))
943 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
944 	    (in[2] == 0x78) && (in[3] == 0x6D))
951 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 	    (in[2] == 0x3F) && (in[3] == 0x00))
954 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
955 	    (in[2] == 0x00) && (in[3] == 0x3F))
963 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
964 	    (in[2] == 0xBF))
969 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
971 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
1003  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1039  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1040  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1045  * Returns 0 in case of success, -1 in case of error
1098  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1102  * Returns 0 in case of success, -1 in case of error
1131  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1135  * [XML] 4.3.3 Character Encoding in Entities.
1169      *       already found and in use
1180      *       already found and in use
1219  * Section 4.3.3  Character Encoding in Entities
1300  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1306  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1385  *       in normal processing.
1493  * Search in the registered set the handler able to read/write that encoding.
1553 	     * We used to keep ISO Latin encodings native in the
1627  * Search in the registered set the handler able to read/write that encoding.
1778  * @in:  a pointer to an array of ISO Latin 1 chars
1779  * @inlen:  the length of @in
1783  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1793                 const unsigned char *in, int *inlen) {
1795     const char *icv_in = (const char *) in;
1799     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1845  * @in:  a pointer to an array of ISO Latin 1 chars
1846  * @inlen:  the length of @in
1850  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1860                 const unsigned char *in, int *inlen) {
1861     const char *ucv_in = (const char *) in;
1865     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1888     *inlen = ucv_in - (const char*) in;
1911  * @in:  an xmlBuffer for the input
1919  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1924                        xmlBufferPtr in, int len) {
1931     if (in == NULL) return(-1);
1935     toconv = in->use;
1958 	                     in->content, &toconv);
1959 	xmlBufferShrink(in, toconv);
1966 	                      &written, in->content, &toconv);
1967 	xmlBufferShrink(in, toconv);
1976 	                      &written, in->content, &toconv);
1977 	xmlBufferShrink(in, toconv);
1992 	            toconv, written, in->use);
2000 	            toconv, written, in->use);
2018  * @in:  an xmlBuffer for the input
2025  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2030                  xmlBufferPtr in) {
2031     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2046  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2057     xmlBufPtr in;
2064     in = input->raw;
2066     toconv = xmlBufUse(in);
2096                                     xmlBufContent(in), &c_in);
2097         xmlBufShrink(in, c_in);
2103                               &c_out, xmlBufContent(in), &c_in);
2104         xmlBufShrink(in, c_in);
2113                               &c_out, xmlBufContent(in), &c_in);
2114         xmlBufShrink(in, c_in);
2132                             c_in, c_out, (int)xmlBufUse(in));
2139                             c_in, c_out, (int)xmlBufUse(in));
2144             const xmlChar *content = xmlBufContent(in);
2172  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2183     xmlBufPtr in;
2190     in = input->raw;
2192     toconv = xmlBufUse(in);
2213                                     xmlBufContent(in), &c_in);
2214         xmlBufShrink(in, c_in);
2220                               &c_out, xmlBufContent(in), &c_in);
2221         xmlBufShrink(in, c_in);
2230                               &c_out, xmlBufContent(in), &c_in);
2231         xmlBufShrink(in, c_in);
2249                             c_in, c_out, (int)xmlBufUse(in));
2256                             c_in, c_out, (int)xmlBufUse(in));
2261             const xmlChar *content = xmlBufContent(in);
2284  * @in:  an xmlBuffer for the input
2290  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2295                  xmlBufferPtr in)
2305     if (in == NULL)
2308     toconv = in->use;
2318                              in->content, &toconv);
2319         xmlBufferShrink(in, toconv);
2326                               &written, in->content, &toconv);
2327         xmlBufferShrink(in, toconv);
2337                               &written, in->content, &toconv);
2338         xmlBufferShrink(in, toconv);
2357                             toconv, written, in->use);
2364                             toconv, written, in->use);
2371 		     in->content[0], in->content[1],
2372 		     in->content[2], in->content[3]);
2395  * output in case of non-stateless encoding needing to initiate their
2396  * state or the output (like the BOM in UTF16).
2397  * In case of UTF8 sequence conversion errors for the given encoder,
2402  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2414     xmlBufPtr in;
2422     in = output->buffer;
2466     toconv = xmlBufUse(in);
2482                                       xmlBufContent(in), &c_in);
2484             xmlBufShrink(in, c_in);
2492                               &c_out, xmlBufContent(in), &c_in);
2493         xmlBufShrink(in, c_in);
2511                               &c_out, xmlBufContent(in), &c_in);
2512         xmlBufShrink(in, c_in);
2555 	            c_in, c_out, (int) xmlBufUse(in));
2559 	    int len = (int) xmlBufUse(in);
2560             xmlChar *content = xmlBufContent(in);
2571 		xmlBufShrink(in, charref_len - c_out);
2594 		xmlBufShrink(in, len);
2595 		xmlBufAddHead(in, charref, -1);
2608 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2622  * @in:  an xmlBuffer for the input
2625  * a first call with @in == NULL has to be made firs to initiate the
2626  * output in case of non-stateless encoding needing to initiate their
2627  * state or the output (like the BOM in UTF16).
2628  * In case of UTF8 sequence conversion errors for the given encoder,
2633  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2638                   xmlBufferPtr in) {
2657      * First specific handling of in = NULL, i.e. the initialization call
2659     if (in == NULL) {
2696     toconv = in->use;
2705 	                      in->content, &toconv);
2707 	    xmlBufferShrink(in, toconv);
2716 	                      &written, in->content, &toconv);
2717 	xmlBufferShrink(in, toconv);
2737 	                      &written, in->content, &toconv);
2738 	xmlBufferShrink(in, toconv);
2782 	            toconv, written, in->use);
2786 	    int len = in->use;
2787 	    const xmlChar *utf = (const xmlChar *) in->content;
2798 		xmlBufferShrink(in, charref_len - written);
2811 			in->content[0], in->content[1],
2812 			in->content[2], in->content[3]);
2821 		xmlBufferShrink(in, len);
2822 		xmlBufferAddHead(in, charref, -1);
2829 			 in->content[0], in->content[1],
2830 			 in->content[2], in->content[3]);
2835 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2836 		    in->content[0] = ' ';
2850  * Returns 0 if success, or -1 in case of error
2926  * to the start of the current entity. This function is computed in
2928  * size in byte of the file if parsing a file. The function is
2932  * Returns the index in bytes from the beginning of the entity or -1
2933  *         in case the index could not be computed.
2937     xmlParserInputPtr in;
2940     in = ctxt->input;
2941     if (in == NULL)  return(-1);
2942     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2944 	xmlCharEncodingHandler * handler = in->buf->encoder;
2950         if (in->end - in->cur > 0) {
2952 	    const unsigned char *cur = (const unsigned char *)in->cur;
2953 	    int toconv = in->end - in->cur, written = 32000;
2959 		    toconv = in->end - cur;
2970 		    toconv = in->end - cur;
2987 		    toconv = in->end - cur;
3006 	if (in->buf->rawconsumed < unused)
3008 	return(in->buf->rawconsumed - unused);
3010     return(in->consumed + (in->cur - in->base));
3020  * @in:  a pointer to an array of UTF-8 chars
3021  * @inlen:  the length of @in
3024  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3034               const unsigned char* in, int *inlen,
3038     const unsigned char* instart = in;
3039     const unsigned char* processed = in;
3044     if (in == NULL) {
3052     inend = in + (*inlen);
3053     while (in < inend) {
3054         unsigned char d = *in++;
3058             /* trailing byte in leading position */
3064             if (!(in < inend)) {
3065                 /* trailing byte not in input buffer */
3070             c = *in++;
3081                 /* not in character set */
3090             if (!(in < inend - 1)) {
3091                 /* trailing bytes not in input buffer */
3096             c1 = *in++;
3103             c2 = *in++;
3116                 /* not in character set */
3128         processed = in;
3139  * @in:  a pointer to an array of ISO Latin 1 chars
3140  * @inlen:  the length of @in
3142  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3150               const unsigned char* in, int *inlen,
3154     const unsigned char* instart = in;
3160         (in == NULL) || (unicodetable == NULL))
3163     inend = in + *inlen;
3166     while ((in < inend) && (out < outend - 2)) {
3167         if (*in >= 0x80) {
3168             c = unicodetable [*in - 0x80];
3172                 *inlen = in - instart;
3183             ++in;
3185         if (instop - in > outend - out) instop = in + (outend - out);
3186         while ((*in < 0x80) && (in < instop)) {
3187             *out++ = *in++;
3190     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3191         *out++ =  *in++;
3193     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3194         *out++ =  *in++;
3197     *inlen = in - instart;
3938     const unsigned char* in, int *inlen) {
3939     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3942     const unsigned char* in, int *inlen) {
3943     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3947     const unsigned char* in, int *inlen) {
3948     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3951     const unsigned char* in, int *inlen) {
3952     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3956     const unsigned char* in, int *inlen) {
3957     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3960     const unsigned char* in, int *inlen) {
3961     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3965     const unsigned char* in, int *inlen) {
3966     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3969     const unsigned char* in, int *inlen) {
3970     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3974     const unsigned char* in, int *inlen) {
3975     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3978     const unsigned char* in, int *inlen) {
3979     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3983     const unsigned char* in, int *inlen) {
3984     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3987     const unsigned char* in, int *inlen) {
3988     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3992     const unsigned char* in, int *inlen) {
3993     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3996     const unsigned char* in, int *inlen) {
3997     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
4001     const unsigned char* in, int *inlen) {
4002     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
4005     const unsigned char* in, int *inlen) {
4006     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
4010     const unsigned char* in, int *inlen) {
4011     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
4014     const unsigned char* in, int *inlen) {
4015     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
4019     const unsigned char* in, int *inlen) {
4020     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
4023     const unsigned char* in, int *inlen) {
4024     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
4028     const unsigned char* in, int *inlen) {
4029     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
4032     const unsigned char* in, int *inlen) {
4033     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
4037     const unsigned char* in, int *inlen) {
4038     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
4041     const unsigned char* in, int *inlen) {
4042     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
4046     const unsigned char* in, int *inlen) {
4047     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
4050     const unsigned char* in, int *inlen) {
4051     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
4055     const unsigned char* in, int *inlen) {
4056     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
4059     const unsigned char* in, int *inlen) {
4060     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
OpenGrok