Cross Reference: /external/libxml2/encoding.c

Lines Matching defs:in
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
12  *                described in Unicode Technical Report #4.
162  * @in:  a pointer to an array of ASCII chars
163  * @inlen:  the length of @in
165  * Take a block of ASCII chars in and try to convert it to an UTF-8
174               const unsigned char* in, int *inlen) {
176     const unsigned char* base = in;
177     const unsigned char* processed = in;
182     inend = in + (*inlen);
183     while ((in < inend) && (out - outstart + 5 < *outlen)) {
184 	c= *in++;
196 	processed = (const unsigned char*) in;
208  * @in:  a pointer to an array of UTF-8 chars
209  * @inlen:  the length of @in
211  * Take a block of UTF-8 chars in and try to convert it to an ASCII
221               const unsigned char* in, int *inlen) {
222     const unsigned char* processed = in;
225     const unsigned char* instart = in;
231     if (in == NULL) {
239     inend = in + (*inlen);
241     while (in < inend) {
242 	d = *in++;
245 	    /* trailing byte in leading position */
253 	    /* no chance for this in Ascii */
259 	if (inend - in < trailing) {
264 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
276 	    /* no chance for this in Ascii */
281 	processed = in;
293  * @in:  a pointer to an array of ISO Latin 1 chars
294  * @inlen:  the length of @in
296  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
305               const unsigned char* in, int *inlen) {
307     const unsigned char* base = in;
312     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316     inend = in + (*inlen);
319     while ((in < inend) && (out < outend - 1)) {
320 	if (*in >= 0x80) {
321 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
322             *out++ = ((*in) & 0x3F) | 0x80;
323 	    ++in;
325 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
326 	while ((in < instop) && (*in < 0x80)) {
327 	    *out++ = *in++;
330     if ((in < inend) && (out < outend) && (*in < 0x80)) {
331         *out++ = *in++;
334     *inlen = in - base;
343  * @inlenb:  the length of @in in UTF-8 chars
380  * @in:  a pointer to an array of UTF-8 chars
381  * @inlen:  the length of @in
383  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
394               const unsigned char* in, int *inlen) {
395     const unsigned char* processed = in;
398     const unsigned char* instart = in;
404     if (in == NULL) {
412     inend = in + (*inlen);
414     while (in < inend) {
415 	d = *in++;
418 	    /* trailing byte in leading position */
426 	    /* no chance for this in IsoLat1 */
432 	if (inend - in < trailing) {
437 	    if (in >= inend)
439 	    if (((d= *in++) & 0xC0) != 0x80) {
454 	    /* no chance for this in IsoLat1 */
459 	processed = in;
472  * @inlenb:  the length of @in in UTF-16LE chars
474  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
480  *     if the transcoding fails (if *in is not a valid utf16 string)
491     unsigned short* in = (unsigned short*) inb;
500     inend = in + inlen;
501     while ((in < inend) && (out - outstart + 5 < *outlen)) {
503 	    c= *in++;
505 	    tmp = (unsigned char *) in;
508 	    in++;
511 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
515 		d = *in++;
517 		tmp = (unsigned char *) in;
520 		in++;
548 	processed = (const unsigned char*) in;
560  * @in:  a pointer to an array of UTF-8 chars
561  * @inlen:  the length of @in
563  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
571             const unsigned char* in, int *inlen)
574     const unsigned char* processed = in;
575     const unsigned char *const instart = in;
586     if (in == NULL) {
591     inend= in + *inlen;
593     while (in < inend) {
594       d= *in++;
597           /* trailing byte in leading position */
605 	/* no chance for this in UTF-16 */
611       if (inend - in < trailing) {
616           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
658 	processed = in;
669  * @in:  a pointer to an array of UTF-8 chars
670  * @inlen:  the length of @in
672  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
680             const unsigned char* in, int *inlen)
682     if (in == NULL) {
701     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
710  * @inlenb:  the length of @in in UTF-16 chars
712  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
718  *     if the transcoding fails (if *in is not a valid utf16 string)
729     unsigned short* in = (unsigned short*) inb;
738     inend= in + inlen;
739     while (in < inend) {
741 	    tmp = (unsigned char *) in;
745 	    in++;
747 	    c= *in++;
750 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
756 		tmp = (unsigned char *) in;
760 		in++;
762 		d= *in++;
790 	processed = (const unsigned char*) in;
802  * @in:  a pointer to an array of UTF-8 chars
803  * @inlen:  the length of @in
805  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
813             const unsigned char* in, int *inlen)
816     const unsigned char* processed = in;
817     const unsigned char *const instart = in;
828     if (in == NULL) {
833     inend= in + *inlen;
835     while (in < inend) {
836       d= *in++;
839           /* trailing byte in leading position */
847           /* no chance for this in UTF-16 */
853       if (inend - in < trailing) {
858           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
897 	processed = in;
913  * @in:  a pointer to the first bytes of the XML entity, must be at least
923 xmlDetectCharEncoding(const unsigned char* in, int len)
925     if (in == NULL)
928 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
929 	    (in[2] == 0x00) && (in[3] == 0x3C))
931 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
932 	    (in[2] == 0x00) && (in[3] == 0x00))
934 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
935 	    (in[2] == 0x3C) && (in[3] == 0x00))
937 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
938 	    (in[2] == 0x00) && (in[3] == 0x00))
940 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
941 	    (in[2] == 0xA7) && (in[3] == 0x94))
943 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
944 	    (in[2] == 0x78) && (in[3] == 0x6D))
951 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 	    (in[2] == 0x3F) && (in[3] == 0x00))
954 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
955 	    (in[2] == 0x00) && (in[3] == 0x3F))
963 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
964 	    (in[2] == 0xBF))
969 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
971 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
1003  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1039  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1040  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1045  * Returns 0 in case of success, -1 in case of error
1098  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1102  * Returns 0 in case of success, -1 in case of error
1131  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1135  * [XML] 4.3.3 Character Encoding in Entities.
1169      *       already found and in use
1180      *       already found and in use
1219  * Section 4.3.3  Character Encoding in Entities
1300  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1306  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1385  *       in normal processing.
1493  * Search in the registered set the handler able to read/write that encoding.
1553 	     * We used to keep ISO Latin encodings native in the
1627  * Search in the registered set the handler able to read/write that encoding.
1778  * @in:  a pointer to an array of ISO Latin 1 chars
1779  * @inlen:  the length of @in
1783  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1793                 const unsigned char *in, int *inlen) {
1795     const char *icv_in = (const char *) in;
1799     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1845  * @in:  a pointer to an array of ISO Latin 1 chars
1846  * @inlen:  the length of @in
1850  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1860                 const unsigned char *in, int *inlen) {
1861     const char *ucv_in = (const char *) in;
1865     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1888     *inlen = ucv_in - (const char*) in;
1911  * @in:  an xmlBuffer for the input
1919  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1924                        xmlBufferPtr in, int len) {
1931     if (in == NULL) return(-1);
1935     toconv = in->use;
1958 	                     in->content, &toconv);
1959 	xmlBufferShrink(in, toconv);
1966 	                      &written, in->content, &toconv);
1967 	xmlBufferShrink(in, toconv);
1976 	                      &written, in->content, &toconv);
1977 	xmlBufferShrink(in, toconv);
1992 	            toconv, written, in->use);
2000 	            toconv, written, in->use);
2018  * @in:  an xmlBuffer for the input
2025  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2030                  xmlBufferPtr in) {
2031     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2046  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2057     xmlBufPtr in;
2064     in = input->raw;
2066     toconv = xmlBufUse(in);
2096                                     xmlBufContent(in), &c_in);
2097         xmlBufShrink(in, c_in);
2103                               &c_out, xmlBufContent(in), &c_in);
2104         xmlBufShrink(in, c_in);
2113                               &c_out, xmlBufContent(in), &c_in);
2114         xmlBufShrink(in, c_in);
2132                             c_in, c_out, (int)xmlBufUse(in));
2139                             c_in, c_out, (int)xmlBufUse(in));
2144             const xmlChar *content = xmlBufContent(in);
2172  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2183     xmlBufPtr in;
2190     in = input->raw;
2192     toconv = xmlBufUse(in);
2213                                     xmlBufContent(in), &c_in);
2214         xmlBufShrink(in, c_in);
2220                               &c_out, xmlBufContent(in), &c_in);
2221         xmlBufShrink(in, c_in);
2230                               &c_out, xmlBufContent(in), &c_in);
2231         xmlBufShrink(in, c_in);
2249                             c_in, c_out, (int)xmlBufUse(in));
2256                             c_in, c_out, (int)xmlBufUse(in));
2261             const xmlChar *content = xmlBufContent(in);
2284  * @in:  an xmlBuffer for the input
2290  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2295                  xmlBufferPtr in)
2305     if (in == NULL)
2308     toconv = in->use;
2318                              in->content, &toconv);
2319         xmlBufferShrink(in, toconv);
2326                               &written, in->content, &toconv);
2327         xmlBufferShrink(in, toconv);
2337                               &written, in->content, &toconv);
2338         xmlBufferShrink(in, toconv);
2357                             toconv, written, in->use);
2364                             toconv, written, in->use);
2371 		     in->content[0], in->content[1],
2372 		     in->content[2], in->content[3]);
2394  * output in case of non-stateless encoding needing to initiate their
2395  * state or the output (like the BOM in UTF16).
2396  * In case of UTF8 sequence conversion errors for the given encoder,
2401  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2413     xmlBufPtr in;
2421     in = output->buffer;
2465     toconv = xmlBufUse(in);
2481                                       xmlBufContent(in), &c_in);
2483             xmlBufShrink(in, c_in);
2491                               &c_out, xmlBufContent(in), &c_in);
2492         xmlBufShrink(in, c_in);
2510                               &c_out, xmlBufContent(in), &c_in);
2511         xmlBufShrink(in, c_in);
2554 	            c_in, c_out, (int) xmlBufUse(in));
2558 	    int len = (int) xmlBufUse(in);
2559             xmlChar *content = xmlBufContent(in);
2570 		xmlBufShrink(in, charref_len - c_out);
2593 		xmlBufShrink(in, len);
2594 		xmlBufAddHead(in, charref, -1);
2607 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2620  * @in:  an xmlBuffer for the input
2623  * a first call with @in == NULL has to be made firs to initiate the
2624  * output in case of non-stateless encoding needing to initiate their
2625  * state or the output (like the BOM in UTF16).
2626  * In case of UTF8 sequence conversion errors for the given encoder,
2631  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2636                   xmlBufferPtr in) {
2655      * First specific handling of in = NULL, i.e. the initialization call
2657     if (in == NULL) {
2694     toconv = in->use;
2703 	                      in->content, &toconv);
2705 	    xmlBufferShrink(in, toconv);
2714 	                      &written, in->content, &toconv);
2715 	xmlBufferShrink(in, toconv);
2735 	                      &written, in->content, &toconv);
2736 	xmlBufferShrink(in, toconv);
2780 	            toconv, written, in->use);
2784 	    int len = in->use;
2785 	    const xmlChar *utf = (const xmlChar *) in->content;
2796 		xmlBufferShrink(in, charref_len - written);
2809 			in->content[0], in->content[1],
2810 			in->content[2], in->content[3]);
2819 		xmlBufferShrink(in, len);
2820 		xmlBufferAddHead(in, charref, -1);
2827 			 in->content[0], in->content[1],
2828 			 in->content[2], in->content[3]);
2833 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2834 		    in->content[0] = ' ';
2848  * Returns 0 if success, or -1 in case of error
2912  * to the start of the current entity. This function is computed in
2914  * size in byte of the file if parsing a file. The function is
2918  * Returns the index in bytes from the beginning of the entity or -1
2919  *         in case the index could not be computed.
2923     xmlParserInputPtr in;
2926     in = ctxt->input;
2927     if (in == NULL)  return(-1);
2928     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2930 	xmlCharEncodingHandler * handler = in->buf->encoder;
2936         if (in->end - in->cur > 0) {
2938 	    const unsigned char *cur = (const unsigned char *)in->cur;
2939 	    int toconv = in->end - in->cur, written = 32000;
2945 		    toconv = in->end - cur;
2956 		    toconv = in->end - cur;
2973 		    toconv = in->end - cur;
2992 	if (in->buf->rawconsumed < unused)
2994 	return(in->buf->rawconsumed - unused);
2996     return(in->consumed + (in->cur - in->base));
3006  * @in:  a pointer to an array of UTF-8 chars
3007  * @inlen:  the length of @in
3010  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
3020               const unsigned char* in, int *inlen,
3024     const unsigned char* instart = in;
3025     const unsigned char* processed = in;
3030     if (in == NULL) {
3038     inend = in + (*inlen);
3039     while (in < inend) {
3040         unsigned char d = *in++;
3044             /* trailing byte in leading position */
3050             if (!(in < inend)) {
3051                 /* trailing byte not in input buffer */
3056             c = *in++;
3067                 /* not in character set */
3076             if (!(in < inend - 1)) {
3077                 /* trailing bytes not in input buffer */
3082             c1 = *in++;
3089             c2 = *in++;
3102                 /* not in character set */
3114         processed = in;
3125  * @in:  a pointer to an array of ISO Latin 1 chars
3126  * @inlen:  the length of @in
3128  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3136               const unsigned char* in, int *inlen,
3140     const unsigned char* instart = in;
3146         (in == NULL) || (unicodetable == NULL))
3149     inend = in + *inlen;
3152     while ((in < inend) && (out < outend - 2)) {
3153         if (*in >= 0x80) {
3154             c = unicodetable [*in - 0x80];
3158                 *inlen = in - instart;
3169             ++in;
3171         if (instop - in > outend - out) instop = in + (outend - out);
3172         while ((*in < 0x80) && (in < instop)) {
3173             *out++ = *in++;
3176     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3177         *out++ =  *in++;
3179     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3180         *out++ =  *in++;
3183     *inlen = in - instart;
3924     const unsigned char* in, int *inlen) {
3925     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3928     const unsigned char* in, int *inlen) {
3929     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3933     const unsigned char* in, int *inlen) {
3934     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3937     const unsigned char* in, int *inlen) {
3938     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3942     const unsigned char* in, int *inlen) {
3943     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3946     const unsigned char* in, int *inlen) {
3947     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3951     const unsigned char* in, int *inlen) {
3952     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3955     const unsigned char* in, int *inlen) {
3956     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3960     const unsigned char* in, int *inlen) {
3961     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3964     const unsigned char* in, int *inlen) {
3965     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3969     const unsigned char* in, int *inlen) {
3970     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3973     const unsigned char* in, int *inlen) {
3974     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3978     const unsigned char* in, int *inlen) {
3979     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3982     const unsigned char* in, int *inlen) {
3983     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3987     const unsigned char* in, int *inlen) {
3988     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3991     const unsigned char* in, int *inlen) {
3992     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3996     const unsigned char* in, int *inlen) {
3997     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
4000     const unsigned char* in, int *inlen) {
4001     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
4005     const unsigned char* in, int *inlen) {
4006     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
4009     const unsigned char* in, int *inlen) {
4010     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
4014     const unsigned char* in, int *inlen) {
4015     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
4018     const unsigned char* in, int *inlen) {
4019     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
4023     const unsigned char* in, int *inlen) {
4024     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
4027     const unsigned char* in, int *inlen) {
4028     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
4032     const unsigned char* in, int *inlen) {
4033     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
4036     const unsigned char* in, int *inlen) {
4037     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
4041     const unsigned char* in, int *inlen) {
4042     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
4045     const unsigned char* in, int *inlen) {
4046     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
OpenGrok