Home | History | Annotate | Download | only in libxml2

Lines Matching refs:utf

60  * encoded in UTF-8 or an encoding with 8bit based chars, we assume
512 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
586 * From rfc2044: encoding of the Unicode values on UTF-8: *
588 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
600 * @utf: pointer to the UTF8 character
607 xmlUTF8Size(const xmlChar *utf) {
611 if (utf == NULL)
613 if (*utf < 0x80)
616 if (!(*utf & 0x40))
621 if (!(*utf & mask))
650 * @utf: a sequence of UTF-8 encoded bytes
658 xmlUTF8Strlen(const xmlChar *utf) {
661 if (utf == NULL)
664 while (*utf != 0) {
665 if (utf[0] & 0x80) {
666 if ((utf[1] & 0xc0) != 0x80)
668 if ((utf[0] & 0xe0) == 0xe0) {
669 if ((utf[2] & 0xc0) != 0x80)
671 if ((utf[0] & 0xf0) == 0xf0) {
672 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
674 utf += 4;
676 utf += 3;
679 utf += 2;
682 utf++;
691 * @utf: a sequence of UTF-8 encoded bytes
696 * Read the first UTF8 character from @utf
702 xmlGetUTF8Char(const unsigned char *utf, int *len) {
705 if (utf == NULL)
712 c = utf[0];
716 if ((utf[1] & 0xc0) != 0x80)
721 if ((utf[2] & 0xc0) != 0x80)
726 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
730 c = (utf[0] & 0x7) << 18;
731 c |= (utf[1] & 0x3f) << 12;
732 c |= (utf[2] & 0x3f) << 6;
733 c |= utf[3] & 0x3f;
737 c = (utf[0] & 0xf) << 12;
738 c |= (utf[1] & 0x3f) << 6;
739 c |= utf[2] & 0x3f;
744 c = (utf[0] & 0x1f) << 6;
745 c |= utf[1] & 0x3f;
761 * @utf: Pointer to putative UTF-8 encoded string.
763 * Checks @utf for being valid UTF-8. @utf is assumed to be
765 * allow longer UTF-8 sequences than necessary. Note that Java is
770 * Return value: true if @utf is valid.
773 xmlCheckUTF8(const unsigned char *utf)
778 if (utf == NULL)
781 * utf is a string of 1, 2, 3 or 4 bytes. The valid strings
788 for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
792 if ((utf[ix+1] & 0xc0 ) != 0x80)
796 if (((utf[ix+1] & 0xc0) != 0x80) ||
797 ((utf[ix+2] & 0xc0) != 0x80))
801 if (((utf[ix+1] & 0xc0) != 0x80) ||
802 ((utf[ix+2] & 0xc0) != 0x80) ||
803 ((utf[ix+3] & 0xc0) != 0x80))
814 * @utf: a sequence of UTF-8 encoded bytes
818 * the behaviour is not garanteed if the input string is not UTF-8
825 xmlUTF8Strsize(const xmlChar *utf, int len) {
826 const xmlChar *ptr=utf;
829 if (utf == NULL)
844 return (ptr - utf);
850 * @utf: the input UTF8 *
851 * @len: the len of @utf (in chars)
858 xmlUTF8Strndup(const xmlChar *utf, int len) {
862 if ((utf == NULL) || (len < 0)) return(NULL);
863 i = xmlUTF8Strsize(utf, len);
871 memcpy(ret, utf, i * sizeof(xmlChar));
878 * @utf: the input UTF8 *
887 xmlUTF8Strpos(const xmlChar *utf, int pos) {
890 if (utf == NULL) return(NULL);
894 if ((ch=*utf++) == 0) return(NULL);
901 if ( (*utf++ & 0xc0) != 0x80 )
905 return((xmlChar *)utf);
910 * @utf: the input UTF8 *
919 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
923 if (utf==NULL || utfchar==NULL) return -1;
925 for(i=0; (ch=*utf) != 0; i++) {
926 if (xmlStrncmp(utf, utfchar, size)==0)
928 utf++;
935 if ( (*utf++ & 0xc0) != 0x80 )
944 * @utf: a sequence of UTF-8 encoded bytes
948 * Create a substring from a given UTF-8 string
949 * Note: positions are given in units of UTF-8 chars
956 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
960 if (utf == NULL) return(NULL);
968 if ((ch=*utf++) == 0) return(NULL);
975 if ( (*utf++ & 0xc0) != 0x80 )
980 return(xmlUTF8Strndup(utf, len));