Lines Matching refs:utf
60 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
519 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
593 * From rfc2044: encoding of the Unicode values on UTF-8: *
595 * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
607 * @utf: pointer to the UTF8 character
614 xmlUTF8Size(const xmlChar *utf) {
618 if (utf == NULL)
620 if (*utf < 0x80)
623 if (!(*utf & 0x40))
628 if (!(*utf & mask))
657 * @utf: a sequence of UTF-8 encoded bytes
665 xmlUTF8Strlen(const xmlChar *utf) {
668 if (utf == NULL)
671 while (*utf != 0) {
672 if (utf[0] & 0x80) {
673 if ((utf[1] & 0xc0) != 0x80)
675 if ((utf[0] & 0xe0) == 0xe0) {
676 if ((utf[2] & 0xc0) != 0x80)
678 if ((utf[0] & 0xf0) == 0xf0) {
679 if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
681 utf += 4;
683 utf += 3;
686 utf += 2;
689 utf++;
698 * @utf: a sequence of UTF-8 encoded bytes
703 * Read the first UTF8 character from @utf
709 xmlGetUTF8Char(const unsigned char *utf, int *len) {
712 if (utf == NULL)
719 c = utf[0];
723 if ((utf[1] & 0xc0) != 0x80)
728 if ((utf[2] & 0xc0) != 0x80)
733 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
737 c = (utf[0] & 0x7) << 18;
738 c |= (utf[1] & 0x3f) << 12;
739 c |= (utf[2] & 0x3f) << 6;
740 c |= utf[3] & 0x3f;
744 c = (utf[0] & 0xf) << 12;
745 c |= (utf[1] & 0x3f) << 6;
746 c |= utf[2] & 0x3f;
751 c = (utf[0] & 0x1f) << 6;
752 c |= utf[1] & 0x3f;
768 * @utf: Pointer to putative UTF-8 encoded string.
770 * Checks @utf for being valid UTF-8. @utf is assumed to be
772 * allow longer UTF-8 sequences than necessary. Note that Java is
777 * Return value: true if @utf is valid.
780 xmlCheckUTF8(const unsigned char *utf)
785 if (utf == NULL)
788 * utf is a string of 1, 2, 3 or 4 bytes. The valid strings
795 for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
799 if ((utf[ix+1] & 0xc0 ) != 0x80)
803 if (((utf[ix+1] & 0xc0) != 0x80) ||
804 ((utf[ix+2] & 0xc0) != 0x80))
808 if (((utf[ix+1] & 0xc0) != 0x80) ||
809 ((utf[ix+2] & 0xc0) != 0x80) ||
810 ((utf[ix+3] & 0xc0) != 0x80))
821 * @utf: a sequence of UTF-8 encoded bytes
825 * the behaviour is not garanteed if the input string is not UTF-8
832 xmlUTF8Strsize(const xmlChar *utf, int len) {
833 const xmlChar *ptr=utf;
836 if (utf == NULL)
851 return (ptr - utf);
857 * @utf: the input UTF8 *
858 * @len: the len of @utf (in chars)
865 xmlUTF8Strndup(const xmlChar *utf, int len) {
869 if ((utf == NULL) || (len < 0)) return(NULL);
870 i = xmlUTF8Strsize(utf, len);
878 memcpy(ret, utf, i * sizeof(xmlChar));
885 * @utf: the input UTF8 *
894 xmlUTF8Strpos(const xmlChar *utf, int pos) {
897 if (utf == NULL) return(NULL);
901 if ((ch=*utf++) == 0) return(NULL);
908 if ( (*utf++ & 0xc0) != 0x80 )
912 return((xmlChar *)utf);
917 * @utf: the input UTF8 *
926 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
930 if (utf==NULL || utfchar==NULL) return -1;
932 for(i=0; (ch=*utf) != 0; i++) {
933 if (xmlStrncmp(utf, utfchar, size)==0)
935 utf++;
942 if ( (*utf++ & 0xc0) != 0x80 )
951 * @utf: a sequence of UTF-8 encoded bytes
955 * Create a substring from a given UTF-8 string
956 * Note: positions are given in units of UTF-8 chars
963 xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
967 if (utf == NULL) return(NULL);
975 if ((ch=*utf++) == 0) return(NULL);
982 if ( (*utf++ & 0xc0) != 0x80 )
987 return(xmlUTF8Strndup(utf, len));