Lines Matching refs:utf
198 automatically cut out the UTF support if PCRE is built without it. */
1565 * Convert UTF-8 string to value *
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1576 -6 to 0 => malformed UTF-8 character at offset = (-return)
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1625 * Convert character value to UTF-8 *
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1679 utf true if UTF-8 (to be converted to UTF-16)
1683 OR -1 if a UTF-8 string is malformed
1685 OR -3 if a value > 0xffff is encountered when not in UTF mode
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1707 if (!utf && !data)
1724 if (!utf) return -3;
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1758 utf true if UTF-8 (to be converted to UTF-32)
1762 OR -1 if a UTF-8 string is malformed
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1786 if (!utf && !data)
1798 if (utf)
1814 /* Check that a 32-bit character string is valid UTF-32.
1820 Returns: TRUE if the string is a valid UTF-32 string
2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2133 (void)(utf); /* Avoid compiler warning */
2499 BOOL utf = (re->options & PCRE_UTF16) != 0;
2549 /* We know that there is only one extra character in UTF-16. */
2625 if (utf) utf16_char = TRUE;
2931 printf(" utf Unicode Transformation Format supported [0, 1]\n");
3237 else if (strcmp(argv[op + 1], "utf") == 0)
3254 vms_setsymbol("UTF",0,yield );
3637 /* Need to know if UTF-8 for printing data strings. */
3958 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3959 "converted to UTF-16\n");
3964 "cannot be converted to UTF-16\n");
3969 "cannot be converted to 16-bit in non-UTF mode\n");
3985 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3986 "converted to UTF-32\n");
3991 "cannot be converted to UTF-32\n");
3995 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4050 /* Compilation succeeded. It is now possible to set the UTF-8 option from
4244 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4549 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4561 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4605 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4606 In non-UTF mode, allow the value of the byte to fall through to later,
4607 where values greater than 127 are turned into UTF-8 when running in
4681 allows UTF-8 characters to be constructed byte by byte, and also allows
4682 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4684 UTF-8 when running in 16/32-bit mode. */
4905 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4906 than 127 in UTF mode must have come from \x{...} or octal constructs
4907 because values from \x.. get this far only in non-UTF mode. */
4918 "and so cannot be converted to UTF-8\n", c);
4929 "and UTF-8 mode is not enabled.\n", c);
4946 "0x10ffff and so cannot be converted to UTF-16\n", c);
4964 "and UTF-16 mode is not enabled.\n", c);
5498 Otherwise, in the case of UTF-8 matching, the advance must be one
5575 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5585 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,