Cross Reference: /external/pcre/dist/pcretest.c

Lines Matching refs:utf
198 automatically cut out the UTF support if PCRE is built without it. */
1565 *            Convert UTF-8 string to value       *
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1576               -6 to 0 => malformed UTF-8 character at offset = (-return)
1593 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1625 *       Convert character value to UTF-8         *
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1679   utf        true if UTF-8 (to be converted to UTF-16)
1683              OR -1 if a UTF-8 string is malformed
1685              OR -3 if a value > 0xffff is encountered when not in UTF mode
1689 to16(int data, pcre_uint8 *p, int utf, int len)
1707 if (!utf && !data)
1724       if (!utf) return -3;
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1758   utf        true if UTF-8 (to be converted to UTF-32)
1762              OR -1 if a UTF-8 string is malformed
1768 to32(int data, pcre_uint8 *p, int utf, int len)
1786 if (!utf && !data)
1798     if (utf)
1814 /* Check that a 32-bit character string is valid UTF-32.
1820 Returns:       TRUE  if the string is a valid UTF-32 string
2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2133 (void)(utf);  /* Avoid compiler warning */
2499 BOOL utf = (re->options & PCRE_UTF16) != 0;
2549       /* We know that there is only one extra character in UTF-16. */
2625     if (utf) utf16_char = TRUE;
2931 printf("     utf          Unicode Transformation Format supported [0, 1]\n");
3237       else if (strcmp(argv[op + 1], "utf") == 0)
3254         vms_setsymbol("UTF",0,yield );
3637     /* Need to know if UTF-8 for printing data strings. */
3958         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3959           "converted to UTF-16\n");
3964           "cannot be converted to UTF-16\n");
3969           "cannot be converted to 16-bit in non-UTF mode\n");
3985         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3986           "converted to UTF-32\n");
3991           "cannot be converted to UTF-32\n");
3995         fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4050     /* Compilation succeeded. It is now possible to set the UTF-8 option from
4244           ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4549     /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4561         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4605       /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4606       In non-UTF mode, allow the value of the byte to fall through to later,
4607       where values greater than 127 are turned into UTF-8 when running in
4681         allows UTF-8 characters to be constructed byte by byte, and also allows
4682         invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4684         UTF-8 when running in 16/32-bit mode. */
4905       In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4906       than 127 in UTF mode must have come from \x{...} or octal constructs
4907       because values from \x.. get this far only in non-UTF mode. */
4918               "and so cannot be converted to UTF-8\n", c);
4929               "and UTF-8 mode is not enabled.\n", c);
4946               "0x10ffff and so cannot be converted to UTF-16\n", c);
4964               "and UTF-16 mode is not enabled.\n", c);
5498       Otherwise, in the case of UTF-8 matching, the advance must be one
5575             fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5585             fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
OpenGrok