Cross Reference: /external/pcre/dist2/src/pcre2test.c

Lines Matching refs:utf
299 /* We need to be able to check input text for UTF-8 validity, whatever code
301 8-bit code units. So we include the UTF validity checking function for 8-bit
623   { "utf",                        MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
926 #define PCHARS(lv, p, offset, len, utf, f) \
928     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
930     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
932     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
934 #define PCHARSV(p, offset, len, utf, f) \
936     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
938     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
940     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1400 #define PCHARS(lv, p, offset, len, utf, f) \
1402     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1404     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1406 #define PCHARSV(p, offset, len, utf, f) \
1408     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1410     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1760 #define PCHARS(lv, p, offset, len, utf, f) \
1761   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1762 #define PCHARSV(p, offset, len, utf, f) \
1763   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1855 #define PCHARS(lv, p, offset, len, utf, f) \
1856   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1857 #define PCHARSV(p, offset, len, utf, f) \
1858   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
1950 #define PCHARS(lv, p, offset, len, utf, f) \
1951   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
1952 #define PCHARSV(p, offset, len, utf, f) \
1953   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2447 *      Convert UTF-8 character to code point     *
2450 /* This function reads one or more bytes that represent a UTF-8 character,
2452 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2455 checking, and also for generating 32-bit non-UTF data values above the UTF
2463               -6 to 0 => malformed UTF-8 character at offset = (-return)
2480 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2518   utf          TRUE in UTF mode
2525 pchar(uint32_t c, BOOL utf, FILE *f)
2536   if (utf)
2588 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2592 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2600   if (utf)
2607       yield += pchar(c, utf, f);
2612   yield += pchar(c, utf, f);
2625 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2629 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
2636   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2646   yield += pchar(c, utf, f);
2659 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2663 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
2666 (void)(utf);  /* Avoid compiler warning */
2671   yield += pchar(c, utf, f);
2682 *       Convert character value to UTF-8         *
2686 and encodes it as a UTF-8 character in 0 to 6 bytes.
2721 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
2724 more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
2725 possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
2726 UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
2730 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
2735   utf        non-zero if converting to UTF-16
2740              OR -1 if a UTF-8 string is malformed
2741              OR -2 if a value > 0x10ffff is encountered in UTF mode
2742              OR -3 if a value > 0xffff is encountered when not in UTF mode
2746 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2766 if (!utf)
2780     if (!utf) return -3;
2800 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
2808 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
2813   utf        true if UTF-8 (to be converted to UTF-32)
2818              OR -1 if a UTF-8 string is malformed
2819              OR -2 if a value > 0x10ffff is encountered in UTF mode
2823 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
2843 if (!utf)
2852   if (utf && c > 0x10ffff) return -2;
2877   utf       TRUE if in UTF mode
2883 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
2885 if (!utf || test_mode == PCRE32_MODE)
3715   ((options & PCRE2_UTF) != 0)? " utf" : "",
3805 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
3815     cb->callout_string_length, utf, outfile);
3970   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
3971   cluttering up the verification output of non-UTF test files. */
4459 BOOL utf;
4508 utf = (pat_patctl.options & PCRE2_UTF) != 0;
4777   if (utf) cflags |= REG_UTF;
4877 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
4881 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
4887   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
4893     "cannot be converted to UTF\n");
4898     "cannot be converted to 16-bit in non-UTF mode\n");
4966 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5144 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5162     cb->callout_string_length, utf, outfile);
5188         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
5203 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
5214   current_position - cb->start_match, utf, f);
5219   utf, f);
5223 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
5270     PCHARSV(cb->mark, 0, -1, utf, outfile);
5300   utf       TRUE for utf
5307 copy_and_get(BOOL utf, int capcount)
5345     PCHARSV(copybuffer, 0, length, utf, outfile);
5369   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5372   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5404     PCHARSV(copybuffer, 0, length, utf, outfile);
5431     PCHARSV(gotbuffer, 0, length, utf, outfile);
5456   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
5459   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
5477     PCHARSV(gotbuffer, 0, length, utf, outfile);
5506       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
5541 BOOL utf;
5568 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
5572 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
5582 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
5585 if (utf)
5593     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
5594       "in UTF mode\n");
5625 buffer of the appropriate width. In UTF mode, input can be UTF-8. */
5698     if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
5766     allows UTF-8 characters to be constructed byte by byte, and also allows
5767     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
5777     if (utf && (test_mode == PCRE8_MODE))
5810   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
5811   than 127 in UTF mode must have come from \x{...} or octal constructs
5812   because values from \x.. get this far only in non-UTF mode. */
5817     if (utf)
5822           "and so cannot be converted to UTF-8\n", c);
5832           "and UTF-8 mode is not enabled.\n", c);
5843     if (utf)
5848           "0x10ffff and so cannot be converted to UTF-16\n", c);
5865           "and UTF-16 mode is not enabled.\n", c);
5989           pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
5996             utf, outfile);
6164   escape processing is done for replacements. In UTF mode, check for an invalid
6165   UTF-8 input string, and if it is invalid, just copy its code units without
6166   UTF interpretation. This provides a means of checking that an invalid string
6167   is detected. Otherwise, UTF-8 can be used to include wide characters in a
6170   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
6172   /* Not UTF or invalid UTF-8: just copy the code units. */
6174   if (!utf || badutf)
6190   /* Valid UTF-8 replacement string */
6243     PCHARSV(nbuffer, 0, nsize, utf, outfile);
6518           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
6519           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
6520           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
6537           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
6538           PCHARSV(pp, start, end - start, utf, outfile);
6552           PCHARSV(pp, start, end - start, utf, outfile);
6562         PCHARSV(pp, start, end - start, utf, outfile);
6574         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
6585       PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
6591     copy_and_get(utf, capcount);
6609       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
6616     poffset = backchars(pp, ovector[0], maxlookbehind, utf);
6617     PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile);
6618     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
6634     copy_and_get(utf, 1);
6651   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
6667     else if (utf && test_mode != PCRE32_MODE)
6702           PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
6711       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
6774           if (utf && test_mode != PCRE32_MODE)
6918 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
7038   printf("  UTF and UCP support (");
OpenGrok