1 /************************************************* 2 * PCRE2 testing program * 3 *************************************************/ 4 5 /* PCRE2 is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. In 2014 7 the API was completely revised and '2' was added to the name, because the old 8 API, which had lasted for 16 years, could not accommodate new requirements. At 9 the same time, this testing program was re-designed because its original 10 hacked-up (non-) design had also run out of steam. 11 12 Written by Philip Hazel 13 Original code Copyright (c) 1997-2012 University of Cambridge 14 Rewritten code Copyright (c) 2016-2018 University of Cambridge 15 16 ----------------------------------------------------------------------------- 17 Redistribution and use in source and binary forms, with or without 18 modification, are permitted provided that the following conditions are met: 19 20 * Redistributions of source code must retain the above copyright notice, 21 this list of conditions and the following disclaimer. 22 23 * Redistributions in binary form must reproduce the above copyright 24 notice, this list of conditions and the following disclaimer in the 25 documentation and/or other materials provided with the distribution. 26 27 * Neither the name of the University of Cambridge nor the names of its 28 contributors may be used to endorse or promote products derived from 29 this software without specific prior written permission. 30 31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 POSSIBILITY OF SUCH DAMAGE. 42 ----------------------------------------------------------------------------- 43 */ 44 45 46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2 47 libraries in a single program, though its input and output are always 8-bit. 48 It is different from modules such as pcre2_compile.c in the library itself, 49 which are compiled separately for each code unit width. If two widths are 50 enabled, for example, pcre2_compile.c is compiled twice. In contrast, 51 pcre2test.c is compiled only once, and linked with all the enabled libraries. 52 Therefore, it must not make use of any of the macros from pcre2.h or 53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make 54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that 55 it references only the enabled library functions. */ 56 57 #ifdef HAVE_CONFIG_H 58 #include "config.h" 59 #endif 60 61 #include <ctype.h> 62 #include <stdio.h> 63 #include <string.h> 64 #include <stdlib.h> 65 #include <time.h> 66 #include <locale.h> 67 #include <errno.h> 68 69 #if defined NATIVE_ZOS 70 #include "pcrzoscs.h" 71 /* That header is not included in the main PCRE2 distribution because other 72 apparatus is needed to compile pcre2test for z/OS. The header can be found in 73 the special z/OS distribution, which is available from www.zaconsultants.net or 74 from www.cbttape.org. */ 75 #endif 76 77 #ifdef HAVE_UNISTD_H 78 #include <unistd.h> 79 #endif 80 81 /* Debugging code enabler */ 82 83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */ 84 85 /* Both libreadline and libedit are optionally supported. The user-supplied 86 original patch uses readline/readline.h for libedit, but in at least one system 87 it is installed as editline/readline.h, so the configuration code now looks for 88 that first, falling back to readline/readline.h. */ 89 90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 91 #if defined(SUPPORT_LIBREADLINE) 92 #include <readline/readline.h> 93 #include <readline/history.h> 94 #else 95 #if defined(HAVE_EDITLINE_READLINE_H) 96 #include <editline/readline.h> 97 #else 98 #include <readline/readline.h> 99 #endif 100 #endif 101 #endif 102 103 /* Put the test for interactive input into a macro so that it can be changed if 104 required for different environments. */ 105 106 #define INTERACTIVE(f) isatty(fileno(f)) 107 108 109 /* ---------------------- System-specific definitions ---------------------- */ 110 111 /* A number of things vary for Windows builds. Originally, pcretest opened its 112 input and output without "b"; then I was told that "b" was needed in some 113 environments, so it was added for release 5.0 to both the input and output. (It 114 makes no difference on Unix-like systems.) Later I was told that it is wrong 115 for the input on Windows. I've now abstracted the modes into macros that are 116 set here, to make it easier to fiddle with them, and removed "b" from the input 117 mode under Windows. The BINARY versions are used when saving/restoring compiled 118 patterns. */ 119 120 #if defined(_WIN32) || defined(WIN32) 121 #include <io.h> /* For _setmode() */ 122 #include <fcntl.h> /* For _O_BINARY */ 123 #define INPUT_MODE "r" 124 #define OUTPUT_MODE "wb" 125 #define BINARY_INPUT_MODE "rb" 126 #define BINARY_OUTPUT_MODE "wb" 127 128 #ifndef isatty 129 #define isatty _isatty /* This is what Windows calls them, I'm told, */ 130 #endif /* though in some environments they seem to */ 131 /* be already defined, hence the #ifndefs. */ 132 #ifndef fileno 133 #define fileno _fileno 134 #endif 135 136 /* A user sent this fix for Borland Builder 5 under Windows. */ 137 138 #ifdef __BORLANDC__ 139 #define _setmode(handle, mode) setmode(handle, mode) 140 #endif 141 142 /* Not Windows */ 143 144 #else 145 #include <sys/time.h> /* These two includes are needed */ 146 #include <sys/resource.h> /* for setrlimit(). */ 147 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */ 148 #define INPUT_MODE "r" 149 #define OUTPUT_MODE "w" 150 #define BINARY_INPUT_MODE "rb" 151 #define BINARY_OUTPUT_MODE "wb" 152 #else 153 #define INPUT_MODE "rb" 154 #define OUTPUT_MODE "wb" 155 #define BINARY_INPUT_MODE "rb" 156 #define BINARY_OUTPUT_MODE "wb" 157 #endif 158 #endif 159 160 #ifdef __VMS 161 #include <ssdef.h> 162 void vms_setsymbol( char *, char *, int ); 163 #endif 164 165 /* VC and older compilers don't support %td or %zu. */ 166 167 #if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L 168 #define PTR_FORM "lu" 169 #define SIZ_FORM "lu" 170 #define SIZ_CAST (unsigned long int) 171 #else 172 #define PTR_FORM "td" 173 #define SIZ_FORM "zu" 174 #define SIZ_CAST 175 #endif 176 177 /* ------------------End of system-specific definitions -------------------- */ 178 179 /* Glueing macros that are used in several places below. */ 180 181 #define glue(a,b) a##b 182 #define G(a,b) glue(a,b) 183 184 /* Miscellaneous parameters and manifests */ 185 186 #ifndef CLOCKS_PER_SEC 187 #ifdef CLK_TCK 188 #define CLOCKS_PER_SEC CLK_TCK 189 #else 190 #define CLOCKS_PER_SEC 100 191 #endif 192 #endif 193 194 #define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */ 195 #define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */ 196 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ 197 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */ 198 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ 199 #define LOCALESIZE 32 /* Size of locale name */ 200 #define LOOPREPEAT 500000 /* Default loop count for timing */ 201 #define MALLOCLISTSIZE 20 /* For remembering mallocs */ 202 #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */ 203 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */ 204 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */ 205 #define VERSION_SIZE 64 /* Size of buffer for the version strings */ 206 207 /* Make sure the buffer into which replacement strings are copied is big enough 208 to hold them as 32-bit code units. */ 209 210 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */ 211 212 /* Execution modes */ 213 214 #define PCRE8_MODE 8 215 #define PCRE16_MODE 16 216 #define PCRE32_MODE 32 217 218 /* Processing returns */ 219 220 enum { PR_OK, PR_SKIP, PR_ABEND }; 221 222 /* The macro PRINTABLE determines whether to print an output character as-is or 223 as a hex value when showing compiled patterns. is We use it in cases when the 224 locale has not been explicitly changed, so as to get consistent output from 225 systems that differ in their output from isprint() even in the "C" locale. */ 226 227 #ifdef EBCDIC 228 #define PRINTABLE(c) ((c) >= 64 && (c) < 255) 229 #else 230 #define PRINTABLE(c) ((c) >= 32 && (c) < 127) 231 #endif 232 233 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c)) 234 235 /* We have to include some of the library source files because we need 236 to use some of the macros, internal structure definitions, and other internal 237 values - pcre2test has "inside information" compared to an application program 238 that strictly follows the PCRE2 API. 239 240 Before including pcre2_internal.h we define PRIV so that it does not get 241 defined therein. This ensures that PRIV names in the included files do not 242 clash with those in the libraries. Also, although pcre2_internal.h does itself 243 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h, 244 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not 245 for building the library. */ 246 247 #define PRIV(name) name 248 #define PCRE2_CODE_UNIT_WIDTH 0 249 #include "pcre2.h" 250 #include "pcre2posix.h" 251 #include "pcre2_internal.h" 252 253 /* We need access to some of the data tables that PCRE2 uses. Defining 254 PCRE2_PCRETEST makes some minor changes in the files. The previous definition 255 of PRIV avoids name clashes. */ 256 257 #define PCRE2_PCRE2TEST 258 #include "pcre2_tables.c" 259 #include "pcre2_ucd.c" 260 261 /* 32-bit integer values in the input are read by strtoul() or strtol(). The 262 check needed for overflow depends on whether long ints are in fact longer than 263 ints. They are defined not to be shorter. */ 264 265 #if ULONG_MAX > UINT32_MAX 266 #define U32OVERFLOW(x) (x > UINT32_MAX) 267 #else 268 #define U32OVERFLOW(x) (x == UINT32_MAX) 269 #endif 270 271 #if LONG_MAX > INT32_MAX 272 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN) 273 #else 274 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN) 275 #endif 276 277 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include 278 pcre2_intmodedep.h, which is where mode-dependent macros and structures are 279 defined. We can now include it for each supported code unit width. Because 280 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will 281 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately 282 while including these files, and then restore it to a no-op. Because LINK_SIZE 283 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of 284 these inclusions should not be changed. */ 285 286 #undef PCRE2_SUFFIX 287 #undef PCRE2_CODE_UNIT_WIDTH 288 289 #ifdef SUPPORT_PCRE2_8 290 #define PCRE2_CODE_UNIT_WIDTH 8 291 #define PCRE2_SUFFIX(a) G(a,8) 292 #include "pcre2_intmodedep.h" 293 #include "pcre2_printint.c" 294 #undef PCRE2_CODE_UNIT_WIDTH 295 #undef PCRE2_SUFFIX 296 #endif /* SUPPORT_PCRE2_8 */ 297 298 #ifdef SUPPORT_PCRE2_16 299 #define PCRE2_CODE_UNIT_WIDTH 16 300 #define PCRE2_SUFFIX(a) G(a,16) 301 #include "pcre2_intmodedep.h" 302 #include "pcre2_printint.c" 303 #undef PCRE2_CODE_UNIT_WIDTH 304 #undef PCRE2_SUFFIX 305 #endif /* SUPPORT_PCRE2_16 */ 306 307 #ifdef SUPPORT_PCRE2_32 308 #define PCRE2_CODE_UNIT_WIDTH 32 309 #define PCRE2_SUFFIX(a) G(a,32) 310 #include "pcre2_intmodedep.h" 311 #include "pcre2_printint.c" 312 #undef PCRE2_CODE_UNIT_WIDTH 313 #undef PCRE2_SUFFIX 314 #endif /* SUPPORT_PCRE2_32 */ 315 316 #define PCRE2_SUFFIX(a) a 317 318 /* We need to be able to check input text for UTF-8 validity, whatever code 319 widths are actually available, because the input to pcre2test is always in 320 8-bit code units. So we include the UTF validity checking function for 8-bit 321 code units. */ 322 323 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *); 324 325 #define PCRE2_CODE_UNIT_WIDTH 8 326 #undef PCRE2_SPTR 327 #define PCRE2_SPTR PCRE2_SPTR8 328 #include "pcre2_valid_utf.c" 329 #undef PCRE2_CODE_UNIT_WIDTH 330 #undef PCRE2_SPTR 331 332 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit 333 support, it can be selected by a command-line option. If there is no 8-bit 334 support, there must be 16-bit or 32-bit support, so default to one of them. The 335 config function, JIT stack, contexts, and version string are the same in all 336 modes, so use the form of the first that is available. */ 337 338 #if defined SUPPORT_PCRE2_8 339 #define DEFAULT_TEST_MODE PCRE8_MODE 340 #define VERSION_TYPE PCRE2_UCHAR8 341 #define PCRE2_CONFIG pcre2_config_8 342 #define PCRE2_JIT_STACK pcre2_jit_stack_8 343 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8 344 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8 345 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8 346 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8 347 348 #elif defined SUPPORT_PCRE2_16 349 #define DEFAULT_TEST_MODE PCRE16_MODE 350 #define VERSION_TYPE PCRE2_UCHAR16 351 #define PCRE2_CONFIG pcre2_config_16 352 #define PCRE2_JIT_STACK pcre2_jit_stack_16 353 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16 354 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16 355 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16 356 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16 357 358 #elif defined SUPPORT_PCRE2_32 359 #define DEFAULT_TEST_MODE PCRE32_MODE 360 #define VERSION_TYPE PCRE2_UCHAR32 361 #define PCRE2_CONFIG pcre2_config_32 362 #define PCRE2_JIT_STACK pcre2_jit_stack_32 363 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32 364 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32 365 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32 366 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32 367 #endif 368 369 /* ------------- Structure and table for handling #-commands ------------- */ 370 371 typedef struct cmdstruct { 372 const char *name; 373 int value; 374 } cmdstruct; 375 376 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN, 377 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN }; 378 379 static cmdstruct cmdlist[] = { 380 { "forbid_utf", CMD_FORBID_UTF }, 381 { "load", CMD_LOAD }, 382 { "newline_default", CMD_NEWLINE_DEFAULT }, 383 { "pattern", CMD_PATTERN }, 384 { "perltest", CMD_PERLTEST }, 385 { "pop", CMD_POP }, 386 { "popcopy", CMD_POPCOPY }, 387 { "save", CMD_SAVE }, 388 { "subject", CMD_SUBJECT }}; 389 390 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct)) 391 392 /* ------------- Structures and tables for handling modifiers -------------- */ 393 394 /* Table of names for newline types. Must be kept in step with the definitions 395 of PCRE2_NEWLINE_xx in pcre2.h. */ 396 397 static const char *newlines[] = { 398 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" }; 399 400 /* Structure and table for handling pattern conversion types. */ 401 402 typedef struct convertstruct { 403 const char *name; 404 uint32_t option; 405 } convertstruct; 406 407 static convertstruct convertlist[] = { 408 { "glob", PCRE2_CONVERT_GLOB }, 409 { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR }, 410 { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR }, 411 { "posix_basic", PCRE2_CONVERT_POSIX_BASIC }, 412 { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED }, 413 { "unset", CONVERT_UNSET }}; 414 415 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct)) 416 417 /* Modifier types and applicability */ 418 419 enum { MOD_CTC, /* Applies to a compile context */ 420 MOD_CTM, /* Applies to a match context */ 421 MOD_PAT, /* Applies to a pattern */ 422 MOD_PATP, /* Ditto, OK for Perl test */ 423 MOD_DAT, /* Applies to a data line */ 424 MOD_PD, /* Applies to a pattern or a data line */ 425 MOD_PDP, /* As MOD_PD, OK for Perl test */ 426 MOD_PND, /* As MOD_PD, but not for a default pattern */ 427 MOD_PNDP, /* As MOD_PND, OK for Perl test */ 428 MOD_CHR, /* Is a single character */ 429 MOD_CON, /* Is a "convert" type/options list */ 430 MOD_CTL, /* Is a control bit */ 431 MOD_BSR, /* Is a BSR value */ 432 MOD_IN2, /* Is one or two unsigned integers */ 433 MOD_INS, /* Is a signed integer */ 434 MOD_INT, /* Is an unsigned integer */ 435 MOD_IND, /* Is an unsigned integer, but no value => default */ 436 MOD_NL, /* Is a newline value */ 437 MOD_NN, /* Is a number or a name; more than one may occur */ 438 MOD_OPT, /* Is an option bit */ 439 MOD_SIZ, /* Is a PCRE2_SIZE value */ 440 MOD_STR }; /* Is a string */ 441 442 /* Control bits. Some apply to compiling, some to matching, but some can be set 443 either on a pattern or a data line, so they must all be distinct. There are now 444 so many of them that they are split into two fields. */ 445 446 #define CTL_AFTERTEXT 0x00000001u 447 #define CTL_ALLAFTERTEXT 0x00000002u 448 #define CTL_ALLCAPTURES 0x00000004u 449 #define CTL_ALLUSEDTEXT 0x00000008u 450 #define CTL_ALTGLOBAL 0x00000010u 451 #define CTL_BINCODE 0x00000020u 452 #define CTL_CALLOUT_CAPTURE 0x00000040u 453 #define CTL_CALLOUT_INFO 0x00000080u 454 #define CTL_CALLOUT_NONE 0x00000100u 455 #define CTL_DFA 0x00000200u 456 #define CTL_EXPAND 0x00000400u 457 #define CTL_FINDLIMITS 0x00000800u 458 #define CTL_FRAMESIZE 0x00001000u 459 #define CTL_FULLBINCODE 0x00002000u 460 #define CTL_GETALL 0x00004000u 461 #define CTL_GLOBAL 0x00008000u 462 #define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */ 463 #define CTL_INFO 0x00020000u 464 #define CTL_JITFAST 0x00040000u 465 #define CTL_JITVERIFY 0x00080000u 466 #define CTL_MARK 0x00100000u 467 #define CTL_MEMORY 0x00200000u 468 #define CTL_NULLCONTEXT 0x00400000u 469 #define CTL_POSIX 0x00800000u 470 #define CTL_POSIX_NOSUB 0x01000000u 471 #define CTL_PUSH 0x02000000u /* These three must be */ 472 #define CTL_PUSHCOPY 0x04000000u /* all in the same */ 473 #define CTL_PUSHTABLESCOPY 0x08000000u /* word. */ 474 #define CTL_STARTCHAR 0x10000000u 475 #define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */ 476 #define CTL_UTF8_INPUT 0x40000000u 477 #define CTL_ZERO_TERMINATE 0x80000000u 478 479 /* Combinations */ 480 481 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */ 482 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO) 483 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL) 484 485 /* Second control word */ 486 487 #define CTL2_SUBSTITUTE_EXTENDED 0x00000001u 488 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u 489 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u 490 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u 491 #define CTL2_SUBJECT_LITERAL 0x00000010u 492 #define CTL2_CALLOUT_NO_WHERE 0x00000020u 493 #define CTL2_CALLOUT_EXTRA 0x00000040u 494 495 #define CTL2_NL_SET 0x40000000u /* Informational */ 496 #define CTL2_BSR_SET 0x80000000u /* Informational */ 497 498 /* These are the matching controls that may be set either on a pattern or on a 499 data line. They are copied from the pattern controls as initial settings for 500 data line controls. Note that CTL_MEMORY is not included here, because it does 501 different things in the two cases. */ 502 503 #define CTL_ALLPD (CTL_AFTERTEXT|\ 504 CTL_ALLAFTERTEXT|\ 505 CTL_ALLCAPTURES|\ 506 CTL_ALLUSEDTEXT|\ 507 CTL_ALTGLOBAL|\ 508 CTL_GLOBAL|\ 509 CTL_MARK|\ 510 CTL_STARTCHAR|\ 511 CTL_UTF8_INPUT) 512 513 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\ 514 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\ 515 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\ 516 CTL2_SUBSTITUTE_UNSET_EMPTY) 517 518 /* Structures for holding modifier information for patterns and subject strings 519 (data). Fields containing modifiers that can be set either for a pattern or a 520 subject must be at the start and in the same order in both cases so that the 521 same offset in the big table below works for both. */ 522 523 typedef struct patctl { /* Structure for pattern modifiers. */ 524 uint32_t options; /* Must be in same position as datctl */ 525 uint32_t control; /* Must be in same position as datctl */ 526 uint32_t control2; /* Must be in same position as datctl */ 527 uint32_t jitstack; /* Must be in same position as datctl */ 528 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ 529 uint32_t jit; 530 uint32_t stackguard_test; 531 uint32_t tables_id; 532 uint32_t convert_type; 533 uint32_t convert_length; 534 uint32_t convert_glob_escape; 535 uint32_t convert_glob_separator; 536 uint32_t regerror_buffsize; 537 uint8_t locale[LOCALESIZE]; 538 } patctl; 539 540 #define MAXCPYGET 10 541 #define LENCPYGET 64 542 543 typedef struct datctl { /* Structure for data line modifiers. */ 544 uint32_t options; /* Must be in same position as patctl */ 545 uint32_t control; /* Must be in same position as patctl */ 546 uint32_t control2; /* Must be in same position as patctl */ 547 uint32_t jitstack; /* Must be in same position as patctl */ 548 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ 549 uint32_t startend[2]; 550 uint32_t cerror[2]; 551 uint32_t cfail[2]; 552 int32_t callout_data; 553 int32_t copy_numbers[MAXCPYGET]; 554 int32_t get_numbers[MAXCPYGET]; 555 uint32_t oveccount; 556 uint32_t offset; 557 uint8_t copy_names[LENCPYGET]; 558 uint8_t get_names[LENCPYGET]; 559 } datctl; 560 561 /* Ids for which context to modify. */ 562 563 enum { CTX_PAT, /* Active pattern context */ 564 CTX_POPPAT, /* Ditto, for a popped pattern */ 565 CTX_DEFPAT, /* Default pattern context */ 566 CTX_DAT, /* Active data (match) context */ 567 CTX_DEFDAT }; /* Default data (match) context */ 568 569 /* Macros to simplify the big table below. */ 570 571 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name) 572 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name) 573 #define PO(name) offsetof(patctl, name) 574 #define PD(name) PO(name) 575 #define DO(name) offsetof(datctl, name) 576 577 /* Table of all long-form modifiers. Must be in collating sequence of modifier 578 name because it is searched by binary chop. */ 579 580 typedef struct modstruct { 581 const char *name; 582 uint16_t which; 583 uint16_t type; 584 uint32_t value; 585 PCRE2_SIZE offset; 586 } modstruct; 587 588 static modstruct modlist[] = { 589 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, 590 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, 591 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) }, 592 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) }, 593 { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) }, 594 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) }, 595 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, 596 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) }, 597 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, 598 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, 599 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, 600 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, 601 { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) }, 602 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, 603 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, 604 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, 605 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, 606 { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) }, 607 { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) }, 608 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, 609 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) }, 610 { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) }, 611 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, 612 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, 613 { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) }, 614 { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) }, 615 { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) }, 616 { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) }, 617 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, 618 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) }, 619 { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, 620 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, 621 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) }, 622 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, 623 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, 624 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, 625 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, 626 { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) }, 627 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) }, 628 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, 629 { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) }, 630 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, 631 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, 632 { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) }, 633 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) }, 634 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, 635 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) }, 636 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, 637 { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) }, 638 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, 639 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, 640 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, 641 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) }, 642 { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) }, 643 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, 644 { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) }, 645 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) }, 646 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) }, 647 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, 648 { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) }, 649 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) }, 650 { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) }, 651 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) }, 652 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, 653 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, 654 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) }, 655 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, 656 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, 657 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, 658 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, 659 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, 660 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, 661 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) }, 662 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, 663 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, 664 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, 665 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) }, 666 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, 667 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, 668 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) }, 669 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, 670 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, 671 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, 672 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, 673 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, 674 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, 675 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, 676 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, 677 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) }, 678 { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) }, 679 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, 680 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, 681 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, 682 { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) }, 683 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */ 684 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, 685 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, 686 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, 687 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, 688 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) }, 689 { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) }, 690 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) }, 691 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) }, 692 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) }, 693 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) }, 694 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, 695 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, 696 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, 697 { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) }, 698 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) }, 699 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, 700 { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) }, 701 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } 702 }; 703 704 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct) 705 706 /* Controls and options that are supported for use with the POSIX interface. */ 707 708 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \ 709 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \ 710 PCRE2_UTF|PCRE2_UNGREEDY) 711 712 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0) 713 714 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \ 715 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \ 716 CTL_POSIX_NOSUB|CTL_USE_LENGTH) 717 718 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0) 719 720 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \ 721 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL) 722 723 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) 724 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0) 725 726 /* Control bits that are not ignored with 'push'. */ 727 728 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \ 729 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \ 730 CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \ 731 CTL_PUSHTABLESCOPY|CTL_USE_LENGTH) 732 733 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET) 734 735 /* Controls that apply only at compile time with 'push'. */ 736 737 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY 738 #define PUSH_COMPILE_ONLY_CONTROLS2 (0) 739 740 /* Controls that are forbidden with #pop or #popcopy. */ 741 742 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \ 743 CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH) 744 745 /* Pattern controls that are mutually exclusive. At present these are all in 746 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by 747 CTL_POSIX, so it doesn't need its own entries. */ 748 749 static uint32_t exclusive_pat_controls[] = { 750 CTL_POSIX | CTL_PUSH, 751 CTL_POSIX | CTL_PUSHCOPY, 752 CTL_POSIX | CTL_PUSHTABLESCOPY, 753 CTL_PUSH | CTL_PUSHCOPY, 754 CTL_PUSH | CTL_PUSHTABLESCOPY, 755 CTL_PUSHCOPY | CTL_PUSHTABLESCOPY, 756 CTL_EXPAND | CTL_HEXPAT }; 757 758 /* Data controls that are mutually exclusive. At present these are all in the 759 first control word. */ 760 761 static uint32_t exclusive_dat_controls[] = { 762 CTL_ALLUSEDTEXT | CTL_STARTCHAR, 763 CTL_FINDLIMITS | CTL_NULLCONTEXT }; 764 765 /* Table of single-character abbreviated modifiers. The index field is 766 initialized to -1, but the first time the modifier is encountered, it is filled 767 in with the index of the full entry in modlist, to save repeated searching when 768 processing multiple test items. This short list is searched serially, so its 769 order does not matter. */ 770 771 typedef struct c1modstruct { 772 const char *fullname; 773 uint32_t onechar; 774 int index; 775 } c1modstruct; 776 777 static c1modstruct c1modlist[] = { 778 { "bincode", 'B', -1 }, 779 { "info", 'I', -1 }, 780 { "global", 'g', -1 }, 781 { "caseless", 'i', -1 }, 782 { "multiline", 'm', -1 }, 783 { "no_auto_capture", 'n', -1 }, 784 { "dotall", 's', -1 }, 785 { "extended", 'x', -1 } 786 }; 787 788 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) 789 790 /* Table of arguments for the -C command line option. Use macros to make the 791 table itself easier to read. */ 792 793 #if defined SUPPORT_PCRE2_8 794 #define SUPPORT_8 1 795 #endif 796 #if defined SUPPORT_PCRE2_16 797 #define SUPPORT_16 1 798 #endif 799 #if defined SUPPORT_PCRE2_32 800 #define SUPPORT_32 1 801 #endif 802 803 #ifndef SUPPORT_8 804 #define SUPPORT_8 0 805 #endif 806 #ifndef SUPPORT_16 807 #define SUPPORT_16 0 808 #endif 809 #ifndef SUPPORT_32 810 #define SUPPORT_32 0 811 #endif 812 813 #ifdef EBCDIC 814 #define SUPPORT_EBCDIC 1 815 #define EBCDIC_NL CHAR_LF 816 #else 817 #define SUPPORT_EBCDIC 0 818 #define EBCDIC_NL 0 819 #endif 820 821 #ifdef NEVER_BACKSLASH_C 822 #define BACKSLASH_C 0 823 #else 824 #define BACKSLASH_C 1 825 #endif 826 827 typedef struct coptstruct { 828 const char *name; 829 uint32_t type; 830 uint32_t value; 831 } coptstruct; 832 833 enum { CONF_BSR, 834 CONF_FIX, 835 CONF_FIZ, 836 CONF_INT, 837 CONF_NL 838 }; 839 840 static coptstruct coptlist[] = { 841 { "backslash-C", CONF_FIX, BACKSLASH_C }, 842 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR }, 843 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, 844 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, 845 { "jit", CONF_INT, PCRE2_CONFIG_JIT }, 846 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE }, 847 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, 848 { "pcre2-16", CONF_FIX, SUPPORT_16 }, 849 { "pcre2-32", CONF_FIX, SUPPORT_32 }, 850 { "pcre2-8", CONF_FIX, SUPPORT_8 }, 851 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE } 852 }; 853 854 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct) 855 856 #undef SUPPORT_8 857 #undef SUPPORT_16 858 #undef SUPPORT_32 859 #undef SUPPORT_EBCDIC 860 861 862 /* ----------------------- Static variables ------------------------ */ 863 864 static FILE *infile; 865 static FILE *outfile; 866 867 static const void *last_callout_mark; 868 static PCRE2_JIT_STACK *jit_stack = NULL; 869 static size_t jit_stack_size = 0; 870 871 static BOOL first_callout; 872 static BOOL jit_was_used; 873 static BOOL restrict_for_perl_test = FALSE; 874 static BOOL show_memory = FALSE; 875 876 static int code_unit_size; /* Bytes */ 877 static int jitrc; /* Return from JIT compile */ 878 static int test_mode = DEFAULT_TEST_MODE; 879 static int timeit = 0; 880 static int timeitm = 0; 881 882 clock_t total_compile_time = 0; 883 clock_t total_jit_compile_time = 0; 884 clock_t total_match_time = 0; 885 886 static uint32_t dfa_matched; 887 static uint32_t forbid_utf = 0; 888 static uint32_t maxlookbehind; 889 static uint32_t max_oveccount; 890 static uint32_t callout_count; 891 892 static uint16_t local_newline_default = 0; 893 894 static VERSION_TYPE jittarget[VERSION_SIZE]; 895 static VERSION_TYPE version[VERSION_SIZE]; 896 static VERSION_TYPE uversion[VERSION_SIZE]; 897 898 static patctl def_patctl; 899 static patctl pat_patctl; 900 static datctl def_datctl; 901 static datctl dat_datctl; 902 903 static void *patstack[PATSTACKSIZE]; 904 static int patstacknext = 0; 905 906 static void *malloclist[MALLOCLISTSIZE]; 907 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE]; 908 static uint32_t malloclistptr = 0; 909 910 #ifdef SUPPORT_PCRE2_8 911 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 }; 912 #endif 913 914 static int *dfa_workspace = NULL; 915 static const uint8_t *locale_tables = NULL; 916 static const uint8_t *use_tables = NULL; 917 static uint8_t locale_name[32]; 918 919 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need 920 rebuilding, but set up the same naming scheme for use in macros. The "buffer" 921 buffer is where all input lines are read. Its size is the same as pbuffer8. 922 Pattern lines are always copied to pbuffer8 for use in callouts, even if they 923 are actually compiled from pbuffer16 or pbuffer32. */ 924 925 static size_t pbuffer8_size = 50000; /* Initial size, bytes */ 926 static uint8_t *pbuffer8 = NULL; 927 static uint8_t *buffer = NULL; 928 929 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it 930 is cast as needed. For long data lines it grows as necessary. */ 931 932 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */ 933 static uint8_t *dbuffer = NULL; 934 935 936 /* ---------------- Mode-dependent variables -------------------*/ 937 938 #ifdef SUPPORT_PCRE2_8 939 static pcre2_code_8 *compiled_code8; 940 static pcre2_general_context_8 *general_context8, *general_context_copy8; 941 static pcre2_compile_context_8 *pat_context8, *default_pat_context8; 942 static pcre2_convert_context_8 *con_context8, *default_con_context8; 943 static pcre2_match_context_8 *dat_context8, *default_dat_context8; 944 static pcre2_match_data_8 *match_data8; 945 #endif 946 947 #ifdef SUPPORT_PCRE2_16 948 static pcre2_code_16 *compiled_code16; 949 static pcre2_general_context_16 *general_context16, *general_context_copy16; 950 static pcre2_compile_context_16 *pat_context16, *default_pat_context16; 951 static pcre2_convert_context_16 *con_context16, *default_con_context16; 952 static pcre2_match_context_16 *dat_context16, *default_dat_context16; 953 static pcre2_match_data_16 *match_data16; 954 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */ 955 static uint16_t *pbuffer16 = NULL; 956 #endif 957 958 #ifdef SUPPORT_PCRE2_32 959 static pcre2_code_32 *compiled_code32; 960 static pcre2_general_context_32 *general_context32, *general_context_copy32; 961 static pcre2_compile_context_32 *pat_context32, *default_pat_context32; 962 static pcre2_convert_context_32 *con_context32, *default_con_context32; 963 static pcre2_match_context_32 *dat_context32, *default_dat_context32; 964 static pcre2_match_data_32 *match_data32; 965 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */ 966 static uint32_t *pbuffer32 = NULL; 967 #endif 968 969 970 /* ---------------- Macros that work in all modes ----------------- */ 971 972 #define CAST8VAR(x) CASTVAR(uint8_t *, x) 973 #define SET(x,y) SETOP(x,y,=) 974 #define SETPLUS(x,y) SETOP(x,y,+=) 975 #define strlen8(x) strlen((char *)x) 976 977 978 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/ 979 980 /* Define macros for variables and functions that must be selected dynamically 981 depending on the mode setting (8, 16, 32). These are dependent on which modes 982 are supported. */ 983 984 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \ 985 defined (SUPPORT_PCRE2_32)) >= 2 986 987 /* ----- All three modes supported ----- */ 988 989 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32) 990 991 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \ 992 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b)) 993 994 #define CASTVAR(t,x) ( \ 995 (test_mode == PCRE8_MODE)? (t)G(x,8) : \ 996 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32)) 997 998 #define CODE_UNIT(a,b) ( \ 999 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \ 1000 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \ 1001 (uint32_t)(((PCRE2_SPTR32)(a))[b])) 1002 1003 #define CONCTXCPY(a,b) \ 1004 if (test_mode == PCRE8_MODE) \ 1005 memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \ 1006 else if (test_mode == PCRE16_MODE) \ 1007 memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \ 1008 else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32)) 1009 1010 #define CONVERT_COPY(a,b,c) \ 1011 if (test_mode == PCRE8_MODE) \ 1012 memcpy(G(a,8),(char *)b,c); \ 1013 else if (test_mode == PCRE16_MODE) \ 1014 memcpy(G(a,16),(char *)b,(c)*2); \ 1015 else if (test_mode == PCRE32_MODE) \ 1016 memcpy(G(a,32),(char *)b,(c)*4) 1017 1018 #define DATCTXCPY(a,b) \ 1019 if (test_mode == PCRE8_MODE) \ 1020 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \ 1021 else if (test_mode == PCRE16_MODE) \ 1022 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \ 1023 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) 1024 1025 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \ 1026 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b) 1027 1028 #define PATCTXCPY(a,b) \ 1029 if (test_mode == PCRE8_MODE) \ 1030 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \ 1031 else if (test_mode == PCRE16_MODE) \ 1032 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \ 1033 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) 1034 1035 #define PCHARS(lv, p, offset, len, utf, f) \ 1036 if (test_mode == PCRE32_MODE) \ 1037 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ 1038 else if (test_mode == PCRE16_MODE) \ 1039 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ 1040 else \ 1041 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 1042 1043 #define PCHARSV(p, offset, len, utf, f) \ 1044 if (test_mode == PCRE32_MODE) \ 1045 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ 1046 else if (test_mode == PCRE16_MODE) \ 1047 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ 1048 else \ 1049 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 1050 1051 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1052 if (test_mode == PCRE8_MODE) \ 1053 a = pcre2_callout_enumerate_8(compiled_code8, \ 1054 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \ 1055 else if (test_mode == PCRE16_MODE) \ 1056 a = pcre2_callout_enumerate_16(compiled_code16, \ 1057 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \ 1058 else \ 1059 a = pcre2_callout_enumerate_32(compiled_code32, \ 1060 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) 1061 1062 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \ 1063 if (test_mode == PCRE8_MODE) \ 1064 G(a,8) = pcre2_code_copy_8(b); \ 1065 else if (test_mode == PCRE16_MODE) \ 1066 G(a,16) = pcre2_code_copy_16(b); \ 1067 else \ 1068 G(a,32) = pcre2_code_copy_32(b) 1069 1070 #define PCRE2_CODE_COPY_TO_VOID(a,b) \ 1071 if (test_mode == PCRE8_MODE) \ 1072 a = (void *)pcre2_code_copy_8(G(b,8)); \ 1073 else if (test_mode == PCRE16_MODE) \ 1074 a = (void *)pcre2_code_copy_16(G(b,16)); \ 1075 else \ 1076 a = (void *)pcre2_code_copy_32(G(b,32)) 1077 1078 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \ 1079 if (test_mode == PCRE8_MODE) \ 1080 a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \ 1081 else if (test_mode == PCRE16_MODE) \ 1082 a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \ 1083 else \ 1084 a = (void *)pcre2_code_copy_with_tables_32(G(b,32)) 1085 1086 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1087 if (test_mode == PCRE8_MODE) \ 1088 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \ 1089 else if (test_mode == PCRE16_MODE) \ 1090 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \ 1091 else \ 1092 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g) 1093 1094 #define PCRE2_CONVERTED_PATTERN_FREE(a) \ 1095 if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \ 1096 else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \ 1097 else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a) 1098 1099 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1100 if (test_mode == PCRE8_MODE) \ 1101 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \ 1102 else if (test_mode == PCRE16_MODE) \ 1103 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \ 1104 else \ 1105 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) 1106 1107 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1108 if (test_mode == PCRE8_MODE) \ 1109 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \ 1110 else if (test_mode == PCRE16_MODE) \ 1111 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \ 1112 else \ 1113 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4)) 1114 1115 #define PCRE2_GET_OVECTOR_COUNT(a,b) \ 1116 if (test_mode == PCRE8_MODE) \ 1117 a = pcre2_get_ovector_count_8(G(b,8)); \ 1118 else if (test_mode == PCRE16_MODE) \ 1119 a = pcre2_get_ovector_count_16(G(b,16)); \ 1120 else \ 1121 a = pcre2_get_ovector_count_32(G(b,32)) 1122 1123 #define PCRE2_GET_STARTCHAR(a,b) \ 1124 if (test_mode == PCRE8_MODE) \ 1125 a = pcre2_get_startchar_8(G(b,8)); \ 1126 else if (test_mode == PCRE16_MODE) \ 1127 a = pcre2_get_startchar_16(G(b,16)); \ 1128 else \ 1129 a = pcre2_get_startchar_32(G(b,32)) 1130 1131 #define PCRE2_JIT_COMPILE(r,a,b) \ 1132 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \ 1133 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \ 1134 else r = pcre2_jit_compile_32(G(a,32),b) 1135 1136 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ 1137 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \ 1138 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \ 1139 else pcre2_jit_free_unused_memory_32(G(a,32)) 1140 1141 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1142 if (test_mode == PCRE8_MODE) \ 1143 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ 1144 else if (test_mode == PCRE16_MODE) \ 1145 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ 1146 else \ 1147 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 1148 1149 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1150 if (test_mode == PCRE8_MODE) \ 1151 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \ 1152 else if (test_mode == PCRE16_MODE) \ 1153 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \ 1154 else \ 1155 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); 1156 1157 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1158 if (test_mode == PCRE8_MODE) \ 1159 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \ 1160 else if (test_mode == PCRE16_MODE) \ 1161 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \ 1162 else \ 1163 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c); 1164 1165 #define PCRE2_JIT_STACK_FREE(a) \ 1166 if (test_mode == PCRE8_MODE) \ 1167 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \ 1168 else if (test_mode == PCRE16_MODE) \ 1169 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \ 1170 else \ 1171 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); 1172 1173 #define PCRE2_MAKETABLES(a) \ 1174 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \ 1175 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \ 1176 else a = pcre2_maketables_32(NULL) 1177 1178 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1179 if (test_mode == PCRE8_MODE) \ 1180 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ 1181 else if (test_mode == PCRE16_MODE) \ 1182 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ 1183 else \ 1184 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 1185 1186 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \ 1187 if (test_mode == PCRE8_MODE) \ 1188 G(a,8) = pcre2_match_data_create_8(b,c); \ 1189 else if (test_mode == PCRE16_MODE) \ 1190 G(a,16) = pcre2_match_data_create_16(b,c); \ 1191 else \ 1192 G(a,32) = pcre2_match_data_create_32(b,c) 1193 1194 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1195 if (test_mode == PCRE8_MODE) \ 1196 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \ 1197 else if (test_mode == PCRE16_MODE) \ 1198 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \ 1199 else \ 1200 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) 1201 1202 #define PCRE2_MATCH_DATA_FREE(a) \ 1203 if (test_mode == PCRE8_MODE) \ 1204 pcre2_match_data_free_8(G(a,8)); \ 1205 else if (test_mode == PCRE16_MODE) \ 1206 pcre2_match_data_free_16(G(a,16)); \ 1207 else \ 1208 pcre2_match_data_free_32(G(a,32)) 1209 1210 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \ 1211 if (test_mode == PCRE8_MODE) \ 1212 a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \ 1213 else if (test_mode == PCRE16_MODE) \ 1214 a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \ 1215 else \ 1216 a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32)) 1217 1218 #define PCRE2_PATTERN_INFO(a,b,c,d) \ 1219 if (test_mode == PCRE8_MODE) \ 1220 a = pcre2_pattern_info_8(G(b,8),c,d); \ 1221 else if (test_mode == PCRE16_MODE) \ 1222 a = pcre2_pattern_info_16(G(b,16),c,d); \ 1223 else \ 1224 a = pcre2_pattern_info_32(G(b,32),c,d) 1225 1226 #define PCRE2_PRINTINT(a) \ 1227 if (test_mode == PCRE8_MODE) \ 1228 pcre2_printint_8(compiled_code8,outfile,a); \ 1229 else if (test_mode == PCRE16_MODE) \ 1230 pcre2_printint_16(compiled_code16,outfile,a); \ 1231 else \ 1232 pcre2_printint_32(compiled_code32,outfile,a) 1233 1234 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1235 if (test_mode == PCRE8_MODE) \ 1236 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \ 1237 else if (test_mode == PCRE16_MODE) \ 1238 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \ 1239 else \ 1240 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) 1241 1242 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1243 if (test_mode == PCRE8_MODE) \ 1244 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \ 1245 else if (test_mode == PCRE16_MODE) \ 1246 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \ 1247 else \ 1248 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32)) 1249 1250 #define PCRE2_SERIALIZE_FREE(a) \ 1251 if (test_mode == PCRE8_MODE) \ 1252 pcre2_serialize_free_8(a); \ 1253 else if (test_mode == PCRE16_MODE) \ 1254 pcre2_serialize_free_16(a); \ 1255 else \ 1256 pcre2_serialize_free_32(a) 1257 1258 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1259 if (test_mode == PCRE8_MODE) \ 1260 r = pcre2_serialize_get_number_of_codes_8(a); \ 1261 else if (test_mode == PCRE16_MODE) \ 1262 r = pcre2_serialize_get_number_of_codes_16(a); \ 1263 else \ 1264 r = pcre2_serialize_get_number_of_codes_32(a); \ 1265 1266 #define PCRE2_SET_CALLOUT(a,b,c) \ 1267 if (test_mode == PCRE8_MODE) \ 1268 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \ 1269 else if (test_mode == PCRE16_MODE) \ 1270 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \ 1271 else \ 1272 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c); 1273 1274 #define PCRE2_SET_CHARACTER_TABLES(a,b) \ 1275 if (test_mode == PCRE8_MODE) \ 1276 pcre2_set_character_tables_8(G(a,8),b); \ 1277 else if (test_mode == PCRE16_MODE) \ 1278 pcre2_set_character_tables_16(G(a,16),b); \ 1279 else \ 1280 pcre2_set_character_tables_32(G(a,32),b) 1281 1282 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1283 if (test_mode == PCRE8_MODE) \ 1284 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \ 1285 else if (test_mode == PCRE16_MODE) \ 1286 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \ 1287 else \ 1288 pcre2_set_compile_recursion_guard_32(G(a,32),b,c) 1289 1290 #define PCRE2_SET_DEPTH_LIMIT(a,b) \ 1291 if (test_mode == PCRE8_MODE) \ 1292 pcre2_set_depth_limit_8(G(a,8),b); \ 1293 else if (test_mode == PCRE16_MODE) \ 1294 pcre2_set_depth_limit_16(G(a,16),b); \ 1295 else \ 1296 pcre2_set_depth_limit_32(G(a,32),b) 1297 1298 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \ 1299 if (test_mode == PCRE8_MODE) \ 1300 r = pcre2_set_glob_separator_8(G(a,8),b); \ 1301 else if (test_mode == PCRE16_MODE) \ 1302 r = pcre2_set_glob_separator_16(G(a,16),b); \ 1303 else \ 1304 r = pcre2_set_glob_separator_32(G(a,32),b) 1305 1306 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \ 1307 if (test_mode == PCRE8_MODE) \ 1308 r = pcre2_set_glob_escape_8(G(a,8),b); \ 1309 else if (test_mode == PCRE16_MODE) \ 1310 r = pcre2_set_glob_escape_16(G(a,16),b); \ 1311 else \ 1312 r = pcre2_set_glob_escape_32(G(a,32),b) 1313 1314 #define PCRE2_SET_HEAP_LIMIT(a,b) \ 1315 if (test_mode == PCRE8_MODE) \ 1316 pcre2_set_heap_limit_8(G(a,8),b); \ 1317 else if (test_mode == PCRE16_MODE) \ 1318 pcre2_set_heap_limit_16(G(a,16),b); \ 1319 else \ 1320 pcre2_set_heap_limit_32(G(a,32),b) 1321 1322 #define PCRE2_SET_MATCH_LIMIT(a,b) \ 1323 if (test_mode == PCRE8_MODE) \ 1324 pcre2_set_match_limit_8(G(a,8),b); \ 1325 else if (test_mode == PCRE16_MODE) \ 1326 pcre2_set_match_limit_16(G(a,16),b); \ 1327 else \ 1328 pcre2_set_match_limit_32(G(a,32),b) 1329 1330 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ 1331 if (test_mode == PCRE8_MODE) \ 1332 pcre2_set_max_pattern_length_8(G(a,8),b); \ 1333 else if (test_mode == PCRE16_MODE) \ 1334 pcre2_set_max_pattern_length_16(G(a,16),b); \ 1335 else \ 1336 pcre2_set_max_pattern_length_32(G(a,32),b) 1337 1338 #define PCRE2_SET_OFFSET_LIMIT(a,b) \ 1339 if (test_mode == PCRE8_MODE) \ 1340 pcre2_set_offset_limit_8(G(a,8),b); \ 1341 else if (test_mode == PCRE16_MODE) \ 1342 pcre2_set_offset_limit_16(G(a,16),b); \ 1343 else \ 1344 pcre2_set_offset_limit_32(G(a,32),b) 1345 1346 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ 1347 if (test_mode == PCRE8_MODE) \ 1348 pcre2_set_parens_nest_limit_8(G(a,8),b); \ 1349 else if (test_mode == PCRE16_MODE) \ 1350 pcre2_set_parens_nest_limit_16(G(a,16),b); \ 1351 else \ 1352 pcre2_set_parens_nest_limit_32(G(a,32),b) 1353 1354 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 1355 if (test_mode == PCRE8_MODE) \ 1356 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ 1357 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \ 1358 else if (test_mode == PCRE16_MODE) \ 1359 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ 1360 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \ 1361 else \ 1362 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ 1363 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) 1364 1365 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 1366 if (test_mode == PCRE8_MODE) \ 1367 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \ 1368 else if (test_mode == PCRE16_MODE) \ 1369 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \ 1370 else \ 1371 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) 1372 1373 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 1374 if (test_mode == PCRE8_MODE) \ 1375 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \ 1376 else if (test_mode == PCRE16_MODE) \ 1377 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \ 1378 else \ 1379 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e) 1380 1381 #define PCRE2_SUBSTRING_FREE(a) \ 1382 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \ 1383 else if (test_mode == PCRE16_MODE) \ 1384 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \ 1385 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a) 1386 1387 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 1388 if (test_mode == PCRE8_MODE) \ 1389 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \ 1390 else if (test_mode == PCRE16_MODE) \ 1391 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \ 1392 else \ 1393 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) 1394 1395 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 1396 if (test_mode == PCRE8_MODE) \ 1397 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \ 1398 else if (test_mode == PCRE16_MODE) \ 1399 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \ 1400 else \ 1401 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) 1402 1403 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 1404 if (test_mode == PCRE8_MODE) \ 1405 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \ 1406 else if (test_mode == PCRE16_MODE) \ 1407 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \ 1408 else \ 1409 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) 1410 1411 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 1412 if (test_mode == PCRE8_MODE) \ 1413 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \ 1414 else if (test_mode == PCRE16_MODE) \ 1415 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \ 1416 else \ 1417 a = pcre2_substring_length_bynumber_32(G(b,32),c,d) 1418 1419 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 1420 if (test_mode == PCRE8_MODE) \ 1421 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \ 1422 else if (test_mode == PCRE16_MODE) \ 1423 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \ 1424 else \ 1425 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) 1426 1427 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 1428 if (test_mode == PCRE8_MODE) \ 1429 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \ 1430 else if (test_mode == PCRE16_MODE) \ 1431 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \ 1432 else \ 1433 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a) 1434 1435 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 1436 if (test_mode == PCRE8_MODE) \ 1437 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \ 1438 else if (test_mode == PCRE16_MODE) \ 1439 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \ 1440 else \ 1441 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)) 1442 1443 #define PTR(x) ( \ 1444 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \ 1445 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \ 1446 (void *)G(x,32)) 1447 1448 #define SETFLD(x,y,z) \ 1449 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \ 1450 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \ 1451 else G(x,32)->y = z 1452 1453 #define SETFLDVEC(x,y,v,z) \ 1454 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \ 1455 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \ 1456 else G(x,32)->y[v] = z 1457 1458 #define SETOP(x,y,z) \ 1459 if (test_mode == PCRE8_MODE) G(x,8) z y; \ 1460 else if (test_mode == PCRE16_MODE) G(x,16) z y; \ 1461 else G(x,32) z y 1462 1463 #define SETCASTPTR(x,y) \ 1464 if (test_mode == PCRE8_MODE) \ 1465 G(x,8) = (uint8_t *)(y); \ 1466 else if (test_mode == PCRE16_MODE) \ 1467 G(x,16) = (uint16_t *)(y); \ 1468 else \ 1469 G(x,32) = (uint32_t *)(y) 1470 1471 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \ 1472 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \ 1473 ((int)strlen32((PCRE2_SPTR32)p))) 1474 1475 #define SUB1(a,b) \ 1476 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \ 1477 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \ 1478 else G(a,32)(G(b,32)) 1479 1480 #define SUB2(a,b,c) \ 1481 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \ 1482 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \ 1483 else G(a,32)(G(b,32),G(c,32)) 1484 1485 #define TEST(x,r,y) ( \ 1486 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \ 1487 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \ 1488 (test_mode == PCRE32_MODE && G(x,32) r (y))) 1489 1490 #define TESTFLD(x,f,r,y) ( \ 1491 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \ 1492 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \ 1493 (test_mode == PCRE32_MODE && G(x,32)->f r (y))) 1494 1495 1496 /* ----- Two out of three modes are supported ----- */ 1497 1498 #else 1499 1500 /* We can use some macro trickery to make a single set of definitions work in 1501 the three different cases. */ 1502 1503 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */ 1504 1505 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16) 1506 #define BITONE 32 1507 #define BITTWO 16 1508 1509 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */ 1510 1511 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8) 1512 #define BITONE 32 1513 #define BITTWO 8 1514 1515 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */ 1516 1517 #else 1518 #define BITONE 16 1519 #define BITTWO 8 1520 #endif 1521 1522 1523 /* ----- Common macros for two-mode cases ----- */ 1524 1525 #define BYTEONE (BITONE/8) 1526 #define BYTETWO (BITTWO/8) 1527 1528 #define CASTFLD(t,a,b) \ 1529 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \ 1530 (t)(G(a,BITTWO)->b)) 1531 1532 #define CASTVAR(t,x) ( \ 1533 (test_mode == G(G(PCRE,BITONE),_MODE))? \ 1534 (t)G(x,BITONE) : (t)G(x,BITTWO)) 1535 1536 #define CODE_UNIT(a,b) ( \ 1537 (test_mode == G(G(PCRE,BITONE),_MODE))? \ 1538 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \ 1539 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b])) 1540 1541 #define CONCTXCPY(a,b) \ 1542 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1543 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \ 1544 else \ 1545 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO))) 1546 1547 #define CONVERT_COPY(a,b,c) \ 1548 (test_mode == G(G(PCRE,BITONE),_MODE))? \ 1549 memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \ 1550 memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO) 1551 1552 #define DATCTXCPY(a,b) \ 1553 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1554 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \ 1555 else \ 1556 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO))) 1557 1558 #define FLD(a,b) \ 1559 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b) 1560 1561 #define PATCTXCPY(a,b) \ 1562 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1563 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \ 1564 else \ 1565 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO))) 1566 1567 #define PCHARS(lv, p, offset, len, utf, f) \ 1568 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1569 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ 1570 else \ 1571 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) 1572 1573 #define PCHARSV(p, offset, len, utf, f) \ 1574 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1575 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ 1576 else \ 1577 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) 1578 1579 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1580 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1581 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \ 1582 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \ 1583 else \ 1584 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \ 1585 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c) 1586 1587 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \ 1588 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1589 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \ 1590 else \ 1591 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b) 1592 1593 #define PCRE2_CODE_COPY_TO_VOID(a,b) \ 1594 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1595 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \ 1596 else \ 1597 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO)) 1598 1599 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \ 1600 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1601 a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \ 1602 else \ 1603 a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO)) 1604 1605 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1606 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1607 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \ 1608 else \ 1609 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g) 1610 1611 #define PCRE2_CONVERTED_PATTERN_FREE(a) \ 1612 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1613 G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \ 1614 else \ 1615 G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a) 1616 1617 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1618 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1619 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1620 G(g,BITONE),h,i,j); \ 1621 else \ 1622 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1623 G(g,BITTWO),h,i,j) 1624 1625 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1626 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1627 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \ 1628 else \ 1629 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO)) 1630 1631 #define PCRE2_GET_OVECTOR_COUNT(a,b) \ 1632 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1633 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \ 1634 else \ 1635 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO)) 1636 1637 #define PCRE2_GET_STARTCHAR(a,b) \ 1638 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1639 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \ 1640 else \ 1641 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO)) 1642 1643 #define PCRE2_JIT_COMPILE(r,a,b) \ 1644 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1645 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \ 1646 else \ 1647 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b) 1648 1649 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ 1650 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1651 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \ 1652 else \ 1653 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO)) 1654 1655 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1656 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1657 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1658 G(g,BITONE),h); \ 1659 else \ 1660 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1661 G(g,BITTWO),h) 1662 1663 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1664 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1665 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \ 1666 else \ 1667 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \ 1668 1669 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1670 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1671 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \ 1672 else \ 1673 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c); 1674 1675 #define PCRE2_JIT_STACK_FREE(a) \ 1676 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1677 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \ 1678 else \ 1679 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a); 1680 1681 #define PCRE2_MAKETABLES(a) \ 1682 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1683 a = G(pcre2_maketables_,BITONE)(NULL); \ 1684 else \ 1685 a = G(pcre2_maketables_,BITTWO)(NULL) 1686 1687 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1688 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1689 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1690 G(g,BITONE),h); \ 1691 else \ 1692 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1693 G(g,BITTWO),h) 1694 1695 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \ 1696 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1697 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \ 1698 else \ 1699 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c) 1700 1701 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1702 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1703 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \ 1704 else \ 1705 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c) 1706 1707 #define PCRE2_MATCH_DATA_FREE(a) \ 1708 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1709 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \ 1710 else \ 1711 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO)) 1712 1713 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \ 1714 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1715 a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \ 1716 else \ 1717 a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO)) 1718 1719 #define PCRE2_PATTERN_INFO(a,b,c,d) \ 1720 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1721 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \ 1722 else \ 1723 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d) 1724 1725 #define PCRE2_PRINTINT(a) \ 1726 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1727 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \ 1728 else \ 1729 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a) 1730 1731 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1732 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1733 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \ 1734 else \ 1735 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO)) 1736 1737 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1738 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1739 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \ 1740 else \ 1741 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO)) 1742 1743 #define PCRE2_SERIALIZE_FREE(a) \ 1744 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1745 G(pcre2_serialize_free_,BITONE)(a); \ 1746 else \ 1747 G(pcre2_serialize_free_,BITTWO)(a) 1748 1749 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1750 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1751 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \ 1752 else \ 1753 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a) 1754 1755 #define PCRE2_SET_CALLOUT(a,b,c) \ 1756 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1757 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \ 1758 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \ 1759 else \ 1760 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \ 1761 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c); 1762 1763 #define PCRE2_SET_CHARACTER_TABLES(a,b) \ 1764 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1765 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \ 1766 else \ 1767 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b) 1768 1769 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1770 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1771 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \ 1772 else \ 1773 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c) 1774 1775 #define PCRE2_SET_DEPTH_LIMIT(a,b) \ 1776 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1777 G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \ 1778 else \ 1779 G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b) 1780 1781 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \ 1782 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1783 r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \ 1784 else \ 1785 r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b) 1786 1787 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \ 1788 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1789 r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \ 1790 else \ 1791 r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b) 1792 1793 #define PCRE2_SET_HEAP_LIMIT(a,b) \ 1794 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1795 G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \ 1796 else \ 1797 G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b) 1798 1799 #define PCRE2_SET_MATCH_LIMIT(a,b) \ 1800 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1801 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \ 1802 else \ 1803 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) 1804 1805 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ 1806 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1807 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \ 1808 else \ 1809 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b) 1810 1811 #define PCRE2_SET_OFFSET_LIMIT(a,b) \ 1812 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1813 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \ 1814 else \ 1815 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b) 1816 1817 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ 1818 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1819 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ 1820 else \ 1821 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b) 1822 1823 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 1824 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1825 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1826 G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \ 1827 (G(PCRE2_UCHAR,BITONE) *)k,l); \ 1828 else \ 1829 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1830 G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \ 1831 (G(PCRE2_UCHAR,BITTWO) *)k,l) 1832 1833 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 1834 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1835 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ 1836 (G(PCRE2_UCHAR,BITONE) *)d,e); \ 1837 else \ 1838 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ 1839 (G(PCRE2_UCHAR,BITTWO) *)d,e) 1840 1841 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 1842 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1843 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\ 1844 (G(PCRE2_UCHAR,BITONE) *)d,e); \ 1845 else \ 1846 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\ 1847 (G(PCRE2_UCHAR,BITTWO) *)d,e) 1848 1849 #define PCRE2_SUBSTRING_FREE(a) \ 1850 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1851 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \ 1852 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a) 1853 1854 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 1855 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1856 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ 1857 (G(PCRE2_UCHAR,BITONE) **)d,e); \ 1858 else \ 1859 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ 1860 (G(PCRE2_UCHAR,BITTWO) **)d,e) 1861 1862 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 1863 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1864 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\ 1865 (G(PCRE2_UCHAR,BITONE) **)d,e); \ 1866 else \ 1867 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\ 1868 (G(PCRE2_UCHAR,BITTWO) **)d,e) 1869 1870 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 1871 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1872 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \ 1873 else \ 1874 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d) 1875 1876 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 1877 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1878 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \ 1879 else \ 1880 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d) 1881 1882 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 1883 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1884 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \ 1885 (G(PCRE2_UCHAR,BITONE) ***)c,d); \ 1886 else \ 1887 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \ 1888 (G(PCRE2_UCHAR,BITTWO) ***)c,d) 1889 1890 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 1891 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1892 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \ 1893 else \ 1894 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a) 1895 1896 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 1897 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1898 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \ 1899 else \ 1900 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO)) 1901 1902 #define PTR(x) ( \ 1903 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \ 1904 (void *)G(x,BITTWO)) 1905 1906 #define SETFLD(x,y,z) \ 1907 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \ 1908 else G(x,BITTWO)->y = z 1909 1910 #define SETFLDVEC(x,y,v,z) \ 1911 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \ 1912 else G(x,BITTWO)->y[v] = z 1913 1914 #define SETOP(x,y,z) \ 1915 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \ 1916 else G(x,BITTWO) z y 1917 1918 #define SETCASTPTR(x,y) \ 1919 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1920 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \ 1921 else \ 1922 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y) 1923 1924 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \ 1925 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \ 1926 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p)) 1927 1928 #define SUB1(a,b) \ 1929 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1930 G(a,BITONE)(G(b,BITONE)); \ 1931 else \ 1932 G(a,BITTWO)(G(b,BITTWO)) 1933 1934 #define SUB2(a,b,c) \ 1935 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1936 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \ 1937 else \ 1938 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO)) 1939 1940 #define TEST(x,r,y) ( \ 1941 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \ 1942 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y))) 1943 1944 #define TESTFLD(x,f,r,y) ( \ 1945 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \ 1946 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y))) 1947 1948 1949 #endif /* Two out of three modes */ 1950 1951 /* ----- End of cases where more than one mode is supported ----- */ 1952 1953 1954 /* ----- Only 8-bit mode is supported ----- */ 1955 1956 #elif defined SUPPORT_PCRE2_8 1957 #define CASTFLD(t,a,b) (t)(G(a,8)->b) 1958 #define CASTVAR(t,x) (t)G(x,8) 1959 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b]) 1960 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)) 1961 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c) 1962 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)) 1963 #define FLD(a,b) G(a,8)->b 1964 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)) 1965 #define PCHARS(lv, p, offset, len, utf, f) \ 1966 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 1967 #define PCHARSV(p, offset, len, utf, f) \ 1968 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 1969 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1970 a = pcre2_callout_enumerate_8(compiled_code8, \ 1971 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c) 1972 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b) 1973 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8)) 1974 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8)) 1975 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1976 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g) 1977 #define PCRE2_CONVERTED_PATTERN_FREE(a) \ 1978 pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a) 1979 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1980 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j) 1981 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1982 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)) 1983 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8)) 1984 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8)) 1985 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b) 1986 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8)) 1987 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1988 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) 1989 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1990 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); 1991 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1992 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); 1993 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); 1994 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL) 1995 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1996 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) 1997 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c) 1998 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1999 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c) 2000 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8)) 2001 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)) 2002 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d) 2003 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a) 2004 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 2005 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)) 2006 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 2007 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)) 2008 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a) 2009 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 2010 r = pcre2_serialize_get_number_of_codes_8(a) 2011 #define PCRE2_SET_CALLOUT(a,b,c) \ 2012 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c) 2013 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) 2014 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 2015 pcre2_set_compile_recursion_guard_8(G(a,8),b,c) 2016 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b) 2017 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b) 2018 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b) 2019 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b) 2020 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) 2021 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b) 2022 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) 2023 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) 2024 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 2025 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ 2026 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l) 2027 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 2028 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e) 2029 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 2030 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e) 2031 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a) 2032 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 2033 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e) 2034 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 2035 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e) 2036 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 2037 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d) 2038 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 2039 a = pcre2_substring_length_bynumber_8(G(b,8),c,d) 2040 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 2041 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d) 2042 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 2043 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a) 2044 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 2045 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); 2046 #define PTR(x) (void *)G(x,8) 2047 #define SETFLD(x,y,z) G(x,8)->y = z 2048 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z 2049 #define SETOP(x,y,z) G(x,8) z y 2050 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y) 2051 #define STRLEN(p) (int)strlen((char *)p) 2052 #define SUB1(a,b) G(a,8)(G(b,8)) 2053 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8)) 2054 #define TEST(x,r,y) (G(x,8) r (y)) 2055 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y)) 2056 2057 2058 /* ----- Only 16-bit mode is supported ----- */ 2059 2060 #elif defined SUPPORT_PCRE2_16 2061 #define CASTFLD(t,a,b) (t)(G(a,16)->b) 2062 #define CASTVAR(t,x) (t)G(x,16) 2063 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b]) 2064 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)) 2065 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2) 2066 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)) 2067 #define FLD(a,b) G(a,16)->b 2068 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)) 2069 #define PCHARS(lv, p, offset, len, utf, f) \ 2070 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) 2071 #define PCHARSV(p, offset, len, utf, f) \ 2072 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) 2073 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 2074 a = pcre2_callout_enumerate_16(compiled_code16, \ 2075 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c) 2076 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b) 2077 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16)) 2078 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16)) 2079 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 2080 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g) 2081 #define PCRE2_CONVERTED_PATTERN_FREE(a) \ 2082 pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a) 2083 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 2084 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j) 2085 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 2086 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)) 2087 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16)) 2088 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16)) 2089 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b) 2090 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16)) 2091 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 2092 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) 2093 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 2094 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); 2095 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 2096 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); 2097 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); 2098 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL) 2099 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 2100 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) 2101 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c) 2102 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 2103 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c) 2104 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16)) 2105 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)) 2106 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d) 2107 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a) 2108 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 2109 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)) 2110 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 2111 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)) 2112 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a) 2113 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 2114 r = pcre2_serialize_get_number_of_codes_16(a) 2115 #define PCRE2_SET_CALLOUT(a,b,c) \ 2116 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); 2117 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b) 2118 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 2119 pcre2_set_compile_recursion_guard_16(G(a,16),b,c) 2120 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b) 2121 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b) 2122 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b) 2123 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b) 2124 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) 2125 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b) 2126 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) 2127 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) 2128 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 2129 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ 2130 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l) 2131 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 2132 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e) 2133 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 2134 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e) 2135 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a) 2136 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 2137 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e) 2138 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 2139 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e) 2140 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 2141 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d) 2142 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 2143 a = pcre2_substring_length_bynumber_16(G(b,16),c,d) 2144 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 2145 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d) 2146 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 2147 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a) 2148 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 2149 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); 2150 #define PTR(x) (void *)G(x,16) 2151 #define SETFLD(x,y,z) G(x,16)->y = z 2152 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z 2153 #define SETOP(x,y,z) G(x,16) z y 2154 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y) 2155 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p) 2156 #define SUB1(a,b) G(a,16)(G(b,16)) 2157 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16)) 2158 #define TEST(x,r,y) (G(x,16) r (y)) 2159 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y)) 2160 2161 2162 /* ----- Only 32-bit mode is supported ----- */ 2163 2164 #elif defined SUPPORT_PCRE2_32 2165 #define CASTFLD(t,a,b) (t)(G(a,32)->b) 2166 #define CASTVAR(t,x) (t)G(x,32) 2167 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b]) 2168 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32)) 2169 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4) 2170 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) 2171 #define FLD(a,b) G(a,32)->b 2172 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) 2173 #define PCHARS(lv, p, offset, len, utf, f) \ 2174 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) 2175 #define PCHARSV(p, offset, len, utf, f) \ 2176 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) 2177 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 2178 a = pcre2_callout_enumerate_32(compiled_code32, \ 2179 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) 2180 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b) 2181 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32)) 2182 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32)) 2183 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 2184 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g) 2185 #define PCRE2_CONVERTED_PATTERN_FREE(a) \ 2186 pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a) 2187 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 2188 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) 2189 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 2190 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4)) 2191 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32)) 2192 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32)) 2193 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b) 2194 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32)) 2195 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 2196 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 2197 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 2198 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); 2199 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 2200 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c); 2201 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); 2202 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL) 2203 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 2204 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 2205 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c) 2206 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 2207 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) 2208 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32)) 2209 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32)) 2210 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d) 2211 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a) 2212 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 2213 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) 2214 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 2215 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32)) 2216 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a) 2217 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 2218 r = pcre2_serialize_get_number_of_codes_32(a) 2219 #define PCRE2_SET_CALLOUT(a,b,c) \ 2220 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c); 2221 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b) 2222 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 2223 pcre2_set_compile_recursion_guard_32(G(a,32),b,c) 2224 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b) 2225 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b) 2226 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b) 2227 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b) 2228 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) 2229 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b) 2230 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) 2231 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) 2232 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 2233 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ 2234 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) 2235 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 2236 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) 2237 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 2238 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e); 2239 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a) 2240 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 2241 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) 2242 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 2243 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) 2244 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 2245 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) 2246 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 2247 a = pcre2_substring_length_bynumber_32(G(b,32),c,d) 2248 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 2249 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) 2250 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 2251 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a) 2252 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 2253 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)); 2254 #define PTR(x) (void *)G(x,32) 2255 #define SETFLD(x,y,z) G(x,32)->y = z 2256 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z 2257 #define SETOP(x,y,z) G(x,32) z y 2258 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y) 2259 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p) 2260 #define SUB1(a,b) G(a,32)(G(b,32)) 2261 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32)) 2262 #define TEST(x,r,y) (G(x,32) r (y)) 2263 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y)) 2264 2265 #endif 2266 2267 /* ----- End of mode-specific function call macros ----- */ 2268 2269 2270 2271 2272 /************************************************* 2273 * Alternate character tables * 2274 *************************************************/ 2275 2276 /* By default, the "tables" pointer in the compile context when calling 2277 pcre2_compile() is not set (= NULL), thereby using the default tables of the 2278 library. However, the tables modifier can be used to select alternate sets of 2279 tables, for different kinds of testing. Note that the locale modifier also 2280 adjusts the tables. */ 2281 2282 /* This is the set of tables distributed as default with PCRE2. It recognizes 2283 only ASCII characters. */ 2284 2285 static const uint8_t tables1[] = { 2286 2287 /* This table is a lower casing table. */ 2288 2289 0, 1, 2, 3, 4, 5, 6, 7, 2290 8, 9, 10, 11, 12, 13, 14, 15, 2291 16, 17, 18, 19, 20, 21, 22, 23, 2292 24, 25, 26, 27, 28, 29, 30, 31, 2293 32, 33, 34, 35, 36, 37, 38, 39, 2294 40, 41, 42, 43, 44, 45, 46, 47, 2295 48, 49, 50, 51, 52, 53, 54, 55, 2296 56, 57, 58, 59, 60, 61, 62, 63, 2297 64, 97, 98, 99,100,101,102,103, 2298 104,105,106,107,108,109,110,111, 2299 112,113,114,115,116,117,118,119, 2300 120,121,122, 91, 92, 93, 94, 95, 2301 96, 97, 98, 99,100,101,102,103, 2302 104,105,106,107,108,109,110,111, 2303 112,113,114,115,116,117,118,119, 2304 120,121,122,123,124,125,126,127, 2305 128,129,130,131,132,133,134,135, 2306 136,137,138,139,140,141,142,143, 2307 144,145,146,147,148,149,150,151, 2308 152,153,154,155,156,157,158,159, 2309 160,161,162,163,164,165,166,167, 2310 168,169,170,171,172,173,174,175, 2311 176,177,178,179,180,181,182,183, 2312 184,185,186,187,188,189,190,191, 2313 192,193,194,195,196,197,198,199, 2314 200,201,202,203,204,205,206,207, 2315 208,209,210,211,212,213,214,215, 2316 216,217,218,219,220,221,222,223, 2317 224,225,226,227,228,229,230,231, 2318 232,233,234,235,236,237,238,239, 2319 240,241,242,243,244,245,246,247, 2320 248,249,250,251,252,253,254,255, 2321 2322 /* This table is a case flipping table. */ 2323 2324 0, 1, 2, 3, 4, 5, 6, 7, 2325 8, 9, 10, 11, 12, 13, 14, 15, 2326 16, 17, 18, 19, 20, 21, 22, 23, 2327 24, 25, 26, 27, 28, 29, 30, 31, 2328 32, 33, 34, 35, 36, 37, 38, 39, 2329 40, 41, 42, 43, 44, 45, 46, 47, 2330 48, 49, 50, 51, 52, 53, 54, 55, 2331 56, 57, 58, 59, 60, 61, 62, 63, 2332 64, 97, 98, 99,100,101,102,103, 2333 104,105,106,107,108,109,110,111, 2334 112,113,114,115,116,117,118,119, 2335 120,121,122, 91, 92, 93, 94, 95, 2336 96, 65, 66, 67, 68, 69, 70, 71, 2337 72, 73, 74, 75, 76, 77, 78, 79, 2338 80, 81, 82, 83, 84, 85, 86, 87, 2339 88, 89, 90,123,124,125,126,127, 2340 128,129,130,131,132,133,134,135, 2341 136,137,138,139,140,141,142,143, 2342 144,145,146,147,148,149,150,151, 2343 152,153,154,155,156,157,158,159, 2344 160,161,162,163,164,165,166,167, 2345 168,169,170,171,172,173,174,175, 2346 176,177,178,179,180,181,182,183, 2347 184,185,186,187,188,189,190,191, 2348 192,193,194,195,196,197,198,199, 2349 200,201,202,203,204,205,206,207, 2350 208,209,210,211,212,213,214,215, 2351 216,217,218,219,220,221,222,223, 2352 224,225,226,227,228,229,230,231, 2353 232,233,234,235,236,237,238,239, 2354 240,241,242,243,244,245,246,247, 2355 248,249,250,251,252,253,254,255, 2356 2357 /* This table contains bit maps for various character classes. Each map is 32 2358 bytes long and the bits run from the least significant end of each byte. The 2359 classes that have their own maps are: space, xdigit, digit, upper, lower, word, 2360 graph, print, punct, and cntrl. Other classes are built from combinations. */ 2361 2362 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 2363 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2364 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2365 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2366 2367 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 2368 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 2369 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2370 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2371 2372 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 2373 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2374 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2375 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2376 2377 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2378 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 2379 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2380 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2381 2382 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2383 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 2384 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2385 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2386 2387 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 2388 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 2389 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2390 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2391 2392 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 2393 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 2394 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2395 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2396 2397 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 2398 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 2399 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2400 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2401 2402 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 2403 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 2404 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2405 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2406 2407 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 2408 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 2409 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2410 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2411 2412 /* This table identifies various classes of character by individual bits: 2413 0x01 white space character 2414 0x02 letter 2415 0x04 decimal digit 2416 0x08 hexadecimal digit 2417 0x10 alphanumeric or '_' 2418 0x80 regular expression metacharacter or binary zero 2419 */ 2420 2421 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 2422 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ 2423 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 2424 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 2425 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 2426 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 2427 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 2428 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 2429 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 2430 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 2431 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 2432 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 2433 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 2434 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 2435 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 2436 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 2437 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 2438 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 2439 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 2440 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 2441 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 2442 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 2443 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 2444 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 2445 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 2446 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 2447 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 2448 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 2449 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 2450 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 2451 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 2452 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 2453 2454 /* This is a set of tables that came originally from a Windows user. It seems 2455 to be at least an approximation of ISO 8859. In particular, there are 2456 characters greater than 128 that are marked as spaces, letters, etc. */ 2457 2458 static const uint8_t tables2[] = { 2459 0,1,2,3,4,5,6,7, 2460 8,9,10,11,12,13,14,15, 2461 16,17,18,19,20,21,22,23, 2462 24,25,26,27,28,29,30,31, 2463 32,33,34,35,36,37,38,39, 2464 40,41,42,43,44,45,46,47, 2465 48,49,50,51,52,53,54,55, 2466 56,57,58,59,60,61,62,63, 2467 64,97,98,99,100,101,102,103, 2468 104,105,106,107,108,109,110,111, 2469 112,113,114,115,116,117,118,119, 2470 120,121,122,91,92,93,94,95, 2471 96,97,98,99,100,101,102,103, 2472 104,105,106,107,108,109,110,111, 2473 112,113,114,115,116,117,118,119, 2474 120,121,122,123,124,125,126,127, 2475 128,129,130,131,132,133,134,135, 2476 136,137,138,139,140,141,142,143, 2477 144,145,146,147,148,149,150,151, 2478 152,153,154,155,156,157,158,159, 2479 160,161,162,163,164,165,166,167, 2480 168,169,170,171,172,173,174,175, 2481 176,177,178,179,180,181,182,183, 2482 184,185,186,187,188,189,190,191, 2483 224,225,226,227,228,229,230,231, 2484 232,233,234,235,236,237,238,239, 2485 240,241,242,243,244,245,246,215, 2486 248,249,250,251,252,253,254,223, 2487 224,225,226,227,228,229,230,231, 2488 232,233,234,235,236,237,238,239, 2489 240,241,242,243,244,245,246,247, 2490 248,249,250,251,252,253,254,255, 2491 0,1,2,3,4,5,6,7, 2492 8,9,10,11,12,13,14,15, 2493 16,17,18,19,20,21,22,23, 2494 24,25,26,27,28,29,30,31, 2495 32,33,34,35,36,37,38,39, 2496 40,41,42,43,44,45,46,47, 2497 48,49,50,51,52,53,54,55, 2498 56,57,58,59,60,61,62,63, 2499 64,97,98,99,100,101,102,103, 2500 104,105,106,107,108,109,110,111, 2501 112,113,114,115,116,117,118,119, 2502 120,121,122,91,92,93,94,95, 2503 96,65,66,67,68,69,70,71, 2504 72,73,74,75,76,77,78,79, 2505 80,81,82,83,84,85,86,87, 2506 88,89,90,123,124,125,126,127, 2507 128,129,130,131,132,133,134,135, 2508 136,137,138,139,140,141,142,143, 2509 144,145,146,147,148,149,150,151, 2510 152,153,154,155,156,157,158,159, 2511 160,161,162,163,164,165,166,167, 2512 168,169,170,171,172,173,174,175, 2513 176,177,178,179,180,181,182,183, 2514 184,185,186,187,188,189,190,191, 2515 224,225,226,227,228,229,230,231, 2516 232,233,234,235,236,237,238,239, 2517 240,241,242,243,244,245,246,215, 2518 248,249,250,251,252,253,254,223, 2519 192,193,194,195,196,197,198,199, 2520 200,201,202,203,204,205,206,207, 2521 208,209,210,211,212,213,214,247, 2522 216,217,218,219,220,221,222,255, 2523 0,62,0,0,1,0,0,0, 2524 0,0,0,0,0,0,0,0, 2525 32,0,0,0,1,0,0,0, 2526 0,0,0,0,0,0,0,0, 2527 0,0,0,0,0,0,255,3, 2528 126,0,0,0,126,0,0,0, 2529 0,0,0,0,0,0,0,0, 2530 0,0,0,0,0,0,0,0, 2531 0,0,0,0,0,0,255,3, 2532 0,0,0,0,0,0,0,0, 2533 0,0,0,0,0,0,12,2, 2534 0,0,0,0,0,0,0,0, 2535 0,0,0,0,0,0,0,0, 2536 254,255,255,7,0,0,0,0, 2537 0,0,0,0,0,0,0,0, 2538 255,255,127,127,0,0,0,0, 2539 0,0,0,0,0,0,0,0, 2540 0,0,0,0,254,255,255,7, 2541 0,0,0,0,0,4,32,4, 2542 0,0,0,128,255,255,127,255, 2543 0,0,0,0,0,0,255,3, 2544 254,255,255,135,254,255,255,7, 2545 0,0,0,0,0,4,44,6, 2546 255,255,127,255,255,255,127,255, 2547 0,0,0,0,254,255,255,255, 2548 255,255,255,255,255,255,255,127, 2549 0,0,0,0,254,255,255,255, 2550 255,255,255,255,255,255,255,255, 2551 0,2,0,0,255,255,255,255, 2552 255,255,255,255,255,255,255,127, 2553 0,0,0,0,255,255,255,255, 2554 255,255,255,255,255,255,255,255, 2555 0,0,0,0,254,255,0,252, 2556 1,0,0,248,1,0,0,120, 2557 0,0,0,0,254,255,255,255, 2558 0,0,128,0,0,0,128,0, 2559 255,255,255,255,0,0,0,0, 2560 0,0,0,0,0,0,0,128, 2561 255,255,255,255,0,0,0,0, 2562 0,0,0,0,0,0,0,0, 2563 128,0,0,0,0,0,0,0, 2564 0,1,1,0,1,1,0,0, 2565 0,0,0,0,0,0,0,0, 2566 0,0,0,0,0,0,0,0, 2567 1,0,0,0,128,0,0,0, 2568 128,128,128,128,0,0,128,0, 2569 28,28,28,28,28,28,28,28, 2570 28,28,0,0,0,0,0,128, 2571 0,26,26,26,26,26,26,18, 2572 18,18,18,18,18,18,18,18, 2573 18,18,18,18,18,18,18,18, 2574 18,18,18,128,128,0,128,16, 2575 0,26,26,26,26,26,26,18, 2576 18,18,18,18,18,18,18,18, 2577 18,18,18,18,18,18,18,18, 2578 18,18,18,128,128,0,0,0, 2579 0,0,0,0,0,1,0,0, 2580 0,0,0,0,0,0,0,0, 2581 0,0,0,0,0,0,0,0, 2582 0,0,0,0,0,0,0,0, 2583 1,0,0,0,0,0,0,0, 2584 0,0,18,0,0,0,0,0, 2585 0,0,20,20,0,18,0,0, 2586 0,20,18,0,0,0,0,0, 2587 18,18,18,18,18,18,18,18, 2588 18,18,18,18,18,18,18,18, 2589 18,18,18,18,18,18,18,0, 2590 18,18,18,18,18,18,18,18, 2591 18,18,18,18,18,18,18,18, 2592 18,18,18,18,18,18,18,18, 2593 18,18,18,18,18,18,18,0, 2594 18,18,18,18,18,18,18,18 2595 }; 2596 2597 2598 2599 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) 2600 /************************************************* 2601 * Emulated memmove() for systems without it * 2602 *************************************************/ 2603 2604 /* This function can make use of bcopy() if it is available. Otherwise do it by 2605 steam, as there are some non-Unix environments that lack both memmove() and 2606 bcopy(). */ 2607 2608 static void * 2609 emulated_memmove(void *d, const void *s, size_t n) 2610 { 2611 #ifdef HAVE_BCOPY 2612 bcopy(s, d, n); 2613 return d; 2614 #else 2615 size_t i; 2616 unsigned char *dest = (unsigned char *)d; 2617 const unsigned char *src = (const unsigned char *)s; 2618 if (dest > src) 2619 { 2620 dest += n; 2621 src += n; 2622 for (i = 0; i < n; ++i) *(--dest) = *(--src); 2623 return (void *)dest; 2624 } 2625 else 2626 { 2627 for (i = 0; i < n; ++i) *dest++ = *src++; 2628 return (void *)(dest - n); 2629 } 2630 #endif /* not HAVE_BCOPY */ 2631 } 2632 #undef memmove 2633 #define memmove(d,s,n) emulated_memmove(d,s,n) 2634 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */ 2635 2636 2637 2638 #ifndef HAVE_STRERROR 2639 /************************************************* 2640 * Provide strerror() for non-ANSI libraries * 2641 *************************************************/ 2642 2643 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their 2644 libraries. They may no longer be around, but just in case, we can try to 2645 provide the same facility by this simple alternative function. */ 2646 2647 extern int sys_nerr; 2648 extern char *sys_errlist[]; 2649 2650 char * 2651 strerror(int n) 2652 { 2653 if (n < 0 || n >= sys_nerr) return "unknown error number"; 2654 return sys_errlist[n]; 2655 } 2656 #endif /* HAVE_STRERROR */ 2657 2658 2659 2660 /************************************************* 2661 * Local memory functions * 2662 *************************************************/ 2663 2664 /* Alternative memory functions, to test functionality. */ 2665 2666 static void *my_malloc(PCRE2_SIZE size, void *data) 2667 { 2668 void *block = malloc(size); 2669 (void)data; 2670 if (show_memory) 2671 { 2672 if (block == NULL) 2673 { 2674 fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size); 2675 } 2676 else 2677 { 2678 fprintf(outfile, "malloc %5" SIZ_FORM, SIZ_CAST size); 2679 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES 2680 fprintf(outfile, " %p", block); /* Not portable */ 2681 #endif 2682 if (malloclistptr < MALLOCLISTSIZE) 2683 { 2684 malloclist[malloclistptr] = block; 2685 malloclistlength[malloclistptr++] = size; 2686 } 2687 else 2688 fprintf(outfile, " (not remembered)"); 2689 fprintf(outfile, "\n"); 2690 } 2691 } 2692 return block; 2693 } 2694 2695 static void my_free(void *block, void *data) 2696 { 2697 (void)data; 2698 if (show_memory) 2699 { 2700 uint32_t i, j; 2701 BOOL found = FALSE; 2702 2703 fprintf(outfile, "free"); 2704 for (i = 0; i < malloclistptr; i++) 2705 { 2706 if (block == malloclist[i]) 2707 { 2708 fprintf(outfile, " %5" SIZ_FORM, SIZ_CAST malloclistlength[i]); 2709 malloclistptr--; 2710 for (j = i; j < malloclistptr; j++) 2711 { 2712 malloclist[j] = malloclist[j+1]; 2713 malloclistlength[j] = malloclistlength[j+1]; 2714 } 2715 found = TRUE; 2716 break; 2717 } 2718 } 2719 if (!found) fprintf(outfile, " unremembered block"); 2720 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES 2721 fprintf(outfile, " %p", block); /* Not portable */ 2722 #endif 2723 fprintf(outfile, "\n"); 2724 } 2725 free(block); 2726 } 2727 2728 2729 2730 /************************************************* 2731 * Callback function for stack guard * 2732 *************************************************/ 2733 2734 /* This is set up to be called from pcre2_compile() when the stackguard=n 2735 modifier sets a value greater than zero. The test we do is whether the 2736 parenthesis nesting depth is greater than the value set by the modifier. 2737 2738 Argument: the current parenthesis nesting depth 2739 Returns: non-zero to kill the compilation 2740 */ 2741 2742 static int 2743 stack_guard(uint32_t depth, void *user_data) 2744 { 2745 (void)user_data; 2746 return depth > pat_patctl.stackguard_test; 2747 } 2748 2749 2750 /************************************************* 2751 * JIT memory callback * 2752 *************************************************/ 2753 2754 static PCRE2_JIT_STACK* 2755 jit_callback(void *arg) 2756 { 2757 jit_was_used = TRUE; 2758 return (PCRE2_JIT_STACK *)arg; 2759 } 2760 2761 2762 /************************************************* 2763 * Convert UTF-8 character to code point * 2764 *************************************************/ 2765 2766 /* This function reads one or more bytes that represent a UTF-8 character, 2767 and returns the codepoint of that character. Note that the function supports 2768 the original UTF-8 definition of RFC 2279, allowing for values in the range 0 2769 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate 2770 codepoints greater than 0x10ffff which are useful for testing PCRE2's error 2771 checking, and also for generating 32-bit non-UTF data values above the UTF 2772 limit. 2773 2774 Argument: 2775 utf8bytes a pointer to the byte vector 2776 vptr a pointer to an int to receive the value 2777 2778 Returns: > 0 => the number of bytes consumed 2779 -6 to 0 => malformed UTF-8 character at offset = (-return) 2780 */ 2781 2782 static int 2783 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr) 2784 { 2785 uint32_t c = *utf8bytes++; 2786 uint32_t d = c; 2787 int i, j, s; 2788 2789 for (i = -1; i < 6; i++) /* i is number of additional bytes */ 2790 { 2791 if ((d & 0x80) == 0) break; 2792 d <<= 1; 2793 } 2794 2795 if (i == -1) { *vptr = c; return 1; } /* ascii character */ 2796 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ 2797 2798 /* i now has a value in the range 1-5 */ 2799 2800 s = 6*i; 2801 d = (c & utf8_table3[i]) << s; 2802 2803 for (j = 0; j < i; j++) 2804 { 2805 c = *utf8bytes++; 2806 if ((c & 0xc0) != 0x80) return -(j+1); 2807 s -= 6; 2808 d |= (c & 0x3f) << s; 2809 } 2810 2811 /* Check that encoding was the correct unique one */ 2812 2813 for (j = 0; j < utf8_table1_size; j++) 2814 if (d <= (uint32_t)utf8_table1[j]) break; 2815 if (j != i) return -(i+1); 2816 2817 /* Valid value */ 2818 2819 *vptr = d; 2820 return i+1; 2821 } 2822 2823 2824 2825 /************************************************* 2826 * Print one character * 2827 *************************************************/ 2828 2829 /* Print a single character either literally, or as a hex escape, and count how 2830 many printed characters are used. 2831 2832 Arguments: 2833 c the character 2834 utf TRUE in UTF mode 2835 f the FILE to print to, or NULL just to count characters 2836 2837 Returns: number of characters written 2838 */ 2839 2840 static int 2841 pchar(uint32_t c, BOOL utf, FILE *f) 2842 { 2843 int n = 0; 2844 char tempbuffer[16]; 2845 2846 if (PRINTOK(c)) 2847 { 2848 if (f != NULL) fprintf(f, "%c", c); 2849 return 1; 2850 } 2851 2852 if (c < 0x100) 2853 { 2854 if (utf) 2855 { 2856 if (f != NULL) fprintf(f, "\\x{%02x}", c); 2857 return 6; 2858 } 2859 else 2860 { 2861 if (f != NULL) fprintf(f, "\\x%02x", c); 2862 return 4; 2863 } 2864 } 2865 2866 if (f != NULL) n = fprintf(f, "\\x{%02x}", c); 2867 else n = sprintf(tempbuffer, "\\x{%02x}", c); 2868 2869 return n >= 0 ? n : 0; 2870 } 2871 2872 2873 2874 #ifdef SUPPORT_PCRE2_16 2875 /************************************************* 2876 * Find length of 0-terminated 16-bit string * 2877 *************************************************/ 2878 2879 static size_t strlen16(PCRE2_SPTR16 p) 2880 { 2881 PCRE2_SPTR16 pp = p; 2882 while (*pp != 0) pp++; 2883 return (int)(pp - p); 2884 } 2885 #endif /* SUPPORT_PCRE2_16 */ 2886 2887 2888 2889 #ifdef SUPPORT_PCRE2_32 2890 /************************************************* 2891 * Find length of 0-terminated 32-bit string * 2892 *************************************************/ 2893 2894 static size_t strlen32(PCRE2_SPTR32 p) 2895 { 2896 PCRE2_SPTR32 pp = p; 2897 while (*pp != 0) pp++; 2898 return (int)(pp - p); 2899 } 2900 #endif /* SUPPORT_PCRE2_32 */ 2901 2902 2903 #ifdef SUPPORT_PCRE2_8 2904 /************************************************* 2905 * Print 8-bit character string * 2906 *************************************************/ 2907 2908 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. 2909 For printing *MARK strings, a negative length is given. If handed a NULL file, 2910 just counts chars without printing (because pchar() does that). */ 2911 2912 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) 2913 { 2914 uint32_t c = 0; 2915 int yield = 0; 2916 2917 if (length < 0) length = p[-1]; 2918 while (length-- > 0) 2919 { 2920 if (utf) 2921 { 2922 int rc = utf82ord(p, &c); 2923 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ 2924 { 2925 length -= rc - 1; 2926 p += rc; 2927 yield += pchar(c, utf, f); 2928 continue; 2929 } 2930 } 2931 c = *p++; 2932 yield += pchar(c, utf, f); 2933 } 2934 2935 return yield; 2936 } 2937 #endif 2938 2939 2940 #ifdef SUPPORT_PCRE2_16 2941 /************************************************* 2942 * Print 16-bit character string * 2943 *************************************************/ 2944 2945 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed. 2946 For printing *MARK strings, a negative length is given. If handed a NULL file, 2947 just counts chars without printing. */ 2948 2949 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f) 2950 { 2951 int yield = 0; 2952 if (length < 0) length = p[-1]; 2953 while (length-- > 0) 2954 { 2955 uint32_t c = *p++ & 0xffff; 2956 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0) 2957 { 2958 int d = *p & 0xffff; 2959 if (d >= 0xDC00 && d <= 0xDFFF) 2960 { 2961 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; 2962 length--; 2963 p++; 2964 } 2965 } 2966 yield += pchar(c, utf, f); 2967 } 2968 return yield; 2969 } 2970 #endif /* SUPPORT_PCRE2_16 */ 2971 2972 2973 2974 #ifdef SUPPORT_PCRE2_32 2975 /************************************************* 2976 * Print 32-bit character string * 2977 *************************************************/ 2978 2979 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed. 2980 For printing *MARK strings, a negative length is given. If handed a NULL file, 2981 just counts chars without printing. */ 2982 2983 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f) 2984 { 2985 int yield = 0; 2986 (void)(utf); /* Avoid compiler warning */ 2987 2988 if (length < 0) length = p[-1]; 2989 while (length-- > 0) 2990 { 2991 uint32_t c = *p++; 2992 yield += pchar(c, utf, f); 2993 } 2994 return yield; 2995 } 2996 #endif /* SUPPORT_PCRE2_32 */ 2997 2998 2999 3000 3001 #ifdef SUPPORT_PCRE2_8 3002 /************************************************* 3003 * Convert character value to UTF-8 * 3004 *************************************************/ 3005 3006 /* This function takes an integer value in the range 0 - 0x7fffffff 3007 and encodes it as a UTF-8 character in 0 to 6 bytes. 3008 3009 Arguments: 3010 cvalue the character value 3011 utf8bytes pointer to buffer for result - at least 6 bytes long 3012 3013 Returns: number of characters placed in the buffer 3014 */ 3015 3016 static int 3017 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes) 3018 { 3019 int i, j; 3020 if (cvalue > 0x7fffffffu) 3021 return -1; 3022 for (i = 0; i < utf8_table1_size; i++) 3023 if (cvalue <= (uint32_t)utf8_table1[i]) break; 3024 utf8bytes += i; 3025 for (j = i; j > 0; j--) 3026 { 3027 *utf8bytes-- = 0x80 | (cvalue & 0x3f); 3028 cvalue >>= 6; 3029 } 3030 *utf8bytes = utf8_table2[i] | cvalue; 3031 return i + 1; 3032 } 3033 #endif /* SUPPORT_PCRE2_8 */ 3034 3035 3036 3037 #ifdef SUPPORT_PCRE2_16 3038 /************************************************* 3039 * Convert string to 16-bit * 3040 *************************************************/ 3041 3042 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using 3043 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and 3044 code values from 0 to 0x7fffffff. However, values greater than the later UTF 3045 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as 3046 UTF-8 if the utf8_input modifier is set, but an error is generated for values 3047 greater than 0xffff. 3048 3049 If all the input bytes are ASCII, the space needed for a 16-bit string is 3050 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string 3051 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 3052 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes 3053 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to 3054 save repeated re-sizing. 3055 3056 Note that this function does not object to surrogate values. This is 3057 deliberate; it makes it possible to construct UTF-16 strings that are invalid, 3058 for the purpose of testing that they are correctly faulted. 3059 3060 Arguments: 3061 p points to a byte string 3062 utf true in UTF mode 3063 lenptr points to number of bytes in the string (excluding trailing zero) 3064 3065 Returns: 0 on success, with the length updated to the number of 16-bit 3066 data items used (excluding the trailing zero) 3067 OR -1 if a UTF-8 string is malformed 3068 OR -2 if a value > 0x10ffff is encountered in UTF mode 3069 OR -3 if a value > 0xffff is encountered when not in UTF mode 3070 */ 3071 3072 static PCRE2_SIZE 3073 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr) 3074 { 3075 uint16_t *pp; 3076 PCRE2_SIZE len = *lenptr; 3077 3078 if (pbuffer16_size < 2*len + 2) 3079 { 3080 if (pbuffer16 != NULL) free(pbuffer16); 3081 pbuffer16_size = 2*len + 2; 3082 if (pbuffer16_size < 4096) pbuffer16_size = 4096; 3083 pbuffer16 = (uint16_t *)malloc(pbuffer16_size); 3084 if (pbuffer16 == NULL) 3085 { 3086 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", 3087 SIZ_CAST pbuffer16_size); 3088 exit(1); 3089 } 3090 } 3091 3092 pp = pbuffer16; 3093 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0) 3094 { 3095 for (; len > 0; len--) *pp++ = *p++; 3096 } 3097 else while (len > 0) 3098 { 3099 uint32_t c; 3100 int chlen = utf82ord(p, &c); 3101 if (chlen <= 0) return -1; 3102 if (!utf && c > 0xffff) return -3; 3103 if (c > 0x10ffff) return -2; 3104 p += chlen; 3105 len -= chlen; 3106 if (c < 0x10000) *pp++ = c; else 3107 { 3108 c -= 0x10000; 3109 *pp++ = 0xD800 | (c >> 10); 3110 *pp++ = 0xDC00 | (c & 0x3ff); 3111 } 3112 } 3113 3114 *pp = 0; 3115 *lenptr = pp - pbuffer16; 3116 return 0; 3117 } 3118 #endif 3119 3120 3121 3122 #ifdef SUPPORT_PCRE2_32 3123 /************************************************* 3124 * Convert string to 32-bit * 3125 *************************************************/ 3126 3127 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using 3128 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and 3129 code values from 0 to 0x7fffffff. However, values greater than the later UTF 3130 limit of 0x10ffff cause an error. 3131 3132 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier 3133 is set, and no limit is imposed. There is special interpretation of the 0xff 3134 byte (which is illegal in UTF-8) in this case: it causes the top bit of the 3135 next character to be set. This provides a way of generating 32-bit characters 3136 greater than 0x7fffffff. 3137 3138 If all the input bytes are ASCII, the space needed for a 32-bit string is 3139 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit 3140 string is no more than four times, because the number of characters must be 3141 less than the number of bytes. The result is always left in pbuffer32. Impose a 3142 minimum size to save repeated re-sizing. 3143 3144 Note that this function does not object to surrogate values. This is 3145 deliberate; it makes it possible to construct UTF-32 strings that are invalid, 3146 for the purpose of testing that they are correctly faulted. 3147 3148 Arguments: 3149 p points to a byte string 3150 utf true in UTF mode 3151 lenptr points to number of bytes in the string (excluding trailing zero) 3152 3153 Returns: 0 on success, with the length updated to the number of 32-bit 3154 data items used (excluding the trailing zero) 3155 OR -1 if a UTF-8 string is malformed 3156 OR -2 if a value > 0x10ffff is encountered in UTF mode 3157 */ 3158 3159 static PCRE2_SIZE 3160 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr) 3161 { 3162 uint32_t *pp; 3163 PCRE2_SIZE len = *lenptr; 3164 3165 if (pbuffer32_size < 4*len + 4) 3166 { 3167 if (pbuffer32 != NULL) free(pbuffer32); 3168 pbuffer32_size = 4*len + 4; 3169 if (pbuffer32_size < 8192) pbuffer32_size = 8192; 3170 pbuffer32 = (uint32_t *)malloc(pbuffer32_size); 3171 if (pbuffer32 == NULL) 3172 { 3173 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", 3174 SIZ_CAST pbuffer32_size); 3175 exit(1); 3176 } 3177 } 3178 3179 pp = pbuffer32; 3180 3181 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0) 3182 { 3183 for (; len > 0; len--) *pp++ = *p++; 3184 } 3185 3186 else while (len > 0) 3187 { 3188 int chlen; 3189 uint32_t c; 3190 uint32_t topbit = 0; 3191 if (!utf && *p == 0xff && len > 1) 3192 { 3193 topbit = 0x80000000u; 3194 p++; 3195 len--; 3196 } 3197 chlen = utf82ord(p, &c); 3198 if (chlen <= 0) return -1; 3199 if (utf && c > 0x10ffff) return -2; 3200 p += chlen; 3201 len -= chlen; 3202 *pp++ = c | topbit; 3203 } 3204 3205 *pp = 0; 3206 *lenptr = pp - pbuffer32; 3207 return 0; 3208 } 3209 #endif /* SUPPORT_PCRE2_32 */ 3210 3211 3212 3213 /************************************************* 3214 * Move back by so many characters * 3215 *************************************************/ 3216 3217 /* Given a code unit offset in a subject string, move backwards by a number of 3218 characters, and return the resulting offset. 3219 3220 Arguments: 3221 subject pointer to the string 3222 offset start offset 3223 count count to move back by 3224 utf TRUE if in UTF mode 3225 3226 Returns: a possibly changed offset 3227 */ 3228 3229 static PCRE2_SIZE 3230 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf) 3231 { 3232 if (!utf || test_mode == PCRE32_MODE) 3233 return (count >= offset)? 0 : (offset - count); 3234 3235 else if (test_mode == PCRE8_MODE) 3236 { 3237 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset; 3238 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--) 3239 { 3240 pp--; 3241 while ((*pp & 0xc0) == 0x80) pp--; 3242 } 3243 return pp - (PCRE2_SPTR8)subject; 3244 } 3245 3246 else /* 16-bit mode */ 3247 { 3248 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset; 3249 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--) 3250 { 3251 pp--; 3252 if ((*pp & 0xfc00) == 0xdc00) pp--; 3253 } 3254 return pp - (PCRE2_SPTR16)subject; 3255 } 3256 } 3257 3258 3259 3260 /************************************************* 3261 * Expand input buffers * 3262 *************************************************/ 3263 3264 /* This function doubles the size of the input buffer and the buffer for 3265 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to 3266 the new ones. 3267 3268 Arguments: none 3269 Returns: nothing (aborts if malloc() fails) 3270 */ 3271 3272 static void 3273 expand_input_buffers(void) 3274 { 3275 int new_pbuffer8_size = 2*pbuffer8_size; 3276 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size); 3277 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size); 3278 3279 if (new_buffer == NULL || new_pbuffer8 == NULL) 3280 { 3281 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size); 3282 exit(1); 3283 } 3284 3285 memcpy(new_buffer, buffer, pbuffer8_size); 3286 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size); 3287 3288 pbuffer8_size = new_pbuffer8_size; 3289 3290 free(buffer); 3291 free(pbuffer8); 3292 3293 buffer = new_buffer; 3294 pbuffer8 = new_pbuffer8; 3295 } 3296 3297 3298 3299 /************************************************* 3300 * Read or extend an input line * 3301 *************************************************/ 3302 3303 /* Input lines are read into buffer, but both patterns and data lines can be 3304 continued over multiple input lines. In addition, if the buffer fills up, we 3305 want to automatically expand it so as to be able to handle extremely large 3306 lines that are needed for certain stress tests, although this is less likely 3307 now that there are repetition features for both patterns and data. When the 3308 input buffer is expanded, the other two buffers must also be expanded likewise, 3309 and the contents of pbuffer, which are a copy of the input for callouts, must 3310 be preserved (for when expansion happens for a data line). This is not the most 3311 optimal way of handling this, but hey, this is just a test program! 3312 3313 Arguments: 3314 f the file to read 3315 start where in buffer to start (this *must* be within buffer) 3316 prompt for stdin or readline() 3317 3318 Returns: pointer to the start of new data 3319 could be a copy of start, or could be moved 3320 NULL if no data read and EOF reached 3321 */ 3322 3323 static uint8_t * 3324 extend_inputline(FILE *f, uint8_t *start, const char *prompt) 3325 { 3326 uint8_t *here = start; 3327 3328 for (;;) 3329 { 3330 size_t rlen = (size_t)(pbuffer8_size - (here - buffer)); 3331 3332 if (rlen > 1000) 3333 { 3334 size_t dlen; 3335 3336 /* If libreadline or libedit support is required, use readline() to read a 3337 line if the input is a terminal. Note that readline() removes the trailing 3338 newline, so we must put it back again, to be compatible with fgets(). */ 3339 3340 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 3341 if (INTERACTIVE(f)) 3342 { 3343 size_t len; 3344 char *s = readline(prompt); 3345 if (s == NULL) return (here == start)? NULL : start; 3346 len = strlen(s); 3347 if (len > 0) add_history(s); 3348 if (len > rlen - 1) len = rlen - 1; 3349 memcpy(here, s, len); 3350 here[len] = '\n'; 3351 here[len+1] = 0; 3352 free(s); 3353 } 3354 else 3355 #endif 3356 3357 /* Read the next line by normal means, prompting if the file is a tty. */ 3358 3359 { 3360 if (INTERACTIVE(f)) printf("%s", prompt); 3361 if (fgets((char *)here, rlen, f) == NULL) 3362 return (here == start)? NULL : start; 3363 } 3364 3365 dlen = strlen((char *)here); 3366 here += dlen; 3367 3368 /* Check for end of line reached. Take care not to read data from before 3369 start (dlen will be zero for a file starting with a binary zero). */ 3370 3371 if (here > start && here[-1] == '\n') return start; 3372 3373 /* If we have not read a newline when reading a file, we have either filled 3374 the buffer or reached the end of the file. We can detect the former by 3375 checking that the string fills the buffer, and the latter by feof(). If 3376 neither of these is true, it means we read a binary zero which has caused 3377 strlen() to give a short length. This is a hard error because pcre2test 3378 expects to work with C strings. */ 3379 3380 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f)) 3381 { 3382 fprintf(outfile, "** Binary zero encountered in input\n"); 3383 fprintf(outfile, "** pcre2test run abandoned\n"); 3384 exit(1); 3385 } 3386 } 3387 3388 else 3389 { 3390 size_t start_offset = start - buffer; 3391 size_t here_offset = here - buffer; 3392 expand_input_buffers(); 3393 start = buffer + start_offset; 3394 here = buffer + here_offset; 3395 } 3396 } 3397 3398 /* Control never gets here */ 3399 } 3400 3401 3402 3403 /************************************************* 3404 * Case-independent strncmp() function * 3405 *************************************************/ 3406 3407 /* 3408 Arguments: 3409 s first string 3410 t second string 3411 n number of characters to compare 3412 3413 Returns: < 0, = 0, or > 0, according to the comparison 3414 */ 3415 3416 static int 3417 strncmpic(const uint8_t *s, const uint8_t *t, int n) 3418 { 3419 while (n--) 3420 { 3421 int c = tolower(*s++) - tolower(*t++); 3422 if (c != 0) return c; 3423 } 3424 return 0; 3425 } 3426 3427 3428 3429 /************************************************* 3430 * Scan the main modifier list * 3431 *************************************************/ 3432 3433 /* This function searches the modifier list for a long modifier name. 3434 3435 Argument: 3436 p start of the name 3437 lenp length of the name 3438 3439 Returns: an index in the modifier list, or -1 on failure 3440 */ 3441 3442 static int 3443 scan_modifiers(const uint8_t *p, unsigned int len) 3444 { 3445 int bot = 0; 3446 int top = MODLISTCOUNT; 3447 3448 while (top > bot) 3449 { 3450 int mid = (bot + top)/2; 3451 unsigned int mlen = strlen(modlist[mid].name); 3452 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen); 3453 if (c == 0) 3454 { 3455 if (len == mlen) return mid; 3456 c = (int)len - (int)mlen; 3457 } 3458 if (c > 0) bot = mid + 1; else top = mid; 3459 } 3460 3461 return -1; 3462 3463 } 3464 3465 3466 3467 /************************************************* 3468 * Check a modifer and find its field * 3469 *************************************************/ 3470 3471 /* This function is called when a modifier has been identified. We check that 3472 it is allowed here and find the field that is to be changed. 3473 3474 Arguments: 3475 m the modifier list entry 3476 ctx CTX_PAT => pattern context 3477 CTX_POPPAT => pattern context for popped pattern 3478 CTX_DEFPAT => default pattern context 3479 CTX_DAT => data context 3480 CTX_DEFDAT => default data context 3481 pctl point to pattern control block 3482 dctl point to data control block 3483 c a single character or 0 3484 3485 Returns: a field pointer or NULL 3486 */ 3487 3488 static void * 3489 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c) 3490 { 3491 void *field = NULL; 3492 PCRE2_SIZE offset = m->offset; 3493 3494 if (restrict_for_perl_test) switch(m->which) 3495 { 3496 case MOD_PNDP: 3497 case MOD_PATP: 3498 case MOD_PDP: 3499 break; 3500 3501 default: 3502 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n", 3503 m->name); 3504 return NULL; 3505 } 3506 3507 switch (m->which) 3508 { 3509 case MOD_CTC: /* Compile context modifier */ 3510 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context); 3511 else if (ctx == CTX_PAT) field = PTR(pat_context); 3512 break; 3513 3514 case MOD_CTM: /* Match context modifier */ 3515 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context); 3516 else if (ctx == CTX_DAT) field = PTR(dat_context); 3517 break; 3518 3519 case MOD_DAT: /* Data line modifier */ 3520 if (dctl != NULL) field = dctl; 3521 break; 3522 3523 case MOD_PAT: /* Pattern modifier */ 3524 case MOD_PATP: /* Allowed for Perl test */ 3525 if (pctl != NULL) field = pctl; 3526 break; 3527 3528 case MOD_PD: /* Pattern or data line modifier */ 3529 case MOD_PDP: /* Ditto, allowed for Perl test */ 3530 case MOD_PND: /* Ditto, but not default pattern */ 3531 case MOD_PNDP: /* Ditto, allowed for Perl test */ 3532 if (dctl != NULL) field = dctl; 3533 else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP || 3534 ctx != CTX_DEFPAT)) 3535 field = pctl; 3536 break; 3537 } 3538 3539 if (field == NULL) 3540 { 3541 if (c == 0) 3542 fprintf(outfile, "** '%s' is not valid here\n", m->name); 3543 else 3544 fprintf(outfile, "** /%c is not valid here\n", c); 3545 return NULL; 3546 } 3547 3548 return (char *)field + offset; 3549 } 3550 3551 3552 3553 /************************************************* 3554 * Decode a modifier list * 3555 *************************************************/ 3556 3557 /* A pointer to a control block is NULL when called in cases when that block is 3558 not relevant. They are never all relevant in one call. At least one of patctl 3559 and datctl is NULL. The second argument specifies which context to use for 3560 modifiers that apply to contexts. 3561 3562 Arguments: 3563 p point to modifier string 3564 ctx CTX_PAT => pattern context 3565 CTX_POPPAT => pattern context for popped pattern 3566 CTX_DEFPAT => default pattern context 3567 CTX_DAT => data context 3568 CTX_DEFDAT => default data context 3569 pctl point to pattern control block 3570 dctl point to data control block 3571 3572 Returns: TRUE if successful decode, FALSE otherwise 3573 */ 3574 3575 static BOOL 3576 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl) 3577 { 3578 uint8_t *ep, *pp; 3579 long li; 3580 unsigned long uli; 3581 BOOL first = TRUE; 3582 3583 for (;;) 3584 { 3585 void *field; 3586 modstruct *m; 3587 BOOL off = FALSE; 3588 unsigned int i, len; 3589 int index; 3590 char *endptr; 3591 3592 /* Skip white space and commas. */ 3593 3594 while (isspace(*p) || *p == ',') p++; 3595 if (*p == 0) break; 3596 3597 /* Find the end of the item; lose trailing whitespace at end of line. */ 3598 3599 for (ep = p; *ep != 0 && *ep != ','; ep++); 3600 if (*ep == 0) 3601 { 3602 while (ep > p && isspace(ep[-1])) ep--; 3603 *ep = 0; 3604 } 3605 3606 /* Remember if the first character is '-'. */ 3607 3608 if (*p == '-') 3609 { 3610 off = TRUE; 3611 p++; 3612 } 3613 3614 /* Find the length of a full-length modifier name, and scan for it. */ 3615 3616 pp = p; 3617 while (pp < ep && *pp != '=') pp++; 3618 index = scan_modifiers(p, pp - p); 3619 3620 /* If the first modifier is unrecognized, try to interpret it as a sequence 3621 of single-character abbreviated modifiers. None of these modifiers have any 3622 associated data. They just set options or control bits. */ 3623 3624 if (index < 0) 3625 { 3626 uint32_t cc; 3627 uint8_t *mp = p; 3628 3629 if (!first) 3630 { 3631 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); 3632 if (ep - p == 1) 3633 fprintf(outfile, "** Single-character modifiers must come first\n"); 3634 return FALSE; 3635 } 3636 3637 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p)) 3638 { 3639 for (i = 0; i < C1MODLISTCOUNT; i++) 3640 if (cc == c1modlist[i].onechar) break; 3641 3642 if (i >= C1MODLISTCOUNT) 3643 { 3644 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n", 3645 *p, (int)(ep-mp), mp); 3646 return FALSE; 3647 } 3648 3649 if (c1modlist[i].index >= 0) 3650 { 3651 index = c1modlist[i].index; 3652 } 3653 3654 else 3655 { 3656 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname), 3657 strlen(c1modlist[i].fullname)); 3658 if (index < 0) 3659 { 3660 fprintf(outfile, "** Internal error: single-character equivalent " 3661 "modifier '%s' not found\n", c1modlist[i].fullname); 3662 return FALSE; 3663 } 3664 c1modlist[i].index = index; /* Cache for next time */ 3665 } 3666 3667 field = check_modifier(modlist + index, ctx, pctl, dctl, *p); 3668 if (field == NULL) return FALSE; 3669 3670 /* /x is a special case; a second appearance changes PCRE2_EXTENDED to 3671 PCRE2_EXTENDED_MORE. */ 3672 3673 if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0) 3674 { 3675 *((uint32_t *)field) &= ~PCRE2_EXTENDED; 3676 *((uint32_t *)field) |= PCRE2_EXTENDED_MORE; 3677 } 3678 else 3679 *((uint32_t *)field) |= modlist[index].value; 3680 } 3681 3682 continue; /* With tne next (fullname) modifier */ 3683 } 3684 3685 /* We have a match on a full-name modifier. Check for the existence of data 3686 when needed. */ 3687 3688 m = modlist + index; /* Save typing */ 3689 if (m->type != MOD_CTL && m->type != MOD_OPT && 3690 (m->type != MOD_IND || *pp == '=')) 3691 { 3692 if (*pp++ != '=') 3693 { 3694 fprintf(outfile, "** '=' expected after '%s'\n", m->name); 3695 return FALSE; 3696 } 3697 if (off) 3698 { 3699 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name); 3700 return FALSE; 3701 } 3702 } 3703 3704 /* These on/off types have no data. */ 3705 3706 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) 3707 { 3708 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); 3709 return FALSE; 3710 } 3711 3712 /* Set the data length for those types that have data. Then find the field 3713 that is to be set. If check_modifier() returns NULL, it has already output an 3714 error message. */ 3715 3716 len = ep - pp; 3717 field = check_modifier(m, ctx, pctl, dctl, 0); 3718 if (field == NULL) return FALSE; 3719 3720 /* Process according to data type. */ 3721 3722 switch (m->type) 3723 { 3724 case MOD_CTL: 3725 case MOD_OPT: 3726 if (off) *((uint32_t *)field) &= ~m->value; 3727 else *((uint32_t *)field) |= m->value; 3728 break; 3729 3730 case MOD_BSR: 3731 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0) 3732 { 3733 #ifdef BSR_ANYCRLF 3734 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; 3735 #else 3736 *((uint16_t *)field) = PCRE2_BSR_UNICODE; 3737 #endif 3738 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET; 3739 else dctl->control2 &= ~CTL2_BSR_SET; 3740 } 3741 else 3742 { 3743 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0) 3744 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; 3745 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) 3746 *((uint16_t *)field) = PCRE2_BSR_UNICODE; 3747 else goto INVALID_VALUE; 3748 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET; 3749 else dctl->control2 |= CTL2_BSR_SET; 3750 } 3751 pp = ep; 3752 break; 3753 3754 case MOD_CHR: /* A single character */ 3755 *((uint32_t *)field) = *pp++; 3756 break; 3757 3758 case MOD_CON: /* A convert type/options list */ 3759 for (;; pp++) 3760 { 3761 uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':'); 3762 len = ((colon != NULL && colon < ep)? colon:ep) - pp; 3763 for (i = 0; i < convertlistcount; i++) 3764 { 3765 if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0) 3766 { 3767 if (*((uint32_t *)field) == CONVERT_UNSET) 3768 *((uint32_t *)field) = convertlist[i].option; 3769 else 3770 *((uint32_t *)field) |= convertlist[i].option; 3771 break; 3772 } 3773 } 3774 if (i >= convertlistcount) goto INVALID_VALUE; 3775 pp += len; 3776 if (*pp != ':') break; 3777 } 3778 break; 3779 3780 case MOD_IN2: /* One or two unsigned integers */ 3781 if (!isdigit(*pp)) goto INVALID_VALUE; 3782 uli = strtoul((const char *)pp, &endptr, 10); 3783 if (U32OVERFLOW(uli)) goto INVALID_VALUE; 3784 ((uint32_t *)field)[0] = (uint32_t)uli; 3785 if (*endptr == ':') 3786 { 3787 uli = strtoul((const char *)endptr+1, &endptr, 10); 3788 if (U32OVERFLOW(uli)) goto INVALID_VALUE; 3789 ((uint32_t *)field)[1] = (uint32_t)uli; 3790 } 3791 else ((uint32_t *)field)[1] = 0; 3792 pp = (uint8_t *)endptr; 3793 break; 3794 3795 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or 3796 less than ULONG_MAX. So first test for overflowing the long int, and then 3797 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */ 3798 3799 case MOD_SIZ: /* PCRE2_SIZE value */ 3800 if (!isdigit(*pp)) goto INVALID_VALUE; 3801 uli = strtoul((const char *)pp, &endptr, 10); 3802 if (uli == ULONG_MAX) goto INVALID_VALUE; 3803 #if ULONG_MAX > PCRE2_SIZE_MAX 3804 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE; 3805 #endif 3806 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli; 3807 pp = (uint8_t *)endptr; 3808 break; 3809 3810 case MOD_IND: /* Unsigned integer with default */ 3811 if (len == 0) 3812 { 3813 *((uint32_t *)field) = (uint32_t)(m->value); 3814 break; 3815 } 3816 /* Fall through */ 3817 3818 case MOD_INT: /* Unsigned integer */ 3819 if (!isdigit(*pp)) goto INVALID_VALUE; 3820 uli = strtoul((const char *)pp, &endptr, 10); 3821 if (U32OVERFLOW(uli)) goto INVALID_VALUE; 3822 *((uint32_t *)field) = (uint32_t)uli; 3823 pp = (uint8_t *)endptr; 3824 break; 3825 3826 case MOD_INS: /* Signed integer */ 3827 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE; 3828 li = strtol((const char *)pp, &endptr, 10); 3829 if (S32OVERFLOW(li)) goto INVALID_VALUE; 3830 *((int32_t *)field) = (int32_t)li; 3831 pp = (uint8_t *)endptr; 3832 break; 3833 3834 case MOD_NL: 3835 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++) 3836 if (len == strlen(newlines[i]) && 3837 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break; 3838 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE; 3839 if (i == 0) 3840 { 3841 *((uint16_t *)field) = NEWLINE_DEFAULT; 3842 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET; 3843 else dctl->control2 &= ~CTL2_NL_SET; 3844 } 3845 else 3846 { 3847 *((uint16_t *)field) = i; 3848 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET; 3849 else dctl->control2 |= CTL2_NL_SET; 3850 } 3851 pp = ep; 3852 break; 3853 3854 case MOD_NN: /* Name or (signed) number; may be several */ 3855 if (isdigit(*pp) || *pp == '-') 3856 { 3857 int ct = MAXCPYGET - 1; 3858 int32_t value; 3859 li = strtol((const char *)pp, &endptr, 10); 3860 if (S32OVERFLOW(li)) goto INVALID_VALUE; 3861 value = (int32_t)li; 3862 field = (char *)field - m->offset + m->value; /* Adjust field ptr */ 3863 if (value >= 0) /* Add new number */ 3864 { 3865 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */ 3866 field = (char *)field + sizeof(int32_t); 3867 if (ct <= 0) 3868 { 3869 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name); 3870 return FALSE; 3871 } 3872 } 3873 *((int32_t *)field) = value; 3874 if (ct > 0) ((int32_t *)field)[1] = -1; 3875 pp = (uint8_t *)endptr; 3876 } 3877 3878 /* Multiple strings are put end to end. */ 3879 3880 else 3881 { 3882 char *nn = (char *)field; 3883 if (len > 0) /* Add new name */ 3884 { 3885 if (len > MAX_NAME_SIZE) 3886 { 3887 fprintf(outfile, "** Group name in '%s' is too long\n", m->name); 3888 return FALSE; 3889 } 3890 while (*nn != 0) nn += strlen(nn) + 1; 3891 if (nn + len + 2 - (char *)field > LENCPYGET) 3892 { 3893 fprintf(outfile, "** Too many characters in named '%s' modifiers\n", 3894 m->name); 3895 return FALSE; 3896 } 3897 memcpy(nn, pp, len); 3898 } 3899 nn[len] = 0 ; 3900 nn[len+1] = 0; 3901 pp = ep; 3902 } 3903 break; 3904 3905 case MOD_STR: 3906 if (len + 1 > m->value) 3907 { 3908 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n", 3909 m->name, m->value - 1); 3910 return FALSE; 3911 } 3912 memcpy(field, pp, len); 3913 ((uint8_t *)field)[len] = 0; 3914 pp = ep; 3915 break; 3916 } 3917 3918 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) 3919 { 3920 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name); 3921 return FALSE; 3922 } 3923 3924 p = pp; 3925 first = FALSE; 3926 3927 if (ctx == CTX_POPPAT && 3928 (pctl->options != 0 || 3929 pctl->tables_id != 0 || 3930 pctl->locale[0] != 0 || 3931 (pctl->control & NOTPOP_CONTROLS) != 0)) 3932 { 3933 fprintf(outfile, "** '%s' is not valid here\n", m->name); 3934 return FALSE; 3935 } 3936 } 3937 3938 return TRUE; 3939 3940 INVALID_VALUE: 3941 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p); 3942 return FALSE; 3943 } 3944 3945 3946 /************************************************* 3947 * Get info from a pattern * 3948 *************************************************/ 3949 3950 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled 3951 pattern. 3952 3953 Arguments: 3954 what code for the required information 3955 where where to put the answer 3956 unsetok PCRE2_ERROR_UNSET is an "expected" result 3957 3958 Returns: the return from pcre2_pattern_info() 3959 */ 3960 3961 static int 3962 pattern_info(int what, void *where, BOOL unsetok) 3963 { 3964 int rc; 3965 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */ 3966 PCRE2_PATTERN_INFO(rc, compiled_code, what, where); 3967 if (rc >= 0) return 0; 3968 if (rc != PCRE2_ERROR_UNSET || !unsetok) 3969 { 3970 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, 3971 what); 3972 if (rc == PCRE2_ERROR_BADMODE) 3973 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " 3974 "%d-bit mode\n", test_mode, 3975 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK)); 3976 } 3977 return rc; 3978 } 3979 3980 3981 3982 #ifdef SUPPORT_PCRE2_8 3983 /************************************************* 3984 * Show something in a list * 3985 *************************************************/ 3986 3987 /* This function just helps to keep the code that uses it tidier. It's used for 3988 various lists of things where there needs to be introductory text before the 3989 first item. As these calls are all in the POSIX-support code, they happen only 3990 when 8-bit mode is supported. */ 3991 3992 static void 3993 prmsg(const char **msg, const char *s) 3994 { 3995 fprintf(outfile, "%s %s", *msg, s); 3996 *msg = ""; 3997 } 3998 #endif /* SUPPORT_PCRE2_8 */ 3999 4000 4001 4002 /************************************************* 4003 * Show control bits * 4004 *************************************************/ 4005 4006 /* Called for mutually exclusive controls and for unsupported POSIX controls. 4007 Because the bits are unique, this can be used for both pattern and data control 4008 words. 4009 4010 Arguments: 4011 controls control bits 4012 controls2 more control bits 4013 before text to print before 4014 4015 Returns: nothing 4016 */ 4017 4018 static void 4019 show_controls(uint32_t controls, uint32_t controls2, const char *before) 4020 { 4021 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 4022 before, 4023 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", 4024 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", 4025 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "", 4026 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", 4027 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", 4028 ((controls & CTL_BINCODE) != 0)? " bincode" : "", 4029 ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "", 4030 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", 4031 ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "", 4032 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "", 4033 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", 4034 ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "", 4035 ((controls & CTL_DFA) != 0)? " dfa" : "", 4036 ((controls & CTL_EXPAND) != 0)? " expand" : "", 4037 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "", 4038 ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "", 4039 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "", 4040 ((controls & CTL_GETALL) != 0)? " getall" : "", 4041 ((controls & CTL_GLOBAL) != 0)? " global" : "", 4042 ((controls & CTL_HEXPAT) != 0)? " hex" : "", 4043 ((controls & CTL_INFO) != 0)? " info" : "", 4044 ((controls & CTL_JITFAST) != 0)? " jitfast" : "", 4045 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", 4046 ((controls & CTL_MARK) != 0)? " mark" : "", 4047 ((controls & CTL_MEMORY) != 0)? " memory" : "", 4048 ((controls2 & CTL2_NL_SET) != 0)? " newline" : "", 4049 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", 4050 ((controls & CTL_POSIX) != 0)? " posix" : "", 4051 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", 4052 ((controls & CTL_PUSH) != 0)? " push" : "", 4053 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "", 4054 ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "", 4055 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "", 4056 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "", 4057 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "", 4058 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "", 4059 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "", 4060 ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "", 4061 ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "", 4062 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : ""); 4063 } 4064 4065 4066 4067 /************************************************* 4068 * Show compile options * 4069 *************************************************/ 4070 4071 /* Called from show_pattern_info() and for unsupported POSIX options. 4072 4073 Arguments: 4074 options an options word 4075 before text to print before 4076 after text to print after 4077 4078 Returns: nothing 4079 */ 4080 4081 static void 4082 show_compile_options(uint32_t options, const char *before, const char *after) 4083 { 4084 if (options == 0) fprintf(outfile, "%s <none>%s", before, after); 4085 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 4086 before, 4087 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", 4088 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", 4089 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", 4090 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", 4091 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", 4092 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "", 4093 ((options & PCRE2_CASELESS) != 0)? " caseless" : "", 4094 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", 4095 ((options & PCRE2_DOTALL) != 0)? " dotall" : "", 4096 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "", 4097 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", 4098 ((options & PCRE2_EXTENDED) != 0)? " extended" : "", 4099 ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "", 4100 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "", 4101 ((options & PCRE2_LITERAL) != 0)? " literal" : "", 4102 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "", 4103 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "", 4104 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "", 4105 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "", 4106 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "", 4107 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", 4108 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "", 4109 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "", 4110 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", 4111 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", 4112 ((options & PCRE2_UCP) != 0)? " ucp" : "", 4113 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", 4114 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "", 4115 ((options & PCRE2_UTF) != 0)? " utf" : "", 4116 after); 4117 } 4118 4119 4120 /************************************************* 4121 * Show compile extra options * 4122 *************************************************/ 4123 4124 /* Called from show_pattern_info() and for unsupported POSIX options. 4125 4126 Arguments: 4127 options an options word 4128 before text to print before 4129 after text to print after 4130 4131 Returns: nothing 4132 */ 4133 4134 static void 4135 show_compile_extra_options(uint32_t options, const char *before, 4136 const char *after) 4137 { 4138 if (options == 0) fprintf(outfile, "%s <none>%s", before, after); 4139 else fprintf(outfile, "%s%s%s%s%s%s", 4140 before, 4141 ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", 4142 ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "", 4143 ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "", 4144 ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "", 4145 after); 4146 } 4147 4148 4149 4150 #ifdef SUPPORT_PCRE2_8 4151 /************************************************* 4152 * Show match options * 4153 *************************************************/ 4154 4155 /* Called for unsupported POSIX options. */ 4156 4157 static void 4158 show_match_options(uint32_t options) 4159 { 4160 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s", 4161 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", 4162 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "", 4163 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "", 4164 ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "", 4165 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", 4166 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "", 4167 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "", 4168 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "", 4169 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "", 4170 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "", 4171 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : ""); 4172 } 4173 #endif /* SUPPORT_PCRE2_8 */ 4174 4175 4176 4177 /************************************************* 4178 * Show memory usage info for a pattern * 4179 *************************************************/ 4180 4181 static void 4182 show_memory_info(void) 4183 { 4184 uint32_t name_count, name_entry_size; 4185 size_t size, cblock_size; 4186 4187 /* One of the test_mode values will always be true, but to stop a compiler 4188 warning we must initialize cblock_size. */ 4189 4190 cblock_size = 0; 4191 #ifdef SUPPORT_PCRE2_8 4192 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8); 4193 #endif 4194 #ifdef SUPPORT_PCRE2_16 4195 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16); 4196 #endif 4197 #ifdef SUPPORT_PCRE2_32 4198 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32); 4199 #endif 4200 4201 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE); 4202 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE); 4203 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE); 4204 fprintf(outfile, "Memory allocation (code space): %d\n", 4205 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size)); 4206 if (pat_patctl.jit != 0) 4207 { 4208 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE); 4209 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size); 4210 } 4211 } 4212 4213 4214 4215 /************************************************* 4216 * Show frame size info for a pattern * 4217 *************************************************/ 4218 4219 static void 4220 show_framesize(void) 4221 { 4222 size_t frame_size; 4223 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE); 4224 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size); 4225 } 4226 4227 4228 4229 /************************************************* 4230 * Get and output an error message * 4231 *************************************************/ 4232 4233 static BOOL 4234 print_error_message(int errorcode, const char *before, const char *after) 4235 { 4236 int len; 4237 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer); 4238 if (len < 0) 4239 { 4240 fprintf(outfile, "\n** pcre2test internal error: cannot interpret error " 4241 "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len); 4242 } 4243 else 4244 { 4245 fprintf(outfile, "%s", before); 4246 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); 4247 fprintf(outfile, "%s", after); 4248 } 4249 return len >= 0; 4250 } 4251 4252 4253 /************************************************* 4254 * Callback function for callout enumeration * 4255 *************************************************/ 4256 4257 /* The only differences in the callout emumeration block for different code 4258 unit widths are that the pointers to the subject, the most recent MARK, and a 4259 callout argument string point to strings of the appropriate width. Casts can be 4260 used to deal with this. 4261 4262 Argument: 4263 cb pointer to enumerate block 4264 callout_data user data 4265 4266 Returns: 0 4267 */ 4268 4269 static int callout_callback(pcre2_callout_enumerate_block_8 *cb, 4270 void *callout_data) 4271 { 4272 uint32_t i; 4273 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; 4274 4275 (void)callout_data; /* Not currently displayed */ 4276 4277 fprintf(outfile, "Callout "); 4278 if (cb->callout_string != NULL) 4279 { 4280 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); 4281 fprintf(outfile, "%c", delimiter); 4282 PCHARSV(cb->callout_string, 0, 4283 cb->callout_string_length, utf, outfile); 4284 for (i = 0; callout_start_delims[i] != 0; i++) 4285 if (delimiter == callout_start_delims[i]) 4286 { 4287 delimiter = callout_end_delims[i]; 4288 break; 4289 } 4290 fprintf(outfile, "%c ", delimiter); 4291 } 4292 else fprintf(outfile, "%d ", cb->callout_number); 4293 4294 fprintf(outfile, "%.*s\n", 4295 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length), 4296 pbuffer8 + cb->pattern_position); 4297 4298 return 0; 4299 } 4300 4301 4302 4303 /************************************************* 4304 * Show information about a pattern * 4305 *************************************************/ 4306 4307 /* This function is called after a pattern has been compiled if any of the 4308 information-requesting controls have been set. 4309 4310 Arguments: none 4311 4312 Returns: PR_OK continue processing next line 4313 PR_SKIP skip to a blank line 4314 PR_ABEND abort the pcre2test run 4315 */ 4316 4317 static int 4318 show_pattern_info(void) 4319 { 4320 uint32_t compile_options, overall_options, extra_options; 4321 4322 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) 4323 { 4324 fprintf(outfile, "------------------------------------------------------------------\n"); 4325 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0); 4326 } 4327 4328 if ((pat_patctl.control & CTL_INFO) != 0) 4329 { 4330 int rc; 4331 void *nametable; 4332 uint8_t *start_bits; 4333 BOOL heap_limit_set, match_limit_set, depth_limit_set; 4334 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, 4335 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, 4336 depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount, 4337 newline_convention; 4338 4339 /* Exercise the error route. */ 4340 4341 PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL); 4342 (void)rc; 4343 4344 /* These info requests may return PCRE2_ERROR_UNSET. */ 4345 4346 switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE)) 4347 { 4348 case 0: 4349 heap_limit_set = TRUE; 4350 break; 4351 4352 case PCRE2_ERROR_UNSET: 4353 heap_limit_set = FALSE; 4354 break; 4355 4356 default: 4357 return PR_ABEND; 4358 } 4359 4360 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE)) 4361 { 4362 case 0: 4363 match_limit_set = TRUE; 4364 break; 4365 4366 case PCRE2_ERROR_UNSET: 4367 match_limit_set = FALSE; 4368 break; 4369 4370 default: 4371 return PR_ABEND; 4372 } 4373 4374 switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE)) 4375 { 4376 case 0: 4377 depth_limit_set = TRUE; 4378 break; 4379 4380 case PCRE2_ERROR_UNSET: 4381 depth_limit_set = FALSE; 4382 break; 4383 4384 default: 4385 return PR_ABEND; 4386 } 4387 4388 /* These info requests should always succeed. */ 4389 4390 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) + 4391 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) + 4392 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) + 4393 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) + 4394 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) + 4395 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) + 4396 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) + 4397 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) + 4398 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) + 4399 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) + 4400 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) + 4401 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) + 4402 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) + 4403 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) + 4404 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) + 4405 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) + 4406 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE) 4407 != 0) 4408 return PR_ABEND; 4409 4410 fprintf(outfile, "Capturing subpattern count = %d\n", capture_count); 4411 4412 if (backrefmax > 0) 4413 fprintf(outfile, "Max back reference = %d\n", backrefmax); 4414 4415 if (maxlookbehind > 0) 4416 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); 4417 4418 if (heap_limit_set) 4419 fprintf(outfile, "Heap limit = %u\n", heap_limit); 4420 4421 if (match_limit_set) 4422 fprintf(outfile, "Match limit = %u\n", match_limit); 4423 4424 if (depth_limit_set) 4425 fprintf(outfile, "Depth limit = %u\n", depth_limit); 4426 4427 if (namecount > 0) 4428 { 4429 fprintf(outfile, "Named capturing subpatterns:\n"); 4430 for (; namecount > 0; namecount--) 4431 { 4432 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1; 4433 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size); 4434 fprintf(outfile, " "); 4435 PCHARSV(nametable, imm2_size, length, FALSE, outfile); 4436 while (length++ < nameentrysize - imm2_size) putc(' ', outfile); 4437 #ifdef SUPPORT_PCRE2_32 4438 if (test_mode == PCRE32_MODE) 4439 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0])); 4440 #endif 4441 #ifdef SUPPORT_PCRE2_16 4442 if (test_mode == PCRE16_MODE) 4443 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0])); 4444 #endif 4445 #ifdef SUPPORT_PCRE2_8 4446 if (test_mode == PCRE8_MODE) 4447 fprintf(outfile, "%3d\n", (int)( 4448 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1])); 4449 #endif 4450 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size); 4451 } 4452 } 4453 4454 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); 4455 if (hasbackslashc) fprintf(outfile, "Contains \\C\n"); 4456 if (match_empty) fprintf(outfile, "May match empty string\n"); 4457 4458 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); 4459 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); 4460 pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE); 4461 4462 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves 4463 cluttering up the verification output of non-UTF test files. */ 4464 4465 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0) 4466 { 4467 compile_options &= ~PCRE2_NEVER_UTF; 4468 overall_options &= ~PCRE2_NEVER_UTF; 4469 } 4470 4471 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0) 4472 { 4473 compile_options &= ~PCRE2_NEVER_UCP; 4474 overall_options &= ~PCRE2_NEVER_UCP; 4475 } 4476 4477 if ((compile_options|overall_options) != 0) 4478 { 4479 if (compile_options == overall_options) 4480 show_compile_options(compile_options, "Options:", "\n"); 4481 else 4482 { 4483 show_compile_options(compile_options, "Compile options:", "\n"); 4484 show_compile_options(overall_options, "Overall options:", "\n"); 4485 } 4486 } 4487 4488 if (extra_options != 0) 4489 show_compile_extra_options(extra_options, "Extra options:", "\n"); 4490 4491 if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); 4492 4493 if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 || 4494 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0) 4495 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? 4496 "any Unicode newline" : "CR, LF, or CRLF"); 4497 4498 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) 4499 { 4500 switch (newline_convention) 4501 { 4502 case PCRE2_NEWLINE_CR: 4503 fprintf(outfile, "Forced newline is CR\n"); 4504 break; 4505 4506 case PCRE2_NEWLINE_LF: 4507 fprintf(outfile, "Forced newline is LF\n"); 4508 break; 4509 4510 case PCRE2_NEWLINE_CRLF: 4511 fprintf(outfile, "Forced newline is CRLF\n"); 4512 break; 4513 4514 case PCRE2_NEWLINE_ANYCRLF: 4515 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n"); 4516 break; 4517 4518 case PCRE2_NEWLINE_ANY: 4519 fprintf(outfile, "Forced newline is any Unicode newline\n"); 4520 break; 4521 4522 case PCRE2_NEWLINE_NUL: 4523 fprintf(outfile, "Forced newline is NUL\n"); 4524 break; 4525 4526 default: 4527 break; 4528 } 4529 } 4530 4531 if (first_ctype == 2) 4532 { 4533 fprintf(outfile, "First code unit at start or follows newline\n"); 4534 } 4535 else if (first_ctype == 1) 4536 { 4537 const char *caseless = 4538 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)? 4539 "" : " (caseless)"; 4540 if (PRINTOK(first_cunit)) 4541 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless); 4542 else 4543 { 4544 fprintf(outfile, "First code unit = "); 4545 pchar(first_cunit, FALSE, outfile); 4546 fprintf(outfile, "%s\n", caseless); 4547 } 4548 } 4549 else if (start_bits != NULL) 4550 { 4551 int i; 4552 int c = 24; 4553 fprintf(outfile, "Starting code units: "); 4554 for (i = 0; i < 256; i++) 4555 { 4556 if ((start_bits[i/8] & (1<<(i&7))) != 0) 4557 { 4558 if (c > 75) 4559 { 4560 fprintf(outfile, "\n "); 4561 c = 2; 4562 } 4563 if (PRINTOK(i) && i != ' ') 4564 { 4565 fprintf(outfile, "%c ", i); 4566 c += 2; 4567 } 4568 else 4569 { 4570 fprintf(outfile, "\\x%02x ", i); 4571 c += 5; 4572 } 4573 } 4574 } 4575 fprintf(outfile, "\n"); 4576 } 4577 4578 if (last_ctype != 0) 4579 { 4580 const char *caseless = 4581 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)? 4582 "" : " (caseless)"; 4583 if (PRINTOK(last_cunit)) 4584 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless); 4585 else 4586 { 4587 fprintf(outfile, "Last code unit = "); 4588 pchar(last_cunit, FALSE, outfile); 4589 fprintf(outfile, "%s\n", caseless); 4590 } 4591 } 4592 4593 fprintf(outfile, "Subject length lower bound = %d\n", minlength); 4594 4595 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) 4596 { 4597 if (FLD(compiled_code, executable_jit) != NULL) 4598 fprintf(outfile, "JIT compilation was successful\n"); 4599 else 4600 { 4601 #ifdef SUPPORT_JIT 4602 fprintf(outfile, "JIT compilation was not successful"); 4603 if (jitrc != 0 && !print_error_message(jitrc, " (", ")")) 4604 return PR_ABEND; 4605 fprintf(outfile, "\n"); 4606 #else 4607 fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); 4608 #endif 4609 } 4610 } 4611 } 4612 4613 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0) 4614 { 4615 int errorcode; 4616 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0); 4617 if (errorcode != 0) 4618 { 4619 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode); 4620 if (errorcode < 0 && !print_error_message(errorcode, "", "\n")) 4621 return PR_ABEND; 4622 return PR_SKIP; 4623 } 4624 } 4625 4626 return PR_OK; 4627 } 4628 4629 4630 4631 /************************************************* 4632 * Handle serialization error * 4633 *************************************************/ 4634 4635 /* Print an error message after a serialization failure. 4636 4637 Arguments: 4638 rc the error code 4639 msg an initial message for what failed 4640 4641 Returns: FALSE if print_error_message() fails 4642 */ 4643 4644 static BOOL 4645 serial_error(int rc, const char *msg) 4646 { 4647 fprintf(outfile, "%s failed: error %d: ", msg, rc); 4648 return print_error_message(rc, "", "\n"); 4649 } 4650 4651 4652 4653 /************************************************* 4654 * Open file for save/load commands * 4655 *************************************************/ 4656 4657 /* This function decodes the file name and opens the file. 4658 4659 Arguments: 4660 buffptr point after the #command 4661 mode open mode 4662 fptr points to the FILE variable 4663 4664 Returns: PR_OK or PR_ABEND 4665 */ 4666 4667 static int 4668 open_file(uint8_t *buffptr, const char *mode, FILE **fptr) 4669 { 4670 char *endf; 4671 char *filename = (char *)buffptr; 4672 while (isspace(*filename)) filename++; 4673 endf = filename + strlen8(filename); 4674 while (endf > filename && isspace(endf[-1])) endf--; 4675 4676 if (endf == filename) 4677 { 4678 fprintf(outfile, "** File name expected after #save\n"); 4679 return PR_ABEND; 4680 } 4681 4682 *endf = 0; 4683 *fptr = fopen((const char *)filename, mode); 4684 if (*fptr == NULL) 4685 { 4686 fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno)); 4687 return PR_ABEND; 4688 } 4689 4690 return PR_OK; 4691 } 4692 4693 4694 4695 /************************************************* 4696 * Process command line * 4697 *************************************************/ 4698 4699 /* This function is called for lines beginning with # and a character that is 4700 not ! or whitespace, when encountered between tests, which means that there is 4701 no compiled pattern (compiled_code is NULL). The line is in buffer. 4702 4703 Arguments: none 4704 4705 Returns: PR_OK continue processing next line 4706 PR_SKIP skip to a blank line 4707 PR_ABEND abort the pcre2test run 4708 */ 4709 4710 static int 4711 process_command(void) 4712 { 4713 FILE *f; 4714 PCRE2_SIZE serial_size; 4715 size_t i; 4716 int rc, cmd, cmdlen, yield; 4717 uint16_t first_listed_newline; 4718 const char *cmdname; 4719 uint8_t *argptr, *serial; 4720 4721 yield = PR_OK; 4722 cmd = CMD_UNKNOWN; 4723 cmdlen = 0; 4724 4725 for (i = 0; i < cmdlistcount; i++) 4726 { 4727 cmdname = cmdlist[i].name; 4728 cmdlen = strlen(cmdname); 4729 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 && 4730 isspace(buffer[cmdlen+1])) 4731 { 4732 cmd = cmdlist[i].value; 4733 break; 4734 } 4735 } 4736 4737 argptr = buffer + cmdlen + 1; 4738 4739 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT) 4740 { 4741 fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname); 4742 return PR_ABEND; 4743 } 4744 4745 switch(cmd) 4746 { 4747 case CMD_UNKNOWN: 4748 fprintf(outfile, "** Unknown command: %s", buffer); 4749 break; 4750 4751 case CMD_FORBID_UTF: 4752 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 4753 break; 4754 4755 case CMD_PERLTEST: 4756 restrict_for_perl_test = TRUE; 4757 break; 4758 4759 /* Set default pattern modifiers */ 4760 4761 case CMD_PATTERN: 4762 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL); 4763 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0) 4764 def_patctl.jit = 7; 4765 break; 4766 4767 /* Set default subject modifiers */ 4768 4769 case CMD_SUBJECT: 4770 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl); 4771 break; 4772 4773 /* Check the default newline, and if not one of those listed, set up the 4774 first one to be forced. An empty list unsets. */ 4775 4776 case CMD_NEWLINE_DEFAULT: 4777 local_newline_default = 0; /* Unset */ 4778 first_listed_newline = 0; 4779 for (;;) 4780 { 4781 while (isspace(*argptr)) argptr++; 4782 if (*argptr == 0) break; 4783 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++) 4784 { 4785 size_t nlen = strlen(newlines[i]); 4786 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 && 4787 isspace(argptr[nlen])) 4788 { 4789 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */ 4790 if (first_listed_newline == 0) first_listed_newline = i; 4791 } 4792 } 4793 while (*argptr != 0 && !isspace(*argptr)) argptr++; 4794 } 4795 local_newline_default = first_listed_newline; 4796 break; 4797 4798 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect 4799 the compiled pattern (e.g. to give information) are permitted. The default 4800 pattern modifiers are ignored. */ 4801 4802 case CMD_POP: 4803 case CMD_POPCOPY: 4804 if (patstacknext <= 0) 4805 { 4806 fprintf(outfile, "** Can't pop off an empty stack\n"); 4807 return PR_SKIP; 4808 } 4809 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */ 4810 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL)) 4811 return PR_SKIP; 4812 4813 if (cmd == CMD_POP) 4814 { 4815 SET(compiled_code, patstack[--patstacknext]); 4816 } 4817 else 4818 { 4819 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]); 4820 } 4821 4822 if (pat_patctl.jit != 0) 4823 { 4824 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); 4825 } 4826 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); 4827 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize(); 4828 if ((pat_patctl.control & CTL_ANYINFO) != 0) 4829 { 4830 rc = show_pattern_info(); 4831 if (rc != PR_OK) return rc; 4832 } 4833 break; 4834 4835 /* Save the stack of compiled patterns to a file, then empty the stack. */ 4836 4837 case CMD_SAVE: 4838 if (patstacknext <= 0) 4839 { 4840 fprintf(outfile, "** No stacked patterns to save\n"); 4841 return PR_OK; 4842 } 4843 4844 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f); 4845 if (rc != PR_OK) return rc; 4846 4847 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size, 4848 general_context); 4849 if (rc < 0) 4850 { 4851 fclose(f); 4852 if (!serial_error(rc, "Serialization")) return PR_ABEND; 4853 break; 4854 } 4855 4856 /* Write the length at the start of the file to make it straightforward to 4857 get the right memory when re-loading. This saves having to read the file size 4858 in different operating systems. To allow for different endianness (even 4859 though reloading with the opposite endianness does not work), write the 4860 length byte-by-byte. */ 4861 4862 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f); 4863 if (fwrite(serial, 1, serial_size, f) != serial_size) 4864 { 4865 fprintf(outfile, "** Wrong return from fwrite()\n"); 4866 fclose(f); 4867 return PR_ABEND; 4868 } 4869 4870 fclose(f); 4871 PCRE2_SERIALIZE_FREE(serial); 4872 while(patstacknext > 0) 4873 { 4874 SET(compiled_code, patstack[--patstacknext]); 4875 SUB1(pcre2_code_free, compiled_code); 4876 } 4877 SET(compiled_code, NULL); 4878 break; 4879 4880 /* Load a set of compiled patterns from a file onto the stack */ 4881 4882 case CMD_LOAD: 4883 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f); 4884 if (rc != PR_OK) return rc; 4885 4886 serial_size = 0; 4887 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8); 4888 4889 serial = malloc(serial_size); 4890 if (serial == NULL) 4891 { 4892 fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n", 4893 SIZ_CAST serial_size); 4894 fclose(f); 4895 return PR_ABEND; 4896 } 4897 4898 i = fread(serial, 1, serial_size, f); 4899 fclose(f); 4900 4901 if (i != serial_size) 4902 { 4903 fprintf(outfile, "** Wrong return from fread()\n"); 4904 yield = PR_ABEND; 4905 } 4906 else 4907 { 4908 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial); 4909 if (rc < 0) 4910 { 4911 if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND; 4912 } 4913 else 4914 { 4915 if (rc + patstacknext > PATSTACKSIZE) 4916 { 4917 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n", 4918 rc, (rc == 1)? "" : "s"); 4919 rc = PATSTACKSIZE - patstacknext; 4920 fprintf(outfile, "** Decoding %d pattern%s\n", rc, 4921 (rc == 1)? "" : "s"); 4922 } 4923 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial, 4924 general_context); 4925 if (rc < 0) 4926 { 4927 if (!serial_error(rc, "Deserialization")) yield = PR_ABEND; 4928 } 4929 else patstacknext += rc; 4930 } 4931 } 4932 4933 free(serial); 4934 break; 4935 } 4936 4937 return yield; 4938 } 4939 4940 4941 4942 /************************************************* 4943 * Process pattern line * 4944 *************************************************/ 4945 4946 /* This function is called when the input buffer contains the start of a 4947 pattern. The first character is known to be a valid delimiter. The pattern is 4948 read, modifiers are interpreted, and a suitable local context is set up for 4949 this test. The pattern is then compiled. 4950 4951 Arguments: none 4952 4953 Returns: PR_OK continue processing next line 4954 PR_SKIP skip to a blank line 4955 PR_ABEND abort the pcre2test run 4956 */ 4957 4958 static int 4959 process_pattern(void) 4960 { 4961 BOOL utf; 4962 uint32_t k; 4963 uint8_t *p = buffer; 4964 unsigned int delimiter = *p++; 4965 int errorcode; 4966 void *use_pat_context; 4967 uint32_t use_forbid_utf = forbid_utf; 4968 PCRE2_SIZE patlen; 4969 PCRE2_SIZE valgrind_access_length; 4970 PCRE2_SIZE erroroffset; 4971 4972 /* Initialize the context and pattern/data controls for this test from the 4973 defaults. */ 4974 4975 PATCTXCPY(pat_context, default_pat_context); 4976 memcpy(&pat_patctl, &def_patctl, sizeof(patctl)); 4977 4978 /* Find the end of the pattern, reading more lines if necessary. */ 4979 4980 for(;;) 4981 { 4982 while (*p != 0) 4983 { 4984 if (*p == '\\' && p[1] != 0) p++; 4985 else if (*p == delimiter) break; 4986 p++; 4987 } 4988 if (*p != 0) break; 4989 if ((p = extend_inputline(infile, p, " > ")) == NULL) 4990 { 4991 fprintf(outfile, "** Unexpected EOF\n"); 4992 return PR_ABEND; 4993 } 4994 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p); 4995 } 4996 4997 /* If the first character after the delimiter is backslash, make the pattern 4998 end with backslash. This is purely to provide a way of testing for the error 4999 message when a pattern ends with backslash. */ 5000 5001 if (p[1] == '\\') *p++ = '\\'; 5002 5003 /* Terminate the pattern at the delimiter, and compute the length. */ 5004 5005 *p++ = 0; 5006 patlen = p - buffer - 2; 5007 5008 /* Look for modifiers and options after the final delimiter. */ 5009 5010 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP; 5011 utf = (pat_patctl.options & PCRE2_UTF) != 0; 5012 5013 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually 5014 exclusive with the utf modifier. */ 5015 5016 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0) 5017 { 5018 if (test_mode == PCRE8_MODE) 5019 { 5020 fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n"); 5021 return PR_SKIP; 5022 } 5023 if (utf) 5024 { 5025 fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n"); 5026 return PR_SKIP; 5027 } 5028 } 5029 5030 /* The convert and posix modifiers are mutually exclusive. */ 5031 5032 if (pat_patctl.convert_type != CONVERT_UNSET && 5033 (pat_patctl.control & CTL_POSIX) != 0) 5034 { 5035 fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n"); 5036 return PR_SKIP; 5037 } 5038 5039 /* Check for mutually exclusive control modifiers. At present, these are all in 5040 the first control word. */ 5041 5042 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++) 5043 { 5044 uint32_t c = pat_patctl.control & exclusive_pat_controls[k]; 5045 if (c != 0 && c != (c & (~c+1))) 5046 { 5047 show_controls(c, 0, "** Not allowed together:"); 5048 fprintf(outfile, "\n"); 5049 return PR_SKIP; 5050 } 5051 } 5052 5053 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was 5054 specified. */ 5055 5056 if (pat_patctl.jit == 0 && 5057 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0) 5058 pat_patctl.jit = 7; 5059 5060 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting 5061 in callouts. Convert from hex if requested (literal strings in quotes may be 5062 present within the hexadecimal pairs). The result must necessarily be fewer 5063 characters so will always fit in pbuffer8. */ 5064 5065 if ((pat_patctl.control & CTL_HEXPAT) != 0) 5066 { 5067 uint8_t *pp, *pt; 5068 uint32_t c, d; 5069 5070 pt = pbuffer8; 5071 for (pp = buffer + 1; *pp != 0; pp++) 5072 { 5073 if (isspace(*pp)) continue; 5074 c = *pp++; 5075 5076 /* Handle a literal substring */ 5077 5078 if (c == '\'' || c == '"') 5079 { 5080 uint8_t *pq = pp; 5081 for (;; pp++) 5082 { 5083 d = *pp; 5084 if (d == 0) 5085 { 5086 fprintf(outfile, "** Missing closing quote in hex pattern: " 5087 "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2); 5088 return PR_SKIP; 5089 } 5090 if (d == c) break; 5091 *pt++ = d; 5092 } 5093 } 5094 5095 /* Expect a hex pair */ 5096 5097 else 5098 { 5099 if (!isxdigit(c)) 5100 { 5101 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %" 5102 PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2); 5103 return PR_SKIP; 5104 } 5105 if (*pp == 0) 5106 { 5107 fprintf(outfile, "** Odd number of digits in hex pattern\n"); 5108 return PR_SKIP; 5109 } 5110 d = *pp; 5111 if (!isxdigit(d)) 5112 { 5113 fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %" 5114 PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1); 5115 return PR_SKIP; 5116 } 5117 c = toupper(c); 5118 d = toupper(d); 5119 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) + 5120 (isdigit(d)? (d - '0') : (d - 'A' + 10)); 5121 } 5122 } 5123 *pt = 0; 5124 patlen = pt - pbuffer8; 5125 } 5126 5127 /* If not a hex string, process for repetition expansion if requested. */ 5128 5129 else if ((pat_patctl.control & CTL_EXPAND) != 0) 5130 { 5131 uint8_t *pp, *pt; 5132 5133 pt = pbuffer8; 5134 for (pp = buffer + 1; *pp != 0; pp++) 5135 { 5136 uint8_t *pc = pp; 5137 uint32_t count = 1; 5138 size_t length = 1; 5139 5140 /* Check for replication syntax; if not found, the defaults just set will 5141 prevail and one character will be copied. */ 5142 5143 if (pp[0] == '\\' && pp[1] == '[') 5144 { 5145 uint8_t *pe; 5146 for (pe = pp + 2; *pe != 0; pe++) 5147 { 5148 if (pe[0] == ']' && pe[1] == '{') 5149 { 5150 uint32_t clen = pe - pc - 2; 5151 uint32_t i = 0; 5152 unsigned long uli; 5153 char *endptr; 5154 5155 pe += 2; 5156 uli = strtoul((const char *)pe, &endptr, 10); 5157 if (U32OVERFLOW(uli)) 5158 { 5159 fprintf(outfile, "** Pattern repeat count too large\n"); 5160 return PR_SKIP; 5161 } 5162 5163 i = (uint32_t)uli; 5164 pe = (uint8_t *)endptr; 5165 if (*pe == '}') 5166 { 5167 if (i == 0) 5168 { 5169 fprintf(outfile, "** Zero repeat not allowed\n"); 5170 return PR_SKIP; 5171 } 5172 pc += 2; 5173 count = i; 5174 length = clen; 5175 pp = pe; 5176 break; 5177 } 5178 } 5179 } 5180 } 5181 5182 /* Add to output. If the buffer is too small expand it. The function for 5183 expanding buffers always keeps buffer and pbuffer8 in step as far as their 5184 size goes. */ 5185 5186 while (pt + count * length > pbuffer8 + pbuffer8_size) 5187 { 5188 size_t pc_offset = pc - buffer; 5189 size_t pp_offset = pp - buffer; 5190 size_t pt_offset = pt - pbuffer8; 5191 expand_input_buffers(); 5192 pc = buffer + pc_offset; 5193 pp = buffer + pp_offset; 5194 pt = pbuffer8 + pt_offset; 5195 } 5196 5197 for (; count > 0; count--) 5198 { 5199 memcpy(pt, pc, length); 5200 pt += length; 5201 } 5202 } 5203 5204 *pt = 0; 5205 patlen = pt - pbuffer8; 5206 5207 if ((pat_patctl.control & CTL_INFO) != 0) 5208 fprintf(outfile, "Expanded: %s\n", pbuffer8); 5209 } 5210 5211 /* Neither hex nor expanded, just copy the input verbatim. */ 5212 5213 else 5214 { 5215 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1); 5216 } 5217 5218 /* Sort out character tables */ 5219 5220 if (pat_patctl.locale[0] != 0) 5221 { 5222 if (pat_patctl.tables_id != 0) 5223 { 5224 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n"); 5225 return PR_SKIP; 5226 } 5227 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL) 5228 { 5229 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale); 5230 return PR_SKIP; 5231 } 5232 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0) 5233 { 5234 strcpy((char *)locale_name, (char *)pat_patctl.locale); 5235 if (locale_tables != NULL) free((void *)locale_tables); 5236 PCRE2_MAKETABLES(locale_tables); 5237 } 5238 use_tables = locale_tables; 5239 } 5240 5241 else switch (pat_patctl.tables_id) 5242 { 5243 case 0: use_tables = NULL; break; 5244 case 1: use_tables = tables1; break; 5245 case 2: use_tables = tables2; break; 5246 default: 5247 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n"); 5248 return PR_SKIP; 5249 } 5250 5251 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables); 5252 5253 /* Set up for the stackguard test. */ 5254 5255 if (pat_patctl.stackguard_test != 0) 5256 { 5257 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL); 5258 } 5259 5260 /* Handle compiling via the POSIX interface, which doesn't support the 5261 timing, showing, or debugging options, nor the ability to pass over 5262 local character tables. Neither does it have 16-bit or 32-bit support. */ 5263 5264 if ((pat_patctl.control & CTL_POSIX) != 0) 5265 { 5266 #ifdef SUPPORT_PCRE2_8 5267 int rc; 5268 int cflags = 0; 5269 const char *msg = "** Ignored with POSIX interface:"; 5270 #endif 5271 5272 if (test_mode != PCRE8_MODE) 5273 { 5274 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n"); 5275 return PR_SKIP; 5276 } 5277 5278 #ifdef SUPPORT_PCRE2_8 5279 /* Check for features that the POSIX interface does not support. */ 5280 5281 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale"); 5282 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace"); 5283 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables"); 5284 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard"); 5285 if (timeit > 0) prmsg(&msg, "timing"); 5286 if (pat_patctl.jit != 0) prmsg(&msg, "JIT"); 5287 5288 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0) 5289 { 5290 show_compile_options( 5291 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, ""); 5292 msg = ""; 5293 } 5294 5295 if ((FLD(pat_context, extra_options) & 5296 ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0) 5297 { 5298 show_compile_extra_options( 5299 FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS, 5300 msg, ""); 5301 msg = ""; 5302 } 5303 5304 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 || 5305 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0) 5306 { 5307 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, 5308 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg); 5309 msg = ""; 5310 } 5311 5312 if (local_newline_default != 0) prmsg(&msg, "#newline_default"); 5313 if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET) 5314 prmsg(&msg, "max_pattern_length"); 5315 if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT) 5316 prmsg(&msg, "parens_nest_limit"); 5317 5318 if (msg[0] == 0) fprintf(outfile, "\n"); 5319 5320 /* Translate PCRE2 options to POSIX options and then compile. */ 5321 5322 if (utf) cflags |= REG_UTF; 5323 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB; 5324 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP; 5325 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE; 5326 if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC; 5327 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE; 5328 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL; 5329 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY; 5330 5331 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0) 5332 { 5333 preg.re_endp = (char *)pbuffer8 + patlen; 5334 cflags |= REG_PEND; 5335 } 5336 5337 rc = regcomp(&preg, (char *)pbuffer8, cflags); 5338 5339 /* Compiling failed */ 5340 5341 if (rc != 0) 5342 { 5343 size_t bsize, usize; 5344 int psize; 5345 5346 preg.re_pcre2_code = NULL; /* In case something was left in there */ 5347 preg.re_match_data = NULL; 5348 5349 bsize = (pat_patctl.regerror_buffsize != 0)? 5350 pat_patctl.regerror_buffsize : pbuffer8_size; 5351 if (bsize + 8 < pbuffer8_size) 5352 memcpy(pbuffer8 + bsize, "DEADBEEF", 8); 5353 usize = regerror(rc, &preg, (char *)pbuffer8, bsize); 5354 5355 /* Inside regerror(), snprintf() is used. If the buffer is too small, some 5356 versions of snprintf() put a zero byte at the end, but others do not. 5357 Therefore, we print a maximum of one less than the size of the buffer. */ 5358 5359 psize = (int)bsize - 1; 5360 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8); 5361 if (usize > bsize) 5362 { 5363 fprintf(outfile, "** regerror() message truncated\n"); 5364 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0) 5365 fprintf(outfile, "** regerror() buffer overflow\n"); 5366 } 5367 return PR_SKIP; 5368 } 5369 5370 /* Compiling succeeded. Check that the values in the preg block are sensible. 5371 It can happen that pcre2test is accidentally linked with a different POSIX 5372 library which succeeds, but of course puts different things into preg. In 5373 this situation, calling regfree() may cause a segfault (or invalid free() in 5374 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the 5375 calling of regfree() on exit. */ 5376 5377 if (preg.re_pcre2_code == NULL || 5378 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER || 5379 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub || 5380 preg.re_match_data == NULL || 5381 preg.re_cflags != cflags) 5382 { 5383 fprintf(outfile, 5384 "** The regcomp() function returned zero (success), but the values set\n" 5385 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n" 5386 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n" 5387 "** some other POSIX regex library.\n**\n"); 5388 preg.re_pcre2_code = NULL; 5389 return PR_ABEND; 5390 } 5391 5392 return PR_OK; 5393 #endif /* SUPPORT_PCRE2_8 */ 5394 } 5395 5396 /* Handle compiling via the native interface. Controls that act later are 5397 ignored with "push". Replacements are locked out. */ 5398 5399 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) 5400 { 5401 if (pat_patctl.replacement[0] != 0) 5402 { 5403 fprintf(outfile, "** Replacement text is not supported with 'push'.\n"); 5404 return PR_OK; 5405 } 5406 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 || 5407 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0) 5408 { 5409 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS, 5410 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2, 5411 "** Ignored when compiled pattern is stacked with 'push':"); 5412 fprintf(outfile, "\n"); 5413 } 5414 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 || 5415 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0) 5416 { 5417 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS, 5418 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2, 5419 "** Applies only to compile when pattern is stacked with 'push':"); 5420 fprintf(outfile, "\n"); 5421 } 5422 } 5423 5424 /* Convert the input in non-8-bit modes. */ 5425 5426 errorcode = 0; 5427 5428 #ifdef SUPPORT_PCRE2_16 5429 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen); 5430 #endif 5431 5432 #ifdef SUPPORT_PCRE2_32 5433 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen); 5434 #endif 5435 5436 switch(errorcode) 5437 { 5438 case -1: 5439 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be " 5440 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32); 5441 return PR_SKIP; 5442 5443 case -2: 5444 fprintf(outfile, "** Failed: character value greater than 0x10ffff " 5445 "cannot be converted to UTF\n"); 5446 return PR_SKIP; 5447 5448 case -3: 5449 fprintf(outfile, "** Failed: character value greater than 0xffff " 5450 "cannot be converted to 16-bit in non-UTF mode\n"); 5451 return PR_SKIP; 5452 5453 default: 5454 break; 5455 } 5456 5457 /* The pattern is now in pbuffer[8|16|32], with the length in code units in 5458 patlen. If it is to be converted, copy the result back afterwards so that it 5459 ends up back in the usual place. */ 5460 5461 if (pat_patctl.convert_type != CONVERT_UNSET) 5462 { 5463 int rc; 5464 int convert_return = PR_OK; 5465 uint32_t convert_options = pat_patctl.convert_type; 5466 void *converted_pattern; 5467 PCRE2_SIZE converted_length; 5468 5469 if (pat_patctl.convert_length != 0) 5470 { 5471 converted_length = pat_patctl.convert_length; 5472 converted_pattern = malloc(converted_length * code_unit_size); 5473 if (converted_pattern == NULL) 5474 { 5475 fprintf(outfile, "** Failed: malloc failed for converted pattern\n"); 5476 return PR_SKIP; 5477 } 5478 } 5479 else converted_pattern = NULL; /* Let the library allocate */ 5480 5481 if (utf) convert_options |= PCRE2_CONVERT_UTF; 5482 if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0) 5483 convert_options |= PCRE2_CONVERT_NO_UTF_CHECK; 5484 5485 CONCTXCPY(con_context, default_con_context); 5486 5487 if (pat_patctl.convert_glob_escape != 0) 5488 { 5489 uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 : 5490 pat_patctl.convert_glob_escape; 5491 PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape); 5492 if (rc != 0) 5493 { 5494 fprintf(outfile, "** Invalid glob escape '%c'\n", 5495 pat_patctl.convert_glob_escape); 5496 convert_return = PR_SKIP; 5497 goto CONVERT_FINISH; 5498 } 5499 } 5500 5501 if (pat_patctl.convert_glob_separator != 0) 5502 { 5503 PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator); 5504 if (rc != 0) 5505 { 5506 fprintf(outfile, "** Invalid glob separator '%c'\n", 5507 pat_patctl.convert_glob_separator); 5508 convert_return = PR_SKIP; 5509 goto CONVERT_FINISH; 5510 } 5511 } 5512 5513 PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options, 5514 &converted_pattern, &converted_length, con_context); 5515 5516 if (rc != 0) 5517 { 5518 fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ", 5519 SIZ_CAST converted_length); 5520 convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND; 5521 } 5522 5523 /* Output the converted pattern, then copy it. */ 5524 5525 else 5526 { 5527 PCHARSV(converted_pattern, 0, converted_length, utf, outfile); 5528 fprintf(outfile, "\n"); 5529 patlen = converted_length; 5530 CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1); 5531 } 5532 5533 /* Free the converted pattern. */ 5534 5535 CONVERT_FINISH: 5536 if (pat_patctl.convert_length != 0) 5537 free(converted_pattern); 5538 else 5539 PCRE2_CONVERTED_PATTERN_FREE(converted_pattern); 5540 5541 /* Return if conversion was unsuccessful. */ 5542 5543 if (convert_return != PR_OK) return convert_return; 5544 } 5545 5546 /* By default we pass a zero-terminated pattern, but a length is passed if 5547 "use_length" was specified or this is a hex pattern (which might contain binary 5548 zeros). When valgrind is supported, arrange for the unused part of the buffer 5549 to be marked as no access. */ 5550 5551 valgrind_access_length = patlen; 5552 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0) 5553 { 5554 patlen = PCRE2_ZERO_TERMINATED; 5555 valgrind_access_length += 1; /* For the terminating zero */ 5556 } 5557 5558 #ifdef SUPPORT_VALGRIND 5559 #ifdef SUPPORT_PCRE2_8 5560 if (test_mode == PCRE8_MODE && pbuffer8 != NULL) 5561 { 5562 VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length, 5563 pbuffer8_size - valgrind_access_length); 5564 } 5565 #endif 5566 #ifdef SUPPORT_PCRE2_16 5567 if (test_mode == PCRE16_MODE && pbuffer16 != NULL) 5568 { 5569 VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length, 5570 pbuffer16_size - valgrind_access_length*sizeof(uint16_t)); 5571 } 5572 #endif 5573 #ifdef SUPPORT_PCRE2_32 5574 if (test_mode == PCRE32_MODE && pbuffer32 != NULL) 5575 { 5576 VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length, 5577 pbuffer32_size - valgrind_access_length*sizeof(uint32_t)); 5578 } 5579 #endif 5580 #else /* Valgrind not supported */ 5581 (void)valgrind_access_length; /* Avoid compiler warning */ 5582 #endif 5583 5584 /* If #newline_default has been used and the library was not compiled with an 5585 appropriate default newline setting, local_newline_default will be non-zero. We 5586 use this if there is no explicit newline modifier. */ 5587 5588 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0) 5589 { 5590 SETFLD(pat_context, newline_convention, local_newline_default); 5591 } 5592 5593 /* The null_context modifier is used to test calling pcre2_compile() with a 5594 NULL context. */ 5595 5596 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)? 5597 NULL : PTR(pat_context); 5598 5599 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF 5600 and PCRE2_NEVER_UCP are invalid with it. */ 5601 5602 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0; 5603 5604 /* Compile many times when timing. */ 5605 5606 if (timeit > 0) 5607 { 5608 int i; 5609 clock_t time_taken = 0; 5610 for (i = 0; i < timeit; i++) 5611 { 5612 clock_t start_time = clock(); 5613 PCRE2_COMPILE(compiled_code, pbuffer, patlen, 5614 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, 5615 use_pat_context); 5616 time_taken += clock() - start_time; 5617 if (TEST(compiled_code, !=, NULL)) 5618 { SUB1(pcre2_code_free, compiled_code); } 5619 } 5620 total_compile_time += time_taken; 5621 fprintf(outfile, "Compile time %.4f milliseconds\n", 5622 (((double)time_taken * 1000.0) / (double)timeit) / 5623 (double)CLOCKS_PER_SEC); 5624 } 5625 5626 /* A final compile that is used "for real". */ 5627 5628 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf, 5629 &errorcode, &erroroffset, use_pat_context); 5630 5631 /* Call the JIT compiler if requested. When timing, we must free and recompile 5632 the pattern each time because that is the only way to free the JIT compiled 5633 code. We know that compilation will always succeed. */ 5634 5635 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) 5636 { 5637 if (timeit > 0) 5638 { 5639 int i; 5640 clock_t time_taken = 0; 5641 for (i = 0; i < timeit; i++) 5642 { 5643 clock_t start_time; 5644 SUB1(pcre2_code_free, compiled_code); 5645 PCRE2_COMPILE(compiled_code, pbuffer, patlen, 5646 pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset, 5647 use_pat_context); 5648 start_time = clock(); 5649 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit); 5650 time_taken += clock() - start_time; 5651 } 5652 total_jit_compile_time += time_taken; 5653 fprintf(outfile, "JIT compile %.4f milliseconds\n", 5654 (((double)time_taken * 1000.0) / (double)timeit) / 5655 (double)CLOCKS_PER_SEC); 5656 } 5657 else 5658 { 5659 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); 5660 } 5661 } 5662 5663 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit 5664 and 32-bit buffers can be marked completely undefined, but we must leave the 5665 pattern in the 8-bit buffer defined because it may be read from a callout 5666 during matching. */ 5667 5668 #ifdef SUPPORT_VALGRIND 5669 #ifdef SUPPORT_PCRE2_8 5670 if (test_mode == PCRE8_MODE) 5671 { 5672 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length, 5673 pbuffer8_size - valgrind_access_length); 5674 } 5675 #endif 5676 #ifdef SUPPORT_PCRE2_16 5677 if (test_mode == PCRE16_MODE) 5678 { 5679 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size); 5680 } 5681 #endif 5682 #ifdef SUPPORT_PCRE2_32 5683 if (test_mode == PCRE32_MODE) 5684 { 5685 VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size); 5686 } 5687 #endif 5688 #endif 5689 5690 /* Compilation failed; go back for another re, skipping to blank line 5691 if non-interactive. */ 5692 5693 if (TEST(compiled_code, ==, NULL)) 5694 { 5695 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode, 5696 (int)erroroffset); 5697 if (!print_error_message(errorcode, "", "\n")) return PR_ABEND; 5698 return PR_SKIP; 5699 } 5700 5701 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are 5702 locked out at compile time, but we must also check for occurrences of \P, \p, 5703 and \X, which are only supported when Unicode is supported. */ 5704 5705 if (forbid_utf != 0) 5706 { 5707 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0) 5708 { 5709 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the " 5710 "#forbid_utf command\n"); 5711 return PR_SKIP; 5712 } 5713 } 5714 5715 /* Remember the maximum lookbehind, for partial matching. */ 5716 5717 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0) 5718 return PR_ABEND; 5719 5720 /* If an explicit newline modifier was given, set the information flag in the 5721 pattern so that it is preserved over push/pop. */ 5722 5723 if ((pat_patctl.control2 & CTL2_NL_SET) != 0) 5724 { 5725 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET); 5726 } 5727 5728 /* Output code size and other information if requested. */ 5729 5730 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); 5731 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize(); 5732 if ((pat_patctl.control & CTL_ANYINFO) != 0) 5733 { 5734 int rc = show_pattern_info(); 5735 if (rc != PR_OK) return rc; 5736 } 5737 5738 /* The "push" control requests that the compiled pattern be remembered on a 5739 stack. This is mainly for testing the serialization functionality. */ 5740 5741 if ((pat_patctl.control & CTL_PUSH) != 0) 5742 { 5743 if (patstacknext >= PATSTACKSIZE) 5744 { 5745 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); 5746 return PR_ABEND; 5747 } 5748 patstack[patstacknext++] = PTR(compiled_code); 5749 SET(compiled_code, NULL); 5750 } 5751 5752 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a 5753 copy of the pattern, the latter with a copy of its character tables. This tests 5754 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */ 5755 5756 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) 5757 { 5758 if (patstacknext >= PATSTACKSIZE) 5759 { 5760 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); 5761 return PR_ABEND; 5762 } 5763 if ((pat_patctl.control & CTL_PUSHCOPY) != 0) 5764 { 5765 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code); 5766 } 5767 else 5768 { 5769 PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++], 5770 compiled_code); } 5771 } 5772 5773 return PR_OK; 5774 } 5775 5776 5777 5778 /************************************************* 5779 * Check heap, match or depth limit * 5780 *************************************************/ 5781 5782 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it 5783 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT. 5784 5785 Arguments: 5786 pp the subject string 5787 ulen length of subject or PCRE2_ZERO_TERMINATED 5788 errnumber defines which limit to test 5789 msg string to include in final message 5790 5791 Returns: the return from the final match function call 5792 */ 5793 5794 static int 5795 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg) 5796 { 5797 int capcount; 5798 uint32_t min = 0; 5799 uint32_t mid = 64; 5800 uint32_t max = UINT32_MAX; 5801 5802 PCRE2_SET_MATCH_LIMIT(dat_context, max); 5803 PCRE2_SET_DEPTH_LIMIT(dat_context, max); 5804 PCRE2_SET_HEAP_LIMIT(dat_context, max); 5805 5806 for (;;) 5807 { 5808 uint32_t stack_start = 0; 5809 5810 if (errnumber == PCRE2_ERROR_HEAPLIMIT) 5811 { 5812 PCRE2_SET_HEAP_LIMIT(dat_context, mid); 5813 } 5814 else if (errnumber == PCRE2_ERROR_MATCHLIMIT) 5815 { 5816 PCRE2_SET_MATCH_LIMIT(dat_context, mid); 5817 } 5818 else 5819 { 5820 PCRE2_SET_DEPTH_LIMIT(dat_context, mid); 5821 } 5822 5823 if ((dat_datctl.control & CTL_DFA) != 0) 5824 { 5825 stack_start = DFA_START_RWS_SIZE/1024; 5826 if (dfa_workspace == NULL) 5827 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 5828 if (dfa_matched++ == 0) 5829 dfa_workspace[0] = -1; /* To catch bad restart */ 5830 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 5831 dat_datctl.options, match_data, 5832 PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION); 5833 } 5834 5835 else if ((pat_patctl.control & CTL_JITFAST) != 0) 5836 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 5837 dat_datctl.options, match_data, PTR(dat_context)); 5838 5839 else 5840 { 5841 stack_start = START_FRAMES_SIZE/1024; 5842 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 5843 dat_datctl.options, match_data, PTR(dat_context)); 5844 } 5845 5846 if (capcount == errnumber) 5847 { 5848 if ((mid & 0x80000000u) != 0) 5849 { 5850 fprintf(outfile, "Can't find minimum %s limit: check pattern for " 5851 "restriction\n", msg); 5852 break; 5853 } 5854 5855 min = mid; 5856 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2; 5857 } 5858 else if (capcount >= 0 || 5859 capcount == PCRE2_ERROR_NOMATCH || 5860 capcount == PCRE2_ERROR_PARTIAL) 5861 { 5862 /* If we've not hit the error with a heap limit less than the size of the 5863 initial stack frame vector (for pcre2_match()) or the initial stack 5864 workspace vector (for pcre2_dfa_match()), the heap is not being used, so 5865 the minimum limit is zero; there's no need to go on. The other limits are 5866 always greater than zero. */ 5867 5868 if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start) 5869 { 5870 fprintf(outfile, "Minimum %s limit = 0\n", msg); 5871 break; 5872 } 5873 if (mid == min + 1) 5874 { 5875 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); 5876 break; 5877 } 5878 max = mid; 5879 mid = (min + max)/2; 5880 } 5881 else break; /* Some other error */ 5882 } 5883 5884 return capcount; 5885 } 5886 5887 5888 5889 /************************************************* 5890 * Callout function * 5891 *************************************************/ 5892 5893 /* Called from a PCRE2 library as a result of the (?C) item. We print out where 5894 we are in the match (unless suppressed). Yield zero unless more callouts than 5895 the fail count, or the callout data is not zero. The only differences in the 5896 callout block for different code unit widths are that the pointers to the 5897 subject, the most recent MARK, and a callout argument string point to strings 5898 of the appropriate width. Casts can be used to deal with this. 5899 5900 Argument: a pointer to a callout block 5901 Return: 5902 */ 5903 5904 static int 5905 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) 5906 { 5907 FILE *f, *fdefault; 5908 uint32_t i, pre_start, post_start, subject_length; 5909 PCRE2_SIZE current_position; 5910 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; 5911 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; 5912 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0; 5913 5914 /* The FILE f is used for echoing the subject string if it is non-NULL. This 5915 happens only once in simple cases, but we want to repeat after any additional 5916 output caused by CALLOUT_EXTRA. */ 5917 5918 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)? 5919 NULL : outfile; 5920 5921 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0) 5922 { 5923 f = outfile; 5924 switch (cb->callout_flags) 5925 { 5926 case PCRE2_CALLOUT_BACKTRACK: 5927 fprintf(f, "Backtrack\n"); 5928 break; 5929 5930 case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK: 5931 fprintf(f, "Backtrack\nNo other matching paths\n"); 5932 /* Fall through */ 5933 5934 case PCRE2_CALLOUT_STARTMATCH: 5935 fprintf(f, "New match attempt\n"); 5936 break; 5937 5938 default: 5939 f = fdefault; 5940 break; 5941 } 5942 } 5943 else f = fdefault; 5944 5945 /* For a callout with a string argument, show the string first because there 5946 isn't a tidy way to fit it in the rest of the data. */ 5947 5948 if (cb->callout_string != NULL) 5949 { 5950 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); 5951 fprintf(outfile, "Callout (%" SIZ_FORM "): %c", 5952 SIZ_CAST cb->callout_string_offset, delimiter); 5953 PCHARSV(cb->callout_string, 0, 5954 cb->callout_string_length, utf, outfile); 5955 for (i = 0; callout_start_delims[i] != 0; i++) 5956 if (delimiter == callout_start_delims[i]) 5957 { 5958 delimiter = callout_end_delims[i]; 5959 break; 5960 } 5961 fprintf(outfile, "%c", delimiter); 5962 if (!callout_capture) fprintf(outfile, "\n"); 5963 } 5964 5965 /* Show captured strings if required */ 5966 5967 if (callout_capture) 5968 { 5969 if (cb->callout_string == NULL) 5970 fprintf(outfile, "Callout %d:", cb->callout_number); 5971 fprintf(outfile, " last capture = %d\n", cb->capture_last); 5972 for (i = 2; i < cb->capture_top * 2; i += 2) 5973 { 5974 fprintf(outfile, "%2d: ", i/2); 5975 if (cb->offset_vector[i] == PCRE2_UNSET) 5976 fprintf(outfile, "<unset>"); 5977 else 5978 { 5979 PCHARSV(cb->subject, cb->offset_vector[i], 5980 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f); 5981 } 5982 fprintf(outfile, "\n"); 5983 } 5984 } 5985 5986 /* Unless suppressed, re-print the subject in canonical form (with escapes for 5987 non-printing characters), the first time, or if giving full details. On 5988 subsequent calls in the same match, we use PCHARS() just to find the printed 5989 lengths of the substrings. */ 5990 5991 if (callout_where) 5992 { 5993 if (f != NULL) fprintf(f, "--->"); 5994 5995 /* The subject before the match start. */ 5996 5997 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); 5998 5999 /* If a lookbehind is involved, the current position may be earlier than the 6000 match start. If so, use the match start instead. */ 6001 6002 current_position = (cb->current_position >= cb->start_match)? 6003 cb->current_position : cb->start_match; 6004 6005 /* The subject between the match start and the current position. */ 6006 6007 PCHARS(post_start, cb->subject, cb->start_match, 6008 current_position - cb->start_match, utf, f); 6009 6010 /* Print from the current position to the end. */ 6011 6012 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, 6013 utf, f); 6014 6015 /* Calculate the total subject printed length (no print). */ 6016 6017 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); 6018 6019 if (f != NULL) fprintf(f, "\n"); 6020 6021 /* For automatic callouts, show the pattern offset. Otherwise, for a 6022 numerical callout whose number has not already been shown with captured 6023 strings, show the number here. A callout with a string argument has been 6024 displayed above. */ 6025 6026 if (cb->callout_number == 255) 6027 { 6028 fprintf(outfile, "%+3d ", (int)cb->pattern_position); 6029 if (cb->pattern_position > 99) fprintf(outfile, "\n "); 6030 } 6031 else 6032 { 6033 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " "); 6034 else fprintf(outfile, "%3d ", cb->callout_number); 6035 } 6036 6037 /* Now show position indicators */ 6038 6039 for (i = 0; i < pre_start; i++) fprintf(outfile, " "); 6040 fprintf(outfile, "^"); 6041 6042 if (post_start > 0) 6043 { 6044 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); 6045 fprintf(outfile, "^"); 6046 } 6047 6048 for (i = 0; i < subject_length - pre_start - post_start + 4; i++) 6049 fprintf(outfile, " "); 6050 6051 if (cb->next_item_length != 0) 6052 fprintf(outfile, "%.*s", (int)(cb->next_item_length), 6053 pbuffer8 + cb->pattern_position); 6054 else 6055 fprintf(outfile, "End of pattern"); 6056 6057 fprintf(outfile, "\n"); 6058 } 6059 6060 first_callout = FALSE; 6061 6062 /* Show any mark info */ 6063 6064 if (cb->mark != last_callout_mark) 6065 { 6066 if (cb->mark == NULL) 6067 fprintf(outfile, "Latest Mark: <unset>\n"); 6068 else 6069 { 6070 fprintf(outfile, "Latest Mark: "); 6071 PCHARSV(cb->mark, 0, -1, utf, outfile); 6072 putc('\n', outfile); 6073 } 6074 last_callout_mark = cb->mark; 6075 } 6076 6077 /* Show callout data */ 6078 6079 if (callout_data_ptr != NULL) 6080 { 6081 int callout_data = *((int32_t *)callout_data_ptr); 6082 if (callout_data != 0) 6083 { 6084 fprintf(outfile, "Callout data = %d\n", callout_data); 6085 return callout_data; 6086 } 6087 } 6088 6089 /* Keep count and give the appropriate return code */ 6090 6091 callout_count++; 6092 6093 if (cb->callout_number == dat_datctl.cerror[0] && 6094 callout_count >= dat_datctl.cerror[1]) 6095 return PCRE2_ERROR_CALLOUT; 6096 6097 if (cb->callout_number == dat_datctl.cfail[0] && 6098 callout_count >= dat_datctl.cfail[1]) 6099 return 1; 6100 6101 return 0; 6102 } 6103 6104 6105 6106 /************************************************* 6107 * Handle *MARK and copy/get tests * 6108 *************************************************/ 6109 6110 /* This function is called after complete and partial matches. It runs the 6111 tests for substring extraction. 6112 6113 Arguments: 6114 utf TRUE for utf 6115 capcount return from pcre2_match() 6116 6117 Returns: FALSE if print_error_message() fails 6118 */ 6119 6120 static BOOL 6121 copy_and_get(BOOL utf, int capcount) 6122 { 6123 int i; 6124 uint8_t *nptr; 6125 6126 /* Test copy strings by number */ 6127 6128 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++) 6129 { 6130 int rc; 6131 PCRE2_SIZE length, length2; 6132 uint32_t copybuffer[256]; 6133 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]); 6134 length = sizeof(copybuffer)/code_unit_size; 6135 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length); 6136 if (rc < 0) 6137 { 6138 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc); 6139 if (!print_error_message(rc, "", "\n")) return FALSE; 6140 } 6141 else 6142 { 6143 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2); 6144 if (rc < 0) 6145 { 6146 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc); 6147 if (!print_error_message(rc, "", "\n")) return FALSE; 6148 } 6149 else if (length2 != length) 6150 { 6151 fprintf(outfile, "Mismatched substring lengths: %" 6152 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2); 6153 } 6154 fprintf(outfile, "%2dC ", n); 6155 PCHARSV(copybuffer, 0, length, utf, outfile); 6156 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length); 6157 } 6158 } 6159 6160 /* Test copy strings by name */ 6161 6162 nptr = dat_datctl.copy_names; 6163 for (;;) 6164 { 6165 int rc; 6166 int groupnumber; 6167 PCRE2_SIZE length, length2; 6168 uint32_t copybuffer[256]; 6169 int namelen = strlen((const char *)nptr); 6170 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 6171 PCRE2_SIZE cnl = namelen; 6172 #endif 6173 if (namelen == 0) break; 6174 6175 #ifdef SUPPORT_PCRE2_8 6176 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr); 6177 #endif 6178 #ifdef SUPPORT_PCRE2_16 6179 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl); 6180 #endif 6181 #ifdef SUPPORT_PCRE2_32 6182 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); 6183 #endif 6184 6185 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); 6186 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) 6187 fprintf(outfile, "Number not found for group '%s'\n", nptr); 6188 6189 length = sizeof(copybuffer)/code_unit_size; 6190 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length); 6191 if (rc < 0) 6192 { 6193 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc); 6194 if (!print_error_message(rc, "", "\n")) return FALSE; 6195 } 6196 else 6197 { 6198 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2); 6199 if (rc < 0) 6200 { 6201 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc); 6202 if (!print_error_message(rc, "", "\n")) return FALSE; 6203 } 6204 else if (length2 != length) 6205 { 6206 fprintf(outfile, "Mismatched substring lengths: %" 6207 SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2); 6208 } 6209 fprintf(outfile, " C "); 6210 PCHARSV(copybuffer, 0, length, utf, outfile); 6211 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr); 6212 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); 6213 else fprintf(outfile, " (non-unique)\n"); 6214 } 6215 nptr += namelen + 1; 6216 } 6217 6218 /* Test get strings by number */ 6219 6220 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++) 6221 { 6222 int rc; 6223 PCRE2_SIZE length; 6224 void *gotbuffer; 6225 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]); 6226 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length); 6227 if (rc < 0) 6228 { 6229 fprintf(outfile, "Get substring %d failed (%d): ", n, rc); 6230 if (!print_error_message(rc, "", "\n")) return FALSE; 6231 } 6232 else 6233 { 6234 fprintf(outfile, "%2dG ", n); 6235 PCHARSV(gotbuffer, 0, length, utf, outfile); 6236 fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length); 6237 PCRE2_SUBSTRING_FREE(gotbuffer); 6238 } 6239 } 6240 6241 /* Test get strings by name */ 6242 6243 nptr = dat_datctl.get_names; 6244 for (;;) 6245 { 6246 PCRE2_SIZE length; 6247 void *gotbuffer; 6248 int rc; 6249 int groupnumber; 6250 int namelen = strlen((const char *)nptr); 6251 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 6252 PCRE2_SIZE cnl = namelen; 6253 #endif 6254 if (namelen == 0) break; 6255 6256 #ifdef SUPPORT_PCRE2_8 6257 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr); 6258 #endif 6259 #ifdef SUPPORT_PCRE2_16 6260 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl); 6261 #endif 6262 #ifdef SUPPORT_PCRE2_32 6263 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); 6264 #endif 6265 6266 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); 6267 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) 6268 fprintf(outfile, "Number not found for group '%s'\n", nptr); 6269 6270 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length); 6271 if (rc < 0) 6272 { 6273 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc); 6274 if (!print_error_message(rc, "", "\n")) return FALSE; 6275 } 6276 else 6277 { 6278 fprintf(outfile, " G "); 6279 PCHARSV(gotbuffer, 0, length, utf, outfile); 6280 fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr); 6281 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); 6282 else fprintf(outfile, " (non-unique)\n"); 6283 PCRE2_SUBSTRING_FREE(gotbuffer); 6284 } 6285 nptr += namelen + 1; 6286 } 6287 6288 /* Test getting the complete list of captured strings. */ 6289 6290 if ((dat_datctl.control & CTL_GETALL) != 0) 6291 { 6292 int rc; 6293 void **stringlist; 6294 PCRE2_SIZE *lengths; 6295 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths); 6296 if (rc < 0) 6297 { 6298 fprintf(outfile, "get substring list failed (%d): ", rc); 6299 if (!print_error_message(rc, "", "\n")) return FALSE; 6300 } 6301 else 6302 { 6303 for (i = 0; i < capcount; i++) 6304 { 6305 fprintf(outfile, "%2dL ", i); 6306 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile); 6307 putc('\n', outfile); 6308 } 6309 if (stringlist[i] != NULL) 6310 fprintf(outfile, "string list not terminated by NULL\n"); 6311 PCRE2_SUBSTRING_LIST_FREE(stringlist); 6312 } 6313 } 6314 6315 return TRUE; 6316 } 6317 6318 6319 6320 /************************************************* 6321 * Process a data line * 6322 *************************************************/ 6323 6324 /* The line is in buffer; it will not be empty. 6325 6326 Arguments: none 6327 6328 Returns: PR_OK continue processing next line 6329 PR_SKIP skip to a blank line 6330 PR_ABEND abort the pcre2test run 6331 */ 6332 6333 static int 6334 process_data(void) 6335 { 6336 PCRE2_SIZE len, ulen, arg_ulen; 6337 uint32_t gmatched; 6338 uint32_t c, k; 6339 uint32_t g_notempty = 0; 6340 uint8_t *p, *pp, *start_rep; 6341 size_t needlen; 6342 void *use_dat_context; 6343 BOOL utf; 6344 BOOL subject_literal; 6345 PCRE2_SIZE ovecsave[3]; 6346 6347 #ifdef SUPPORT_PCRE2_8 6348 uint8_t *q8 = NULL; 6349 #endif 6350 #ifdef SUPPORT_PCRE2_16 6351 uint16_t *q16 = NULL; 6352 #endif 6353 #ifdef SUPPORT_PCRE2_32 6354 uint32_t *q32 = NULL; 6355 #endif 6356 6357 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0; 6358 6359 /* Copy the default context and data control blocks to the active ones. Then 6360 copy from the pattern the controls that can be set in either the pattern or the 6361 data. This allows them to be overridden in the data line. We do not do this for 6362 options because those that are common apply separately to compiling and 6363 matching. */ 6364 6365 DATCTXCPY(dat_context, default_dat_context); 6366 memcpy(&dat_datctl, &def_datctl, sizeof(datctl)); 6367 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD); 6368 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD); 6369 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement); 6370 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack; 6371 6372 /* Initialize for scanning the data line. */ 6373 6374 #ifdef SUPPORT_PCRE2_8 6375 utf = ((((pat_patctl.control & CTL_POSIX) != 0)? 6376 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options : 6377 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0; 6378 #else 6379 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; 6380 #endif 6381 6382 start_rep = NULL; 6383 len = strlen((const char *)buffer); 6384 while (len > 0 && isspace(buffer[len-1])) len--; 6385 buffer[len] = 0; 6386 p = buffer; 6387 while (isspace(*p)) p++; 6388 6389 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create 6390 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */ 6391 6392 if (utf) 6393 { 6394 uint8_t *q; 6395 uint32_t cc; 6396 int n = 1; 6397 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc); 6398 if (n <= 0) 6399 { 6400 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input " 6401 "in UTF mode\n"); 6402 return PR_OK; 6403 } 6404 } 6405 6406 #ifdef SUPPORT_VALGRIND 6407 /* Mark the dbuffer as addressable but undefined again. */ 6408 if (dbuffer != NULL) 6409 { 6410 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size); 6411 } 6412 #endif 6413 6414 /* Allocate a buffer to hold the data line; len+1 is an upper bound on 6415 the number of code units that will be needed (though the buffer may have to be 6416 extended if replication is involved). */ 6417 6418 needlen = (size_t)((len+1) * code_unit_size); 6419 if (dbuffer == NULL || needlen >= dbuffer_size) 6420 { 6421 while (needlen >= dbuffer_size) dbuffer_size *= 2; 6422 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); 6423 if (dbuffer == NULL) 6424 { 6425 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size); 6426 exit(1); 6427 } 6428 } 6429 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */ 6430 6431 /* Scan the data line, interpreting data escapes, and put the result into a 6432 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise, 6433 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier. 6434 */ 6435 6436 while ((c = *p++) != 0) 6437 { 6438 int32_t i = 0; 6439 size_t replen; 6440 6441 /* ] may mark the end of a replicated sequence */ 6442 6443 if (c == ']' && start_rep != NULL) 6444 { 6445 long li; 6446 char *endptr; 6447 size_t qoffset = CAST8VAR(q) - dbuffer; 6448 size_t rep_offset = start_rep - dbuffer; 6449 6450 if (*p++ != '{') 6451 { 6452 fprintf(outfile, "** Expected '{' after \\[....]\n"); 6453 return PR_OK; 6454 } 6455 6456 li = strtol((const char *)p, &endptr, 10); 6457 if (S32OVERFLOW(li)) 6458 { 6459 fprintf(outfile, "** Repeat count too large\n"); 6460 return PR_OK; 6461 } 6462 6463 p = (uint8_t *)endptr; 6464 if (*p++ != '}') 6465 { 6466 fprintf(outfile, "** Expected '}' after \\[...]{...\n"); 6467 return PR_OK; 6468 } 6469 6470 i = (int32_t)li; 6471 if (i-- == 0) 6472 { 6473 fprintf(outfile, "** Zero repeat not allowed\n"); 6474 return PR_OK; 6475 } 6476 6477 replen = CAST8VAR(q) - start_rep; 6478 needlen += replen * i; 6479 6480 if (needlen >= dbuffer_size) 6481 { 6482 while (needlen >= dbuffer_size) dbuffer_size *= 2; 6483 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); 6484 if (dbuffer == NULL) 6485 { 6486 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size); 6487 exit(1); 6488 } 6489 SETCASTPTR(q, dbuffer + qoffset); 6490 start_rep = dbuffer + rep_offset; 6491 } 6492 6493 while (i-- > 0) 6494 { 6495 memcpy(CAST8VAR(q), start_rep, replen); 6496 SETPLUS(q, replen/code_unit_size); 6497 } 6498 6499 start_rep = NULL; 6500 continue; 6501 } 6502 6503 /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input 6504 set, do the fudge for setting the top bit. */ 6505 6506 if (c != '\\' || subject_literal) 6507 { 6508 uint32_t topbit = 0; 6509 if (test_mode == PCRE32_MODE && c == 0xff && *p != 0) 6510 { 6511 topbit = 0x80000000; 6512 c = *p++; 6513 } 6514 if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) && 6515 HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } 6516 c |= topbit; 6517 } 6518 6519 /* Handle backslash escapes */ 6520 6521 else switch ((c = *p++)) 6522 { 6523 case '\\': break; 6524 case 'a': c = CHAR_BEL; break; 6525 case 'b': c = '\b'; break; 6526 case 'e': c = CHAR_ESC; break; 6527 case 'f': c = '\f'; break; 6528 case 'n': c = '\n'; break; 6529 case 'r': c = '\r'; break; 6530 case 't': c = '\t'; break; 6531 case 'v': c = '\v'; break; 6532 6533 case '0': case '1': case '2': case '3': 6534 case '4': case '5': case '6': case '7': 6535 c -= '0'; 6536 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') 6537 c = c * 8 + *p++ - '0'; 6538 break; 6539 6540 case 'o': 6541 if (*p == '{') 6542 { 6543 uint8_t *pt = p; 6544 c = 0; 6545 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++) 6546 { 6547 if (++i == 12) 6548 fprintf(outfile, "** Too many octal digits in \\o{...} item; " 6549 "using only the first twelve.\n"); 6550 else c = c * 8 + *pt - '0'; 6551 } 6552 if (*pt == '}') p = pt + 1; 6553 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n"); 6554 } 6555 break; 6556 6557 case 'x': 6558 if (*p == '{') 6559 { 6560 uint8_t *pt = p; 6561 c = 0; 6562 6563 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails 6564 when isxdigit() is a macro that refers to its argument more than 6565 once. This is banned by the C Standard, but apparently happens in at 6566 least one MacOS environment. */ 6567 6568 for (pt++; isxdigit(*pt); pt++) 6569 { 6570 if (++i == 9) 6571 fprintf(outfile, "** Too many hex digits in \\x{...} item; " 6572 "using only the first eight.\n"); 6573 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); 6574 } 6575 if (*pt == '}') 6576 { 6577 p = pt + 1; 6578 break; 6579 } 6580 /* Not correct form for \x{...}; fall through */ 6581 } 6582 6583 /* \x without {} always defines just one byte in 8-bit mode. This 6584 allows UTF-8 characters to be constructed byte by byte, and also allows 6585 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode. 6586 Otherwise, pass it down as data. */ 6587 6588 c = 0; 6589 while (i++ < 2 && isxdigit(*p)) 6590 { 6591 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); 6592 p++; 6593 } 6594 #if defined SUPPORT_PCRE2_8 6595 if (utf && (test_mode == PCRE8_MODE)) 6596 { 6597 *q8++ = c; 6598 continue; 6599 } 6600 #endif 6601 break; 6602 6603 case 0: /* \ followed by EOF allows for an empty line */ 6604 p--; 6605 continue; 6606 6607 case '=': /* \= terminates the data, starts modifiers */ 6608 goto ENDSTRING; 6609 6610 case '[': /* \[ introduces a replicated character sequence */ 6611 if (start_rep != NULL) 6612 { 6613 fprintf(outfile, "** Nested replication is not supported\n"); 6614 return PR_OK; 6615 } 6616 start_rep = CAST8VAR(q); 6617 continue; 6618 6619 default: 6620 if (isalnum(c)) 6621 { 6622 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c); 6623 return PR_OK; 6624 } 6625 } 6626 6627 /* We now have a character value in c that may be greater than 255. 6628 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater 6629 than 127 in UTF mode must have come from \x{...} or octal constructs 6630 because values from \x.. get this far only in non-UTF mode. */ 6631 6632 #ifdef SUPPORT_PCRE2_8 6633 if (test_mode == PCRE8_MODE) 6634 { 6635 if (utf) 6636 { 6637 if (c > 0x7fffffff) 6638 { 6639 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff " 6640 "and so cannot be converted to UTF-8\n", c); 6641 return PR_OK; 6642 } 6643 q8 += ord2utf8(c, q8); 6644 } 6645 else 6646 { 6647 if (c > 0xffu) 6648 { 6649 fprintf(outfile, "** Character \\x{%x} is greater than 255 " 6650 "and UTF-8 mode is not enabled.\n", c); 6651 fprintf(outfile, "** Truncation will probably give the wrong " 6652 "result.\n"); 6653 } 6654 *q8++ = c; 6655 } 6656 } 6657 #endif 6658 #ifdef SUPPORT_PCRE2_16 6659 if (test_mode == PCRE16_MODE) 6660 { 6661 if (utf) 6662 { 6663 if (c > 0x10ffffu) 6664 { 6665 fprintf(outfile, "** Failed: character \\x{%x} is greater than " 6666 "0x10ffff and so cannot be converted to UTF-16\n", c); 6667 return PR_OK; 6668 } 6669 else if (c >= 0x10000u) 6670 { 6671 c-= 0x10000u; 6672 *q16++ = 0xD800 | (c >> 10); 6673 *q16++ = 0xDC00 | (c & 0x3ff); 6674 } 6675 else 6676 *q16++ = c; 6677 } 6678 else 6679 { 6680 if (c > 0xffffu) 6681 { 6682 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " 6683 "and UTF-16 mode is not enabled.\n", c); 6684 fprintf(outfile, "** Truncation will probably give the wrong " 6685 "result.\n"); 6686 } 6687 6688 *q16++ = c; 6689 } 6690 } 6691 #endif 6692 #ifdef SUPPORT_PCRE2_32 6693 if (test_mode == PCRE32_MODE) 6694 { 6695 *q32++ = c; 6696 } 6697 #endif 6698 } 6699 6700 ENDSTRING: 6701 SET(*q, 0); 6702 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */ 6703 ulen = len/code_unit_size; /* Length in code units */ 6704 arg_ulen = ulen; /* Value to use in match arg */ 6705 6706 /* If the string was terminated by \= we must now interpret modifiers. */ 6707 6708 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) 6709 return PR_OK; 6710 6711 /* Check for mutually exclusive modifiers. At present, these are all in the 6712 first control word. */ 6713 6714 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++) 6715 { 6716 c = dat_datctl.control & exclusive_dat_controls[k]; 6717 if (c != 0 && c != (c & (~c+1))) 6718 { 6719 show_controls(c, 0, "** Not allowed together:"); 6720 fprintf(outfile, "\n"); 6721 return PR_OK; 6722 } 6723 } 6724 6725 if (pat_patctl.replacement[0] != 0 && 6726 (dat_datctl.control & CTL_NULLCONTEXT) != 0) 6727 { 6728 fprintf(outfile, "** Replacement text is not supported with null_context.\n"); 6729 return PR_OK; 6730 } 6731 6732 /* We now have the subject in dbuffer, with len containing the byte length, and 6733 ulen containing the code unit length, with a copy in arg_ulen for use in match 6734 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the 6735 zero_terminate modifier is present). 6736 6737 Move the data to the end of the buffer so that a read over the end can be 6738 caught by valgrind or other means. If we have explicit valgrind support, mark 6739 the unused start of the buffer unaddressable. If we are using the POSIX 6740 interface, or testing zero-termination, we must include the terminating zero in 6741 the usable data. */ 6742 6743 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) + 6744 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0); 6745 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c); 6746 #ifdef SUPPORT_VALGRIND 6747 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); 6748 #endif 6749 6750 /* Now pp points to the subject string. POSIX matching is only possible in 6751 8-bit mode, and it does not support timing or other fancy features. Some were 6752 checked at compile time, but we need to check the match-time settings here. */ 6753 6754 #ifdef SUPPORT_PCRE2_8 6755 if ((pat_patctl.control & CTL_POSIX) != 0) 6756 { 6757 int rc; 6758 int eflags = 0; 6759 regmatch_t *pmatch = NULL; 6760 const char *msg = "** Ignored with POSIX interface:"; 6761 6762 if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET) 6763 prmsg(&msg, "callout_error"); 6764 if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET) 6765 prmsg(&msg, "callout_fail"); 6766 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0) 6767 prmsg(&msg, "copy"); 6768 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0) 6769 prmsg(&msg, "get"); 6770 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack"); 6771 if (dat_datctl.offset != 0) prmsg(&msg, "offset"); 6772 6773 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0) 6774 { 6775 fprintf(outfile, "%s", msg); 6776 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS); 6777 msg = ""; 6778 } 6779 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 || 6780 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0) 6781 { 6782 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, 6783 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg); 6784 msg = ""; 6785 } 6786 6787 if (msg[0] == 0) fprintf(outfile, "\n"); 6788 6789 if (dat_datctl.oveccount > 0) 6790 { 6791 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount); 6792 if (pmatch == NULL) 6793 { 6794 fprintf(outfile, "** Failed to get memory for recording matching " 6795 "information (size set = %du)\n", dat_datctl.oveccount); 6796 return PR_OK; 6797 } 6798 } 6799 6800 if (dat_datctl.startend[0] != CFORE_UNSET) 6801 { 6802 pmatch[0].rm_so = dat_datctl.startend[0]; 6803 pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)? 6804 dat_datctl.startend[1] : len; 6805 eflags |= REG_STARTEND; 6806 } 6807 6808 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL; 6809 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL; 6810 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; 6811 6812 rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags); 6813 if (rc != 0) 6814 { 6815 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); 6816 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8); 6817 } 6818 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) 6819 fprintf(outfile, "Matched with REG_NOSUB\n"); 6820 else if (dat_datctl.oveccount == 0) 6821 fprintf(outfile, "Matched without capture\n"); 6822 else 6823 { 6824 size_t i, j; 6825 size_t last_printed = (size_t)dat_datctl.oveccount; 6826 for (i = 0; i < (size_t)dat_datctl.oveccount; i++) 6827 { 6828 if (pmatch[i].rm_so >= 0) 6829 { 6830 PCRE2_SIZE start = pmatch[i].rm_so; 6831 PCRE2_SIZE end = pmatch[i].rm_eo; 6832 for (j = last_printed + 1; j < i; j++) 6833 fprintf(outfile, "%2d: <unset>\n", (int)j); 6834 last_printed = i; 6835 if (start > end) 6836 { 6837 start = pmatch[i].rm_eo; 6838 end = pmatch[i].rm_so; 6839 fprintf(outfile, "Start of matched string is beyond its end - " 6840 "displaying from end to start.\n"); 6841 } 6842 fprintf(outfile, "%2d: ", (int)i); 6843 PCHARSV(pp, start, end - start, utf, outfile); 6844 fprintf(outfile, "\n"); 6845 6846 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) || 6847 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0) 6848 { 6849 fprintf(outfile, "%2d+ ", (int)i); 6850 /* Note: don't use the start/end variables here because we want to 6851 show the text from what is reported as the end. */ 6852 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile); 6853 fprintf(outfile, "\n"); } 6854 } 6855 } 6856 } 6857 free(pmatch); 6858 return PR_OK; 6859 } 6860 #endif /* SUPPORT_PCRE2_8 */ 6861 6862 /* Handle matching via the native interface. Check for consistency of 6863 modifiers. */ 6864 6865 if (dat_datctl.startend[0] != CFORE_UNSET) 6866 fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n"); 6867 6868 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA 6869 matching, even if the JIT compiler was used. */ 6870 6871 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT && 6872 FLD(compiled_code, executable_jit) != NULL) 6873 { 6874 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n"); 6875 dat_datctl.control &= ~CTL_ALLUSEDTEXT; 6876 } 6877 6878 /* Handle passing the subject as zero-terminated. */ 6879 6880 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) 6881 arg_ulen = PCRE2_ZERO_TERMINATED; 6882 6883 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a 6884 NULL context. */ 6885 6886 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)? 6887 NULL : PTR(dat_context); 6888 6889 /* Enable display of malloc/free if wanted. We can do this only if either the 6890 pattern or the subject is processed with a context. */ 6891 6892 show_memory = (dat_datctl.control & CTL_MEMORY) != 0; 6893 6894 if (show_memory && 6895 (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0) 6896 fprintf(outfile, "** \\=memory requires either a pattern or a subject " 6897 "context: ignored\n"); 6898 6899 /* Create and assign a JIT stack if requested. */ 6900 6901 if (dat_datctl.jitstack != 0) 6902 { 6903 if (dat_datctl.jitstack != jit_stack_size) 6904 { 6905 PCRE2_JIT_STACK_FREE(jit_stack); 6906 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL); 6907 jit_stack_size = dat_datctl.jitstack; 6908 } 6909 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack); 6910 } 6911 6912 /* Or de-assign */ 6913 6914 else if (jit_stack != NULL) 6915 { 6916 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL); 6917 PCRE2_JIT_STACK_FREE(jit_stack); 6918 jit_stack = NULL; 6919 jit_stack_size = 0; 6920 } 6921 6922 /* When no JIT stack is assigned, we must ensure that there is a JIT callback 6923 if we want to verify that JIT was actually used. */ 6924 6925 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL) 6926 { 6927 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL); 6928 } 6929 6930 /* Adjust match_data according to size of offsets required. A size of zero 6931 causes a new match data block to be obtained that exactly fits the pattern. */ 6932 6933 if (dat_datctl.oveccount == 0) 6934 { 6935 PCRE2_MATCH_DATA_FREE(match_data); 6936 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL); 6937 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data); 6938 } 6939 else if (dat_datctl.oveccount <= max_oveccount) 6940 { 6941 SETFLD(match_data, oveccount, dat_datctl.oveccount); 6942 } 6943 else 6944 { 6945 max_oveccount = dat_datctl.oveccount; 6946 PCRE2_MATCH_DATA_FREE(match_data); 6947 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL); 6948 } 6949 6950 if (CASTVAR(void *, match_data) == NULL) 6951 { 6952 fprintf(outfile, "** Failed to get memory for recording matching " 6953 "information (size requested: %d)\n", dat_datctl.oveccount); 6954 max_oveccount = 0; 6955 return PR_OK; 6956 } 6957 6958 /* Replacement processing is ignored for DFA matching. */ 6959 6960 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0) 6961 { 6962 fprintf(outfile, "** Ignored for DFA matching: replace\n"); 6963 dat_datctl.replacement[0] = 0; 6964 } 6965 6966 /* If a replacement string is provided, call pcre2_substitute() instead of one 6967 of the matching functions. First we have to convert the replacement string to 6968 the appropriate width. */ 6969 6970 if (dat_datctl.replacement[0] != 0) 6971 { 6972 int rc; 6973 uint8_t *pr; 6974 uint8_t rbuffer[REPLACE_BUFFSIZE]; 6975 uint8_t nbuffer[REPLACE_BUFFSIZE]; 6976 uint32_t xoptions; 6977 PCRE2_SIZE rlen, nsize, erroroffset; 6978 BOOL badutf = FALSE; 6979 6980 #ifdef SUPPORT_PCRE2_8 6981 uint8_t *r8 = NULL; 6982 #endif 6983 #ifdef SUPPORT_PCRE2_16 6984 uint16_t *r16 = NULL; 6985 #endif 6986 #ifdef SUPPORT_PCRE2_32 6987 uint32_t *r32 = NULL; 6988 #endif 6989 6990 if (timeitm) 6991 fprintf(outfile, "** Timing is not supported with replace: ignored\n"); 6992 6993 if ((dat_datctl.control & CTL_ALTGLOBAL) != 0) 6994 fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); 6995 6996 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : 6997 PCRE2_SUBSTITUTE_GLOBAL) | 6998 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 : 6999 PCRE2_SUBSTITUTE_EXTENDED) | 7000 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 : 7001 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) | 7002 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 : 7003 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) | 7004 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 : 7005 PCRE2_SUBSTITUTE_UNSET_EMPTY); 7006 7007 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */ 7008 pr = dat_datctl.replacement; 7009 7010 /* If the replacement starts with '[<number>]' we interpret that as length 7011 value for the replacement buffer. */ 7012 7013 nsize = REPLACE_BUFFSIZE/code_unit_size; 7014 if (*pr == '[') 7015 { 7016 PCRE2_SIZE n = 0; 7017 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0; 7018 if (*pr++ != ']') 7019 { 7020 fprintf(outfile, "Bad buffer size in replacement string\n"); 7021 return PR_OK; 7022 } 7023 if (n > nsize) 7024 { 7025 fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too " 7026 "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize); 7027 return PR_OK; 7028 } 7029 nsize = n; 7030 } 7031 7032 /* Now copy the replacement string to a buffer of the appropriate width. No 7033 escape processing is done for replacements. In UTF mode, check for an invalid 7034 UTF-8 input string, and if it is invalid, just copy its code units without 7035 UTF interpretation. This provides a means of checking that an invalid string 7036 is detected. Otherwise, UTF-8 can be used to include wide characters in a 7037 replacement. */ 7038 7039 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset); 7040 7041 /* Not UTF or invalid UTF-8: just copy the code units. */ 7042 7043 if (!utf || badutf) 7044 { 7045 while ((c = *pr++) != 0) 7046 { 7047 #ifdef SUPPORT_PCRE2_8 7048 if (test_mode == PCRE8_MODE) *r8++ = c; 7049 #endif 7050 #ifdef SUPPORT_PCRE2_16 7051 if (test_mode == PCRE16_MODE) *r16++ = c; 7052 #endif 7053 #ifdef SUPPORT_PCRE2_32 7054 if (test_mode == PCRE32_MODE) *r32++ = c; 7055 #endif 7056 } 7057 } 7058 7059 /* Valid UTF-8 replacement string */ 7060 7061 else while ((c = *pr++) != 0) 7062 { 7063 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); } 7064 7065 #ifdef SUPPORT_PCRE2_8 7066 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8); 7067 #endif 7068 7069 #ifdef SUPPORT_PCRE2_16 7070 if (test_mode == PCRE16_MODE) 7071 { 7072 if (c >= 0x10000u) 7073 { 7074 c-= 0x10000u; 7075 *r16++ = 0xD800 | (c >> 10); 7076 *r16++ = 0xDC00 | (c & 0x3ff); 7077 } 7078 else *r16++ = c; 7079 } 7080 #endif 7081 7082 #ifdef SUPPORT_PCRE2_32 7083 if (test_mode == PCRE32_MODE) *r32++ = c; 7084 #endif 7085 } 7086 7087 SET(*r, 0); 7088 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) 7089 rlen = PCRE2_ZERO_TERMINATED; 7090 else 7091 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size; 7092 PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, 7093 dat_datctl.options|xoptions, match_data, dat_context, 7094 rbuffer, rlen, nbuffer, &nsize); 7095 7096 if (rc < 0) 7097 { 7098 fprintf(outfile, "Failed: error %d", rc); 7099 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET) 7100 fprintf(outfile, " at offset %ld in replacement", (long int)nsize); 7101 fprintf(outfile, ": "); 7102 if (!print_error_message(rc, "", "")) return PR_ABEND; 7103 if (rc == PCRE2_ERROR_NOMEMORY && 7104 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0) 7105 fprintf(outfile, ": %ld code units are needed", (long int)nsize); 7106 } 7107 else 7108 { 7109 fprintf(outfile, "%2d: ", rc); 7110 PCHARSV(nbuffer, 0, nsize, utf, outfile); 7111 } 7112 7113 fprintf(outfile, "\n"); 7114 show_memory = FALSE; 7115 return PR_OK; 7116 } /* End of substitution handling */ 7117 7118 /* When a replacement string is not provided, run a loop for global matching 7119 with one of the basic matching functions. For altglobal (or first time round 7120 the loop), set an "unset" value for the previous match info. */ 7121 7122 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; 7123 7124 for (gmatched = 0;; gmatched++) 7125 { 7126 PCRE2_SIZE j; 7127 int capcount; 7128 PCRE2_SIZE *ovector; 7129 7130 ovector = FLD(match_data, ovector); 7131 7132 /* Fill the ovector with junk to detect elements that do not get set 7133 when they should be. */ 7134 7135 for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET; 7136 7137 /* When matching is via pcre2_match(), we will detect the use of JIT via the 7138 stack callback function. */ 7139 7140 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0; 7141 7142 /* Do timing if required. */ 7143 7144 if (timeitm > 0) 7145 { 7146 int i; 7147 clock_t start_time, time_taken; 7148 7149 if ((dat_datctl.control & CTL_DFA) != 0) 7150 { 7151 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0) 7152 { 7153 fprintf(outfile, "Timing DFA restarts is not supported\n"); 7154 return PR_OK; 7155 } 7156 if (dfa_workspace == NULL) 7157 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 7158 start_time = clock(); 7159 for (i = 0; i < timeitm; i++) 7160 { 7161 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen, 7162 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 7163 use_dat_context, dfa_workspace, DFA_WS_DIMENSION); 7164 } 7165 } 7166 7167 else if ((pat_patctl.control & CTL_JITFAST) != 0) 7168 { 7169 start_time = clock(); 7170 for (i = 0; i < timeitm; i++) 7171 { 7172 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, 7173 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 7174 use_dat_context); 7175 } 7176 } 7177 7178 else 7179 { 7180 start_time = clock(); 7181 for (i = 0; i < timeitm; i++) 7182 { 7183 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, 7184 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 7185 use_dat_context); 7186 } 7187 } 7188 total_match_time += (time_taken = clock() - start_time); 7189 fprintf(outfile, "Match time %.4f milliseconds\n", 7190 (((double)time_taken * 1000.0) / (double)timeitm) / 7191 (double)CLOCKS_PER_SEC); 7192 } 7193 7194 /* Find the heap, match and depth limits if requested. The depth and heap 7195 limits are not relevant for JIT. The return from check_match_limit() is the 7196 return from the final call to pcre2_match() or pcre2_dfa_match(). */ 7197 7198 if ((dat_datctl.control & CTL_FINDLIMITS) != 0) 7199 { 7200 capcount = 0; /* This stops compiler warnings */ 7201 7202 if (FLD(compiled_code, executable_jit) == NULL || 7203 (dat_datctl.options & PCRE2_NO_JIT) != 0) 7204 { 7205 (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap"); 7206 } 7207 7208 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, 7209 "match"); 7210 7211 if (FLD(compiled_code, executable_jit) == NULL || 7212 (dat_datctl.options & PCRE2_NO_JIT) != 0 || 7213 (dat_datctl.control & CTL_DFA) != 0) 7214 { 7215 capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT, 7216 "depth"); 7217 } 7218 7219 if (capcount == 0) 7220 { 7221 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); 7222 capcount = dat_datctl.oveccount; 7223 } 7224 } 7225 7226 /* Otherwise just run a single match, setting up a callout if required (the 7227 default). There is a copy of the pattern in pbuffer8 for use by callouts. */ 7228 7229 else 7230 { 7231 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0) 7232 { 7233 PCRE2_SET_CALLOUT(dat_context, callout_function, 7234 (void *)(&dat_datctl.callout_data)); 7235 first_callout = TRUE; 7236 last_callout_mark = NULL; 7237 callout_count = 0; 7238 } 7239 else 7240 { 7241 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */ 7242 } 7243 7244 /* Run a single DFA or NFA match. */ 7245 7246 if ((dat_datctl.control & CTL_DFA) != 0) 7247 { 7248 if (dfa_workspace == NULL) 7249 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 7250 if (dfa_matched++ == 0) 7251 dfa_workspace[0] = -1; /* To catch bad restart */ 7252 PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen, 7253 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 7254 use_dat_context, dfa_workspace, DFA_WS_DIMENSION); 7255 if (capcount == 0) 7256 { 7257 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); 7258 capcount = dat_datctl.oveccount; 7259 } 7260 } 7261 else 7262 { 7263 if ((pat_patctl.control & CTL_JITFAST) != 0) 7264 PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset, 7265 dat_datctl.options | g_notempty, match_data, use_dat_context); 7266 else 7267 PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset, 7268 dat_datctl.options | g_notempty, match_data, use_dat_context); 7269 if (capcount == 0) 7270 { 7271 fprintf(outfile, "Matched, but too many substrings\n"); 7272 capcount = dat_datctl.oveccount; 7273 } 7274 } 7275 } 7276 7277 /* The result of the match is now in capcount. First handle a successful 7278 match. */ 7279 7280 if (capcount >= 0) 7281 { 7282 int i; 7283 uint32_t oveccount; 7284 7285 /* This is a check against a lunatic return value. */ 7286 7287 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data); 7288 if (capcount > (int)oveccount) 7289 { 7290 fprintf(outfile, 7291 "** PCRE2 error: returned count %d is too big for ovector count %d\n", 7292 capcount, oveccount); 7293 capcount = oveccount; 7294 if ((dat_datctl.control & CTL_ANYGLOB) != 0) 7295 { 7296 fprintf(outfile, "** Global loop abandoned\n"); 7297 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */ 7298 } 7299 } 7300 7301 /* If this is not the first time round a global loop, check that the 7302 returned string has changed. If it has not, check for an empty string match 7303 at different starting offset from the previous match. This is a failed test 7304 retry for null-matching patterns that don't match at their starting offset, 7305 for example /(?<=\G.)/. A repeated match at the same point is not such a 7306 pattern, and must be discarded, and we then proceed to seek a non-null 7307 match at the current point. For any other repeated match, there is a bug 7308 somewhere and we must break the loop because it will go on for ever. We 7309 know that there are always at least two elements in the ovector. */ 7310 7311 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) 7312 { 7313 if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset) 7314 { 7315 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; 7316 ovecsave[2] = dat_datctl.offset; 7317 continue; /* Back to the top of the loop */ 7318 } 7319 fprintf(outfile, 7320 "** PCRE2 error: global repeat returned the same string as previous\n"); 7321 fprintf(outfile, "** Global loop abandoned\n"); 7322 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */ 7323 } 7324 7325 /* "allcaptures" requests showing of all captures in the pattern, to check 7326 unset ones at the end. It may be set on the pattern or the data. Implement 7327 by setting capcount to the maximum. This is not relevant for DFA matching, 7328 so ignore it. */ 7329 7330 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) 7331 { 7332 uint32_t maxcapcount; 7333 if ((dat_datctl.control & CTL_DFA) != 0) 7334 { 7335 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n"); 7336 } 7337 else 7338 { 7339 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0) 7340 return PR_SKIP; 7341 capcount = maxcapcount + 1; /* Allow for full match */ 7342 if (capcount > (int)oveccount) capcount = oveccount; 7343 } 7344 } 7345 7346 /* Output the captured substrings. Note that, for the matched string, 7347 the use of \K in an assertion can make the start later than the end. */ 7348 7349 for (i = 0; i < 2*capcount; i += 2) 7350 { 7351 PCRE2_SIZE lleft, lmiddle, lright; 7352 PCRE2_SIZE start = ovector[i]; 7353 PCRE2_SIZE end = ovector[i+1]; 7354 7355 if (start > end) 7356 { 7357 start = ovector[i+1]; 7358 end = ovector[i]; 7359 fprintf(outfile, "Start of matched string is beyond its end - " 7360 "displaying from end to start.\n"); 7361 } 7362 7363 fprintf(outfile, "%2d: ", i/2); 7364 7365 /* Check for an unset group */ 7366 7367 if (start == PCRE2_UNSET) 7368 { 7369 fprintf(outfile, "<unset>\n"); 7370 continue; 7371 } 7372 7373 /* Check for silly offsets, in particular, values that have not been 7374 set when they should have been. */ 7375 7376 if (start > ulen || end > ulen) 7377 { 7378 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", 7379 (unsigned long int)start, (unsigned long int)end); 7380 continue; 7381 } 7382 7383 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with 7384 JIT, it is disabled above, with a comment.) When the match is done by the 7385 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is 7386 set, and if the leftmost consulted character is before the start of the 7387 match or the rightmost consulted character is past the end of the match, 7388 we want to show all consulted characters for the main matched string, and 7389 indicate which were lookarounds. */ 7390 7391 if (i == 0) 7392 { 7393 BOOL showallused; 7394 PCRE2_SIZE leftchar, rightchar; 7395 7396 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0) 7397 { 7398 leftchar = FLD(match_data, leftchar); 7399 rightchar = FLD(match_data, rightchar); 7400 showallused = i == 0 && (leftchar < start || rightchar > end); 7401 } 7402 else showallused = FALSE; 7403 7404 if (showallused) 7405 { 7406 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); 7407 PCHARS(lmiddle, pp, start, end - start, utf, outfile); 7408 PCHARS(lright, pp, end, rightchar - end, utf, outfile); 7409 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 7410 fprintf(outfile, " (JIT)"); 7411 fprintf(outfile, "\n "); 7412 for (j = 0; j < lleft; j++) fprintf(outfile, "<"); 7413 for (j = 0; j < lmiddle; j++) fprintf(outfile, " "); 7414 for (j = 0; j < lright; j++) fprintf(outfile, ">"); 7415 } 7416 7417 /* When a pattern contains \K, the start of match position may be 7418 different to the start of the matched string. When this is the case, 7419 show it when requested. */ 7420 7421 else if ((dat_datctl.control & CTL_STARTCHAR) != 0) 7422 { 7423 PCRE2_SIZE startchar; 7424 PCRE2_GET_STARTCHAR(startchar, match_data); 7425 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile); 7426 PCHARSV(pp, start, end - start, utf, outfile); 7427 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 7428 fprintf(outfile, " (JIT)"); 7429 if (startchar != start) 7430 { 7431 fprintf(outfile, "\n "); 7432 for (j = 0; j < lleft; j++) fprintf(outfile, "^"); 7433 } 7434 } 7435 7436 /* Otherwise, just show the matched string. */ 7437 7438 else 7439 { 7440 PCHARSV(pp, start, end - start, utf, outfile); 7441 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 7442 fprintf(outfile, " (JIT)"); 7443 } 7444 } 7445 7446 /* Not the main matched string. Just show it unadorned. */ 7447 7448 else 7449 { 7450 PCHARSV(pp, start, end - start, utf, outfile); 7451 } 7452 7453 fprintf(outfile, "\n"); 7454 7455 /* Note: don't use the start/end variables here because we want to 7456 show the text from what is reported as the end. */ 7457 7458 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 || 7459 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0)) 7460 { 7461 fprintf(outfile, "%2d+ ", i/2); 7462 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile); 7463 fprintf(outfile, "\n"); 7464 } 7465 } 7466 7467 /* Output (*MARK) data if requested */ 7468 7469 if ((dat_datctl.control & CTL_MARK) != 0 && 7470 TESTFLD(match_data, mark, !=, NULL)) 7471 { 7472 fprintf(outfile, "MK: "); 7473 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile); 7474 fprintf(outfile, "\n"); 7475 } 7476 7477 /* Process copy/get strings */ 7478 7479 if (!copy_and_get(utf, capcount)) return PR_ABEND; 7480 7481 } /* End of handling a successful match */ 7482 7483 /* There was a partial match. The value of ovector[0] is the bumpalong point, 7484 that is, startchar, not any \K point that might have been passed. */ 7485 7486 else if (capcount == PCRE2_ERROR_PARTIAL) 7487 { 7488 PCRE2_SIZE poffset; 7489 int backlength; 7490 int rubriclength = 0; 7491 7492 fprintf(outfile, "Partial match"); 7493 if ((dat_datctl.control & CTL_MARK) != 0 && 7494 TESTFLD(match_data, mark, !=, NULL)) 7495 { 7496 fprintf(outfile, ", mark="); 7497 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf, 7498 outfile); 7499 rubriclength += 7; 7500 } 7501 fprintf(outfile, ": "); 7502 rubriclength += 15; 7503 7504 poffset = backchars(pp, ovector[0], maxlookbehind, utf); 7505 PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile); 7506 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile); 7507 7508 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 7509 fprintf(outfile, " (JIT)"); 7510 fprintf(outfile, "\n"); 7511 7512 if (backlength != 0) 7513 { 7514 int i; 7515 for (i = 0; i < rubriclength; i++) fprintf(outfile, " "); 7516 for (i = 0; i < backlength; i++) fprintf(outfile, "<"); 7517 fprintf(outfile, "\n"); 7518 } 7519 7520 /* Process copy/get strings */ 7521 7522 if (!copy_and_get(utf, 1)) return PR_ABEND; 7523 7524 break; /* Out of the /g loop */ 7525 } /* End of handling partial match */ 7526 7527 /* Failed to match. If this is a /g or /G loop, we might previously have 7528 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match. 7529 If that is the case, this is not necessarily the end. We want to advance the 7530 start offset, and continue. We won't be at the end of the string - that was 7531 checked before setting g_notempty. We achieve the effect by pretending that a 7532 single character was matched. 7533 7534 Complication arises in the case when the newline convention is "any", "crlf", 7535 or "anycrlf". If the previous match was at the end of a line terminated by 7536 CRLF, an advance of one character just passes the CR, whereas we should 7537 prefer the longer newline sequence, as does the code in pcre2_match(). 7538 7539 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one 7540 character, not one byte. */ 7541 7542 else if (g_notempty != 0) /* There was a previous null match */ 7543 { 7544 uint16_t nl = FLD(compiled_code, newline_convention); 7545 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */ 7546 PCRE2_SIZE end_offset = start_offset + 1; 7547 7548 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY || 7549 nl == PCRE2_NEWLINE_ANYCRLF) && 7550 start_offset < ulen - 1 && 7551 CODE_UNIT(pp, start_offset) == '\r' && 7552 CODE_UNIT(pp, end_offset) == '\n') 7553 end_offset++; 7554 7555 else if (utf && test_mode != PCRE32_MODE) 7556 { 7557 if (test_mode == PCRE8_MODE) 7558 { 7559 for (; end_offset < ulen; end_offset++) 7560 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; 7561 } 7562 else /* 16-bit mode */ 7563 { 7564 for (; end_offset < ulen; end_offset++) 7565 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; 7566 } 7567 } 7568 7569 SETFLDVEC(match_data, ovector, 0, start_offset); 7570 SETFLDVEC(match_data, ovector, 1, end_offset); 7571 } /* End of handling null match in a global loop */ 7572 7573 /* A "normal" match failure. There will be a negative error number in 7574 capcount. */ 7575 7576 else 7577 { 7578 switch(capcount) 7579 { 7580 case PCRE2_ERROR_NOMATCH: 7581 if (gmatched == 0) 7582 { 7583 fprintf(outfile, "No match"); 7584 if ((dat_datctl.control & CTL_MARK) != 0 && 7585 TESTFLD(match_data, mark, !=, NULL)) 7586 { 7587 fprintf(outfile, ", mark = "); 7588 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile); 7589 } 7590 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 7591 fprintf(outfile, " (JIT)"); 7592 fprintf(outfile, "\n"); 7593 } 7594 break; 7595 7596 case PCRE2_ERROR_BADUTFOFFSET: 7597 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode); 7598 break; 7599 7600 default: 7601 fprintf(outfile, "Failed: error %d: ", capcount); 7602 if (!print_error_message(capcount, "", "")) return PR_ABEND; 7603 if (capcount <= PCRE2_ERROR_UTF8_ERR1 && 7604 capcount >= PCRE2_ERROR_UTF32_ERR2) 7605 { 7606 PCRE2_SIZE startchar; 7607 PCRE2_GET_STARTCHAR(startchar, match_data); 7608 fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar); 7609 } 7610 fprintf(outfile, "\n"); 7611 break; 7612 } 7613 7614 break; /* Out of the /g loop */ 7615 } /* End of failed match handling */ 7616 7617 /* Control reaches here in two circumstances: (a) after a match, and (b) 7618 after a non-match that immediately followed a match on an empty string when 7619 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and 7620 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match 7621 of one character. So effectively we get here only after a match. If we 7622 are not doing a global search, we are done. */ 7623 7624 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else 7625 { 7626 PCRE2_SIZE match_offset = FLD(match_data, ovector)[0]; 7627 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1]; 7628 7629 /* We must now set up for the next iteration of a global search. If we have 7630 matched an empty string, first check to see if we are at the end of the 7631 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option 7632 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again 7633 at the same point. If this fails it will be picked up above, where a fake 7634 match is set up so that at this point we advance to the next character. 7635 7636 However, in order to cope with patterns that never match at their starting 7637 offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater 7638 than the starting offset. This means there will be a retry with the 7639 starting offset at the match offset. If this returns the same match again, 7640 it is picked up above and ignored, and the special action is then taken. */ 7641 7642 if (match_offset == end_offset) 7643 { 7644 if (end_offset == ulen) break; /* End of subject */ 7645 if (match_offset <= dat_datctl.offset) 7646 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; 7647 } 7648 7649 /* However, even after matching a non-empty string, there is still one 7650 tricky case. If a pattern contains \K within a lookbehind assertion at the 7651 start, the end of the matched string can be at the offset where the match 7652 started. In the case of a normal /g iteration without special action, this 7653 leads to a loop that keeps on returning the same substring. The loop would 7654 be caught above, but we really want to move on to the next match. */ 7655 7656 else 7657 { 7658 g_notempty = 0; /* Set for a "normal" repeat */ 7659 if ((dat_datctl.control & CTL_GLOBAL) != 0) 7660 { 7661 PCRE2_SIZE startchar; 7662 PCRE2_GET_STARTCHAR(startchar, match_data); 7663 if (end_offset <= startchar) 7664 { 7665 if (startchar >= ulen) break; /* End of subject */ 7666 end_offset = startchar + 1; 7667 if (utf && test_mode != PCRE32_MODE) 7668 { 7669 if (test_mode == PCRE8_MODE) 7670 { 7671 for (; end_offset < ulen; end_offset++) 7672 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; 7673 } 7674 else /* 16-bit mode */ 7675 { 7676 for (; end_offset < ulen; end_offset++) 7677 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; 7678 } 7679 } 7680 } 7681 } 7682 } 7683 7684 /* For a normal global (/g) iteration, save the current ovector[0,1] and 7685 the starting offset so that we can check that they do change each time. 7686 Otherwise a matching bug that returns the same string causes an infinite 7687 loop. It has happened! Then update the start offset, leaving other 7688 parameters alone. */ 7689 7690 if ((dat_datctl.control & CTL_GLOBAL) != 0) 7691 { 7692 ovecsave[0] = ovector[0]; 7693 ovecsave[1] = ovector[1]; 7694 ovecsave[2] = dat_datctl.offset; 7695 dat_datctl.offset = end_offset; 7696 } 7697 7698 /* For altglobal, just update the pointer and length. */ 7699 7700 else 7701 { 7702 pp += end_offset * code_unit_size; 7703 len -= end_offset * code_unit_size; 7704 ulen -= end_offset; 7705 if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset; 7706 } 7707 } 7708 } /* End of global loop */ 7709 7710 show_memory = FALSE; 7711 return PR_OK; 7712 } 7713 7714 7715 7716 7717 /************************************************* 7718 * Print PCRE2 version * 7719 *************************************************/ 7720 7721 static void 7722 print_version(FILE *f) 7723 { 7724 VERSION_TYPE *vp; 7725 fprintf(f, "PCRE2 version "); 7726 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp); 7727 fprintf(f, "\n"); 7728 } 7729 7730 7731 7732 /************************************************* 7733 * Print Unicode version * 7734 *************************************************/ 7735 7736 static void 7737 print_unicode_version(FILE *f) 7738 { 7739 VERSION_TYPE *vp; 7740 fprintf(f, "Unicode version "); 7741 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp); 7742 } 7743 7744 7745 7746 /************************************************* 7747 * Print JIT target * 7748 *************************************************/ 7749 7750 static void 7751 print_jit_target(FILE *f) 7752 { 7753 VERSION_TYPE *vp; 7754 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp); 7755 } 7756 7757 7758 7759 /************************************************* 7760 * Print newline configuration * 7761 *************************************************/ 7762 7763 /* Output is always to stdout. 7764 7765 Arguments: 7766 rc the return code from PCRE2_CONFIG_NEWLINE 7767 isc TRUE if called from "-C newline" 7768 Returns: nothing 7769 */ 7770 7771 static void 7772 print_newline_config(uint32_t optval, BOOL isc) 7773 { 7774 if (!isc) printf(" Newline sequence is "); 7775 if (optval < sizeof(newlines)/sizeof(char *)) 7776 printf("%s\n", newlines[optval]); 7777 else 7778 printf("a non-standard value: %d\n", optval); 7779 } 7780 7781 7782 7783 /************************************************* 7784 * Usage function * 7785 *************************************************/ 7786 7787 static void 7788 usage(void) 7789 { 7790 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n"); 7791 printf("Input and output default to stdin and stdout.\n"); 7792 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 7793 printf("If input is a terminal, readline() is used to read from it.\n"); 7794 #else 7795 printf("This version of pcre2test is not linked with readline().\n"); 7796 #endif 7797 printf("\nOptions:\n"); 7798 #ifdef SUPPORT_PCRE2_8 7799 printf(" -8 use the 8-bit library\n"); 7800 #endif 7801 #ifdef SUPPORT_PCRE2_16 7802 printf(" -16 use the 16-bit library\n"); 7803 #endif 7804 #ifdef SUPPORT_PCRE2_32 7805 printf(" -32 use the 32-bit library\n"); 7806 #endif 7807 printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n"); 7808 printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n"); 7809 printf(" -b set default pattern modifier 'fullbincode'\n"); 7810 printf(" -C show PCRE2 compile-time options and exit\n"); 7811 printf(" -C arg show a specific compile-time option and exit with its\n"); 7812 printf(" value if numeric (else 0). The arg can be:\n"); 7813 printf(" backslash-C use of \\C is enabled [0, 1]\n"); 7814 printf(" bsr \\R type [ANYCRLF, ANY]\n"); 7815 printf(" ebcdic compiled for EBCDIC character code [0,1]\n"); 7816 printf(" ebcdic-nl NL code if compiled for EBCDIC\n"); 7817 printf(" jit just-in-time compiler supported [0, 1]\n"); 7818 printf(" linksize internal link size [2, 3, 4]\n"); 7819 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n"); 7820 printf(" pcre2-8 8 bit library support enabled [0, 1]\n"); 7821 printf(" pcre2-16 16 bit library support enabled [0, 1]\n"); 7822 printf(" pcre2-32 32 bit library support enabled [0, 1]\n"); 7823 printf(" unicode Unicode and UTF support enabled [0, 1]\n"); 7824 printf(" -d set default pattern modifier 'debug'\n"); 7825 printf(" -dfa set default subject modifier 'dfa'\n"); 7826 printf(" -error <n,m,..> show messages for error numbers, then exit\n"); 7827 printf(" -help show usage information\n"); 7828 printf(" -i set default pattern modifier 'info'\n"); 7829 printf(" -jit set default pattern modifier 'jit'\n"); 7830 printf(" -jitverify set default pattern modifier 'jitverify'\n"); 7831 printf(" -LM list pattern and subject modifiers, then exit\n"); 7832 printf(" -q quiet: do not output PCRE2 version number at start\n"); 7833 printf(" -pattern <s> set default pattern modifier fields\n"); 7834 printf(" -subject <s> set default subject modifier fields\n"); 7835 printf(" -S <n> set stack size to <n> mebibytes\n"); 7836 printf(" -t [<n>] time compilation and execution, repeating <n> times\n"); 7837 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n"); 7838 printf(" -T same as -t, but show total times at the end\n"); 7839 printf(" -TM same as -tm, but show total time at the end\n"); 7840 printf(" -version show PCRE2 version and exit\n"); 7841 } 7842 7843 7844 7845 /************************************************* 7846 * Handle -C option * 7847 *************************************************/ 7848 7849 /* This option outputs configuration options and sets an appropriate return 7850 code when asked for a single option. The code is abstracted into a separate 7851 function because of its size. Use whichever pcre2_config() function is 7852 available. 7853 7854 Argument: an option name or NULL 7855 Returns: the return code 7856 */ 7857 7858 static int 7859 c_option(const char *arg) 7860 { 7861 uint32_t optval; 7862 unsigned int i = COPTLISTCOUNT; 7863 int yield = 0; 7864 7865 if (arg != NULL && arg[0] != CHAR_MINUS) 7866 { 7867 for (i = 0; i < COPTLISTCOUNT; i++) 7868 if (strcmp(arg, coptlist[i].name) == 0) break; 7869 7870 if (i >= COPTLISTCOUNT) 7871 { 7872 fprintf(stderr, "** Unknown -C option '%s'\n", arg); 7873 return 0; 7874 } 7875 7876 switch (coptlist[i].type) 7877 { 7878 case CONF_BSR: 7879 (void)PCRE2_CONFIG(coptlist[i].value, &optval); 7880 printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY"); 7881 break; 7882 7883 case CONF_FIX: 7884 yield = coptlist[i].value; 7885 printf("%d\n", yield); 7886 break; 7887 7888 case CONF_FIZ: 7889 optval = coptlist[i].value; 7890 printf("%d\n", optval); 7891 break; 7892 7893 case CONF_INT: 7894 (void)PCRE2_CONFIG(coptlist[i].value, &yield); 7895 printf("%d\n", yield); 7896 break; 7897 7898 case CONF_NL: 7899 (void)PCRE2_CONFIG(coptlist[i].value, &optval); 7900 print_newline_config(optval, TRUE); 7901 break; 7902 } 7903 7904 /* For VMS, return the value by setting a symbol, for certain values only. */ 7905 7906 #ifdef __VMS 7907 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT) 7908 { 7909 char ucname[16]; 7910 strcpy(ucname, coptlist[i].name); 7911 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]]; 7912 vms_setsymbol(ucname, 0, optval); 7913 } 7914 #endif 7915 7916 return yield; 7917 } 7918 7919 /* No argument for -C: output all configuration information. */ 7920 7921 print_version(stdout); 7922 printf("Compiled with\n"); 7923 7924 #ifdef EBCDIC 7925 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); 7926 #if defined NATIVE_ZOS 7927 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion()); 7928 #endif 7929 #endif 7930 7931 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval); 7932 if (optval & 1) printf(" 8-bit support\n"); 7933 if (optval & 2) printf(" 16-bit support\n"); 7934 if (optval & 4) printf(" 32-bit support\n"); 7935 7936 #ifdef SUPPORT_VALGRIND 7937 printf(" Valgrind support\n"); 7938 #endif 7939 7940 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval); 7941 if (optval != 0) 7942 { 7943 printf(" UTF and UCP support ("); 7944 print_unicode_version(stdout); 7945 printf(")\n"); 7946 } 7947 else printf(" No Unicode support\n"); 7948 7949 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval); 7950 if (optval != 0) 7951 { 7952 printf(" Just-in-time compiler support: "); 7953 print_jit_target(stdout); 7954 printf("\n"); 7955 } 7956 else 7957 { 7958 printf(" No just-in-time compiler support\n"); 7959 } 7960 7961 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval); 7962 print_newline_config(optval, FALSE); 7963 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval); 7964 printf(" \\R matches %s\n", 7965 (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" : 7966 "all Unicode newlines"); 7967 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval); 7968 printf(" \\C is %ssupported\n", optval? "not ":""); 7969 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval); 7970 printf(" Internal link size = %d\n", optval); 7971 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval); 7972 printf(" Parentheses nest limit = %d\n", optval); 7973 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval); 7974 printf(" Default heap limit = %d\n", optval); 7975 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval); 7976 printf(" Default match limit = %d\n", optval); 7977 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval); 7978 printf(" Default depth limit = %d\n", optval); 7979 return 0; 7980 } 7981 7982 7983 7984 /************************************************* 7985 * Display one modifier * 7986 *************************************************/ 7987 7988 static void 7989 display_one_modifier(modstruct *m, BOOL for_pattern) 7990 { 7991 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))? 7992 '*' : ' '; 7993 printf("%c%s", c, m->name); 7994 } 7995 7996 7997 7998 /************************************************* 7999 * Display pattern or subject modifiers * 8000 *************************************************/ 8001 8002 /* In order to print in two columns, first scan without printing to get a list 8003 of the modifiers that are required. 8004 8005 Arguments: 8006 for_pattern TRUE for pattern modifiers, FALSE for subject modifiers 8007 title string to be used in title 8008 8009 Returns: nothing 8010 */ 8011 8012 static void 8013 display_selected_modifiers(BOOL for_pattern, const char *title) 8014 { 8015 uint32_t i, j; 8016 uint32_t n = 0; 8017 uint32_t list[MODLISTCOUNT]; 8018 8019 for (i = 0; i < MODLISTCOUNT; i++) 8020 { 8021 BOOL is_pattern = TRUE; 8022 modstruct *m = modlist + i; 8023 8024 switch (m->which) 8025 { 8026 case MOD_CTC: /* Compile context */ 8027 case MOD_PAT: /* Pattern */ 8028 case MOD_PATP: /* Pattern, OK for Perl-compatible test */ 8029 break; 8030 8031 /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect 8032 subjects, but can be given with a pattern. We list them as subject 8033 modifiers, but marked with an asterisk.*/ 8034 8035 case MOD_CTM: /* Match context */ 8036 case MOD_DAT: /* Subject line */ 8037 case MOD_PND: /* As PD, but not default pattern */ 8038 case MOD_PNDP: /* As PND, OK for Perl-compatible test */ 8039 is_pattern = FALSE; 8040 break; 8041 8042 default: printf("** Unknown type for modifier '%s'\n", m->name); 8043 /* Fall through */ 8044 case MOD_PD: /* Pattern or subject */ 8045 case MOD_PDP: /* As PD, OK for Perl-compatible test */ 8046 is_pattern = for_pattern; 8047 break; 8048 } 8049 8050 if (for_pattern == is_pattern) list[n++] = i; 8051 } 8052 8053 /* Now print from the list in two columns. */ 8054 8055 printf("-------------- %s MODIFIERS --------------\n", title); 8056 8057 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++) 8058 { 8059 modstruct *m = modlist + list[i]; 8060 display_one_modifier(m, for_pattern); 8061 if (j < n) 8062 { 8063 uint32_t k = 27 - strlen(m->name); 8064 while (k-- > 0) printf(" "); 8065 display_one_modifier(modlist + list[j], for_pattern); 8066 } 8067 printf("\n"); 8068 } 8069 } 8070 8071 8072 8073 /************************************************* 8074 * Display the list of modifiers * 8075 *************************************************/ 8076 8077 static void 8078 display_modifiers(void) 8079 { 8080 printf( 8081 "An asterisk on a subject modifier means that it may be given on a pattern\n" 8082 "line, in order to apply to all subjects matched by that pattern. Modifiers\n" 8083 "that are listed for both patterns and subjects have different effects in\n" 8084 "each case.\n\n"); 8085 display_selected_modifiers(TRUE, "PATTERN"); 8086 printf("\n"); 8087 display_selected_modifiers(FALSE, "SUBJECT"); 8088 } 8089 8090 8091 8092 /************************************************* 8093 * Main Program * 8094 *************************************************/ 8095 8096 int 8097 main(int argc, char **argv) 8098 { 8099 uint32_t temp; 8100 uint32_t yield = 0; 8101 uint32_t op = 1; 8102 BOOL notdone = TRUE; 8103 BOOL quiet = FALSE; 8104 BOOL showtotaltimes = FALSE; 8105 BOOL skipping = FALSE; 8106 char *arg_subject = NULL; 8107 char *arg_pattern = NULL; 8108 char *arg_error = NULL; 8109 8110 /* The offsets to the options and control bits fields of the pattern and data 8111 control blocks must be the same so that common options and controls such as 8112 "anchored" or "memory" can work for either of them from a single table entry. 8113 We cannot test this till runtime because "offsetof" does not work in the 8114 preprocessor. */ 8115 8116 if (PO(options) != DO(options) || PO(control) != DO(control) || 8117 PO(control2) != DO(control2)) 8118 { 8119 fprintf(stderr, "** Coding error: " 8120 "options and control offsets for pattern and data must be the same.\n"); 8121 return 1; 8122 } 8123 8124 /* Get the PCRE2 and Unicode version number and JIT target information, at the 8125 same time checking that a request for the length gives the same answer. Also 8126 check lengths for non-string items. */ 8127 8128 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != 8129 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) || 8130 8131 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) != 8132 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) || 8133 8134 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) != 8135 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) || 8136 8137 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) || 8138 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t)) 8139 { 8140 fprintf(stderr, "** Error in pcre2_config(): bad length\n"); 8141 return 1; 8142 } 8143 8144 /* Check that bad options are diagnosed. */ 8145 8146 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION || 8147 PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION) 8148 { 8149 fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n"); 8150 return 1; 8151 } 8152 8153 /* This configuration option is now obsolete, but running a quick check ensures 8154 that its code is covered. */ 8155 8156 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp); 8157 8158 /* Get buffers from malloc() so that valgrind will check their misuse when 8159 debugging. They grow automatically when very long lines are read. The 16- 8160 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */ 8161 8162 buffer = (uint8_t *)malloc(pbuffer8_size); 8163 pbuffer8 = (uint8_t *)malloc(pbuffer8_size); 8164 8165 /* The following _setmode() stuff is some Windows magic that tells its runtime 8166 library to translate CRLF into a single LF character. At least, that's what 8167 I've been told: never having used Windows I take this all on trust. Originally 8168 it set 0x8000, but then I was advised that _O_BINARY was better. */ 8169 8170 #if defined(_WIN32) || defined(WIN32) 8171 _setmode( _fileno( stdout ), _O_BINARY ); 8172 #endif 8173 8174 /* Initialization that does not depend on the running mode. */ 8175 8176 locale_name[0] = 0; 8177 8178 memset(&def_patctl, 0, sizeof(patctl)); 8179 def_patctl.convert_type = CONVERT_UNSET; 8180 8181 memset(&def_datctl, 0, sizeof(datctl)); 8182 def_datctl.oveccount = DEFAULT_OVECCOUNT; 8183 def_datctl.copy_numbers[0] = -1; 8184 def_datctl.get_numbers[0] = -1; 8185 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET; 8186 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET; 8187 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET; 8188 8189 /* Scan command line options. */ 8190 8191 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) 8192 { 8193 char *endptr; 8194 char *arg = argv[op]; 8195 unsigned long uli; 8196 8197 /* List modifiers and exit. */ 8198 8199 if (strcmp(arg, "-LM") == 0) 8200 { 8201 display_modifiers(); 8202 goto EXIT; 8203 } 8204 8205 /* Display and/or set return code for configuration options. */ 8206 8207 if (strcmp(arg, "-C") == 0) 8208 { 8209 yield = c_option(argv[op + 1]); 8210 goto EXIT; 8211 } 8212 8213 /* Select operating mode. Ensure that pcre2_config() is called in 16-bit 8214 and 32-bit modes because that won't happen naturally when 8-bit is also 8215 configured. Also call some other functions that are not otherwise used. This 8216 means that a coverage report won't claim there are uncalled functions. */ 8217 8218 if (strcmp(arg, "-8") == 0) 8219 { 8220 #ifdef SUPPORT_PCRE2_8 8221 test_mode = PCRE8_MODE; 8222 (void)pcre2_set_bsr_8(pat_context8, 999); 8223 (void)pcre2_set_newline_8(pat_context8, 999); 8224 #else 8225 fprintf(stderr, 8226 "** This version of PCRE2 was built without 8-bit support\n"); 8227 exit(1); 8228 #endif 8229 } 8230 8231 else if (strcmp(arg, "-16") == 0) 8232 { 8233 #ifdef SUPPORT_PCRE2_16 8234 test_mode = PCRE16_MODE; 8235 (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL); 8236 (void)pcre2_set_bsr_16(pat_context16, 999); 8237 (void)pcre2_set_newline_16(pat_context16, 999); 8238 #else 8239 fprintf(stderr, 8240 "** This version of PCRE2 was built without 16-bit support\n"); 8241 exit(1); 8242 #endif 8243 } 8244 8245 else if (strcmp(arg, "-32") == 0) 8246 { 8247 #ifdef SUPPORT_PCRE2_32 8248 test_mode = PCRE32_MODE; 8249 (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL); 8250 (void)pcre2_set_bsr_32(pat_context32, 999); 8251 (void)pcre2_set_newline_32(pat_context32, 999); 8252 #else 8253 fprintf(stderr, 8254 "** This version of PCRE2 was built without 32-bit support\n"); 8255 exit(1); 8256 #endif 8257 } 8258 8259 /* Set quiet (no version verification) */ 8260 8261 else if (strcmp(arg, "-q") == 0) quiet = TRUE; 8262 8263 /* Set system stack size */ 8264 8265 else if (strcmp(arg, "-S") == 0 && argc > 2 && 8266 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) 8267 { 8268 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) 8269 fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); 8270 exit(1); 8271 #else 8272 int rc; 8273 uint32_t stack_size; 8274 struct rlimit rlim; 8275 if (U32OVERFLOW(uli)) 8276 { 8277 fprintf(stderr, "** Argument for -S is too big\n"); 8278 exit(1); 8279 } 8280 stack_size = (uint32_t)uli; 8281 getrlimit(RLIMIT_STACK, &rlim); 8282 rlim.rlim_cur = stack_size * 1024 * 1024; 8283 if (rlim.rlim_cur > rlim.rlim_max) 8284 { 8285 fprintf(stderr, 8286 "pcre2test: requested stack size %luMiB is greater than hard limit " 8287 "%luMiB\n", (unsigned long int)stack_size, 8288 (unsigned long int)(rlim.rlim_max)); 8289 exit(1); 8290 } 8291 rc = setrlimit(RLIMIT_STACK, &rlim); 8292 if (rc != 0) 8293 { 8294 fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n", 8295 (unsigned long int)stack_size, strerror(errno)); 8296 exit(1); 8297 } 8298 op++; 8299 argc--; 8300 #endif 8301 } 8302 8303 /* Set some common pattern and subject controls */ 8304 8305 else if (strcmp(arg, "-AC") == 0) 8306 { 8307 def_patctl.options |= PCRE2_AUTO_CALLOUT; 8308 def_datctl.control2 |= CTL2_CALLOUT_EXTRA; 8309 } 8310 else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT; 8311 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; 8312 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; 8313 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; 8314 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; 8315 else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0) 8316 { 8317 if (arg[4] != 0) def_patctl.control |= CTL_JITVERIFY; 8318 def_patctl.jit = 7; /* full & partial */ 8319 #ifndef SUPPORT_JIT 8320 fprintf(stderr, "** Warning: JIT support is not available: " 8321 "-jit[verify] calls functions that do nothing.\n"); 8322 #endif 8323 } 8324 8325 /* Set timing parameters */ 8326 8327 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 || 8328 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0) 8329 { 8330 int both = arg[2] == 0; 8331 showtotaltimes = arg[1] == 'T'; 8332 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0)) 8333 { 8334 if (U32OVERFLOW(uli)) 8335 { 8336 fprintf(stderr, "** Argument for %s is too big\n", arg); 8337 exit(1); 8338 } 8339 timeitm = (int)uli; 8340 op++; 8341 argc--; 8342 } 8343 else timeitm = LOOPREPEAT; 8344 if (both) timeit = timeitm; 8345 } 8346 8347 /* Give help */ 8348 8349 else if (strcmp(arg, "-help") == 0 || 8350 strcmp(arg, "--help") == 0) 8351 { 8352 usage(); 8353 goto EXIT; 8354 } 8355 8356 /* Show version */ 8357 8358 else if (strcmp(arg, "-version") == 0 || 8359 strcmp(arg, "--version") == 0) 8360 { 8361 print_version(stdout); 8362 goto EXIT; 8363 } 8364 8365 /* The following options save their data for processing once we know what 8366 the running mode is. */ 8367 8368 else if (strcmp(arg, "-error") == 0) 8369 { 8370 arg_error = argv[op+1]; 8371 goto CHECK_VALUE_EXISTS; 8372 } 8373 8374 else if (strcmp(arg, "-subject") == 0) 8375 { 8376 arg_subject = argv[op+1]; 8377 goto CHECK_VALUE_EXISTS; 8378 } 8379 8380 else if (strcmp(arg, "-pattern") == 0) 8381 { 8382 arg_pattern = argv[op+1]; 8383 CHECK_VALUE_EXISTS: 8384 if (argc <= 2) 8385 { 8386 fprintf(stderr, "** Missing value for %s\n", arg); 8387 yield = 1; 8388 goto EXIT; 8389 } 8390 op++; 8391 argc--; 8392 } 8393 8394 /* Unrecognized option */ 8395 8396 else 8397 { 8398 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg); 8399 usage(); 8400 yield = 1; 8401 goto EXIT; 8402 } 8403 op++; 8404 argc--; 8405 } 8406 8407 /* If -error was present, get the error numbers, show the messages, and exit. 8408 We wait to do this until we know which mode we are in. */ 8409 8410 if (arg_error != NULL) 8411 { 8412 int len; 8413 int errcode; 8414 char *endptr; 8415 8416 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at 8417 least 128 code units, because it is used for retrieving error messages. */ 8418 8419 #ifdef SUPPORT_PCRE2_16 8420 if (test_mode == PCRE16_MODE) 8421 { 8422 pbuffer16_size = 256; 8423 pbuffer16 = (uint16_t *)malloc(pbuffer16_size); 8424 if (pbuffer16 == NULL) 8425 { 8426 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", 8427 SIZ_CAST pbuffer16_size); 8428 yield = 1; 8429 goto EXIT; 8430 } 8431 } 8432 #endif 8433 8434 #ifdef SUPPORT_PCRE2_32 8435 if (test_mode == PCRE32_MODE) 8436 { 8437 pbuffer32_size = 512; 8438 pbuffer32 = (uint32_t *)malloc(pbuffer32_size); 8439 if (pbuffer32 == NULL) 8440 { 8441 fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", 8442 SIZ_CAST pbuffer32_size); 8443 yield = 1; 8444 goto EXIT; 8445 } 8446 } 8447 #endif 8448 8449 /* Loop along a list of error numbers. */ 8450 8451 for (;;) 8452 { 8453 errcode = strtol(arg_error, &endptr, 10); 8454 if (*endptr != 0 && *endptr != CHAR_COMMA) 8455 { 8456 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error); 8457 yield = 1; 8458 goto EXIT; 8459 } 8460 printf("Error %d: ", errcode); 8461 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer); 8462 if (len < 0) 8463 { 8464 switch (len) 8465 { 8466 case PCRE2_ERROR_BADDATA: 8467 printf("PCRE2_ERROR_BADDATA (unknown error number)"); 8468 break; 8469 8470 case PCRE2_ERROR_NOMEMORY: 8471 printf("PCRE2_ERROR_NOMEMORY (buffer too small)"); 8472 break; 8473 8474 default: 8475 printf("Unexpected return (%d) from pcre2_get_error_message()", len); 8476 break; 8477 } 8478 } 8479 else 8480 { 8481 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout); 8482 } 8483 printf("\n"); 8484 if (*endptr == 0) goto EXIT; 8485 arg_error = endptr + 1; 8486 } 8487 /* Control never reaches here */ 8488 } /* End of -error handling */ 8489 8490 /* Initialize things that cannot be done until we know which test mode we are 8491 running in. Exercise the general context copying function, which is not 8492 otherwise used. */ 8493 8494 code_unit_size = test_mode/8; 8495 max_oveccount = DEFAULT_OVECCOUNT; 8496 8497 /* Use macros to save a lot of duplication. */ 8498 8499 #define CREATECONTEXTS \ 8500 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \ 8501 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \ 8502 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \ 8503 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \ 8504 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \ 8505 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \ 8506 G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \ 8507 G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \ 8508 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS)) 8509 8510 #define CONTEXTTESTS \ 8511 (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \ 8512 (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \ 8513 (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \ 8514 (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL) 8515 8516 /* Call the appropriate functions for the current mode, and exercise some 8517 functions that are not otherwise called. */ 8518 8519 #ifdef SUPPORT_PCRE2_8 8520 #undef BITS 8521 #define BITS 8 8522 if (test_mode == PCRE8_MODE) 8523 { 8524 CREATECONTEXTS; 8525 CONTEXTTESTS; 8526 } 8527 #endif 8528 8529 #ifdef SUPPORT_PCRE2_16 8530 #undef BITS 8531 #define BITS 16 8532 if (test_mode == PCRE16_MODE) 8533 { 8534 CREATECONTEXTS; 8535 CONTEXTTESTS; 8536 } 8537 #endif 8538 8539 #ifdef SUPPORT_PCRE2_32 8540 #undef BITS 8541 #define BITS 32 8542 if (test_mode == PCRE32_MODE) 8543 { 8544 CREATECONTEXTS; 8545 CONTEXTTESTS; 8546 } 8547 #endif 8548 8549 /* Set a default parentheses nest limit that is large enough to run the 8550 standard tests (this also exercises the function). */ 8551 8552 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT); 8553 8554 /* Handle command line modifier settings, sending any error messages to 8555 stderr. We need to know the mode before modifying the context, and it is tidier 8556 to do them all in the same way. */ 8557 8558 outfile = stderr; 8559 if ((arg_pattern != NULL && 8560 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) || 8561 (arg_subject != NULL && 8562 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl))) 8563 { 8564 yield = 1; 8565 goto EXIT; 8566 } 8567 8568 /* Sort out the input and output files, defaulting to stdin/stdout. */ 8569 8570 infile = stdin; 8571 outfile = stdout; 8572 8573 if (argc > 1 && strcmp(argv[op], "-") != 0) 8574 { 8575 infile = fopen(argv[op], INPUT_MODE); 8576 if (infile == NULL) 8577 { 8578 printf("** Failed to open '%s': %s\n", argv[op], strerror(errno)); 8579 yield = 1; 8580 goto EXIT; 8581 } 8582 } 8583 8584 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 8585 if (INTERACTIVE(infile)) using_history(); 8586 #endif 8587 8588 if (argc > 2) 8589 { 8590 outfile = fopen(argv[op+1], OUTPUT_MODE); 8591 if (outfile == NULL) 8592 { 8593 printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno)); 8594 yield = 1; 8595 goto EXIT; 8596 } 8597 } 8598 8599 /* Output a heading line unless quiet, then process input lines. */ 8600 8601 if (!quiet) print_version(outfile); 8602 8603 SET(compiled_code, NULL); 8604 8605 #ifdef SUPPORT_PCRE2_8 8606 preg.re_pcre2_code = NULL; 8607 preg.re_match_data = NULL; 8608 #endif 8609 8610 while (notdone) 8611 { 8612 uint8_t *p; 8613 int rc = PR_OK; 8614 BOOL expectdata = TEST(compiled_code, !=, NULL); 8615 #ifdef SUPPORT_PCRE2_8 8616 expectdata |= preg.re_pcre2_code != NULL; 8617 #endif 8618 8619 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL) 8620 break; 8621 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer); 8622 fflush(outfile); 8623 p = buffer; 8624 8625 /* If we have a pattern set up for testing, or we are skipping after a 8626 compile failure, a blank line terminates this test. */ 8627 8628 if (expectdata || skipping) 8629 { 8630 while (isspace(*p)) p++; 8631 if (*p == 0) 8632 { 8633 #ifdef SUPPORT_PCRE2_8 8634 if (preg.re_pcre2_code != NULL) 8635 { 8636 regfree(&preg); 8637 preg.re_pcre2_code = NULL; 8638 preg.re_match_data = NULL; 8639 } 8640 #endif /* SUPPORT_PCRE2_8 */ 8641 if (TEST(compiled_code, !=, NULL)) 8642 { 8643 SUB1(pcre2_code_free, compiled_code); 8644 SET(compiled_code, NULL); 8645 } 8646 skipping = FALSE; 8647 setlocale(LC_CTYPE, "C"); 8648 } 8649 8650 /* Otherwise, if we are not skipping, and the line is not a data comment 8651 line starting with "\=", process a data line. */ 8652 8653 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) 8654 { 8655 rc = process_data(); 8656 } 8657 } 8658 8659 /* We do not have a pattern set up for testing. Lines starting with # are 8660 either comments or special commands. Blank lines are ignored. Otherwise, the 8661 line must start with a valid delimiter. It is then processed as a pattern 8662 line. A copy of the pattern is left in pbuffer8 for use by callouts. Under 8663 valgrind, make the unused part of the buffer undefined, to catch overruns. */ 8664 8665 else if (*p == '#') 8666 { 8667 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue; 8668 rc = process_command(); 8669 } 8670 8671 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL) 8672 { 8673 rc = process_pattern(); 8674 dfa_matched = 0; 8675 } 8676 8677 else 8678 { 8679 while (isspace(*p)) p++; 8680 if (*p != 0) 8681 { 8682 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, 8683 *buffer); 8684 rc = PR_SKIP; 8685 } 8686 } 8687 8688 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE; 8689 else if (rc == PR_ABEND) 8690 { 8691 fprintf(outfile, "** pcre2test run abandoned\n"); 8692 yield = 1; 8693 goto EXIT; 8694 } 8695 } 8696 8697 /* Finish off a normal run. */ 8698 8699 if (INTERACTIVE(infile)) fprintf(outfile, "\n"); 8700 8701 if (showtotaltimes) 8702 { 8703 const char *pad = ""; 8704 fprintf(outfile, "--------------------------------------\n"); 8705 if (timeit > 0) 8706 { 8707 fprintf(outfile, "Total compile time %.4f milliseconds\n", 8708 (((double)total_compile_time * 1000.0) / (double)timeit) / 8709 (double)CLOCKS_PER_SEC); 8710 if (total_jit_compile_time > 0) 8711 fprintf(outfile, "Total JIT compile %.4f milliseconds\n", 8712 (((double)total_jit_compile_time * 1000.0) / (double)timeit) / 8713 (double)CLOCKS_PER_SEC); 8714 pad = " "; 8715 } 8716 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad, 8717 (((double)total_match_time * 1000.0) / (double)timeitm) / 8718 (double)CLOCKS_PER_SEC); 8719 } 8720 8721 8722 EXIT: 8723 8724 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 8725 if (infile != NULL && INTERACTIVE(infile)) clear_history(); 8726 #endif 8727 8728 if (infile != NULL && infile != stdin) fclose(infile); 8729 if (outfile != NULL && outfile != stdout) fclose(outfile); 8730 8731 free(buffer); 8732 free(dbuffer); 8733 free(pbuffer8); 8734 free(dfa_workspace); 8735 free((void *)locale_tables); 8736 PCRE2_MATCH_DATA_FREE(match_data); 8737 SUB1(pcre2_code_free, compiled_code); 8738 8739 while(patstacknext-- > 0) 8740 { 8741 SET(compiled_code, patstack[patstacknext]); 8742 SUB1(pcre2_code_free, compiled_code); 8743 } 8744 8745 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context); 8746 if (jit_stack != NULL) 8747 { 8748 PCRE2_JIT_STACK_FREE(jit_stack); 8749 } 8750 8751 #define FREECONTEXTS \ 8752 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \ 8753 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \ 8754 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \ 8755 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \ 8756 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \ 8757 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \ 8758 G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \ 8759 G(pcre2_convert_context_free_,BITS)(G(con_context,BITS)); 8760 8761 #ifdef SUPPORT_PCRE2_8 8762 #undef BITS 8763 #define BITS 8 8764 if (preg.re_pcre2_code != NULL) regfree(&preg); 8765 FREECONTEXTS; 8766 #endif 8767 8768 #ifdef SUPPORT_PCRE2_16 8769 #undef BITS 8770 #define BITS 16 8771 free(pbuffer16); 8772 FREECONTEXTS; 8773 #endif 8774 8775 #ifdef SUPPORT_PCRE2_32 8776 #undef BITS 8777 #define BITS 32 8778 free(pbuffer32); 8779 FREECONTEXTS; 8780 #endif 8781 8782 #if defined(__VMS) 8783 yield = SS$_NORMAL; /* Return values via DCL symbols */ 8784 #endif 8785 8786 return yield; 8787 } 8788 8789 /* End of pcre2test.c */ 8790