1 /************************************************* 2 * PCRE2 testing program * 3 *************************************************/ 4 5 /* PCRE2 is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. In 2014 7 the API was completely revised and '2' was added to the name, because the old 8 API, which had lasted for 16 years, could not accommodate new requirements. At 9 the same time, this testing program was re-designed because its original 10 hacked-up (non-) design had also run out of steam. 11 12 Written by Philip Hazel 13 Original code Copyright (c) 1997-2012 University of Cambridge 14 Rewritten code Copyright (c) 2016 University of Cambridge 15 16 ----------------------------------------------------------------------------- 17 Redistribution and use in source and binary forms, with or without 18 modification, are permitted provided that the following conditions are met: 19 20 * Redistributions of source code must retain the above copyright notice, 21 this list of conditions and the following disclaimer. 22 23 * Redistributions in binary form must reproduce the above copyright 24 notice, this list of conditions and the following disclaimer in the 25 documentation and/or other materials provided with the distribution. 26 27 * Neither the name of the University of Cambridge nor the names of its 28 contributors may be used to endorse or promote products derived from 29 this software without specific prior written permission. 30 31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 POSSIBILITY OF SUCH DAMAGE. 42 ----------------------------------------------------------------------------- 43 */ 44 45 46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2 47 libraries in a single program, though its input and output are always 8-bit. 48 It is different from modules such as pcre2_compile.c in the library itself, 49 which are compiled separately for each code unit width. If two widths are 50 enabled, for example, pcre2_compile.c is compiled twice. In contrast, 51 pcre2test.c is compiled only once, and linked with all the enabled libraries. 52 Therefore, it must not make use of any of the macros from pcre2.h or 53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make 54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that 55 it references only the enabled library functions. */ 56 57 #ifdef HAVE_CONFIG_H 58 #include "config.h" 59 #endif 60 61 #include <ctype.h> 62 #include <stdio.h> 63 #include <string.h> 64 #include <stdlib.h> 65 #include <time.h> 66 #include <locale.h> 67 #include <errno.h> 68 69 #if defined NATIVE_ZOS 70 #include "pcrzoscs.h" 71 /* That header is not included in the main PCRE2 distribution because other 72 apparatus is needed to compile pcre2test for z/OS. The header can be found in 73 the special z/OS distribution, which is available from www.zaconsultants.net or 74 from www.cbttape.org. */ 75 #endif 76 77 #ifdef HAVE_UNISTD_H 78 #include <unistd.h> 79 #endif 80 81 /* Both libreadline and libedit are optionally supported. The user-supplied 82 original patch uses readline/readline.h for libedit, but in at least one system 83 it is installed as editline/readline.h, so the configuration code now looks for 84 that first, falling back to readline/readline.h. */ 85 86 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 87 #if defined(SUPPORT_LIBREADLINE) 88 #include <readline/readline.h> 89 #include <readline/history.h> 90 #else 91 #if defined(HAVE_EDITLINE_READLINE_H) 92 #include <editline/readline.h> 93 #else 94 #include <readline/readline.h> 95 #endif 96 #endif 97 #endif 98 99 /* Put the test for interactive input into a macro so that it can be changed if 100 required for different environments. */ 101 102 #define INTERACTIVE(f) isatty(fileno(f)) 103 104 105 /* ---------------------- System-specific definitions ---------------------- */ 106 107 /* A number of things vary for Windows builds. Originally, pcretest opened its 108 input and output without "b"; then I was told that "b" was needed in some 109 environments, so it was added for release 5.0 to both the input and output. (It 110 makes no difference on Unix-like systems.) Later I was told that it is wrong 111 for the input on Windows. I've now abstracted the modes into macros that are 112 set here, to make it easier to fiddle with them, and removed "b" from the input 113 mode under Windows. The BINARY versions are used when saving/restoring compiled 114 patterns. */ 115 116 #if defined(_WIN32) || defined(WIN32) 117 #include <io.h> /* For _setmode() */ 118 #include <fcntl.h> /* For _O_BINARY */ 119 #define INPUT_MODE "r" 120 #define OUTPUT_MODE "wb" 121 #define BINARY_INPUT_MODE "rb" 122 #define BINARY_OUTPUT_MODE "wb" 123 124 #ifndef isatty 125 #define isatty _isatty /* This is what Windows calls them, I'm told, */ 126 #endif /* though in some environments they seem to */ 127 /* be already defined, hence the #ifndefs. */ 128 #ifndef fileno 129 #define fileno _fileno 130 #endif 131 132 /* A user sent this fix for Borland Builder 5 under Windows. */ 133 134 #ifdef __BORLANDC__ 135 #define _setmode(handle, mode) setmode(handle, mode) 136 #endif 137 138 /* Not Windows */ 139 140 #else 141 #include <sys/time.h> /* These two includes are needed */ 142 #include <sys/resource.h> /* for setrlimit(). */ 143 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */ 144 #define INPUT_MODE "r" 145 #define OUTPUT_MODE "w" 146 #define BINARY_INPUT_MODE "rb" 147 #define BINARY_OUTPUT_MODE "wb" 148 #else 149 #define INPUT_MODE "rb" 150 #define OUTPUT_MODE "wb" 151 #define BINARY_INPUT_MODE "rb" 152 #define BINARY_OUTPUT_MODE "wb" 153 #endif 154 #endif 155 156 #ifdef __VMS 157 #include <ssdef.h> 158 void vms_setsymbol( char *, char *, int ); 159 #endif 160 161 /* ------------------End of system-specific definitions -------------------- */ 162 163 /* Glueing macros that are used in several places below. */ 164 165 #define glue(a,b) a##b 166 #define G(a,b) glue(a,b) 167 168 /* Miscellaneous parameters and manifests */ 169 170 #ifndef CLOCKS_PER_SEC 171 #ifdef CLK_TCK 172 #define CLOCKS_PER_SEC CLK_TCK 173 #else 174 #define CLOCKS_PER_SEC 100 175 #endif 176 #endif 177 178 #define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */ 179 #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ 180 #define DEFAULT_OVECCOUNT 15 /* Default ovector count */ 181 #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ 182 #define LOCALESIZE 32 /* Size of locale name */ 183 #define LOOPREPEAT 500000 /* Default loop count for timing */ 184 #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */ 185 #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */ 186 #define VERSION_SIZE 64 /* Size of buffer for the version strings */ 187 188 /* Make sure the buffer into which replacement strings are copied is big enough 189 to hold them as 32-bit code units. */ 190 191 #define REPLACE_BUFFSIZE 1024 /* This is a byte value */ 192 193 /* Execution modes */ 194 195 #define PCRE8_MODE 8 196 #define PCRE16_MODE 16 197 #define PCRE32_MODE 32 198 199 /* Processing returns */ 200 201 enum { PR_OK, PR_SKIP, PR_ABEND }; 202 203 /* The macro PRINTABLE determines whether to print an output character as-is or 204 as a hex value when showing compiled patterns. is We use it in cases when the 205 locale has not been explicitly changed, so as to get consistent output from 206 systems that differ in their output from isprint() even in the "C" locale. */ 207 208 #ifdef EBCDIC 209 #define PRINTABLE(c) ((c) >= 64 && (c) < 255) 210 #else 211 #define PRINTABLE(c) ((c) >= 32 && (c) < 127) 212 #endif 213 214 #define PRINTOK(c) ((locale_tables != NULL)? isprint(c) : PRINTABLE(c)) 215 216 /* We have to include some of the library source files because we need 217 to use some of the macros, internal structure definitions, and other internal 218 values - pcre2test has "inside information" compared to an application program 219 that strictly follows the PCRE2 API. 220 221 Before including pcre2_internal.h we define PRIV so that it does not get 222 defined therein. This ensures that PRIV names in the included files do not 223 clash with those in the libraries. Also, although pcre2_internal.h does itself 224 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h, 225 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not 226 for building the library. */ 227 228 #define PRIV(name) name 229 #define PCRE2_CODE_UNIT_WIDTH 0 230 #include "pcre2.h" 231 #include "pcre2posix.h" 232 #include "pcre2_internal.h" 233 234 /* We need access to some of the data tables that PCRE2 uses. Defining 235 PCRE2_PCRETEST makes some minor changes in the files. The previous definition 236 of PRIV avoids name clashes. */ 237 238 #define PCRE2_PCRE2TEST 239 #include "pcre2_tables.c" 240 #include "pcre2_ucd.c" 241 242 /* 32-bit integer values in the input are read by strtoul() or strtol(). The 243 check needed for overflow depends on whether long ints are in fact longer than 244 ints. They are defined not to be shorter. */ 245 246 #if ULONG_MAX > UINT32_MAX 247 #define U32OVERFLOW(x) (x > UINT32_MAX) 248 #else 249 #define U32OVERFLOW(x) (x == UINT32_MAX) 250 #endif 251 252 #if LONG_MAX > INT32_MAX 253 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN) 254 #else 255 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN) 256 #endif 257 258 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include 259 pcre2_intmodedep.h, which is where mode-dependent macros and structures are 260 defined. We can now include it for each supported code unit width. Because 261 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will 262 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately 263 while including these files, and then restore it to a no-op. Because LINK_SIZE 264 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of 265 these inclusions should not be changed. */ 266 267 #undef PCRE2_SUFFIX 268 #undef PCRE2_CODE_UNIT_WIDTH 269 270 #ifdef SUPPORT_PCRE2_8 271 #define PCRE2_CODE_UNIT_WIDTH 8 272 #define PCRE2_SUFFIX(a) G(a,8) 273 #include "pcre2_intmodedep.h" 274 #include "pcre2_printint.c" 275 #undef PCRE2_CODE_UNIT_WIDTH 276 #undef PCRE2_SUFFIX 277 #endif /* SUPPORT_PCRE2_8 */ 278 279 #ifdef SUPPORT_PCRE2_16 280 #define PCRE2_CODE_UNIT_WIDTH 16 281 #define PCRE2_SUFFIX(a) G(a,16) 282 #include "pcre2_intmodedep.h" 283 #include "pcre2_printint.c" 284 #undef PCRE2_CODE_UNIT_WIDTH 285 #undef PCRE2_SUFFIX 286 #endif /* SUPPORT_PCRE2_16 */ 287 288 #ifdef SUPPORT_PCRE2_32 289 #define PCRE2_CODE_UNIT_WIDTH 32 290 #define PCRE2_SUFFIX(a) G(a,32) 291 #include "pcre2_intmodedep.h" 292 #include "pcre2_printint.c" 293 #undef PCRE2_CODE_UNIT_WIDTH 294 #undef PCRE2_SUFFIX 295 #endif /* SUPPORT_PCRE2_32 */ 296 297 #define PCRE2_SUFFIX(a) a 298 299 /* We need to be able to check input text for UTF-8 validity, whatever code 300 widths are actually available, because the input to pcre2test is always in 301 8-bit code units. So we include the UTF validity checking function for 8-bit 302 code units. */ 303 304 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *); 305 306 #define PCRE2_CODE_UNIT_WIDTH 8 307 #undef PCRE2_SPTR 308 #define PCRE2_SPTR PCRE2_SPTR8 309 #include "pcre2_valid_utf.c" 310 #undef PCRE2_CODE_UNIT_WIDTH 311 #undef PCRE2_SPTR 312 313 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit 314 support, it can be selected by a command-line option. If there is no 8-bit 315 support, there must be 16- or 32-bit support, so default to one of them. The 316 config function, JIT stack, contexts, and version string are the same in all 317 modes, so use the form of the first that is available. */ 318 319 #if defined SUPPORT_PCRE2_8 320 #define DEFAULT_TEST_MODE PCRE8_MODE 321 #define VERSION_TYPE PCRE2_UCHAR8 322 #define PCRE2_CONFIG pcre2_config_8 323 #define PCRE2_JIT_STACK pcre2_jit_stack_8 324 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8 325 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8 326 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8 327 #define VERSION_TYPE PCRE2_UCHAR8 328 329 #elif defined SUPPORT_PCRE2_16 330 #define DEFAULT_TEST_MODE PCRE16_MODE 331 #define VERSION_TYPE PCRE2_UCHAR16 332 #define PCRE2_CONFIG pcre2_config_16 333 #define PCRE2_JIT_STACK pcre2_jit_stack_16 334 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16 335 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16 336 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16 337 338 #elif defined SUPPORT_PCRE2_32 339 #define DEFAULT_TEST_MODE PCRE32_MODE 340 #define VERSION_TYPE PCRE2_UCHAR32 341 #define PCRE2_CONFIG pcre2_config_32 342 #define PCRE2_JIT_STACK pcre2_jit_stack_32 343 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32 344 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32 345 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32 346 #endif 347 348 /* ------------- Structure and table for handling #-commands ------------- */ 349 350 typedef struct cmdstruct { 351 const char *name; 352 int value; 353 } cmdstruct; 354 355 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN, 356 CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN }; 357 358 static cmdstruct cmdlist[] = { 359 { "forbid_utf", CMD_FORBID_UTF }, 360 { "load", CMD_LOAD }, 361 { "newline_default", CMD_NEWLINE_DEFAULT }, 362 { "pattern", CMD_PATTERN }, 363 { "perltest", CMD_PERLTEST }, 364 { "pop", CMD_POP }, 365 { "popcopy", CMD_POPCOPY }, 366 { "save", CMD_SAVE }, 367 { "subject", CMD_SUBJECT }}; 368 369 #define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct) 370 371 /* ------------- Structures and tables for handling modifiers -------------- */ 372 373 /* Table of names for newline types. Must be kept in step with the definitions 374 of PCRE2_NEWLINE_xx in pcre2.h. */ 375 376 static const char *newlines[] = { 377 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" }; 378 379 /* Modifier types and applicability */ 380 381 enum { MOD_CTC, /* Applies to a compile context */ 382 MOD_CTM, /* Applies to a match context */ 383 MOD_PAT, /* Applies to a pattern */ 384 MOD_PATP, /* Ditto, OK for Perl test */ 385 MOD_DAT, /* Applies to a data line */ 386 MOD_PD, /* Applies to a pattern or a data line */ 387 MOD_PDP, /* As MOD_PD, OK for Perl test */ 388 MOD_PND, /* As MOD_PD, but not for a default pattern */ 389 MOD_PNDP, /* As MOD_PND, OK for Perl test */ 390 MOD_CTL, /* Is a control bit */ 391 MOD_BSR, /* Is a BSR value */ 392 MOD_IN2, /* Is one or two unsigned integers */ 393 MOD_INS, /* Is a signed integer */ 394 MOD_INT, /* Is an unsigned integer */ 395 MOD_IND, /* Is an unsigned integer, but no value => default */ 396 MOD_NL, /* Is a newline value */ 397 MOD_NN, /* Is a number or a name; more than one may occur */ 398 MOD_OPT, /* Is an option bit */ 399 MOD_SIZ, /* Is a PCRE2_SIZE value */ 400 MOD_STR }; /* Is a string */ 401 402 /* Control bits. Some apply to compiling, some to matching, but some can be set 403 either on a pattern or a data line, so they must all be distinct. There are now 404 so many of them that they are split into two fields. */ 405 406 #define CTL_AFTERTEXT 0x00000001u 407 #define CTL_ALLAFTERTEXT 0x00000002u 408 #define CTL_ALLCAPTURES 0x00000004u 409 #define CTL_ALLUSEDTEXT 0x00000008u 410 #define CTL_ALTGLOBAL 0x00000010u 411 #define CTL_BINCODE 0x00000020u 412 #define CTL_CALLOUT_CAPTURE 0x00000040u 413 #define CTL_CALLOUT_INFO 0x00000080u 414 #define CTL_CALLOUT_NONE 0x00000100u 415 #define CTL_DFA 0x00000200u 416 #define CTL_EXPAND 0x00000400u 417 #define CTL_FINDLIMITS 0x00000800u 418 #define CTL_FULLBINCODE 0x00001000u 419 #define CTL_GETALL 0x00002000u 420 #define CTL_GLOBAL 0x00004000u 421 #define CTL_HEXPAT 0x00008000u 422 #define CTL_INFO 0x00010000u 423 #define CTL_JITFAST 0x00020000u 424 #define CTL_JITVERIFY 0x00040000u 425 #define CTL_MARK 0x00080000u 426 #define CTL_MEMORY 0x00100000u 427 #define CTL_NULLCONTEXT 0x00200000u 428 #define CTL_POSIX 0x00400000u 429 #define CTL_POSIX_NOSUB 0x00800000u 430 #define CTL_PUSH 0x01000000u 431 #define CTL_PUSHCOPY 0x02000000u 432 #define CTL_STARTCHAR 0x04000000u 433 #define CTL_ZERO_TERMINATE 0x08000000u 434 /* Spare 0x10000000u */ 435 /* Spare 0x20000000u */ 436 #define CTL_NL_SET 0x40000000u /* Informational */ 437 #define CTL_BSR_SET 0x80000000u /* Informational */ 438 439 /* Second control word */ 440 441 #define CTL2_SUBSTITUTE_EXTENDED 0x00000001u 442 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u 443 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u 444 #define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u 445 446 /* Combinations */ 447 448 #define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */ 449 #define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO) 450 #define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL) 451 452 /* These are all the controls that may be set either on a pattern or on a 453 data line. */ 454 455 #define CTL_ALLPD (CTL_AFTERTEXT|\ 456 CTL_ALLAFTERTEXT|\ 457 CTL_ALLCAPTURES|\ 458 CTL_ALLUSEDTEXT|\ 459 CTL_ALTGLOBAL|\ 460 CTL_GLOBAL|\ 461 CTL_MARK|\ 462 CTL_MEMORY|\ 463 CTL_STARTCHAR) 464 465 #define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\ 466 CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\ 467 CTL2_SUBSTITUTE_UNKNOWN_UNSET|\ 468 CTL2_SUBSTITUTE_UNSET_EMPTY) 469 470 /* Structures for holding modifier information for patterns and subject strings 471 (data). Fields containing modifiers that can be set either for a pattern or a 472 subject must be at the start and in the same order in both cases so that the 473 same offset in the big table below works for both. */ 474 475 typedef struct patctl { /* Structure for pattern modifiers. */ 476 uint32_t options; /* Must be in same position as datctl */ 477 uint32_t control; /* Must be in same position as datctl */ 478 uint32_t control2; /* Must be in same position as datctl */ 479 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ 480 uint32_t jit; 481 uint32_t stackguard_test; 482 uint32_t tables_id; 483 uint32_t regerror_buffsize; 484 uint8_t locale[LOCALESIZE]; 485 } patctl; 486 487 #define MAXCPYGET 10 488 #define LENCPYGET 64 489 490 typedef struct datctl { /* Structure for data line modifiers. */ 491 uint32_t options; /* Must be in same position as patctl */ 492 uint32_t control; /* Must be in same position as patctl */ 493 uint32_t control2; /* Must be in same position as patctl */ 494 uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ 495 uint32_t cfail[2]; 496 int32_t callout_data; 497 int32_t copy_numbers[MAXCPYGET]; 498 int32_t get_numbers[MAXCPYGET]; 499 uint32_t jitstack; 500 uint32_t oveccount; 501 uint32_t offset; 502 uint8_t copy_names[LENCPYGET]; 503 uint8_t get_names[LENCPYGET]; 504 } datctl; 505 506 /* Ids for which context to modify. */ 507 508 enum { CTX_PAT, /* Active pattern context */ 509 CTX_POPPAT, /* Ditto, for a popped pattern */ 510 CTX_DEFPAT, /* Default pattern context */ 511 CTX_DAT, /* Active data (match) context */ 512 CTX_DEFDAT }; /* Default data (match) context */ 513 514 /* Macros to simplify the big table below. */ 515 516 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name) 517 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name) 518 #define PO(name) offsetof(patctl, name) 519 #define PD(name) PO(name) 520 #define DO(name) offsetof(datctl, name) 521 522 /* Table of all long-form modifiers. Must be in collating sequence of modifier 523 name because it is searched by binary chop. */ 524 525 typedef struct modstruct { 526 const char *name; 527 uint16_t which; 528 uint16_t type; 529 uint32_t value; 530 PCRE2_SIZE offset; 531 } modstruct; 532 533 static modstruct modlist[] = { 534 { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) }, 535 { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) }, 536 { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) }, 537 { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) }, 538 { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) }, 539 { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) }, 540 { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) }, 541 { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) }, 542 { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) }, 543 { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) }, 544 { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) }, 545 { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) }, 546 { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) }, 547 { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) }, 548 { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) }, 549 { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) }, 550 { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) }, 551 { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, 552 { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, 553 { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, 554 { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) }, 555 { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) }, 556 { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) }, 557 { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) }, 558 { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) }, 559 { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) }, 560 { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) }, 561 { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) }, 562 { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) }, 563 { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) }, 564 { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) }, 565 { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) }, 566 { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, 567 { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) }, 568 { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, 569 { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, 570 { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, 571 { "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, 572 { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) }, 573 { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) }, 574 { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) }, 575 { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) }, 576 { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) }, 577 { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) }, 578 { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) }, 579 { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) }, 580 { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) }, 581 { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) }, 582 { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) }, 583 { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) }, 584 { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) }, 585 { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) }, 586 { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, 587 { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, 588 { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, 589 { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) }, 590 { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, 591 { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, 592 { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, 593 { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) }, 594 { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, 595 { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, 596 { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) }, 597 { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, 598 { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, 599 { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, 600 { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) }, 601 { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, 602 { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, 603 { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, 604 { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, 605 { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) }, 606 { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, 607 { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, 608 { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, 609 { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, 610 { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) }, 611 { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) }, 612 { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, 613 { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, 614 { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) }, 615 { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) }, 616 { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) }, 617 { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) }, 618 { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) }, 619 { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, 620 { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, 621 { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, 622 { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) }, 623 { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }, 624 { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) } 625 }; 626 627 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct) 628 629 /* Controls and options that are supported for use with the POSIX interface. */ 630 631 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \ 632 PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \ 633 PCRE2_UNGREEDY) 634 635 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \ 636 CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB) 637 638 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0) 639 640 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \ 641 PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL) 642 643 #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) 644 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0) 645 646 /* Control bits that are not ignored with 'push'. */ 647 648 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \ 649 CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \ 650 CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET) 651 652 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0) 653 654 /* Controls that apply only at compile time with 'push'. */ 655 656 #define PUSH_COMPILE_ONLY_CONTROLS CTL_JITVERIFY 657 #define PUSH_COMPILE_ONLY_CONTROLS2 (0) 658 659 /* Controls that are forbidden with #pop or #popcopy. */ 660 661 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \ 662 CTL_PUSHCOPY) 663 664 /* Pattern controls that are mutually exclusive. At present these are all in 665 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by 666 CTL_POSIX, so it doesn't need its own entries. */ 667 668 static uint32_t exclusive_pat_controls[] = { 669 CTL_POSIX | CTL_HEXPAT, 670 CTL_POSIX | CTL_PUSH, 671 CTL_POSIX | CTL_PUSHCOPY, 672 CTL_EXPAND | CTL_HEXPAT }; 673 674 /* Data controls that are mutually exclusive. At present these are all in the 675 first control word. */ 676 static uint32_t exclusive_dat_controls[] = { 677 CTL_ALLUSEDTEXT | CTL_STARTCHAR, 678 CTL_FINDLIMITS | CTL_NULLCONTEXT }; 679 680 /* Table of single-character abbreviated modifiers. The index field is 681 initialized to -1, but the first time the modifier is encountered, it is filled 682 in with the index of the full entry in modlist, to save repeated searching when 683 processing multiple test items. This short list is searched serially, so its 684 order does not matter. */ 685 686 typedef struct c1modstruct { 687 const char *fullname; 688 uint32_t onechar; 689 int index; 690 } c1modstruct; 691 692 static c1modstruct c1modlist[] = { 693 { "bincode", 'B', -1 }, 694 { "info", 'I', -1 }, 695 { "global", 'g', -1 }, 696 { "caseless", 'i', -1 }, 697 { "multiline", 'm', -1 }, 698 { "dotall", 's', -1 }, 699 { "extended", 'x', -1 } 700 }; 701 702 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct) 703 704 /* Table of arguments for the -C command line option. Use macros to make the 705 table itself easier to read. */ 706 707 #if defined SUPPORT_PCRE2_8 708 #define SUPPORT_8 1 709 #endif 710 #if defined SUPPORT_PCRE2_16 711 #define SUPPORT_16 1 712 #endif 713 #if defined SUPPORT_PCRE2_32 714 #define SUPPORT_32 1 715 #endif 716 717 #ifndef SUPPORT_8 718 #define SUPPORT_8 0 719 #endif 720 #ifndef SUPPORT_16 721 #define SUPPORT_16 0 722 #endif 723 #ifndef SUPPORT_32 724 #define SUPPORT_32 0 725 #endif 726 727 #ifdef EBCDIC 728 #define SUPPORT_EBCDIC 1 729 #define EBCDIC_NL CHAR_LF 730 #else 731 #define SUPPORT_EBCDIC 0 732 #define EBCDIC_NL 0 733 #endif 734 735 #ifdef NEVER_BACKSLASH_C 736 #define BACKSLASH_C 0 737 #else 738 #define BACKSLASH_C 1 739 #endif 740 741 typedef struct coptstruct { 742 const char *name; 743 uint32_t type; 744 uint32_t value; 745 } coptstruct; 746 747 enum { CONF_BSR, 748 CONF_FIX, 749 CONF_FIZ, 750 CONF_INT, 751 CONF_NL 752 }; 753 754 static coptstruct coptlist[] = { 755 { "backslash-C", CONF_FIX, BACKSLASH_C }, 756 { "bsr", CONF_BSR, PCRE2_CONFIG_BSR }, 757 { "ebcdic", CONF_FIX, SUPPORT_EBCDIC }, 758 { "ebcdic-nl", CONF_FIZ, EBCDIC_NL }, 759 { "jit", CONF_INT, PCRE2_CONFIG_JIT }, 760 { "linksize", CONF_INT, PCRE2_CONFIG_LINKSIZE }, 761 { "newline", CONF_NL, PCRE2_CONFIG_NEWLINE }, 762 { "pcre2-16", CONF_FIX, SUPPORT_16 }, 763 { "pcre2-32", CONF_FIX, SUPPORT_32 }, 764 { "pcre2-8", CONF_FIX, SUPPORT_8 }, 765 { "unicode", CONF_INT, PCRE2_CONFIG_UNICODE } 766 }; 767 768 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct) 769 770 #undef SUPPORT_8 771 #undef SUPPORT_16 772 #undef SUPPORT_32 773 #undef SUPPORT_EBCDIC 774 775 776 /* ----------------------- Static variables ------------------------ */ 777 778 static FILE *infile; 779 static FILE *outfile; 780 781 static const void *last_callout_mark; 782 static PCRE2_JIT_STACK *jit_stack = NULL; 783 static size_t jit_stack_size = 0; 784 785 static BOOL first_callout; 786 static BOOL jit_was_used; 787 static BOOL restrict_for_perl_test = FALSE; 788 static BOOL show_memory = FALSE; 789 790 static int code_unit_size; /* Bytes */ 791 static int jitrc; /* Return from JIT compile */ 792 static int test_mode = DEFAULT_TEST_MODE; 793 static int timeit = 0; 794 static int timeitm = 0; 795 796 clock_t total_compile_time = 0; 797 clock_t total_jit_compile_time = 0; 798 clock_t total_match_time = 0; 799 800 static uint32_t dfa_matched; 801 static uint32_t forbid_utf = 0; 802 static uint32_t maxlookbehind; 803 static uint32_t max_oveccount; 804 static uint32_t callout_count; 805 806 static uint16_t local_newline_default = 0; 807 808 static VERSION_TYPE jittarget[VERSION_SIZE]; 809 static VERSION_TYPE version[VERSION_SIZE]; 810 static VERSION_TYPE uversion[VERSION_SIZE]; 811 812 static patctl def_patctl; 813 static patctl pat_patctl; 814 static datctl def_datctl; 815 static datctl dat_datctl; 816 817 static void *patstack[PATSTACKSIZE]; 818 static int patstacknext = 0; 819 820 #ifdef SUPPORT_PCRE2_8 821 static regex_t preg = { NULL, NULL, 0, 0, 0 }; 822 #endif 823 824 static int *dfa_workspace = NULL; 825 static const uint8_t *locale_tables = NULL; 826 static uint8_t locale_name[32]; 827 828 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need 829 rebuilding, but set up the same naming scheme for use in macros. The "buffer" 830 buffer is where all input lines are read. Its size is the same as pbuffer8. 831 Pattern lines are always copied to pbuffer8 for use in callouts, even if they 832 are actually compiled from pbuffer16 or pbuffer32. */ 833 834 static size_t pbuffer8_size = 50000; /* Initial size, bytes */ 835 static uint8_t *pbuffer8 = NULL; 836 static uint8_t *buffer = NULL; 837 838 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it 839 is cast as needed. For long data lines it grows as necessary. */ 840 841 static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */ 842 static uint8_t *dbuffer = NULL; 843 844 845 /* ---------------- Mode-dependent variables -------------------*/ 846 847 #ifdef SUPPORT_PCRE2_8 848 static pcre2_code_8 *compiled_code8; 849 static pcre2_general_context_8 *general_context8, *general_context_copy8; 850 static pcre2_compile_context_8 *pat_context8, *default_pat_context8; 851 static pcre2_match_context_8 *dat_context8, *default_dat_context8; 852 static pcre2_match_data_8 *match_data8; 853 #endif 854 855 #ifdef SUPPORT_PCRE2_16 856 static pcre2_code_16 *compiled_code16; 857 static pcre2_general_context_16 *general_context16, *general_context_copy16; 858 static pcre2_compile_context_16 *pat_context16, *default_pat_context16; 859 static pcre2_match_context_16 *dat_context16, *default_dat_context16; 860 static pcre2_match_data_16 *match_data16; 861 static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */ 862 static uint16_t *pbuffer16 = NULL; 863 #endif 864 865 #ifdef SUPPORT_PCRE2_32 866 static pcre2_code_32 *compiled_code32; 867 static pcre2_general_context_32 *general_context32, *general_context_copy32; 868 static pcre2_compile_context_32 *pat_context32, *default_pat_context32; 869 static pcre2_match_context_32 *dat_context32, *default_dat_context32; 870 static pcre2_match_data_32 *match_data32; 871 static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */ 872 static uint32_t *pbuffer32 = NULL; 873 #endif 874 875 876 /* ---------------- Macros that work in all modes ----------------- */ 877 878 #define CAST8VAR(x) CASTVAR(uint8_t *, x) 879 #define SET(x,y) SETOP(x,y,=) 880 #define SETPLUS(x,y) SETOP(x,y,+=) 881 #define strlen8(x) strlen((char *)x) 882 883 884 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/ 885 886 /* Define macros for variables and functions that must be selected dynamically 887 depending on the mode setting (8, 16, 32). These are dependent on which modes 888 are supported. */ 889 890 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \ 891 defined (SUPPORT_PCRE2_32)) >= 2 892 893 /* ----- All three modes supported ----- */ 894 895 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32) 896 897 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \ 898 (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b)) 899 900 #define CASTVAR(t,x) ( \ 901 (test_mode == PCRE8_MODE)? (t)G(x,8) : \ 902 (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32)) 903 904 #define CODE_UNIT(a,b) ( \ 905 (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \ 906 (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \ 907 (uint32_t)(((PCRE2_SPTR32)(a))[b])) 908 909 #define DATCTXCPY(a,b) \ 910 if (test_mode == PCRE8_MODE) \ 911 memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \ 912 else if (test_mode == PCRE16_MODE) \ 913 memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \ 914 else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) 915 916 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \ 917 (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b) 918 919 #define PATCTXCPY(a,b) \ 920 if (test_mode == PCRE8_MODE) \ 921 memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \ 922 else if (test_mode == PCRE16_MODE) \ 923 memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \ 924 else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) 925 926 #define PCHARS(lv, p, offset, len, utf, f) \ 927 if (test_mode == PCRE32_MODE) \ 928 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ 929 else if (test_mode == PCRE16_MODE) \ 930 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ 931 else \ 932 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 933 934 #define PCHARSV(p, offset, len, utf, f) \ 935 if (test_mode == PCRE32_MODE) \ 936 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ 937 else if (test_mode == PCRE16_MODE) \ 938 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ 939 else \ 940 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 941 942 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 943 if (test_mode == PCRE8_MODE) \ 944 a = pcre2_callout_enumerate_8(compiled_code8, \ 945 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \ 946 else if (test_mode == PCRE16_MODE) \ 947 a = pcre2_callout_enumerate_16(compiled_code16, \ 948 (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \ 949 else \ 950 a = pcre2_callout_enumerate_32(compiled_code32, \ 951 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) 952 953 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \ 954 if (test_mode == PCRE8_MODE) \ 955 G(a,8) = pcre2_code_copy_8(b); \ 956 else if (test_mode == PCRE16_MODE) \ 957 G(a,16) = pcre2_code_copy_16(b); \ 958 else \ 959 G(a,32) = pcre2_code_copy_32(b) 960 961 #define PCRE2_CODE_COPY_TO_VOID(a,b) \ 962 if (test_mode == PCRE8_MODE) \ 963 a = (void *)pcre2_code_copy_8(G(b,8)); \ 964 else if (test_mode == PCRE16_MODE) \ 965 a = (void *)pcre2_code_copy_16(G(b,16)); \ 966 else \ 967 a = (void *)pcre2_code_copy_32(G(b,32)) 968 969 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 970 if (test_mode == PCRE8_MODE) \ 971 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \ 972 else if (test_mode == PCRE16_MODE) \ 973 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \ 974 else \ 975 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g) 976 977 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 978 if (test_mode == PCRE8_MODE) \ 979 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \ 980 else if (test_mode == PCRE16_MODE) \ 981 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \ 982 else \ 983 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) 984 985 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 986 if (test_mode == PCRE8_MODE) \ 987 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \ 988 else if (test_mode == PCRE16_MODE) \ 989 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)); \ 990 else \ 991 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size)) 992 993 #define PCRE2_GET_OVECTOR_COUNT(a,b) \ 994 if (test_mode == PCRE8_MODE) \ 995 a = pcre2_get_ovector_count_8(G(b,8)); \ 996 else if (test_mode == PCRE16_MODE) \ 997 a = pcre2_get_ovector_count_16(G(b,16)); \ 998 else \ 999 a = pcre2_get_ovector_count_32(G(b,32)) 1000 1001 #define PCRE2_GET_STARTCHAR(a,b) \ 1002 if (test_mode == PCRE8_MODE) \ 1003 a = pcre2_get_startchar_8(G(b,8)); \ 1004 else if (test_mode == PCRE16_MODE) \ 1005 a = pcre2_get_startchar_16(G(b,16)); \ 1006 else \ 1007 a = pcre2_get_startchar_32(G(b,32)) 1008 1009 #define PCRE2_JIT_COMPILE(r,a,b) \ 1010 if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \ 1011 else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \ 1012 else r = pcre2_jit_compile_32(G(a,32),b) 1013 1014 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ 1015 if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \ 1016 else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \ 1017 else pcre2_jit_free_unused_memory_32(G(a,32)) 1018 1019 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1020 if (test_mode == PCRE8_MODE) \ 1021 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ 1022 else if (test_mode == PCRE16_MODE) \ 1023 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ 1024 else \ 1025 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 1026 1027 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1028 if (test_mode == PCRE8_MODE) \ 1029 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \ 1030 else if (test_mode == PCRE16_MODE) \ 1031 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \ 1032 else \ 1033 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); 1034 1035 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1036 if (test_mode == PCRE8_MODE) \ 1037 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \ 1038 else if (test_mode == PCRE16_MODE) \ 1039 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \ 1040 else \ 1041 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c); 1042 1043 #define PCRE2_JIT_STACK_FREE(a) \ 1044 if (test_mode == PCRE8_MODE) \ 1045 pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \ 1046 else if (test_mode == PCRE16_MODE) \ 1047 pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \ 1048 else \ 1049 pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); 1050 1051 #define PCRE2_MAKETABLES(a) \ 1052 if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \ 1053 else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \ 1054 else a = pcre2_maketables_32(NULL) 1055 1056 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1057 if (test_mode == PCRE8_MODE) \ 1058 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \ 1059 else if (test_mode == PCRE16_MODE) \ 1060 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \ 1061 else \ 1062 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 1063 1064 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \ 1065 if (test_mode == PCRE8_MODE) \ 1066 G(a,8) = pcre2_match_data_create_8(b,c); \ 1067 else if (test_mode == PCRE16_MODE) \ 1068 G(a,16) = pcre2_match_data_create_16(b,c); \ 1069 else \ 1070 G(a,32) = pcre2_match_data_create_32(b,c) 1071 1072 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1073 if (test_mode == PCRE8_MODE) \ 1074 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \ 1075 else if (test_mode == PCRE16_MODE) \ 1076 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \ 1077 else \ 1078 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) 1079 1080 #define PCRE2_MATCH_DATA_FREE(a) \ 1081 if (test_mode == PCRE8_MODE) \ 1082 pcre2_match_data_free_8(G(a,8)); \ 1083 else if (test_mode == PCRE16_MODE) \ 1084 pcre2_match_data_free_16(G(a,16)); \ 1085 else \ 1086 pcre2_match_data_free_32(G(a,32)) 1087 1088 #define PCRE2_PATTERN_INFO(a,b,c,d) \ 1089 if (test_mode == PCRE8_MODE) \ 1090 a = pcre2_pattern_info_8(G(b,8),c,d); \ 1091 else if (test_mode == PCRE16_MODE) \ 1092 a = pcre2_pattern_info_16(G(b,16),c,d); \ 1093 else \ 1094 a = pcre2_pattern_info_32(G(b,32),c,d) 1095 1096 #define PCRE2_PRINTINT(a) \ 1097 if (test_mode == PCRE8_MODE) \ 1098 pcre2_printint_8(compiled_code8,outfile,a); \ 1099 else if (test_mode == PCRE16_MODE) \ 1100 pcre2_printint_16(compiled_code16,outfile,a); \ 1101 else \ 1102 pcre2_printint_32(compiled_code32,outfile,a) 1103 1104 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1105 if (test_mode == PCRE8_MODE) \ 1106 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \ 1107 else if (test_mode == PCRE16_MODE) \ 1108 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \ 1109 else \ 1110 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) 1111 1112 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1113 if (test_mode == PCRE8_MODE) \ 1114 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \ 1115 else if (test_mode == PCRE16_MODE) \ 1116 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \ 1117 else \ 1118 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32)) 1119 1120 #define PCRE2_SERIALIZE_FREE(a) \ 1121 if (test_mode == PCRE8_MODE) \ 1122 pcre2_serialize_free_8(a); \ 1123 else if (test_mode == PCRE16_MODE) \ 1124 pcre2_serialize_free_16(a); \ 1125 else \ 1126 pcre2_serialize_free_32(a) 1127 1128 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1129 if (test_mode == PCRE8_MODE) \ 1130 r = pcre2_serialize_get_number_of_codes_8(a); \ 1131 else if (test_mode == PCRE16_MODE) \ 1132 r = pcre2_serialize_get_number_of_codes_16(a); \ 1133 else \ 1134 r = pcre2_serialize_get_number_of_codes_32(a); \ 1135 1136 #define PCRE2_SET_CALLOUT(a,b,c) \ 1137 if (test_mode == PCRE8_MODE) \ 1138 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \ 1139 else if (test_mode == PCRE16_MODE) \ 1140 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \ 1141 else \ 1142 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c); 1143 1144 #define PCRE2_SET_CHARACTER_TABLES(a,b) \ 1145 if (test_mode == PCRE8_MODE) \ 1146 pcre2_set_character_tables_8(G(a,8),b); \ 1147 else if (test_mode == PCRE16_MODE) \ 1148 pcre2_set_character_tables_16(G(a,16),b); \ 1149 else \ 1150 pcre2_set_character_tables_32(G(a,32),b) 1151 1152 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1153 if (test_mode == PCRE8_MODE) \ 1154 pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \ 1155 else if (test_mode == PCRE16_MODE) \ 1156 pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \ 1157 else \ 1158 pcre2_set_compile_recursion_guard_32(G(a,32),b,c) 1159 1160 #define PCRE2_SET_MATCH_LIMIT(a,b) \ 1161 if (test_mode == PCRE8_MODE) \ 1162 pcre2_set_match_limit_8(G(a,8),b); \ 1163 else if (test_mode == PCRE16_MODE) \ 1164 pcre2_set_match_limit_16(G(a,16),b); \ 1165 else \ 1166 pcre2_set_match_limit_32(G(a,32),b) 1167 1168 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ 1169 if (test_mode == PCRE8_MODE) \ 1170 pcre2_set_max_pattern_length_8(G(a,8),b); \ 1171 else if (test_mode == PCRE16_MODE) \ 1172 pcre2_set_max_pattern_length_16(G(a,16),b); \ 1173 else \ 1174 pcre2_set_max_pattern_length_32(G(a,32),b) 1175 1176 #define PCRE2_SET_OFFSET_LIMIT(a,b) \ 1177 if (test_mode == PCRE8_MODE) \ 1178 pcre2_set_offset_limit_8(G(a,8),b); \ 1179 else if (test_mode == PCRE16_MODE) \ 1180 pcre2_set_offset_limit_16(G(a,16),b); \ 1181 else \ 1182 pcre2_set_offset_limit_32(G(a,32),b) 1183 1184 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ 1185 if (test_mode == PCRE8_MODE) \ 1186 pcre2_set_parens_nest_limit_8(G(a,8),b); \ 1187 else if (test_mode == PCRE16_MODE) \ 1188 pcre2_set_parens_nest_limit_16(G(a,16),b); \ 1189 else \ 1190 pcre2_set_parens_nest_limit_32(G(a,32),b) 1191 1192 #define PCRE2_SET_RECURSION_LIMIT(a,b) \ 1193 if (test_mode == PCRE8_MODE) \ 1194 pcre2_set_recursion_limit_8(G(a,8),b); \ 1195 else if (test_mode == PCRE16_MODE) \ 1196 pcre2_set_recursion_limit_16(G(a,16),b); \ 1197 else \ 1198 pcre2_set_recursion_limit_32(G(a,32),b) 1199 1200 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 1201 if (test_mode == PCRE8_MODE) \ 1202 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ 1203 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \ 1204 else if (test_mode == PCRE16_MODE) \ 1205 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ 1206 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \ 1207 else \ 1208 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ 1209 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) 1210 1211 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 1212 if (test_mode == PCRE8_MODE) \ 1213 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \ 1214 else if (test_mode == PCRE16_MODE) \ 1215 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \ 1216 else \ 1217 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) 1218 1219 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 1220 if (test_mode == PCRE8_MODE) \ 1221 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \ 1222 else if (test_mode == PCRE16_MODE) \ 1223 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \ 1224 else \ 1225 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e) 1226 1227 #define PCRE2_SUBSTRING_FREE(a) \ 1228 if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \ 1229 else if (test_mode == PCRE16_MODE) \ 1230 pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \ 1231 else pcre2_substring_free_32((PCRE2_UCHAR32 *)a) 1232 1233 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 1234 if (test_mode == PCRE8_MODE) \ 1235 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \ 1236 else if (test_mode == PCRE16_MODE) \ 1237 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \ 1238 else \ 1239 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) 1240 1241 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 1242 if (test_mode == PCRE8_MODE) \ 1243 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \ 1244 else if (test_mode == PCRE16_MODE) \ 1245 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \ 1246 else \ 1247 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) 1248 1249 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 1250 if (test_mode == PCRE8_MODE) \ 1251 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \ 1252 else if (test_mode == PCRE16_MODE) \ 1253 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \ 1254 else \ 1255 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) 1256 1257 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 1258 if (test_mode == PCRE8_MODE) \ 1259 a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \ 1260 else if (test_mode == PCRE16_MODE) \ 1261 a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \ 1262 else \ 1263 a = pcre2_substring_length_bynumber_32(G(b,32),c,d) 1264 1265 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 1266 if (test_mode == PCRE8_MODE) \ 1267 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \ 1268 else if (test_mode == PCRE16_MODE) \ 1269 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \ 1270 else \ 1271 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) 1272 1273 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 1274 if (test_mode == PCRE8_MODE) \ 1275 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \ 1276 else if (test_mode == PCRE16_MODE) \ 1277 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \ 1278 else \ 1279 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a) 1280 1281 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 1282 if (test_mode == PCRE8_MODE) \ 1283 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \ 1284 else if (test_mode == PCRE16_MODE) \ 1285 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \ 1286 else \ 1287 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)) 1288 1289 #define PTR(x) ( \ 1290 (test_mode == PCRE8_MODE)? (void *)G(x,8) : \ 1291 (test_mode == PCRE16_MODE)? (void *)G(x,16) : \ 1292 (void *)G(x,32)) 1293 1294 #define SETFLD(x,y,z) \ 1295 if (test_mode == PCRE8_MODE) G(x,8)->y = z; \ 1296 else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \ 1297 else G(x,32)->y = z 1298 1299 #define SETFLDVEC(x,y,v,z) \ 1300 if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \ 1301 else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \ 1302 else G(x,32)->y[v] = z 1303 1304 #define SETOP(x,y,z) \ 1305 if (test_mode == PCRE8_MODE) G(x,8) z y; \ 1306 else if (test_mode == PCRE16_MODE) G(x,16) z y; \ 1307 else G(x,32) z y 1308 1309 #define SETCASTPTR(x,y) \ 1310 if (test_mode == PCRE8_MODE) \ 1311 G(x,8) = (uint8_t *)(y); \ 1312 else if (test_mode == PCRE16_MODE) \ 1313 G(x,16) = (uint16_t *)(y); \ 1314 else \ 1315 G(x,32) = (uint32_t *)(y) 1316 1317 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \ 1318 (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \ 1319 ((int)strlen32((PCRE2_SPTR32)p))) 1320 1321 #define SUB1(a,b) \ 1322 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \ 1323 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \ 1324 else G(a,32)(G(b,32)) 1325 1326 #define SUB2(a,b,c) \ 1327 if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \ 1328 else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \ 1329 else G(a,32)(G(b,32),G(c,32)) 1330 1331 #define TEST(x,r,y) ( \ 1332 (test_mode == PCRE8_MODE && G(x,8) r (y)) || \ 1333 (test_mode == PCRE16_MODE && G(x,16) r (y)) || \ 1334 (test_mode == PCRE32_MODE && G(x,32) r (y))) 1335 1336 #define TESTFLD(x,f,r,y) ( \ 1337 (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \ 1338 (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \ 1339 (test_mode == PCRE32_MODE && G(x,32)->f r (y))) 1340 1341 1342 1343 /* ----- Two out of three modes are supported ----- */ 1344 1345 #else 1346 1347 /* We can use some macro trickery to make a single set of definitions work in 1348 the three different cases. */ 1349 1350 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */ 1351 1352 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16) 1353 #define BITONE 32 1354 #define BITTWO 16 1355 1356 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */ 1357 1358 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8) 1359 #define BITONE 32 1360 #define BITTWO 8 1361 1362 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */ 1363 1364 #else 1365 #define BITONE 16 1366 #define BITTWO 8 1367 #endif 1368 1369 1370 /* ----- Common macros for two-mode cases ----- */ 1371 1372 #define CASTFLD(t,a,b) \ 1373 ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \ 1374 (t)(G(a,BITTWO)->b)) 1375 1376 #define CASTVAR(t,x) ( \ 1377 (test_mode == G(G(PCRE,BITONE),_MODE))? \ 1378 (t)G(x,BITONE) : (t)G(x,BITTWO)) 1379 1380 #define CODE_UNIT(a,b) ( \ 1381 (test_mode == G(G(PCRE,BITONE),_MODE))? \ 1382 (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \ 1383 (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b])) 1384 1385 #define DATCTXCPY(a,b) \ 1386 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1387 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \ 1388 else \ 1389 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO))) 1390 1391 #define FLD(a,b) \ 1392 ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b) 1393 1394 #define PATCTXCPY(a,b) \ 1395 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1396 memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \ 1397 else \ 1398 memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO))) 1399 1400 #define PCHARS(lv, p, offset, len, utf, f) \ 1401 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1402 lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ 1403 else \ 1404 lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) 1405 1406 #define PCHARSV(p, offset, len, utf, f) \ 1407 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1408 (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ 1409 else \ 1410 (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) 1411 1412 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1413 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1414 a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \ 1415 (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \ 1416 else \ 1417 a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \ 1418 (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c) 1419 1420 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \ 1421 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1422 G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \ 1423 else \ 1424 G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b) 1425 1426 #define PCRE2_CODE_COPY_TO_VOID(a,b) \ 1427 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1428 a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \ 1429 else \ 1430 a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO)) 1431 1432 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1433 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1434 G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \ 1435 else \ 1436 G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g) 1437 1438 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1439 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1440 a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1441 G(g,BITONE),h,i,j); \ 1442 else \ 1443 a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1444 G(g,BITTWO),h,i,j) 1445 1446 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1447 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1448 r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size)); \ 1449 else \ 1450 r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size)) 1451 1452 #define PCRE2_GET_OVECTOR_COUNT(a,b) \ 1453 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1454 a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \ 1455 else \ 1456 a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO)) 1457 1458 #define PCRE2_GET_STARTCHAR(a,b) \ 1459 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1460 a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \ 1461 else \ 1462 a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO)) 1463 1464 #define PCRE2_JIT_COMPILE(r,a,b) \ 1465 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1466 r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \ 1467 else \ 1468 r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b) 1469 1470 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \ 1471 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1472 G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \ 1473 else \ 1474 G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO)) 1475 1476 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1477 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1478 a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1479 G(g,BITONE),h); \ 1480 else \ 1481 a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1482 G(g,BITTWO),h) 1483 1484 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1485 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1486 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \ 1487 else \ 1488 a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \ 1489 1490 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1491 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1492 G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \ 1493 else \ 1494 G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c); 1495 1496 #define PCRE2_JIT_STACK_FREE(a) \ 1497 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1498 G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \ 1499 else \ 1500 G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a); 1501 1502 #define PCRE2_MAKETABLES(a) \ 1503 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1504 a = G(pcre2_maketables_,BITONE)(NULL); \ 1505 else \ 1506 a = G(pcre2_maketables_,BITTWO)(NULL) 1507 1508 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1509 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1510 a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1511 G(g,BITONE),h); \ 1512 else \ 1513 a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1514 G(g,BITTWO),h) 1515 1516 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \ 1517 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1518 G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \ 1519 else \ 1520 G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c) 1521 1522 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1523 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1524 G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \ 1525 else \ 1526 G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c) 1527 1528 #define PCRE2_MATCH_DATA_FREE(a) \ 1529 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1530 G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \ 1531 else \ 1532 G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO)) 1533 1534 #define PCRE2_PATTERN_INFO(a,b,c,d) \ 1535 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1536 a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \ 1537 else \ 1538 a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d) 1539 1540 #define PCRE2_PRINTINT(a) \ 1541 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1542 G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \ 1543 else \ 1544 G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a) 1545 1546 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1547 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1548 r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \ 1549 else \ 1550 r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO)) 1551 1552 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1553 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1554 r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \ 1555 else \ 1556 r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO)) 1557 1558 #define PCRE2_SERIALIZE_FREE(a) \ 1559 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1560 G(pcre2_serialize_free_,BITONE)(a); \ 1561 else \ 1562 G(pcre2_serialize_free_,BITTWO)(a) 1563 1564 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1565 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1566 r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \ 1567 else \ 1568 r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a) 1569 1570 #define PCRE2_SET_CALLOUT(a,b,c) \ 1571 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1572 G(pcre2_set_callout_,BITONE)(G(a,BITONE), \ 1573 (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \ 1574 else \ 1575 G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \ 1576 (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c); 1577 1578 #define PCRE2_SET_CHARACTER_TABLES(a,b) \ 1579 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1580 G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \ 1581 else \ 1582 G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b) 1583 1584 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1585 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1586 G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \ 1587 else \ 1588 G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c) 1589 1590 #define PCRE2_SET_MATCH_LIMIT(a,b) \ 1591 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1592 G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \ 1593 else \ 1594 G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b) 1595 1596 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \ 1597 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1598 G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \ 1599 else \ 1600 G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b) 1601 1602 #define PCRE2_SET_OFFSET_LIMIT(a,b) \ 1603 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1604 G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \ 1605 else \ 1606 G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b) 1607 1608 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \ 1609 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1610 G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \ 1611 else \ 1612 G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b) 1613 1614 #define PCRE2_SET_RECURSION_LIMIT(a,b) \ 1615 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1616 G(pcre2_set_recursion_limit_,BITONE)(G(a,BITONE),b); \ 1617 else \ 1618 G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b) 1619 1620 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 1621 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1622 a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \ 1623 G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \ 1624 (G(PCRE2_UCHAR,BITONE) *)k,l); \ 1625 else \ 1626 a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \ 1627 G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \ 1628 (G(PCRE2_UCHAR,BITTWO) *)k,l) 1629 1630 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 1631 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1632 a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ 1633 (G(PCRE2_UCHAR,BITONE) *)d,e); \ 1634 else \ 1635 a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ 1636 (G(PCRE2_UCHAR,BITTWO) *)d,e) 1637 1638 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 1639 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1640 a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\ 1641 (G(PCRE2_UCHAR,BITONE) *)d,e); \ 1642 else \ 1643 a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\ 1644 (G(PCRE2_UCHAR,BITTWO) *)d,e) 1645 1646 #define PCRE2_SUBSTRING_FREE(a) \ 1647 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1648 G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \ 1649 else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a) 1650 1651 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 1652 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1653 a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ 1654 (G(PCRE2_UCHAR,BITONE) **)d,e); \ 1655 else \ 1656 a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\ 1657 (G(PCRE2_UCHAR,BITTWO) **)d,e) 1658 1659 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 1660 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1661 a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\ 1662 (G(PCRE2_UCHAR,BITONE) **)d,e); \ 1663 else \ 1664 a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\ 1665 (G(PCRE2_UCHAR,BITTWO) **)d,e) 1666 1667 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 1668 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1669 a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \ 1670 else \ 1671 a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d) 1672 1673 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 1674 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1675 a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \ 1676 else \ 1677 a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d) 1678 1679 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 1680 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1681 a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \ 1682 (G(PCRE2_UCHAR,BITONE) ***)c,d); \ 1683 else \ 1684 a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \ 1685 (G(PCRE2_UCHAR,BITTWO) ***)c,d) 1686 1687 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 1688 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1689 G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \ 1690 else \ 1691 G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a) 1692 1693 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 1694 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1695 a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \ 1696 else \ 1697 a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO)) 1698 1699 #define PTR(x) ( \ 1700 (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \ 1701 (void *)G(x,BITTWO)) 1702 1703 #define SETFLD(x,y,z) \ 1704 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \ 1705 else G(x,BITTWO)->y = z 1706 1707 #define SETFLDVEC(x,y,v,z) \ 1708 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \ 1709 else G(x,BITTWO)->y[v] = z 1710 1711 #define SETOP(x,y,z) \ 1712 if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \ 1713 else G(x,BITTWO) z y 1714 1715 #define SETCASTPTR(x,y) \ 1716 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1717 G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \ 1718 else \ 1719 G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y) 1720 1721 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \ 1722 G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \ 1723 G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p)) 1724 1725 #define SUB1(a,b) \ 1726 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1727 G(a,BITONE)(G(b,BITONE)); \ 1728 else \ 1729 G(a,BITTWO)(G(b,BITTWO)) 1730 1731 #define SUB2(a,b,c) \ 1732 if (test_mode == G(G(PCRE,BITONE),_MODE)) \ 1733 G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \ 1734 else \ 1735 G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO)) 1736 1737 #define TEST(x,r,y) ( \ 1738 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \ 1739 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y))) 1740 1741 #define TESTFLD(x,f,r,y) ( \ 1742 (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \ 1743 (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y))) 1744 1745 1746 #endif /* Two out of three modes */ 1747 1748 /* ----- End of cases where more than one mode is supported ----- */ 1749 1750 1751 /* ----- Only 8-bit mode is supported ----- */ 1752 1753 #elif defined SUPPORT_PCRE2_8 1754 #define CASTFLD(t,a,b) (t)(G(a,8)->b) 1755 #define CASTVAR(t,x) (t)G(x,8) 1756 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b]) 1757 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)) 1758 #define FLD(a,b) G(a,8)->b 1759 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)) 1760 #define PCHARS(lv, p, offset, len, utf, f) \ 1761 lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 1762 #define PCHARSV(p, offset, len, utf, f) \ 1763 (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) 1764 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1765 a = pcre2_callout_enumerate_8(compiled_code8, \ 1766 (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c) 1767 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b) 1768 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8)) 1769 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1770 G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g) 1771 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1772 a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j) 1773 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1774 r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)) 1775 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8)) 1776 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8)) 1777 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b) 1778 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8)) 1779 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1780 a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) 1781 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1782 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); 1783 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1784 pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); 1785 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); 1786 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL) 1787 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1788 a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h) 1789 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c) 1790 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1791 G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c) 1792 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8)) 1793 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d) 1794 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a) 1795 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1796 r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)) 1797 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1798 r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)) 1799 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a) 1800 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1801 r = pcre2_serialize_get_number_of_codes_8(a) 1802 #define PCRE2_SET_CALLOUT(a,b,c) \ 1803 pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c) 1804 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b) 1805 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1806 pcre2_set_compile_recursion_guard_8(G(a,8),b,c) 1807 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) 1808 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b) 1809 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) 1810 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) 1811 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) 1812 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 1813 a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \ 1814 (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l) 1815 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 1816 a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e) 1817 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 1818 a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e) 1819 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a) 1820 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 1821 a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e) 1822 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 1823 a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e) 1824 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 1825 a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d) 1826 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 1827 a = pcre2_substring_length_bynumber_8(G(b,8),c,d) 1828 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 1829 a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d) 1830 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 1831 pcre2_substring_list_free_8((PCRE2_SPTR8 *)a) 1832 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 1833 a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); 1834 #define PTR(x) (void *)G(x,8) 1835 #define SETFLD(x,y,z) G(x,8)->y = z 1836 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z 1837 #define SETOP(x,y,z) G(x,8) z y 1838 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y) 1839 #define STRLEN(p) (int)strlen((char *)p) 1840 #define SUB1(a,b) G(a,8)(G(b,8)) 1841 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8)) 1842 #define TEST(x,r,y) (G(x,8) r (y)) 1843 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y)) 1844 1845 1846 /* ----- Only 16-bit mode is supported ----- */ 1847 1848 #elif defined SUPPORT_PCRE2_16 1849 #define CASTFLD(t,a,b) (t)(G(a,16)->b) 1850 #define CASTVAR(t,x) (t)G(x,16) 1851 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b]) 1852 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)) 1853 #define FLD(a,b) G(a,16)->b 1854 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)) 1855 #define PCHARS(lv, p, offset, len, utf, f) \ 1856 lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) 1857 #define PCHARSV(p, offset, len, utf, f) \ 1858 (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) 1859 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1860 a = pcre2_callout_enumerate_16(compiled_code16, \ 1861 (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c) 1862 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b) 1863 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16)) 1864 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1865 G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g) 1866 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1867 a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j) 1868 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1869 r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)) 1870 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16)) 1871 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16)) 1872 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b) 1873 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16)) 1874 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1875 a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) 1876 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1877 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); 1878 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1879 pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); 1880 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); 1881 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL) 1882 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1883 a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h) 1884 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c) 1885 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1886 G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c) 1887 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16)) 1888 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d) 1889 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a) 1890 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1891 r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)) 1892 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1893 r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)) 1894 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a) 1895 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1896 r = pcre2_serialize_get_number_of_codes_16(a) 1897 #define PCRE2_SET_CALLOUT(a,b,c) \ 1898 pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); 1899 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b) 1900 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1901 pcre2_set_compile_recursion_guard_16(G(a,16),b,c) 1902 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) 1903 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b) 1904 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) 1905 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) 1906 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) 1907 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 1908 a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \ 1909 (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l) 1910 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 1911 a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e) 1912 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 1913 a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e) 1914 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a) 1915 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 1916 a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e) 1917 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 1918 a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e) 1919 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 1920 a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d) 1921 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 1922 a = pcre2_substring_length_bynumber_16(G(b,16),c,d) 1923 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 1924 a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d) 1925 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 1926 pcre2_substring_list_free_16((PCRE2_SPTR16 *)a) 1927 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 1928 a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); 1929 #define PTR(x) (void *)G(x,16) 1930 #define SETFLD(x,y,z) G(x,16)->y = z 1931 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z 1932 #define SETOP(x,y,z) G(x,16) z y 1933 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y) 1934 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p) 1935 #define SUB1(a,b) G(a,16)(G(b,16)) 1936 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16)) 1937 #define TEST(x,r,y) (G(x,16) r (y)) 1938 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y)) 1939 1940 1941 /* ----- Only 32-bit mode is supported ----- */ 1942 1943 #elif defined SUPPORT_PCRE2_32 1944 #define CASTFLD(t,a,b) (t)(G(a,32)->b) 1945 #define CASTVAR(t,x) (t)G(x,32) 1946 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b]) 1947 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) 1948 #define FLD(a,b) G(a,32)->b 1949 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) 1950 #define PCHARS(lv, p, offset, len, utf, f) \ 1951 lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) 1952 #define PCHARSV(p, offset, len, utf, f) \ 1953 (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) 1954 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ 1955 a = pcre2_callout_enumerate_32(compiled_code32, \ 1956 (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) 1957 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b) 1958 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32)) 1959 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \ 1960 G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g) 1961 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \ 1962 a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j) 1963 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \ 1964 r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size)) 1965 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32)) 1966 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32)) 1967 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b) 1968 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32)) 1969 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \ 1970 a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 1971 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \ 1972 a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d); 1973 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \ 1974 pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c); 1975 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a); 1976 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL) 1977 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \ 1978 a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h) 1979 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c) 1980 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \ 1981 G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c) 1982 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32)) 1983 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d) 1984 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a) 1985 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ 1986 r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) 1987 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ 1988 r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32)) 1989 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a) 1990 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \ 1991 r = pcre2_serialize_get_number_of_codes_32(a) 1992 #define PCRE2_SET_CALLOUT(a,b,c) \ 1993 pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c); 1994 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b) 1995 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ 1996 pcre2_set_compile_recursion_guard_32(G(a,32),b,c) 1997 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) 1998 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b) 1999 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) 2000 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) 2001 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) 2002 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \ 2003 a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \ 2004 (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l) 2005 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ 2006 a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) 2007 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ 2008 a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e); 2009 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a) 2010 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \ 2011 a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e) 2012 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \ 2013 a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e) 2014 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \ 2015 a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d) 2016 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \ 2017 a = pcre2_substring_length_bynumber_32(G(b,32),c,d) 2018 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \ 2019 a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d) 2020 #define PCRE2_SUBSTRING_LIST_FREE(a) \ 2021 pcre2_substring_list_free_32((PCRE2_SPTR32 *)a) 2022 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \ 2023 a = pcre2_substring_number_from_name_32(G(b,32),G(c,32)); 2024 #define PTR(x) (void *)G(x,32) 2025 #define SETFLD(x,y,z) G(x,32)->y = z 2026 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z 2027 #define SETOP(x,y,z) G(x,32) z y 2028 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y) 2029 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p) 2030 #define SUB1(a,b) G(a,32)(G(b,32)) 2031 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32)) 2032 #define TEST(x,r,y) (G(x,32) r (y)) 2033 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y)) 2034 2035 #endif 2036 2037 /* ----- End of mode-specific function call macros ----- */ 2038 2039 2040 2041 2042 /************************************************* 2043 * Alternate character tables * 2044 *************************************************/ 2045 2046 /* By default, the "tables" pointer in the compile context when calling 2047 pcre2_compile() is not set (= NULL), thereby using the default tables of the 2048 library. However, the tables modifier can be used to select alternate sets of 2049 tables, for different kinds of testing. Note that the locale modifier also 2050 adjusts the tables. */ 2051 2052 /* This is the set of tables distributed as default with PCRE2. It recognizes 2053 only ASCII characters. */ 2054 2055 static const uint8_t tables1[] = { 2056 2057 /* This table is a lower casing table. */ 2058 2059 0, 1, 2, 3, 4, 5, 6, 7, 2060 8, 9, 10, 11, 12, 13, 14, 15, 2061 16, 17, 18, 19, 20, 21, 22, 23, 2062 24, 25, 26, 27, 28, 29, 30, 31, 2063 32, 33, 34, 35, 36, 37, 38, 39, 2064 40, 41, 42, 43, 44, 45, 46, 47, 2065 48, 49, 50, 51, 52, 53, 54, 55, 2066 56, 57, 58, 59, 60, 61, 62, 63, 2067 64, 97, 98, 99,100,101,102,103, 2068 104,105,106,107,108,109,110,111, 2069 112,113,114,115,116,117,118,119, 2070 120,121,122, 91, 92, 93, 94, 95, 2071 96, 97, 98, 99,100,101,102,103, 2072 104,105,106,107,108,109,110,111, 2073 112,113,114,115,116,117,118,119, 2074 120,121,122,123,124,125,126,127, 2075 128,129,130,131,132,133,134,135, 2076 136,137,138,139,140,141,142,143, 2077 144,145,146,147,148,149,150,151, 2078 152,153,154,155,156,157,158,159, 2079 160,161,162,163,164,165,166,167, 2080 168,169,170,171,172,173,174,175, 2081 176,177,178,179,180,181,182,183, 2082 184,185,186,187,188,189,190,191, 2083 192,193,194,195,196,197,198,199, 2084 200,201,202,203,204,205,206,207, 2085 208,209,210,211,212,213,214,215, 2086 216,217,218,219,220,221,222,223, 2087 224,225,226,227,228,229,230,231, 2088 232,233,234,235,236,237,238,239, 2089 240,241,242,243,244,245,246,247, 2090 248,249,250,251,252,253,254,255, 2091 2092 /* This table is a case flipping table. */ 2093 2094 0, 1, 2, 3, 4, 5, 6, 7, 2095 8, 9, 10, 11, 12, 13, 14, 15, 2096 16, 17, 18, 19, 20, 21, 22, 23, 2097 24, 25, 26, 27, 28, 29, 30, 31, 2098 32, 33, 34, 35, 36, 37, 38, 39, 2099 40, 41, 42, 43, 44, 45, 46, 47, 2100 48, 49, 50, 51, 52, 53, 54, 55, 2101 56, 57, 58, 59, 60, 61, 62, 63, 2102 64, 97, 98, 99,100,101,102,103, 2103 104,105,106,107,108,109,110,111, 2104 112,113,114,115,116,117,118,119, 2105 120,121,122, 91, 92, 93, 94, 95, 2106 96, 65, 66, 67, 68, 69, 70, 71, 2107 72, 73, 74, 75, 76, 77, 78, 79, 2108 80, 81, 82, 83, 84, 85, 86, 87, 2109 88, 89, 90,123,124,125,126,127, 2110 128,129,130,131,132,133,134,135, 2111 136,137,138,139,140,141,142,143, 2112 144,145,146,147,148,149,150,151, 2113 152,153,154,155,156,157,158,159, 2114 160,161,162,163,164,165,166,167, 2115 168,169,170,171,172,173,174,175, 2116 176,177,178,179,180,181,182,183, 2117 184,185,186,187,188,189,190,191, 2118 192,193,194,195,196,197,198,199, 2119 200,201,202,203,204,205,206,207, 2120 208,209,210,211,212,213,214,215, 2121 216,217,218,219,220,221,222,223, 2122 224,225,226,227,228,229,230,231, 2123 232,233,234,235,236,237,238,239, 2124 240,241,242,243,244,245,246,247, 2125 248,249,250,251,252,253,254,255, 2126 2127 /* This table contains bit maps for various character classes. Each map is 32 2128 bytes long and the bits run from the least significant end of each byte. The 2129 classes that have their own maps are: space, xdigit, digit, upper, lower, word, 2130 graph, print, punct, and cntrl. Other classes are built from combinations. */ 2131 2132 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 2133 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2134 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2135 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2136 2137 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 2138 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 2139 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2140 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2141 2142 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 2143 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2144 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2145 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2146 2147 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2148 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 2149 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2150 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2151 2152 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2153 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 2154 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2155 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2156 2157 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 2158 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 2159 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2160 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2161 2162 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 2163 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 2164 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2165 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2166 2167 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 2168 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 2169 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2170 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2171 2172 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 2173 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 2174 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2175 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2176 2177 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 2178 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 2179 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2180 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 2181 2182 /* This table identifies various classes of character by individual bits: 2183 0x01 white space character 2184 0x02 letter 2185 0x04 decimal digit 2186 0x08 hexadecimal digit 2187 0x10 alphanumeric or '_' 2188 0x80 regular expression metacharacter or binary zero 2189 */ 2190 2191 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 2192 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ 2193 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 2194 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 2195 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 2196 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 2197 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 2198 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 2199 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 2200 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 2201 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 2202 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 2203 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 2204 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 2205 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 2206 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 2207 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 2208 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 2209 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 2210 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 2211 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 2212 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 2213 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 2214 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 2215 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 2216 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 2217 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 2218 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 2219 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 2220 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 2221 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 2222 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 2223 2224 /* This is a set of tables that came originally from a Windows user. It seems 2225 to be at least an approximation of ISO 8859. In particular, there are 2226 characters greater than 128 that are marked as spaces, letters, etc. */ 2227 2228 static const uint8_t tables2[] = { 2229 0,1,2,3,4,5,6,7, 2230 8,9,10,11,12,13,14,15, 2231 16,17,18,19,20,21,22,23, 2232 24,25,26,27,28,29,30,31, 2233 32,33,34,35,36,37,38,39, 2234 40,41,42,43,44,45,46,47, 2235 48,49,50,51,52,53,54,55, 2236 56,57,58,59,60,61,62,63, 2237 64,97,98,99,100,101,102,103, 2238 104,105,106,107,108,109,110,111, 2239 112,113,114,115,116,117,118,119, 2240 120,121,122,91,92,93,94,95, 2241 96,97,98,99,100,101,102,103, 2242 104,105,106,107,108,109,110,111, 2243 112,113,114,115,116,117,118,119, 2244 120,121,122,123,124,125,126,127, 2245 128,129,130,131,132,133,134,135, 2246 136,137,138,139,140,141,142,143, 2247 144,145,146,147,148,149,150,151, 2248 152,153,154,155,156,157,158,159, 2249 160,161,162,163,164,165,166,167, 2250 168,169,170,171,172,173,174,175, 2251 176,177,178,179,180,181,182,183, 2252 184,185,186,187,188,189,190,191, 2253 224,225,226,227,228,229,230,231, 2254 232,233,234,235,236,237,238,239, 2255 240,241,242,243,244,245,246,215, 2256 248,249,250,251,252,253,254,223, 2257 224,225,226,227,228,229,230,231, 2258 232,233,234,235,236,237,238,239, 2259 240,241,242,243,244,245,246,247, 2260 248,249,250,251,252,253,254,255, 2261 0,1,2,3,4,5,6,7, 2262 8,9,10,11,12,13,14,15, 2263 16,17,18,19,20,21,22,23, 2264 24,25,26,27,28,29,30,31, 2265 32,33,34,35,36,37,38,39, 2266 40,41,42,43,44,45,46,47, 2267 48,49,50,51,52,53,54,55, 2268 56,57,58,59,60,61,62,63, 2269 64,97,98,99,100,101,102,103, 2270 104,105,106,107,108,109,110,111, 2271 112,113,114,115,116,117,118,119, 2272 120,121,122,91,92,93,94,95, 2273 96,65,66,67,68,69,70,71, 2274 72,73,74,75,76,77,78,79, 2275 80,81,82,83,84,85,86,87, 2276 88,89,90,123,124,125,126,127, 2277 128,129,130,131,132,133,134,135, 2278 136,137,138,139,140,141,142,143, 2279 144,145,146,147,148,149,150,151, 2280 152,153,154,155,156,157,158,159, 2281 160,161,162,163,164,165,166,167, 2282 168,169,170,171,172,173,174,175, 2283 176,177,178,179,180,181,182,183, 2284 184,185,186,187,188,189,190,191, 2285 224,225,226,227,228,229,230,231, 2286 232,233,234,235,236,237,238,239, 2287 240,241,242,243,244,245,246,215, 2288 248,249,250,251,252,253,254,223, 2289 192,193,194,195,196,197,198,199, 2290 200,201,202,203,204,205,206,207, 2291 208,209,210,211,212,213,214,247, 2292 216,217,218,219,220,221,222,255, 2293 0,62,0,0,1,0,0,0, 2294 0,0,0,0,0,0,0,0, 2295 32,0,0,0,1,0,0,0, 2296 0,0,0,0,0,0,0,0, 2297 0,0,0,0,0,0,255,3, 2298 126,0,0,0,126,0,0,0, 2299 0,0,0,0,0,0,0,0, 2300 0,0,0,0,0,0,0,0, 2301 0,0,0,0,0,0,255,3, 2302 0,0,0,0,0,0,0,0, 2303 0,0,0,0,0,0,12,2, 2304 0,0,0,0,0,0,0,0, 2305 0,0,0,0,0,0,0,0, 2306 254,255,255,7,0,0,0,0, 2307 0,0,0,0,0,0,0,0, 2308 255,255,127,127,0,0,0,0, 2309 0,0,0,0,0,0,0,0, 2310 0,0,0,0,254,255,255,7, 2311 0,0,0,0,0,4,32,4, 2312 0,0,0,128,255,255,127,255, 2313 0,0,0,0,0,0,255,3, 2314 254,255,255,135,254,255,255,7, 2315 0,0,0,0,0,4,44,6, 2316 255,255,127,255,255,255,127,255, 2317 0,0,0,0,254,255,255,255, 2318 255,255,255,255,255,255,255,127, 2319 0,0,0,0,254,255,255,255, 2320 255,255,255,255,255,255,255,255, 2321 0,2,0,0,255,255,255,255, 2322 255,255,255,255,255,255,255,127, 2323 0,0,0,0,255,255,255,255, 2324 255,255,255,255,255,255,255,255, 2325 0,0,0,0,254,255,0,252, 2326 1,0,0,248,1,0,0,120, 2327 0,0,0,0,254,255,255,255, 2328 0,0,128,0,0,0,128,0, 2329 255,255,255,255,0,0,0,0, 2330 0,0,0,0,0,0,0,128, 2331 255,255,255,255,0,0,0,0, 2332 0,0,0,0,0,0,0,0, 2333 128,0,0,0,0,0,0,0, 2334 0,1,1,0,1,1,0,0, 2335 0,0,0,0,0,0,0,0, 2336 0,0,0,0,0,0,0,0, 2337 1,0,0,0,128,0,0,0, 2338 128,128,128,128,0,0,128,0, 2339 28,28,28,28,28,28,28,28, 2340 28,28,0,0,0,0,0,128, 2341 0,26,26,26,26,26,26,18, 2342 18,18,18,18,18,18,18,18, 2343 18,18,18,18,18,18,18,18, 2344 18,18,18,128,128,0,128,16, 2345 0,26,26,26,26,26,26,18, 2346 18,18,18,18,18,18,18,18, 2347 18,18,18,18,18,18,18,18, 2348 18,18,18,128,128,0,0,0, 2349 0,0,0,0,0,1,0,0, 2350 0,0,0,0,0,0,0,0, 2351 0,0,0,0,0,0,0,0, 2352 0,0,0,0,0,0,0,0, 2353 1,0,0,0,0,0,0,0, 2354 0,0,18,0,0,0,0,0, 2355 0,0,20,20,0,18,0,0, 2356 0,20,18,0,0,0,0,0, 2357 18,18,18,18,18,18,18,18, 2358 18,18,18,18,18,18,18,18, 2359 18,18,18,18,18,18,18,0, 2360 18,18,18,18,18,18,18,18, 2361 18,18,18,18,18,18,18,18, 2362 18,18,18,18,18,18,18,18, 2363 18,18,18,18,18,18,18,0, 2364 18,18,18,18,18,18,18,18 2365 }; 2366 2367 2368 2369 /************************************************* 2370 * Local memory functions * 2371 *************************************************/ 2372 2373 /* Alternative memory functions, to test functionality. */ 2374 2375 static void *my_malloc(size_t size, void *data) 2376 { 2377 void *block = malloc(size); 2378 (void)data; 2379 if (show_memory) 2380 fprintf(outfile, "malloc %3d %p\n", (int)size, block); 2381 return block; 2382 } 2383 2384 static void my_free(void *block, void *data) 2385 { 2386 (void)data; 2387 if (show_memory) 2388 fprintf(outfile, "free %p\n", block); 2389 free(block); 2390 } 2391 2392 /* For recursion malloc/free, to test stacking calls */ 2393 2394 #ifdef HEAP_MATCH_RECURSE 2395 static void *my_stack_malloc(size_t size, void *data) 2396 { 2397 void *block = malloc(size); 2398 (void)data; 2399 if (show_memory) 2400 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block); 2401 return block; 2402 } 2403 2404 static void my_stack_free(void *block, void *data) 2405 { 2406 (void)data; 2407 if (show_memory) 2408 fprintf(outfile, "stack_free %p\n", block); 2409 free(block); 2410 } 2411 #endif /* HEAP_MATCH_RECURSE */ 2412 2413 2414 /************************************************* 2415 * Callback function for stack guard * 2416 *************************************************/ 2417 2418 /* This is set up to be called from pcre2_compile() when the stackguard=n 2419 modifier sets a value greater than zero. The test we do is whether the 2420 parenthesis nesting depth is greater than the value set by the modifier. 2421 2422 Argument: the current parenthesis nesting depth 2423 Returns: non-zero to kill the compilation 2424 */ 2425 2426 static int 2427 stack_guard(uint32_t depth, void *user_data) 2428 { 2429 (void)user_data; 2430 return depth > pat_patctl.stackguard_test; 2431 } 2432 2433 2434 /************************************************* 2435 * JIT memory callback * 2436 *************************************************/ 2437 2438 static PCRE2_JIT_STACK* 2439 jit_callback(void *arg) 2440 { 2441 jit_was_used = TRUE; 2442 return (PCRE2_JIT_STACK *)arg; 2443 } 2444 2445 2446 /************************************************* 2447 * Convert UTF-8 character to code point * 2448 *************************************************/ 2449 2450 /* This function reads one or more bytes that represent a UTF-8 character, 2451 and returns the codepoint of that character. Note that the function supports 2452 the original UTF-8 definition of RFC 2279, allowing for values in the range 0 2453 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate 2454 codepoints greater than 0x10ffff which are useful for testing PCRE2's error 2455 checking, and also for generating 32-bit non-UTF data values above the UTF 2456 limit. 2457 2458 Argument: 2459 utf8bytes a pointer to the byte vector 2460 vptr a pointer to an int to receive the value 2461 2462 Returns: > 0 => the number of bytes consumed 2463 -6 to 0 => malformed UTF-8 character at offset = (-return) 2464 */ 2465 2466 static int 2467 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr) 2468 { 2469 uint32_t c = *utf8bytes++; 2470 uint32_t d = c; 2471 int i, j, s; 2472 2473 for (i = -1; i < 6; i++) /* i is number of additional bytes */ 2474 { 2475 if ((d & 0x80) == 0) break; 2476 d <<= 1; 2477 } 2478 2479 if (i == -1) { *vptr = c; return 1; } /* ascii character */ 2480 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ 2481 2482 /* i now has a value in the range 1-5 */ 2483 2484 s = 6*i; 2485 d = (c & utf8_table3[i]) << s; 2486 2487 for (j = 0; j < i; j++) 2488 { 2489 c = *utf8bytes++; 2490 if ((c & 0xc0) != 0x80) return -(j+1); 2491 s -= 6; 2492 d |= (c & 0x3f) << s; 2493 } 2494 2495 /* Check that encoding was the correct unique one */ 2496 2497 for (j = 0; j < utf8_table1_size; j++) 2498 if (d <= (uint32_t)utf8_table1[j]) break; 2499 if (j != i) return -(i+1); 2500 2501 /* Valid value */ 2502 2503 *vptr = d; 2504 return i+1; 2505 } 2506 2507 2508 2509 /************************************************* 2510 * Print one character * 2511 *************************************************/ 2512 2513 /* Print a single character either literally, or as a hex escape, and count how 2514 many printed characters are used. 2515 2516 Arguments: 2517 c the character 2518 utf TRUE in UTF mode 2519 f the FILE to print to, or NULL just to count characters 2520 2521 Returns: number of characters written 2522 */ 2523 2524 static int 2525 pchar(uint32_t c, BOOL utf, FILE *f) 2526 { 2527 int n = 0; 2528 if (PRINTOK(c)) 2529 { 2530 if (f != NULL) fprintf(f, "%c", c); 2531 return 1; 2532 } 2533 2534 if (c < 0x100) 2535 { 2536 if (utf) 2537 { 2538 if (f != NULL) fprintf(f, "\\x{%02x}", c); 2539 return 6; 2540 } 2541 else 2542 { 2543 if (f != NULL) fprintf(f, "\\x%02x", c); 2544 return 4; 2545 } 2546 } 2547 2548 if (f != NULL) n = fprintf(f, "\\x{%02x}", c); 2549 return n >= 0 ? n : 0; 2550 } 2551 2552 2553 2554 #ifdef SUPPORT_PCRE2_16 2555 /************************************************* 2556 * Find length of 0-terminated 16-bit string * 2557 *************************************************/ 2558 2559 static size_t strlen16(PCRE2_SPTR16 p) 2560 { 2561 PCRE2_SPTR16 pp = p; 2562 while (*pp != 0) pp++; 2563 return (int)(pp - p); 2564 } 2565 #endif /* SUPPORT_PCRE2_16 */ 2566 2567 2568 2569 #ifdef SUPPORT_PCRE2_32 2570 /************************************************* 2571 * Find length of 0-terminated 32-bit string * 2572 *************************************************/ 2573 2574 static size_t strlen32(PCRE2_SPTR32 p) 2575 { 2576 PCRE2_SPTR32 pp = p; 2577 while (*pp != 0) pp++; 2578 return (int)(pp - p); 2579 } 2580 #endif /* SUPPORT_PCRE2_32 */ 2581 2582 2583 #ifdef SUPPORT_PCRE2_8 2584 /************************************************* 2585 * Print 8-bit character string * 2586 *************************************************/ 2587 2588 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. 2589 For printing *MARK strings, a negative length is given. If handed a NULL file, 2590 just counts chars without printing (because pchar() does that). */ 2591 2592 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) 2593 { 2594 uint32_t c = 0; 2595 int yield = 0; 2596 2597 if (length < 0) length = p[-1]; 2598 while (length-- > 0) 2599 { 2600 if (utf) 2601 { 2602 int rc = utf82ord(p, &c); 2603 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ 2604 { 2605 length -= rc - 1; 2606 p += rc; 2607 yield += pchar(c, utf, f); 2608 continue; 2609 } 2610 } 2611 c = *p++; 2612 yield += pchar(c, utf, f); 2613 } 2614 2615 return yield; 2616 } 2617 #endif 2618 2619 2620 #ifdef SUPPORT_PCRE2_16 2621 /************************************************* 2622 * Print 16-bit character string * 2623 *************************************************/ 2624 2625 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed. 2626 For printing *MARK strings, a negative length is given. If handed a NULL file, 2627 just counts chars without printing. */ 2628 2629 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f) 2630 { 2631 int yield = 0; 2632 if (length < 0) length = p[-1]; 2633 while (length-- > 0) 2634 { 2635 uint32_t c = *p++ & 0xffff; 2636 if (utf && c >= 0xD800 && c < 0xDC00 && length > 0) 2637 { 2638 int d = *p & 0xffff; 2639 if (d >= 0xDC00 && d <= 0xDFFF) 2640 { 2641 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; 2642 length--; 2643 p++; 2644 } 2645 } 2646 yield += pchar(c, utf, f); 2647 } 2648 return yield; 2649 } 2650 #endif /* SUPPORT_PCRE2_16 */ 2651 2652 2653 2654 #ifdef SUPPORT_PCRE2_32 2655 /************************************************* 2656 * Print 32-bit character string * 2657 *************************************************/ 2658 2659 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed. 2660 For printing *MARK strings, a negative length is given.If handed a NULL file, 2661 just counts chars without printing. */ 2662 2663 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f) 2664 { 2665 int yield = 0; 2666 (void)(utf); /* Avoid compiler warning */ 2667 if (length < 0) length = p[-1]; 2668 while (length-- > 0) 2669 { 2670 uint32_t c = *p++; 2671 yield += pchar(c, utf, f); 2672 } 2673 return yield; 2674 } 2675 #endif /* SUPPORT_PCRE2_32 */ 2676 2677 2678 2679 2680 #ifdef SUPPORT_PCRE2_8 2681 /************************************************* 2682 * Convert character value to UTF-8 * 2683 *************************************************/ 2684 2685 /* This function takes an integer value in the range 0 - 0x7fffffff 2686 and encodes it as a UTF-8 character in 0 to 6 bytes. 2687 2688 Arguments: 2689 cvalue the character value 2690 utf8bytes pointer to buffer for result - at least 6 bytes long 2691 2692 Returns: number of characters placed in the buffer 2693 */ 2694 2695 static int 2696 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes) 2697 { 2698 register int i, j; 2699 if (cvalue > 0x7fffffffu) 2700 return -1; 2701 for (i = 0; i < utf8_table1_size; i++) 2702 if (cvalue <= (uint32_t)utf8_table1[i]) break; 2703 utf8bytes += i; 2704 for (j = i; j > 0; j--) 2705 { 2706 *utf8bytes-- = 0x80 | (cvalue & 0x3f); 2707 cvalue >>= 6; 2708 } 2709 *utf8bytes = utf8_table2[i] | cvalue; 2710 return i + 1; 2711 } 2712 #endif /* SUPPORT_PCRE2_8 */ 2713 2714 2715 2716 #ifdef SUPPORT_PCRE2_16 2717 /************************************************* 2718 * Convert pattern to 16-bit * 2719 *************************************************/ 2720 2721 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If 2722 all the input bytes are ASCII, the space needed for a 16-bit string is exactly 2723 double the 8-bit size. Otherwise, the size needed for a 16-bit string is no 2724 more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but 2725 possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in 2726 UTF-16. The result is always left in pbuffer16. Impose a minimum size to save 2727 repeated re-sizing. 2728 2729 Note that this function does not object to surrogate values. This is 2730 deliberate; it makes it possible to construct UTF-16 strings that are invalid, 2731 for the purpose of testing that they are correctly faulted. 2732 2733 Arguments: 2734 p points to a byte string 2735 utf non-zero if converting to UTF-16 2736 lenptr points to number of bytes in the string (excluding trailing zero) 2737 2738 Returns: 0 on success, with the length updated to the number of 16-bit 2739 data items used (excluding the trailing zero) 2740 OR -1 if a UTF-8 string is malformed 2741 OR -2 if a value > 0x10ffff is encountered in UTF mode 2742 OR -3 if a value > 0xffff is encountered when not in UTF mode 2743 */ 2744 2745 static PCRE2_SIZE 2746 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr) 2747 { 2748 uint16_t *pp; 2749 PCRE2_SIZE len = *lenptr; 2750 2751 if (pbuffer16_size < 2*len + 2) 2752 { 2753 if (pbuffer16 != NULL) free(pbuffer16); 2754 pbuffer16_size = 2*len + 2; 2755 if (pbuffer16_size < 256) pbuffer16_size = 256; 2756 pbuffer16 = (uint16_t *)malloc(pbuffer16_size); 2757 if (pbuffer16 == NULL) 2758 { 2759 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n", 2760 (unsigned long int)pbuffer16_size); 2761 exit(1); 2762 } 2763 } 2764 2765 pp = pbuffer16; 2766 if (!utf) 2767 { 2768 for (; len > 0; len--) *pp++ = *p++; 2769 } 2770 else while (len > 0) 2771 { 2772 uint32_t c; 2773 int chlen = utf82ord(p, &c); 2774 if (chlen <= 0) return -1; 2775 if (c > 0x10ffff) return -2; 2776 p += chlen; 2777 len -= chlen; 2778 if (c < 0x10000) *pp++ = c; else 2779 { 2780 if (!utf) return -3; 2781 c -= 0x10000; 2782 *pp++ = 0xD800 | (c >> 10); 2783 *pp++ = 0xDC00 | (c & 0x3ff); 2784 } 2785 } 2786 2787 *pp = 0; 2788 *lenptr = pp - pbuffer16; 2789 return 0; 2790 } 2791 #endif 2792 2793 2794 2795 #ifdef SUPPORT_PCRE2_32 2796 /************************************************* 2797 * Convert pattern to 32-bit * 2798 *************************************************/ 2799 2800 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If 2801 all the input bytes are ASCII, the space needed for a 32-bit string is exactly 2802 four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no 2803 more than four times, because the number of characters must be less than the 2804 number of bytes. The result is always left in pbuffer32. Impose a minimum size 2805 to save repeated re-sizing. 2806 2807 Note that this function does not object to surrogate values. This is 2808 deliberate; it makes it possible to construct UTF-32 strings that are invalid, 2809 for the purpose of testing that they are correctly faulted. 2810 2811 Arguments: 2812 p points to a byte string 2813 utf true if UTF-8 (to be converted to UTF-32) 2814 lenptr points to number of bytes in the string (excluding trailing zero) 2815 2816 Returns: 0 on success, with the length updated to the number of 32-bit 2817 data items used (excluding the trailing zero) 2818 OR -1 if a UTF-8 string is malformed 2819 OR -2 if a value > 0x10ffff is encountered in UTF mode 2820 */ 2821 2822 static PCRE2_SIZE 2823 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr) 2824 { 2825 uint32_t *pp; 2826 PCRE2_SIZE len = *lenptr; 2827 2828 if (pbuffer32_size < 4*len + 4) 2829 { 2830 if (pbuffer32 != NULL) free(pbuffer32); 2831 pbuffer32_size = 4*len + 4; 2832 if (pbuffer32_size < 256) pbuffer32_size = 256; 2833 pbuffer32 = (uint32_t *)malloc(pbuffer32_size); 2834 if (pbuffer32 == NULL) 2835 { 2836 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n", 2837 (unsigned long int)pbuffer32_size); 2838 exit(1); 2839 } 2840 } 2841 2842 pp = pbuffer32; 2843 if (!utf) 2844 { 2845 for (; len > 0; len--) *pp++ = *p++; 2846 } 2847 else while (len > 0) 2848 { 2849 uint32_t c; 2850 int chlen = utf82ord(p, &c); 2851 if (chlen <= 0) return -1; 2852 if (utf && c > 0x10ffff) return -2; 2853 p += chlen; 2854 len -= chlen; 2855 *pp++ = c; 2856 } 2857 2858 *pp = 0; 2859 *lenptr = pp - pbuffer32; 2860 return 0; 2861 } 2862 #endif /* SUPPORT_PCRE2_32 */ 2863 2864 2865 2866 /************************************************* 2867 * Move back by so many characters * 2868 *************************************************/ 2869 2870 /* Given a code unit offset in a subject string, move backwards by a number of 2871 characters, and return the resulting offset. 2872 2873 Arguments: 2874 subject pointer to the string 2875 offset start offset 2876 count count to move back by 2877 utf TRUE if in UTF mode 2878 2879 Returns: a possibly changed offset 2880 */ 2881 2882 static PCRE2_SIZE 2883 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf) 2884 { 2885 if (!utf || test_mode == PCRE32_MODE) 2886 return (count >= offset)? 0 : (offset - count); 2887 2888 else if (test_mode == PCRE8_MODE) 2889 { 2890 PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset; 2891 for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--) 2892 { 2893 pp--; 2894 while ((*pp & 0xc0) == 0x80) pp--; 2895 } 2896 return pp - (PCRE2_SPTR8)subject; 2897 } 2898 2899 else /* 16-bit mode */ 2900 { 2901 PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset; 2902 for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--) 2903 { 2904 pp--; 2905 if ((*pp & 0xfc00) == 0xdc00) pp--; 2906 } 2907 return pp - (PCRE2_SPTR16)subject; 2908 } 2909 } 2910 2911 2912 2913 /************************************************* 2914 * Expand input buffers * 2915 *************************************************/ 2916 2917 /* This function doubles the size of the input buffer and the buffer for 2918 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to 2919 the new ones. 2920 2921 Arguments: none 2922 Returns: nothing (aborts if malloc() fails) 2923 */ 2924 2925 static void 2926 expand_input_buffers(void) 2927 { 2928 int new_pbuffer8_size = 2*pbuffer8_size; 2929 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size); 2930 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size); 2931 2932 if (new_buffer == NULL || new_pbuffer8 == NULL) 2933 { 2934 fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size); 2935 exit(1); 2936 } 2937 2938 memcpy(new_buffer, buffer, pbuffer8_size); 2939 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size); 2940 2941 pbuffer8_size = new_pbuffer8_size; 2942 2943 free(buffer); 2944 free(pbuffer8); 2945 2946 buffer = new_buffer; 2947 pbuffer8 = new_pbuffer8; 2948 } 2949 2950 2951 2952 /************************************************* 2953 * Read or extend an input line * 2954 *************************************************/ 2955 2956 /* Input lines are read into buffer, but both patterns and data lines can be 2957 continued over multiple input lines. In addition, if the buffer fills up, we 2958 want to automatically expand it so as to be able to handle extremely large 2959 lines that are needed for certain stress tests, although this is less likely 2960 now that there are repetition features for both patterns and data. When the 2961 input buffer is expanded, the other two buffers must also be expanded likewise, 2962 and the contents of pbuffer, which are a copy of the input for callouts, must 2963 be preserved (for when expansion happens for a data line). This is not the most 2964 optimal way of handling this, but hey, this is just a test program! 2965 2966 Arguments: 2967 f the file to read 2968 start where in buffer to start (this *must* be within buffer) 2969 prompt for stdin or readline() 2970 2971 Returns: pointer to the start of new data 2972 could be a copy of start, or could be moved 2973 NULL if no data read and EOF reached 2974 */ 2975 2976 static uint8_t * 2977 extend_inputline(FILE *f, uint8_t *start, const char *prompt) 2978 { 2979 uint8_t *here = start; 2980 2981 for (;;) 2982 { 2983 size_t rlen = (size_t)(pbuffer8_size - (here - buffer)); 2984 2985 if (rlen > 1000) 2986 { 2987 size_t dlen; 2988 2989 /* If libreadline or libedit support is required, use readline() to read a 2990 line if the input is a terminal. Note that readline() removes the trailing 2991 newline, so we must put it back again, to be compatible with fgets(). */ 2992 2993 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 2994 if (INTERACTIVE(f)) 2995 { 2996 size_t len; 2997 char *s = readline(prompt); 2998 if (s == NULL) return (here == start)? NULL : start; 2999 len = strlen(s); 3000 if (len > 0) add_history(s); 3001 if (len > rlen - 1) len = rlen - 1; 3002 memcpy(here, s, len); 3003 here[len] = '\n'; 3004 here[len+1] = 0; 3005 free(s); 3006 } 3007 else 3008 #endif 3009 3010 /* Read the next line by normal means, prompting if the file is a tty. */ 3011 3012 { 3013 if (INTERACTIVE(f)) printf("%s", prompt); 3014 if (fgets((char *)here, rlen, f) == NULL) 3015 return (here == start)? NULL : start; 3016 } 3017 3018 dlen = strlen((char *)here); 3019 here += dlen; 3020 3021 /* Check for end of line reached. Take care not to read data from before 3022 start (dlen will be zero for a file starting with a binary zero). */ 3023 3024 if (here > start && here[-1] == '\n') return start; 3025 3026 /* If we have not read a newline when reading a file, we have either filled 3027 the buffer or reached the end of the file. We can detect the former by 3028 checking that the string fills the buffer, and the latter by feof(). If 3029 neither of these is true, it means we read a binary zero which has caused 3030 strlen() to give a short length. This is a hard error because pcre2test 3031 expects to work with C strings. */ 3032 3033 if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f)) 3034 { 3035 fprintf(outfile, "** Binary zero encountered in input\n"); 3036 fprintf(outfile, "** pcre2test run abandoned\n"); 3037 exit(1); 3038 } 3039 } 3040 3041 else 3042 { 3043 size_t start_offset = start - buffer; 3044 size_t here_offset = here - buffer; 3045 expand_input_buffers(); 3046 start = buffer + start_offset; 3047 here = buffer + here_offset; 3048 } 3049 } 3050 3051 /* Control never gets here */ 3052 } 3053 3054 3055 3056 /************************************************* 3057 * Case-independent strncmp() function * 3058 *************************************************/ 3059 3060 /* 3061 Arguments: 3062 s first string 3063 t second string 3064 n number of characters to compare 3065 3066 Returns: < 0, = 0, or > 0, according to the comparison 3067 */ 3068 3069 static int 3070 strncmpic(const uint8_t *s, const uint8_t *t, int n) 3071 { 3072 while (n--) 3073 { 3074 int c = tolower(*s++) - tolower(*t++); 3075 if (c) return c; 3076 } 3077 return 0; 3078 } 3079 3080 3081 3082 /************************************************* 3083 * Scan the main modifier list * 3084 *************************************************/ 3085 3086 /* This function searches the modifier list for a long modifier name. 3087 3088 Argument: 3089 p start of the name 3090 lenp length of the name 3091 3092 Returns: an index in the modifier list, or -1 on failure 3093 */ 3094 3095 static int 3096 scan_modifiers(const uint8_t *p, unsigned int len) 3097 { 3098 int bot = 0; 3099 int top = MODLISTCOUNT; 3100 3101 while (top > bot) 3102 { 3103 int mid = (bot + top)/2; 3104 unsigned int mlen = strlen(modlist[mid].name); 3105 int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen); 3106 if (c == 0) 3107 { 3108 if (len == mlen) return mid; 3109 c = (int)len - (int)mlen; 3110 } 3111 if (c > 0) bot = mid + 1; else top = mid; 3112 } 3113 3114 return -1; 3115 3116 } 3117 3118 3119 3120 /************************************************* 3121 * Check a modifer and find its field * 3122 *************************************************/ 3123 3124 /* This function is called when a modifier has been identified. We check that 3125 it is allowed here and find the field that is to be changed. 3126 3127 Arguments: 3128 m the modifier list entry 3129 ctx CTX_PAT => pattern context 3130 CTX_POPPAT => pattern context for popped pattern 3131 CTX_DEFPAT => default pattern context 3132 CTX_DAT => data context 3133 CTX_DEFDAT => default data context 3134 pctl point to pattern control block 3135 dctl point to data control block 3136 c a single character or 0 3137 3138 Returns: a field pointer or NULL 3139 */ 3140 3141 static void * 3142 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c) 3143 { 3144 void *field = NULL; 3145 PCRE2_SIZE offset = m->offset; 3146 3147 if (restrict_for_perl_test) switch(m->which) 3148 { 3149 case MOD_PNDP: 3150 case MOD_PATP: 3151 case MOD_PDP: 3152 break; 3153 3154 default: 3155 fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n", 3156 m->name); 3157 return NULL; 3158 } 3159 3160 switch (m->which) 3161 { 3162 case MOD_CTC: /* Compile context modifier */ 3163 if (ctx == CTX_DEFPAT) field = PTR(default_pat_context); 3164 else if (ctx == CTX_PAT) field = PTR(pat_context); 3165 break; 3166 3167 case MOD_CTM: /* Match context modifier */ 3168 if (ctx == CTX_DEFDAT) field = PTR(default_dat_context); 3169 else if (ctx == CTX_DAT) field = PTR(dat_context); 3170 break; 3171 3172 case MOD_DAT: /* Data line modifier */ 3173 if (dctl != NULL) field = dctl; 3174 break; 3175 3176 case MOD_PAT: /* Pattern modifier */ 3177 case MOD_PATP: /* Allowed for Perl test */ 3178 if (pctl != NULL) field = pctl; 3179 break; 3180 3181 case MOD_PD: /* Pattern or data line modifier */ 3182 case MOD_PDP: /* Ditto, allowed for Perl test */ 3183 case MOD_PND: /* Ditto, but not default pattern */ 3184 case MOD_PNDP: /* Ditto, allowed for Perl test */ 3185 if (dctl != NULL) field = dctl; 3186 else if (pctl != NULL && (m->which == MOD_PD || ctx != CTX_DEFPAT)) 3187 field = pctl; 3188 break; 3189 } 3190 3191 if (field == NULL) 3192 { 3193 if (c == 0) 3194 fprintf(outfile, "** '%s' is not valid here\n", m->name); 3195 else 3196 fprintf(outfile, "** /%c is not valid here\n", c); 3197 return NULL; 3198 } 3199 3200 return (char *)field + offset; 3201 } 3202 3203 3204 3205 /************************************************* 3206 * Decode a modifier list * 3207 *************************************************/ 3208 3209 /* A pointer to a control block is NULL when called in cases when that block is 3210 not relevant. They are never all relevant in one call. At least one of patctl 3211 and datctl is NULL. The second argument specifies which context to use for 3212 modifiers that apply to contexts. 3213 3214 Arguments: 3215 p point to modifier string 3216 ctx CTX_PAT => pattern context 3217 CTX_POPPAT => pattern context for popped pattern 3218 CTX_DEFPAT => default pattern context 3219 CTX_DAT => data context 3220 CTX_DEFDAT => default data context 3221 pctl point to pattern control block 3222 dctl point to data control block 3223 3224 Returns: TRUE if successful decode, FALSE otherwise 3225 */ 3226 3227 static BOOL 3228 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl) 3229 { 3230 uint8_t *ep, *pp; 3231 long li; 3232 unsigned long uli; 3233 BOOL first = TRUE; 3234 3235 for (;;) 3236 { 3237 void *field; 3238 modstruct *m; 3239 BOOL off = FALSE; 3240 unsigned int i, len; 3241 int index; 3242 char *endptr; 3243 3244 /* Skip white space and commas. */ 3245 3246 while (isspace(*p) || *p == ',') p++; 3247 if (*p == 0) break; 3248 3249 /* Find the end of the item; lose trailing whitespace at end of line. */ 3250 3251 for (ep = p; *ep != 0 && *ep != ','; ep++); 3252 if (*ep == 0) 3253 { 3254 while (ep > p && isspace(ep[-1])) ep--; 3255 *ep = 0; 3256 } 3257 3258 /* Remember if the first character is '-'. */ 3259 3260 if (*p == '-') 3261 { 3262 off = TRUE; 3263 p++; 3264 } 3265 3266 /* Find the length of a full-length modifier name, and scan for it. */ 3267 3268 pp = p; 3269 while (pp < ep && *pp != '=') pp++; 3270 index = scan_modifiers(p, pp - p); 3271 3272 /* If the first modifier is unrecognized, try to interpret it as a sequence 3273 of single-character abbreviated modifiers. None of these modifiers have any 3274 associated data. They just set options or control bits. */ 3275 3276 if (index < 0) 3277 { 3278 uint32_t cc; 3279 uint8_t *mp = p; 3280 3281 if (!first) 3282 { 3283 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); 3284 if (ep - p == 1) 3285 fprintf(outfile, "** Single-character modifiers must come first\n"); 3286 return FALSE; 3287 } 3288 3289 for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p)) 3290 { 3291 for (i = 0; i < C1MODLISTCOUNT; i++) 3292 if (cc == c1modlist[i].onechar) break; 3293 3294 if (i >= C1MODLISTCOUNT) 3295 { 3296 fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n", 3297 *p, (int)(ep-mp), mp); 3298 return FALSE; 3299 } 3300 3301 if (c1modlist[i].index >= 0) 3302 { 3303 index = c1modlist[i].index; 3304 } 3305 3306 else 3307 { 3308 index = scan_modifiers((uint8_t *)(c1modlist[i].fullname), 3309 strlen(c1modlist[i].fullname)); 3310 if (index < 0) 3311 { 3312 fprintf(outfile, "** Internal error: single-character equivalent " 3313 "modifier '%s' not found\n", c1modlist[i].fullname); 3314 return FALSE; 3315 } 3316 c1modlist[i].index = index; /* Cache for next time */ 3317 } 3318 3319 field = check_modifier(modlist + index, ctx, pctl, dctl, *p); 3320 if (field == NULL) return FALSE; 3321 *((uint32_t *)field) |= modlist[index].value; 3322 } 3323 3324 continue; /* With tne next (fullname) modifier */ 3325 } 3326 3327 /* We have a match on a full-name modifier. Check for the existence of data 3328 when needed. */ 3329 3330 m = modlist + index; /* Save typing */ 3331 if (m->type != MOD_CTL && m->type != MOD_OPT && 3332 (m->type != MOD_IND || *pp == '=')) 3333 { 3334 if (*pp++ != '=') 3335 { 3336 fprintf(outfile, "** '=' expected after '%s'\n", m->name); 3337 return FALSE; 3338 } 3339 if (off) 3340 { 3341 fprintf(outfile, "** '-' is not valid for '%s'\n", m->name); 3342 return FALSE; 3343 } 3344 } 3345 3346 /* These on/off types have no data. */ 3347 3348 else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) 3349 { 3350 fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); 3351 return FALSE; 3352 } 3353 3354 /* Set the data length for those types that have data. Then find the field 3355 that is to be set. If check_modifier() returns NULL, it has already output an 3356 error message. */ 3357 3358 len = ep - pp; 3359 field = check_modifier(m, ctx, pctl, dctl, 0); 3360 if (field == NULL) return FALSE; 3361 3362 /* Process according to data type. */ 3363 3364 switch (m->type) 3365 { 3366 case MOD_CTL: 3367 case MOD_OPT: 3368 if (off) *((uint32_t *)field) &= ~m->value; 3369 else *((uint32_t *)field) |= m->value; 3370 break; 3371 3372 case MOD_BSR: 3373 if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0) 3374 { 3375 #ifdef BSR_ANYCRLF 3376 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; 3377 #else 3378 *((uint16_t *)field) = PCRE2_BSR_UNICODE; 3379 #endif 3380 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET; 3381 else dctl->control &= ~CTL_BSR_SET; 3382 } 3383 else 3384 { 3385 if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0) 3386 *((uint16_t *)field) = PCRE2_BSR_ANYCRLF; 3387 else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0) 3388 *((uint16_t *)field) = PCRE2_BSR_UNICODE; 3389 else goto INVALID_VALUE; 3390 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET; 3391 else dctl->control |= CTL_BSR_SET; 3392 } 3393 pp = ep; 3394 break; 3395 3396 case MOD_IN2: /* One or two unsigned integers */ 3397 if (!isdigit(*pp)) goto INVALID_VALUE; 3398 uli = strtoul((const char *)pp, &endptr, 10); 3399 if (U32OVERFLOW(uli)) goto INVALID_VALUE; 3400 ((uint32_t *)field)[0] = (uint32_t)uli; 3401 if (*endptr == ':') 3402 { 3403 uli = strtoul((const char *)endptr+1, &endptr, 10); 3404 if (U32OVERFLOW(uli)) goto INVALID_VALUE; 3405 ((uint32_t *)field)[1] = (uint32_t)uli; 3406 } 3407 else ((uint32_t *)field)[1] = 0; 3408 pp = (uint8_t *)endptr; 3409 break; 3410 3411 /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or 3412 less than ULONG_MAX. So first test for overflowing the long int, and then 3413 test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */ 3414 3415 case MOD_SIZ: /* PCRE2_SIZE value */ 3416 if (!isdigit(*pp)) goto INVALID_VALUE; 3417 uli = strtoul((const char *)pp, &endptr, 10); 3418 if (uli == ULONG_MAX) goto INVALID_VALUE; 3419 #if ULONG_MAX > PCRE2_SIZE_MAX 3420 if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE; 3421 #endif 3422 *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli; 3423 pp = (uint8_t *)endptr; 3424 break; 3425 3426 case MOD_IND: /* Unsigned integer with default */ 3427 if (len == 0) 3428 { 3429 *((uint32_t *)field) = (uint32_t)(m->value); 3430 break; 3431 } 3432 /* Fall through */ 3433 3434 case MOD_INT: /* Unsigned integer */ 3435 if (!isdigit(*pp)) goto INVALID_VALUE; 3436 uli = strtoul((const char *)pp, &endptr, 10); 3437 if (U32OVERFLOW(uli)) goto INVALID_VALUE; 3438 *((uint32_t *)field) = (uint32_t)uli; 3439 pp = (uint8_t *)endptr; 3440 break; 3441 3442 case MOD_INS: /* Signed integer */ 3443 if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE; 3444 li = strtol((const char *)pp, &endptr, 10); 3445 if (S32OVERFLOW(li)) goto INVALID_VALUE; 3446 *((int32_t *)field) = (int32_t)li; 3447 pp = (uint8_t *)endptr; 3448 break; 3449 3450 case MOD_NL: 3451 for (i = 0; i < sizeof(newlines)/sizeof(char *); i++) 3452 if (len == strlen(newlines[i]) && 3453 strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break; 3454 if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE; 3455 if (i == 0) 3456 { 3457 *((uint16_t *)field) = NEWLINE_DEFAULT; 3458 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET; 3459 else dctl->control &= ~CTL_NL_SET; 3460 } 3461 else 3462 { 3463 *((uint16_t *)field) = i; 3464 if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET; 3465 else dctl->control |= CTL_NL_SET; 3466 } 3467 pp = ep; 3468 break; 3469 3470 case MOD_NN: /* Name or (signed) number; may be several */ 3471 if (isdigit(*pp) || *pp == '-') 3472 { 3473 int ct = MAXCPYGET - 1; 3474 int32_t value; 3475 li = strtol((const char *)pp, &endptr, 10); 3476 if (S32OVERFLOW(li)) goto INVALID_VALUE; 3477 value = (int32_t)li; 3478 field = (char *)field - m->offset + m->value; /* Adjust field ptr */ 3479 if (value >= 0) /* Add new number */ 3480 { 3481 while (*((int32_t *)field) >= 0 && ct-- > 0) /* Skip previous */ 3482 field = (char *)field + sizeof(int32_t); 3483 if (ct <= 0) 3484 { 3485 fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name); 3486 return FALSE; 3487 } 3488 } 3489 *((int32_t *)field) = value; 3490 if (ct > 0) ((int32_t *)field)[1] = -1; 3491 pp = (uint8_t *)endptr; 3492 } 3493 3494 /* Multiple strings are put end to end. */ 3495 3496 else 3497 { 3498 char *nn = (char *)field; 3499 if (len > 0) /* Add new name */ 3500 { 3501 while (*nn != 0) nn += strlen(nn) + 1; 3502 if (nn + len + 1 - (char *)field > LENCPYGET) 3503 { 3504 fprintf(outfile, "** Too many named '%s' modifiers\n", m->name); 3505 return FALSE; 3506 } 3507 memcpy(nn, pp, len); 3508 } 3509 nn[len] = 0 ; 3510 nn[len+1] = 0; 3511 pp = ep; 3512 } 3513 break; 3514 3515 case MOD_STR: 3516 if (len + 1 > m->value) 3517 { 3518 fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n", 3519 m->name, m->value - 1); 3520 return FALSE; 3521 } 3522 memcpy(field, pp, len); 3523 ((uint8_t *)field)[len] = 0; 3524 pp = ep; 3525 break; 3526 } 3527 3528 if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) 3529 { 3530 fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name); 3531 return FALSE; 3532 } 3533 3534 p = pp; 3535 first = FALSE; 3536 3537 if (ctx == CTX_POPPAT && 3538 (pctl->options != 0 || 3539 pctl->tables_id != 0 || 3540 pctl->locale[0] != 0 || 3541 (pctl->control & NOTPOP_CONTROLS) != 0)) 3542 { 3543 fprintf(outfile, "** '%s' is not valid here\n", m->name); 3544 return FALSE; 3545 } 3546 } 3547 3548 return TRUE; 3549 3550 INVALID_VALUE: 3551 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p); 3552 return FALSE; 3553 } 3554 3555 3556 /************************************************* 3557 * Get info from a pattern * 3558 *************************************************/ 3559 3560 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled 3561 pattern. 3562 3563 Arguments: 3564 what code for the required information 3565 where where to put the answer 3566 unsetok PCRE2_ERROR_UNSET is an "expected" result 3567 3568 Returns: the return from pcre2_pattern_info() 3569 */ 3570 3571 static int 3572 pattern_info(int what, void *where, BOOL unsetok) 3573 { 3574 int rc; 3575 PCRE2_PATTERN_INFO(rc, compiled_code, what, where); 3576 if (rc >= 0) return 0; 3577 if (rc != PCRE2_ERROR_UNSET || !unsetok) 3578 { 3579 fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, 3580 what); 3581 if (rc == PCRE2_ERROR_BADMODE) 3582 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " 3583 "%d-bit mode\n", test_mode, 3584 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK)); 3585 } 3586 return rc; 3587 } 3588 3589 3590 3591 #ifdef SUPPORT_PCRE2_8 3592 /************************************************* 3593 * Show something in a list * 3594 *************************************************/ 3595 3596 /* This function just helps to keep the code that uses it tidier. It's used for 3597 various lists of things where there needs to be introductory text before the 3598 first item. As these calls are all in the POSIX-support code, they happen only 3599 when 8-bit mode is supported. */ 3600 3601 static void 3602 prmsg(const char **msg, const char *s) 3603 { 3604 fprintf(outfile, "%s %s", *msg, s); 3605 *msg = ""; 3606 } 3607 #endif /* SUPPORT_PCRE2_8 */ 3608 3609 3610 3611 /************************************************* 3612 * Show control bits * 3613 *************************************************/ 3614 3615 /* Called for mutually exclusive controls and for unsupported POSIX controls. 3616 Because the bits are unique, this can be used for both pattern and data control 3617 words. 3618 3619 Arguments: 3620 controls control bits 3621 controls2 more control bits 3622 before text to print before 3623 3624 Returns: nothing 3625 */ 3626 3627 static void 3628 show_controls(uint32_t controls, uint32_t controls2, const char *before) 3629 { 3630 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 3631 before, 3632 ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", 3633 ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", 3634 ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "", 3635 ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "", 3636 ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "", 3637 ((controls & CTL_BINCODE) != 0)? " bincode" : "", 3638 ((controls & CTL_BSR_SET) != 0)? " bsr" : "", 3639 ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "", 3640 ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "", 3641 ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "", 3642 ((controls & CTL_DFA) != 0)? " dfa" : "", 3643 ((controls & CTL_EXPAND) != 0)? " expand" : "", 3644 ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "", 3645 ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "", 3646 ((controls & CTL_GETALL) != 0)? " getall" : "", 3647 ((controls & CTL_GLOBAL) != 0)? " global" : "", 3648 ((controls & CTL_HEXPAT) != 0)? " hex" : "", 3649 ((controls & CTL_INFO) != 0)? " info" : "", 3650 ((controls & CTL_JITFAST) != 0)? " jitfast" : "", 3651 ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", 3652 ((controls & CTL_MARK) != 0)? " mark" : "", 3653 ((controls & CTL_MEMORY) != 0)? " memory" : "", 3654 ((controls & CTL_NL_SET) != 0)? " newline" : "", 3655 ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", 3656 ((controls & CTL_POSIX) != 0)? " posix" : "", 3657 ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", 3658 ((controls & CTL_PUSH) != 0)? " push" : "", 3659 ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "", 3660 ((controls & CTL_STARTCHAR) != 0)? " startchar" : "", 3661 ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "", 3662 ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "", 3663 ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "", 3664 ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "", 3665 ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : ""); 3666 } 3667 3668 3669 3670 /************************************************* 3671 * Show compile options * 3672 *************************************************/ 3673 3674 /* Called from show_pattern_info() and for unsupported POSIX options. 3675 3676 Arguments: 3677 options an options word 3678 before text to print before 3679 after text to print after 3680 3681 Returns: nothing 3682 */ 3683 3684 static void 3685 show_compile_options(uint32_t options, const char *before, const char *after) 3686 { 3687 if (options == 0) fprintf(outfile, "%s <none>%s", before, after); 3688 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 3689 before, 3690 ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", 3691 ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", 3692 ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "", 3693 ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "", 3694 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", 3695 ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "", 3696 ((options & PCRE2_CASELESS) != 0)? " caseless" : "", 3697 ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", 3698 ((options & PCRE2_DOTALL) != 0)? " dotall" : "", 3699 ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "", 3700 ((options & PCRE2_EXTENDED) != 0)? " extended" : "", 3701 ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "", 3702 ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "", 3703 ((options & PCRE2_MULTILINE) != 0)? " multiline" : "", 3704 ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "", 3705 ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "", 3706 ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "", 3707 ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", 3708 ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "", 3709 ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "", 3710 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", 3711 ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", 3712 ((options & PCRE2_UCP) != 0)? " ucp" : "", 3713 ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "", 3714 ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "", 3715 ((options & PCRE2_UTF) != 0)? " utf" : "", 3716 after); 3717 } 3718 3719 3720 3721 #ifdef SUPPORT_PCRE2_8 3722 /************************************************* 3723 * Show match options * 3724 *************************************************/ 3725 3726 /* Called for unsupported POSIX options. */ 3727 3728 static void 3729 show_match_options(uint32_t options) 3730 { 3731 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s", 3732 ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", 3733 ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "", 3734 ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "", 3735 ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "", 3736 ((options & PCRE2_NOTBOL) != 0)? " notbol" : "", 3737 ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "", 3738 ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "", 3739 ((options & PCRE2_NOTEOL) != 0)? " noteol" : "", 3740 ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "", 3741 ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : ""); 3742 } 3743 #endif /* SUPPORT_PCRE2_8 */ 3744 3745 3746 3747 /************************************************* 3748 * Show memory usage info for a pattern * 3749 *************************************************/ 3750 3751 static void 3752 show_memory_info(void) 3753 { 3754 uint32_t name_count, name_entry_size; 3755 size_t size, cblock_size; 3756 3757 /* One of the test_mode values will always be true, but to stop a compiler 3758 warning we must initialize cblock_size. */ 3759 3760 cblock_size = 0; 3761 #ifdef SUPPORT_PCRE2_8 3762 if (test_mode == 8) cblock_size = sizeof(pcre2_real_code_8); 3763 #endif 3764 #ifdef SUPPORT_PCRE2_16 3765 if (test_mode == 16) cblock_size = sizeof(pcre2_real_code_16); 3766 #endif 3767 #ifdef SUPPORT_PCRE2_32 3768 if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32); 3769 #endif 3770 3771 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE); 3772 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE); 3773 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE); 3774 fprintf(outfile, "Memory allocation (code space): %d\n", 3775 (int)(size - name_count*name_entry_size*code_unit_size - cblock_size)); 3776 if (pat_patctl.jit != 0) 3777 { 3778 (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE); 3779 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size); 3780 } 3781 } 3782 3783 3784 3785 /************************************************* 3786 * Callback function for callout enumeration * 3787 *************************************************/ 3788 3789 /* The only differences in the callout emumeration block for different code 3790 unit widths are that the pointers to the subject, the most recent MARK, and a 3791 callout argument string point to strings of the appropriate width. Casts can be 3792 used to deal with this. 3793 3794 Argument: 3795 cb pointer to enumerate block 3796 callout_data user data 3797 3798 Returns: 0 3799 */ 3800 3801 static int callout_callback(pcre2_callout_enumerate_block_8 *cb, 3802 void *callout_data) 3803 { 3804 uint32_t i; 3805 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; 3806 3807 (void)callout_data; /* Not currently displayed */ 3808 3809 fprintf(outfile, "Callout "); 3810 if (cb->callout_string != NULL) 3811 { 3812 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); 3813 fprintf(outfile, "%c", delimiter); 3814 PCHARSV(cb->callout_string, 0, 3815 cb->callout_string_length, utf, outfile); 3816 for (i = 0; callout_start_delims[i] != 0; i++) 3817 if (delimiter == callout_start_delims[i]) 3818 { 3819 delimiter = callout_end_delims[i]; 3820 break; 3821 } 3822 fprintf(outfile, "%c ", delimiter); 3823 } 3824 else fprintf(outfile, "%d ", cb->callout_number); 3825 3826 fprintf(outfile, "%.*s\n", 3827 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length), 3828 pbuffer8 + cb->pattern_position); 3829 3830 return 0; 3831 } 3832 3833 3834 3835 /************************************************* 3836 * Show information about a pattern * 3837 *************************************************/ 3838 3839 /* This function is called after a pattern has been compiled if any of the 3840 information-requesting controls have been set. 3841 3842 Arguments: none 3843 3844 Returns: PR_OK continue processing next line 3845 PR_SKIP skip to a blank line 3846 PR_ABEND abort the pcre2test run 3847 */ 3848 3849 static int 3850 show_pattern_info(void) 3851 { 3852 uint32_t compile_options, overall_options; 3853 3854 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) 3855 { 3856 fprintf(outfile, "------------------------------------------------------------------\n"); 3857 PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0); 3858 } 3859 3860 if ((pat_patctl.control & CTL_INFO) != 0) 3861 { 3862 void *nametable; 3863 uint8_t *start_bits; 3864 BOOL match_limit_set, recursion_limit_set; 3865 uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, 3866 hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, 3867 match_limit, minlength, nameentrysize, namecount, newline_convention, 3868 recursion_limit; 3869 3870 /* These info requests may return PCRE2_ERROR_UNSET. */ 3871 3872 switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE)) 3873 { 3874 case 0: 3875 match_limit_set = TRUE; 3876 break; 3877 3878 case PCRE2_ERROR_UNSET: 3879 match_limit_set = FALSE; 3880 break; 3881 3882 default: 3883 return PR_ABEND; 3884 } 3885 3886 switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE)) 3887 { 3888 case 0: 3889 recursion_limit_set = TRUE; 3890 break; 3891 3892 case PCRE2_ERROR_UNSET: 3893 recursion_limit_set = FALSE; 3894 break; 3895 3896 default: 3897 return PR_ABEND; 3898 } 3899 3900 /* These info requests should always succeed. */ 3901 3902 if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) + 3903 pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) + 3904 pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) + 3905 pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) + 3906 pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) + 3907 pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) + 3908 pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) + 3909 pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) + 3910 pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) + 3911 pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) + 3912 pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) + 3913 pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) + 3914 pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) + 3915 pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) + 3916 pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) + 3917 pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) + 3918 pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE) 3919 != 0) 3920 return PR_ABEND; 3921 3922 fprintf(outfile, "Capturing subpattern count = %d\n", capture_count); 3923 3924 if (backrefmax > 0) 3925 fprintf(outfile, "Max back reference = %d\n", backrefmax); 3926 3927 if (maxlookbehind > 0) 3928 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); 3929 3930 if (match_limit_set) 3931 fprintf(outfile, "Match limit = %u\n", match_limit); 3932 3933 if (recursion_limit_set) 3934 fprintf(outfile, "Recursion limit = %u\n", recursion_limit); 3935 3936 if (namecount > 0) 3937 { 3938 fprintf(outfile, "Named capturing subpatterns:\n"); 3939 for (; namecount > 0; namecount--) 3940 { 3941 int imm2_size = test_mode == PCRE8_MODE ? 2 : 1; 3942 uint32_t length = (uint32_t)STRLEN(nametable + imm2_size); 3943 fprintf(outfile, " "); 3944 PCHARSV(nametable, imm2_size, length, FALSE, outfile); 3945 while (length++ < nameentrysize - imm2_size) putc(' ', outfile); 3946 #ifdef SUPPORT_PCRE2_32 3947 if (test_mode == PCRE32_MODE) 3948 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0])); 3949 #endif 3950 #ifdef SUPPORT_PCRE2_16 3951 if (test_mode == PCRE16_MODE) 3952 fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0])); 3953 #endif 3954 #ifdef SUPPORT_PCRE2_8 3955 if (test_mode == PCRE8_MODE) 3956 fprintf(outfile, "%3d\n", (int)( 3957 ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1])); 3958 #endif 3959 nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size); 3960 } 3961 } 3962 3963 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); 3964 if (hasbackslashc) fprintf(outfile, "Contains \\C\n"); 3965 if (match_empty) fprintf(outfile, "May match empty string\n"); 3966 3967 pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); 3968 pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); 3969 3970 /* Remove UTF/UCP if they were there only because of forbid_utf. This saves 3971 cluttering up the verification output of non-UTF test files. */ 3972 3973 if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0) 3974 { 3975 compile_options &= ~PCRE2_NEVER_UTF; 3976 overall_options &= ~PCRE2_NEVER_UTF; 3977 } 3978 3979 if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0) 3980 { 3981 compile_options &= ~PCRE2_NEVER_UCP; 3982 overall_options &= ~PCRE2_NEVER_UCP; 3983 } 3984 3985 if ((compile_options|overall_options) != 0) 3986 { 3987 if (compile_options == overall_options) 3988 show_compile_options(compile_options, "Options:", "\n"); 3989 else 3990 { 3991 show_compile_options(compile_options, "Compile options:", "\n"); 3992 show_compile_options(overall_options, "Overall options:", "\n"); 3993 } 3994 } 3995 3996 if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); 3997 3998 if ((pat_patctl.control & CTL_BSR_SET) != 0 || 3999 (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0) 4000 fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? 4001 "any Unicode newline" : "CR, LF, or CRLF"); 4002 4003 if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) 4004 { 4005 switch (newline_convention) 4006 { 4007 case PCRE2_NEWLINE_CR: 4008 fprintf(outfile, "Forced newline is CR\n"); 4009 break; 4010 4011 case PCRE2_NEWLINE_LF: 4012 fprintf(outfile, "Forced newline is LF\n"); 4013 break; 4014 4015 case PCRE2_NEWLINE_CRLF: 4016 fprintf(outfile, "Forced newline is CRLF\n"); 4017 break; 4018 4019 case PCRE2_NEWLINE_ANYCRLF: 4020 fprintf(outfile, "Forced newline is CR, LF, or CRLF\n"); 4021 break; 4022 4023 case PCRE2_NEWLINE_ANY: 4024 fprintf(outfile, "Forced newline is any Unicode newline\n"); 4025 break; 4026 4027 default: 4028 break; 4029 } 4030 } 4031 4032 if (first_ctype == 2) 4033 { 4034 fprintf(outfile, "First code unit at start or follows newline\n"); 4035 } 4036 else if (first_ctype == 1) 4037 { 4038 const char *caseless = 4039 ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)? 4040 "" : " (caseless)"; 4041 if (PRINTOK(first_cunit)) 4042 fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless); 4043 else 4044 { 4045 fprintf(outfile, "First code unit = "); 4046 pchar(first_cunit, FALSE, outfile); 4047 fprintf(outfile, "%s\n", caseless); 4048 } 4049 } 4050 else if (start_bits != NULL) 4051 { 4052 int i; 4053 int c = 24; 4054 fprintf(outfile, "Starting code units: "); 4055 for (i = 0; i < 256; i++) 4056 { 4057 if ((start_bits[i/8] & (1<<(i&7))) != 0) 4058 { 4059 if (c > 75) 4060 { 4061 fprintf(outfile, "\n "); 4062 c = 2; 4063 } 4064 if (PRINTOK(i) && i != ' ') 4065 { 4066 fprintf(outfile, "%c ", i); 4067 c += 2; 4068 } 4069 else 4070 { 4071 fprintf(outfile, "\\x%02x ", i); 4072 c += 5; 4073 } 4074 } 4075 } 4076 fprintf(outfile, "\n"); 4077 } 4078 4079 if (last_ctype != 0) 4080 { 4081 const char *caseless = 4082 ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)? 4083 "" : " (caseless)"; 4084 if (PRINTOK(last_cunit)) 4085 fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless); 4086 else 4087 { 4088 fprintf(outfile, "Last code unit = "); 4089 pchar(last_cunit, FALSE, outfile); 4090 fprintf(outfile, "%s\n", caseless); 4091 } 4092 } 4093 4094 fprintf(outfile, "Subject length lower bound = %d\n", minlength); 4095 4096 if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) 4097 { 4098 if (FLD(compiled_code, executable_jit) != NULL) 4099 fprintf(outfile, "JIT compilation was successful\n"); 4100 else 4101 { 4102 #ifdef SUPPORT_JIT 4103 int len; 4104 fprintf(outfile, "JIT compilation was not successful"); 4105 if (jitrc != 0) 4106 { 4107 fprintf(outfile, " ("); 4108 PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer); 4109 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); 4110 fprintf(outfile, ")"); 4111 } 4112 fprintf(outfile, "\n"); 4113 #else 4114 fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); 4115 #endif 4116 } 4117 } 4118 } 4119 4120 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0) 4121 { 4122 int errorcode; 4123 PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0); 4124 if (errorcode != 0) 4125 { 4126 int len; 4127 fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode); 4128 if (errorcode < 0) 4129 { 4130 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer); 4131 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); 4132 } 4133 fprintf(outfile, "\n"); 4134 return PR_SKIP; 4135 } 4136 } 4137 4138 return PR_OK; 4139 } 4140 4141 4142 4143 /************************************************* 4144 * Handle serialization error * 4145 *************************************************/ 4146 4147 /* Print an error message after a serialization failure. 4148 4149 Arguments: 4150 rc the error code 4151 msg an initial message for what failed 4152 4153 Returns: nothing 4154 */ 4155 4156 static void 4157 serial_error(int rc, const char *msg) 4158 { 4159 fprintf(outfile, "%s failed: error %d: ", msg, rc); 4160 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 4161 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 4162 fprintf(outfile, "\n"); 4163 } 4164 4165 4166 4167 /************************************************* 4168 * Open file for save/load commands * 4169 *************************************************/ 4170 4171 /* This function decodes the file name and opens the file. 4172 4173 Arguments: 4174 buffptr point after the #command 4175 mode open mode 4176 fptr points to the FILE variable 4177 4178 Returns: PR_OK or PR_ABEND 4179 */ 4180 4181 static int 4182 open_file(uint8_t *buffptr, const char *mode, FILE **fptr) 4183 { 4184 char *endf; 4185 char *filename = (char *)buffptr; 4186 while (isspace(*filename)) filename++; 4187 endf = filename + strlen8(filename); 4188 while (endf > filename && isspace(endf[-1])) endf--; 4189 4190 if (endf == filename) 4191 { 4192 fprintf(outfile, "** File name expected after #save\n"); 4193 return PR_ABEND; 4194 } 4195 4196 *endf = 0; 4197 *fptr = fopen((const char *)filename, mode); 4198 if (*fptr == NULL) 4199 { 4200 fprintf(outfile, "** Failed to open '%s'\n", filename); 4201 return PR_ABEND; 4202 } 4203 4204 return PR_OK; 4205 } 4206 4207 4208 4209 /************************************************* 4210 * Process command line * 4211 *************************************************/ 4212 4213 /* This function is called for lines beginning with # and a character that is 4214 not ! or whitespace, when encountered between tests, which means that there is 4215 no compiled pattern (compiled_code is NULL). The line is in buffer. 4216 4217 Arguments: none 4218 4219 Returns: PR_OK continue processing next line 4220 PR_SKIP skip to a blank line 4221 PR_ABEND abort the pcre2test run 4222 */ 4223 4224 static int 4225 process_command(void) 4226 { 4227 FILE *f; 4228 PCRE2_SIZE serial_size; 4229 size_t i; 4230 int rc, cmd, cmdlen; 4231 uint16_t first_listed_newline; 4232 const char *cmdname; 4233 uint8_t *argptr, *serial; 4234 4235 if (restrict_for_perl_test) 4236 { 4237 fprintf(outfile, "** #-commands are not allowed after #perltest\n"); 4238 return PR_ABEND; 4239 } 4240 4241 cmd = CMD_UNKNOWN; 4242 cmdlen = 0; 4243 4244 for (i = 0; i < cmdlistcount; i++) 4245 { 4246 cmdname = cmdlist[i].name; 4247 cmdlen = strlen(cmdname); 4248 if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 && 4249 isspace(buffer[cmdlen+1])) 4250 { 4251 cmd = cmdlist[i].value; 4252 break; 4253 } 4254 } 4255 4256 argptr = buffer + cmdlen + 1; 4257 4258 switch(cmd) 4259 { 4260 case CMD_UNKNOWN: 4261 fprintf(outfile, "** Unknown command: %s", buffer); 4262 break; 4263 4264 case CMD_FORBID_UTF: 4265 forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 4266 break; 4267 4268 case CMD_PERLTEST: 4269 restrict_for_perl_test = TRUE; 4270 break; 4271 4272 /* Set default pattern modifiers */ 4273 4274 case CMD_PATTERN: 4275 (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL); 4276 if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0) 4277 def_patctl.jit = 7; 4278 break; 4279 4280 /* Set default subject modifiers */ 4281 4282 case CMD_SUBJECT: 4283 (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl); 4284 break; 4285 4286 /* Check the default newline, and if not one of those listed, set up the 4287 first one to be forced. An empty list unsets. */ 4288 4289 case CMD_NEWLINE_DEFAULT: 4290 local_newline_default = 0; /* Unset */ 4291 first_listed_newline = 0; 4292 for (;;) 4293 { 4294 while (isspace(*argptr)) argptr++; 4295 if (*argptr == 0) break; 4296 for (i = 1; i < sizeof(newlines)/sizeof(char *); i++) 4297 { 4298 size_t nlen = strlen(newlines[i]); 4299 if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 && 4300 isspace(argptr[nlen])) 4301 { 4302 if (i == NEWLINE_DEFAULT) return PR_OK; /* Default is valid */ 4303 if (first_listed_newline == 0) first_listed_newline = i; 4304 } 4305 } 4306 while (*argptr != 0 && !isspace(*argptr)) argptr++; 4307 } 4308 local_newline_default = first_listed_newline; 4309 break; 4310 4311 /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect 4312 the compiled pattern (e.g. to give information) are permitted. The default 4313 pattern modifiers are ignored. */ 4314 4315 case CMD_POP: 4316 case CMD_POPCOPY: 4317 if (patstacknext <= 0) 4318 { 4319 fprintf(outfile, "** Can't pop off an empty stack\n"); 4320 return PR_SKIP; 4321 } 4322 memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */ 4323 if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL)) 4324 return PR_SKIP; 4325 4326 if (cmd == CMD_POP) 4327 { 4328 SET(compiled_code, patstack[--patstacknext]); 4329 } 4330 else 4331 { 4332 PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]); 4333 } 4334 4335 if (pat_patctl.jit != 0) 4336 { 4337 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); 4338 } 4339 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); 4340 if ((pat_patctl.control & CTL_ANYINFO) != 0) 4341 { 4342 rc = show_pattern_info(); 4343 if (rc != PR_OK) return rc; 4344 } 4345 break; 4346 4347 /* Save the stack of compiled patterns to a file, then empty the stack. */ 4348 4349 case CMD_SAVE: 4350 if (patstacknext <= 0) 4351 { 4352 fprintf(outfile, "** No stacked patterns to save\n"); 4353 return PR_OK; 4354 } 4355 4356 rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f); 4357 if (rc != PR_OK) return rc; 4358 4359 PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size, 4360 general_context); 4361 if (rc < 0) 4362 { 4363 serial_error(rc, "Serialization"); 4364 break; 4365 } 4366 4367 /* Write the length at the start of the file to make it straightforward to 4368 get the right memory when re-loading. This saves having to read the file size 4369 in different operating systems. To allow for different endianness (even 4370 though reloading with the opposite endianness does not work), write the 4371 length byte-by-byte. */ 4372 4373 for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f); 4374 if (fwrite(serial, 1, serial_size, f) != serial_size) 4375 { 4376 fprintf(outfile, "** Wrong return from fwrite()\n"); 4377 return PR_ABEND; 4378 } 4379 4380 fclose(f); 4381 PCRE2_SERIALIZE_FREE(serial); 4382 while(patstacknext > 0) 4383 { 4384 SET(compiled_code, patstack[--patstacknext]); 4385 SUB1(pcre2_code_free, compiled_code); 4386 } 4387 SET(compiled_code, NULL); 4388 break; 4389 4390 /* Load a set of compiled patterns from a file onto the stack */ 4391 4392 case CMD_LOAD: 4393 rc = open_file(argptr+1, BINARY_INPUT_MODE, &f); 4394 if (rc != PR_OK) return rc; 4395 4396 serial_size = 0; 4397 for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8); 4398 4399 serial = malloc(serial_size); 4400 if (serial == NULL) 4401 { 4402 fprintf(outfile, "** Failed to get memory (size %lu) for #load\n", 4403 (unsigned long int)serial_size); 4404 return PR_ABEND; 4405 } 4406 4407 if (fread(serial, 1, serial_size, f) != serial_size) 4408 { 4409 fprintf(outfile, "** Wrong return from fread()\n"); 4410 return PR_ABEND; 4411 } 4412 fclose(f); 4413 4414 PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial); 4415 if (rc < 0) serial_error(rc, "Get number of codes"); else 4416 { 4417 if (rc + patstacknext > PATSTACKSIZE) 4418 { 4419 fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n", 4420 rc, (rc == 1)? "" : "s"); 4421 rc = PATSTACKSIZE - patstacknext; 4422 fprintf(outfile, "** Decoding %d pattern%s\n", rc, 4423 (rc == 1)? "" : "s"); 4424 } 4425 PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial, 4426 general_context); 4427 if (rc < 0) serial_error(rc, "Deserialization"); 4428 else patstacknext += rc; 4429 } 4430 4431 free(serial); 4432 break; 4433 } 4434 4435 return PR_OK; 4436 } 4437 4438 4439 4440 /************************************************* 4441 * Process pattern line * 4442 *************************************************/ 4443 4444 /* This function is called when the input buffer contains the start of a 4445 pattern. The first character is known to be a valid delimiter. The pattern is 4446 read, modifiers are interpreted, and a suitable local context is set up for 4447 this test. The pattern is then compiled. 4448 4449 Arguments: none 4450 4451 Returns: PR_OK continue processing next line 4452 PR_SKIP skip to a blank line 4453 PR_ABEND abort the pcre2test run 4454 */ 4455 4456 static int 4457 process_pattern(void) 4458 { 4459 BOOL utf; 4460 uint32_t k; 4461 uint8_t *p = buffer; 4462 const uint8_t *use_tables; 4463 unsigned int delimiter = *p++; 4464 int errorcode; 4465 void *use_pat_context; 4466 PCRE2_SIZE patlen; 4467 PCRE2_SIZE erroroffset; 4468 4469 /* Initialize the context and pattern/data controls for this test from the 4470 defaults. */ 4471 4472 PATCTXCPY(pat_context, default_pat_context); 4473 memcpy(&pat_patctl, &def_patctl, sizeof(patctl)); 4474 4475 /* Find the end of the pattern, reading more lines if necessary. */ 4476 4477 for(;;) 4478 { 4479 while (*p != 0) 4480 { 4481 if (*p == '\\' && p[1] != 0) p++; 4482 else if (*p == delimiter) break; 4483 p++; 4484 } 4485 if (*p != 0) break; 4486 if ((p = extend_inputline(infile, p, " > ")) == NULL) 4487 { 4488 fprintf(outfile, "** Unexpected EOF\n"); 4489 return PR_ABEND; 4490 } 4491 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p); 4492 } 4493 4494 /* If the first character after the delimiter is backslash, make the pattern 4495 end with backslash. This is purely to provide a way of testing for the error 4496 message when a pattern ends with backslash. */ 4497 4498 if (p[1] == '\\') *p++ = '\\'; 4499 4500 /* Terminate the pattern at the delimiter, and compute the length. */ 4501 4502 *p++ = 0; 4503 patlen = p - buffer - 2; 4504 4505 /* Look for modifiers and options after the final delimiter. */ 4506 4507 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP; 4508 utf = (pat_patctl.options & PCRE2_UTF) != 0; 4509 4510 /* Check for mutually exclusive modifiers. At present, these are all in the 4511 first control word. */ 4512 4513 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++) 4514 { 4515 uint32_t c = pat_patctl.control & exclusive_pat_controls[k]; 4516 if (c != 0 && c != (c & (~c+1))) 4517 { 4518 show_controls(c, 0, "** Not allowed together:"); 4519 fprintf(outfile, "\n"); 4520 return PR_SKIP; 4521 } 4522 } 4523 4524 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was 4525 specified. */ 4526 4527 if (pat_patctl.jit == 0 && 4528 (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0) 4529 pat_patctl.jit = 7; 4530 4531 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting 4532 in callouts. Convert from hex if requested (literal strings in quotes may be 4533 present within the hexadecimal pairs). The result must necessarily be fewer 4534 characters so will always fit in pbuffer8. */ 4535 4536 if ((pat_patctl.control & CTL_HEXPAT) != 0) 4537 { 4538 uint8_t *pp, *pt; 4539 uint32_t c, d; 4540 4541 pt = pbuffer8; 4542 for (pp = buffer + 1; *pp != 0; pp++) 4543 { 4544 if (isspace(*pp)) continue; 4545 c = *pp++; 4546 4547 /* Handle a literal substring */ 4548 4549 if (c == '\'' || c == '"') 4550 { 4551 for (;; pp++) 4552 { 4553 d = *pp; 4554 if (d == 0) 4555 { 4556 fprintf(outfile, "** Missing closing quote in hex pattern\n"); 4557 return PR_SKIP; 4558 } 4559 if (d == c) break; 4560 *pt++ = d; 4561 } 4562 } 4563 4564 /* Expect a hex pair */ 4565 4566 else 4567 { 4568 if (!isxdigit(c)) 4569 { 4570 fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: " 4571 "quote missing?\n", c); 4572 return PR_SKIP; 4573 } 4574 if (*pp == 0) 4575 { 4576 fprintf(outfile, "** Odd number of digits in hex pattern\n"); 4577 return PR_SKIP; 4578 } 4579 d = *pp; 4580 if (!isxdigit(d)) 4581 { 4582 fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: " 4583 "quote missing?\n", d); 4584 return PR_SKIP; 4585 } 4586 c = toupper(c); 4587 d = toupper(d); 4588 *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) + 4589 (isdigit(d)? (d - '0') : (d - 'A' + 10)); 4590 } 4591 } 4592 *pt = 0; 4593 patlen = pt - pbuffer8; 4594 } 4595 4596 /* If not a hex string, process for repetition expansion if requested. */ 4597 4598 else if ((pat_patctl.control & CTL_EXPAND) != 0) 4599 { 4600 uint8_t *pp, *pt; 4601 4602 pt = pbuffer8; 4603 for (pp = buffer + 1; *pp != 0; pp++) 4604 { 4605 uint8_t *pc = pp; 4606 uint32_t count = 1; 4607 size_t length = 1; 4608 4609 /* Check for replication syntax; if not found, the defaults just set will 4610 prevail and one character will be copied. */ 4611 4612 if (pp[0] == '\\' && pp[1] == '[') 4613 { 4614 uint8_t *pe; 4615 for (pe = pp + 2; *pe != 0; pe++) 4616 { 4617 if (pe[0] == ']' && pe[1] == '{') 4618 { 4619 uint32_t clen = pe - pc - 2; 4620 uint32_t i = 0; 4621 unsigned long uli; 4622 char *endptr; 4623 4624 pe += 2; 4625 uli = strtoul((const char *)pe, &endptr, 10); 4626 if (U32OVERFLOW(uli)) 4627 { 4628 fprintf(outfile, "** Pattern repeat count too large\n"); 4629 return PR_SKIP; 4630 } 4631 4632 i = (uint32_t)uli; 4633 pe = (uint8_t *)endptr; 4634 if (*pe == '}') 4635 { 4636 if (i == 0) 4637 { 4638 fprintf(outfile, "** Zero repeat not allowed\n"); 4639 return PR_SKIP; 4640 } 4641 pc += 2; 4642 count = i; 4643 length = clen; 4644 pp = pe; 4645 break; 4646 } 4647 } 4648 } 4649 } 4650 4651 /* Add to output. If the buffer is too small expand it. The function for 4652 expanding buffers always keeps buffer and pbuffer8 in step as far as their 4653 size goes. */ 4654 4655 while (pt + count * length > pbuffer8 + pbuffer8_size) 4656 { 4657 size_t pc_offset = pc - buffer; 4658 size_t pp_offset = pp - buffer; 4659 size_t pt_offset = pt - pbuffer8; 4660 expand_input_buffers(); 4661 pc = buffer + pc_offset; 4662 pp = buffer + pp_offset; 4663 pt = pbuffer8 + pt_offset; 4664 } 4665 4666 for (; count > 0; count--) 4667 { 4668 memcpy(pt, pc, length); 4669 pt += length; 4670 } 4671 } 4672 4673 *pt = 0; 4674 patlen = pt - pbuffer8; 4675 4676 if ((pat_patctl.control & CTL_INFO) != 0) 4677 fprintf(outfile, "Expanded: %s\n", pbuffer8); 4678 } 4679 4680 /* Neither hex nor expanded, just copy the input verbatim. */ 4681 4682 else 4683 { 4684 strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1); 4685 } 4686 4687 /* Sort out character tables */ 4688 4689 if (pat_patctl.locale[0] != 0) 4690 { 4691 if (pat_patctl.tables_id != 0) 4692 { 4693 fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n"); 4694 return PR_SKIP; 4695 } 4696 if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL) 4697 { 4698 fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale); 4699 return PR_SKIP; 4700 } 4701 if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0) 4702 { 4703 strcpy((char *)locale_name, (char *)pat_patctl.locale); 4704 if (locale_tables != NULL) free((void *)locale_tables); 4705 PCRE2_MAKETABLES(locale_tables); 4706 } 4707 use_tables = locale_tables; 4708 } 4709 4710 else switch (pat_patctl.tables_id) 4711 { 4712 case 0: use_tables = NULL; break; 4713 case 1: use_tables = tables1; break; 4714 case 2: use_tables = tables2; break; 4715 default: 4716 fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n"); 4717 return PR_SKIP; 4718 } 4719 4720 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables); 4721 4722 /* Set up for the stackguard test. */ 4723 4724 if (pat_patctl.stackguard_test != 0) 4725 { 4726 PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL); 4727 } 4728 4729 /* Handle compiling via the POSIX interface, which doesn't support the 4730 timing, showing, or debugging options, nor the ability to pass over 4731 local character tables. Neither does it have 16-bit or 32-bit support. */ 4732 4733 if ((pat_patctl.control & CTL_POSIX) != 0) 4734 { 4735 #ifdef SUPPORT_PCRE2_8 4736 int rc; 4737 int cflags = 0; 4738 const char *msg = "** Ignored with POSIX interface:"; 4739 #endif 4740 4741 if (test_mode != 8) 4742 { 4743 fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n"); 4744 return PR_SKIP; 4745 } 4746 4747 #ifdef SUPPORT_PCRE2_8 4748 /* Check for features that the POSIX interface does not support. */ 4749 4750 if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale"); 4751 if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace"); 4752 if (pat_patctl.tables_id != 0) prmsg(&msg, "tables"); 4753 if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard"); 4754 if (timeit > 0) prmsg(&msg, "timing"); 4755 if (pat_patctl.jit != 0) prmsg(&msg, "JIT"); 4756 4757 if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0) 4758 { 4759 show_compile_options( 4760 pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, ""); 4761 msg = ""; 4762 } 4763 if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 || 4764 (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0) 4765 { 4766 show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, 4767 pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg); 4768 msg = ""; 4769 } 4770 4771 if (local_newline_default != 0) prmsg(&msg, "#newline_default"); 4772 4773 if (msg[0] == 0) fprintf(outfile, "\n"); 4774 4775 /* Translate PCRE2 options to POSIX options and then compile. */ 4776 4777 if (utf) cflags |= REG_UTF; 4778 if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB; 4779 if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP; 4780 if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE; 4781 if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE; 4782 if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL; 4783 if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY; 4784 4785 rc = regcomp(&preg, (char *)pbuffer8, cflags); 4786 4787 /* Compiling failed */ 4788 4789 if (rc != 0) 4790 { 4791 size_t bsize, usize; 4792 int psize; 4793 4794 preg.re_pcre2_code = NULL; /* In case something was left in there */ 4795 preg.re_match_data = NULL; 4796 4797 bsize = (pat_patctl.regerror_buffsize != 0)? 4798 pat_patctl.regerror_buffsize : pbuffer8_size; 4799 if (bsize + 8 < pbuffer8_size) 4800 memcpy(pbuffer8 + bsize, "DEADBEEF", 8); 4801 usize = regerror(rc, &preg, (char *)pbuffer8, bsize); 4802 4803 /* Inside regerror(), snprintf() is used. If the buffer is too small, some 4804 versions of snprintf() put a zero byte at the end, but others do not. 4805 Therefore, we print a maximum of one less than the size of the buffer. */ 4806 4807 psize = (int)bsize - 1; 4808 fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8); 4809 if (usize > bsize) 4810 { 4811 fprintf(outfile, "** regerror() message truncated\n"); 4812 if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0) 4813 fprintf(outfile, "** regerror() buffer overflow\n"); 4814 } 4815 return PR_SKIP; 4816 } 4817 4818 /* Compiling succeeded. Check that the values in the preg block are sensible. 4819 It can happen that pcre2test is accidentally linked with a different POSIX 4820 library which succeeds, but of course puts different things into preg. In 4821 this situation, calling regfree() may cause a segfault (or invalid free() in 4822 valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the 4823 calling of regfree() on exit. */ 4824 4825 if (preg.re_pcre2_code == NULL || 4826 ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER || 4827 ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub || 4828 preg.re_match_data == NULL || 4829 preg.re_cflags != cflags) 4830 { 4831 fprintf(outfile, 4832 "** The regcomp() function returned zero (success), but the values set\n" 4833 "** in the preg block are not valid for PCRE2. Check that pcre2test is\n" 4834 "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n" 4835 "** some other POSIX regex library.\n**\n"); 4836 preg.re_pcre2_code = NULL; 4837 return PR_ABEND; 4838 } 4839 4840 return PR_OK; 4841 #endif /* SUPPORT_PCRE2_8 */ 4842 } 4843 4844 /* Handle compiling via the native interface. Controls that act later are 4845 ignored with "push". Replacements are locked out. */ 4846 4847 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY)) != 0) 4848 { 4849 if (pat_patctl.replacement[0] != 0) 4850 { 4851 fprintf(outfile, "** Replacement text is not supported with 'push'.\n"); 4852 return PR_OK; 4853 } 4854 if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 || 4855 (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0) 4856 { 4857 show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS, 4858 pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2, 4859 "** Ignored when compiled pattern is stacked with 'push':"); 4860 fprintf(outfile, "\n"); 4861 } 4862 if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 || 4863 (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0) 4864 { 4865 show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS, 4866 pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2, 4867 "** Applies only to compile when pattern is stacked with 'push':"); 4868 fprintf(outfile, "\n"); 4869 } 4870 } 4871 4872 /* Convert the input in non-8-bit modes. */ 4873 4874 errorcode = 0; 4875 4876 #ifdef SUPPORT_PCRE2_16 4877 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen); 4878 #endif 4879 4880 #ifdef SUPPORT_PCRE2_32 4881 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen); 4882 #endif 4883 4884 switch(errorcode) 4885 { 4886 case -1: 4887 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be " 4888 "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32); 4889 return PR_SKIP; 4890 4891 case -2: 4892 fprintf(outfile, "** Failed: character value greater than 0x10ffff " 4893 "cannot be converted to UTF\n"); 4894 return PR_SKIP; 4895 4896 case -3: 4897 fprintf(outfile, "** Failed: character value greater than 0xffff " 4898 "cannot be converted to 16-bit in non-UTF mode\n"); 4899 return PR_SKIP; 4900 4901 default: 4902 break; 4903 } 4904 4905 /* The pattern is now in pbuffer[8|16|32], with the length in patlen. By 4906 default, however, we pass a zero-terminated pattern. The length is passed only 4907 if we had a hex pattern. */ 4908 4909 if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED; 4910 4911 /* If #newline_default has been used and the library was not compiled with an 4912 appropriate default newline setting, local_newline_default will be non-zero. We 4913 use this if there is no explicit newline modifier. */ 4914 4915 if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0) 4916 { 4917 SETFLD(pat_context, newline_convention, local_newline_default); 4918 } 4919 4920 /* The nullcontext modifier is used to test calling pcre2_compile() with a NULL 4921 context. */ 4922 4923 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)? 4924 NULL : PTR(pat_context); 4925 4926 /* Compile many times when timing. */ 4927 4928 if (timeit > 0) 4929 { 4930 register int i; 4931 clock_t time_taken = 0; 4932 for (i = 0; i < timeit; i++) 4933 { 4934 clock_t start_time = clock(); 4935 PCRE2_COMPILE(compiled_code, pbuffer, patlen, 4936 pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context); 4937 time_taken += clock() - start_time; 4938 if (TEST(compiled_code, !=, NULL)) 4939 { SUB1(pcre2_code_free, compiled_code); } 4940 } 4941 total_compile_time += time_taken; 4942 fprintf(outfile, "Compile time %.4f milliseconds\n", 4943 (((double)time_taken * 1000.0) / (double)timeit) / 4944 (double)CLOCKS_PER_SEC); 4945 } 4946 4947 /* A final compile that is used "for real". */ 4948 4949 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf, 4950 &errorcode, &erroroffset, use_pat_context); 4951 4952 /* Compilation failed; go back for another re, skipping to blank line 4953 if non-interactive. */ 4954 4955 if (TEST(compiled_code, ==, NULL)) 4956 { 4957 int len; 4958 fprintf(outfile, "Failed: error %d at offset %d: ", errorcode, 4959 (int)erroroffset); 4960 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer); 4961 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); 4962 fprintf(outfile, "\n"); 4963 return PR_SKIP; 4964 } 4965 4966 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are 4967 locked out at compile time, but we must also check for occurrences of \P, \p, 4968 and \X, which are only supported when Unicode is supported. */ 4969 4970 if (forbid_utf != 0) 4971 { 4972 if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0) 4973 { 4974 fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the " 4975 "#forbid_utf command\n"); 4976 return PR_SKIP; 4977 } 4978 } 4979 4980 /* Remember the maximum lookbehind, for partial matching. */ 4981 4982 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0) 4983 return PR_ABEND; 4984 4985 /* Call the JIT compiler if requested. When timing, we must free and recompile 4986 the pattern each time because that is the only way to free the JIT compiled 4987 code. We know that compilation will always succeed. */ 4988 4989 if (pat_patctl.jit != 0) 4990 { 4991 if (timeit > 0) 4992 { 4993 register int i; 4994 clock_t time_taken = 0; 4995 for (i = 0; i < timeit; i++) 4996 { 4997 clock_t start_time; 4998 SUB1(pcre2_code_free, compiled_code); 4999 PCRE2_COMPILE(compiled_code, pbuffer, patlen, 5000 pat_patctl.options|forbid_utf, &errorcode, &erroroffset, 5001 use_pat_context); 5002 start_time = clock(); 5003 PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit); 5004 time_taken += clock() - start_time; 5005 } 5006 total_jit_compile_time += time_taken; 5007 fprintf(outfile, "JIT compile %.4f milliseconds\n", 5008 (((double)time_taken * 1000.0) / (double)timeit) / 5009 (double)CLOCKS_PER_SEC); 5010 } 5011 else 5012 { 5013 PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit); 5014 } 5015 } 5016 5017 /* If an explicit newline modifier was given, set the information flag in the 5018 pattern so that it is preserved over push/pop. */ 5019 5020 if ((pat_patctl.control & CTL_NL_SET) != 0) 5021 { 5022 SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET); 5023 } 5024 5025 /* Output code size and other information if requested. */ 5026 5027 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info(); 5028 if ((pat_patctl.control & CTL_ANYINFO) != 0) 5029 { 5030 int rc = show_pattern_info(); 5031 if (rc != PR_OK) return rc; 5032 } 5033 5034 /* The "push" control requests that the compiled pattern be remembered on a 5035 stack. This is mainly for testing the serialization functionality. */ 5036 5037 if ((pat_patctl.control & CTL_PUSH) != 0) 5038 { 5039 if (patstacknext >= PATSTACKSIZE) 5040 { 5041 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); 5042 return PR_ABEND; 5043 } 5044 patstack[patstacknext++] = PTR(compiled_code); 5045 SET(compiled_code, NULL); 5046 } 5047 5048 /* The "pushcopy" control is similar, but pushes a copy of the pattern. This 5049 tests the pcre2_code_copy() function. */ 5050 5051 if ((pat_patctl.control & CTL_PUSHCOPY) != 0) 5052 { 5053 if (patstacknext >= PATSTACKSIZE) 5054 { 5055 fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); 5056 return PR_ABEND; 5057 } 5058 PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code); 5059 } 5060 5061 return PR_OK; 5062 } 5063 5064 5065 5066 /************************************************* 5067 * Check match or recursion limit * 5068 *************************************************/ 5069 5070 static int 5071 check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg) 5072 { 5073 int capcount; 5074 uint32_t min = 0; 5075 uint32_t mid = 64; 5076 uint32_t max = UINT32_MAX; 5077 5078 PCRE2_SET_MATCH_LIMIT(dat_context, max); 5079 PCRE2_SET_RECURSION_LIMIT(dat_context, max); 5080 5081 for (;;) 5082 { 5083 if (errnumber == PCRE2_ERROR_MATCHLIMIT) 5084 { 5085 PCRE2_SET_MATCH_LIMIT(dat_context, mid); 5086 } 5087 else 5088 { 5089 PCRE2_SET_RECURSION_LIMIT(dat_context, mid); 5090 } 5091 5092 if ((pat_patctl.control & CTL_JITFAST) != 0) 5093 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 5094 dat_datctl.options, match_data, PTR(dat_context)); 5095 else 5096 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 5097 dat_datctl.options, match_data, PTR(dat_context)); 5098 5099 if (capcount == errnumber) 5100 { 5101 min = mid; 5102 mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2; 5103 } 5104 else if (capcount >= 0 || 5105 capcount == PCRE2_ERROR_NOMATCH || 5106 capcount == PCRE2_ERROR_PARTIAL) 5107 { 5108 if (mid == min + 1) 5109 { 5110 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); 5111 break; 5112 } 5113 max = mid; 5114 mid = (min + mid)/2; 5115 } 5116 else break; /* Some other error */ 5117 } 5118 5119 return capcount; 5120 } 5121 5122 5123 5124 /************************************************* 5125 * Callout function * 5126 *************************************************/ 5127 5128 /* Called from a PCRE2 library as a result of the (?C) item. We print out where 5129 we are in the match. Yield zero unless more callouts than the fail count, or 5130 the callout data is not zero. The only differences in the callout block for 5131 different code unit widths are that the pointers to the subject, the most 5132 recent MARK, and a callout argument string point to strings of the appropriate 5133 width. Casts can be used to deal with this. 5134 5135 Argument: a pointer to a callout block 5136 Return: 5137 */ 5138 5139 static int 5140 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) 5141 { 5142 uint32_t i, pre_start, post_start, subject_length; 5143 PCRE2_SIZE current_position; 5144 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; 5145 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; 5146 5147 /* This FILE is used for echoing the subject. This is done only once in simple 5148 cases. */ 5149 5150 FILE *f = (first_callout || callout_capture || cb->callout_string != NULL)? 5151 outfile : NULL; 5152 5153 /* For a callout with a string argument, show the string first because there 5154 isn't a tidy way to fit it in the rest of the data. */ 5155 5156 if (cb->callout_string != NULL) 5157 { 5158 uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); 5159 fprintf(outfile, "Callout (%lu): %c", 5160 (unsigned long int)cb->callout_string_offset, delimiter); 5161 PCHARSV(cb->callout_string, 0, 5162 cb->callout_string_length, utf, outfile); 5163 for (i = 0; callout_start_delims[i] != 0; i++) 5164 if (delimiter == callout_start_delims[i]) 5165 { 5166 delimiter = callout_end_delims[i]; 5167 break; 5168 } 5169 fprintf(outfile, "%c", delimiter); 5170 if (!callout_capture) fprintf(outfile, "\n"); 5171 } 5172 5173 /* Show captured strings if required */ 5174 5175 if (callout_capture) 5176 { 5177 if (cb->callout_string == NULL) 5178 fprintf(outfile, "Callout %d:", cb->callout_number); 5179 fprintf(outfile, " last capture = %d\n", cb->capture_last); 5180 for (i = 0; i < cb->capture_top * 2; i += 2) 5181 { 5182 fprintf(outfile, "%2d: ", i/2); 5183 if (cb->offset_vector[i] == PCRE2_UNSET) 5184 fprintf(outfile, "<unset>"); 5185 else 5186 { 5187 PCHARSV(cb->subject, cb->offset_vector[i], 5188 cb->offset_vector[i+1] - cb->offset_vector[i], utf, f); 5189 } 5190 fprintf(outfile, "\n"); 5191 } 5192 } 5193 5194 /* Re-print the subject in canonical form (with escapes for non-printing 5195 characters), the first time, or if giving full details. On subsequent calls in 5196 the same match, we use PCHARS() just to find the printed lengths of the 5197 substrings. */ 5198 5199 if (f != NULL) fprintf(f, "--->"); 5200 5201 /* The subject before the match start. */ 5202 5203 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); 5204 5205 /* If a lookbehind is involved, the current position may be earlier than the 5206 match start. If so, use the match start instead. */ 5207 5208 current_position = (cb->current_position >= cb->start_match)? 5209 cb->current_position : cb->start_match; 5210 5211 /* The subject between the match start and the current position. */ 5212 5213 PCHARS(post_start, cb->subject, cb->start_match, 5214 current_position - cb->start_match, utf, f); 5215 5216 /* Print from the current position to the end. */ 5217 5218 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, 5219 utf, f); 5220 5221 /* Calculate the total subject printed length (no print). */ 5222 5223 PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); 5224 5225 if (f != NULL) fprintf(f, "\n"); 5226 5227 /* For automatic callouts, show the pattern offset. Otherwise, for a numerical 5228 callout whose number has not already been shown with captured strings, show the 5229 number here. A callout with a string argument has been displayed above. */ 5230 5231 if (cb->callout_number == 255) 5232 { 5233 fprintf(outfile, "%+3d ", (int)cb->pattern_position); 5234 if (cb->pattern_position > 99) fprintf(outfile, "\n "); 5235 } 5236 else 5237 { 5238 if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " "); 5239 else fprintf(outfile, "%3d ", cb->callout_number); 5240 } 5241 5242 /* Now show position indicators */ 5243 5244 for (i = 0; i < pre_start; i++) fprintf(outfile, " "); 5245 fprintf(outfile, "^"); 5246 5247 if (post_start > 0) 5248 { 5249 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); 5250 fprintf(outfile, "^"); 5251 } 5252 5253 for (i = 0; i < subject_length - pre_start - post_start + 4; i++) 5254 fprintf(outfile, " "); 5255 5256 fprintf(outfile, "%.*s", 5257 (int)((cb->next_item_length == 0)? 1 : cb->next_item_length), 5258 pbuffer8 + cb->pattern_position); 5259 5260 fprintf(outfile, "\n"); 5261 first_callout = FALSE; 5262 5263 if (cb->mark != last_callout_mark) 5264 { 5265 if (cb->mark == NULL) 5266 fprintf(outfile, "Latest Mark: <unset>\n"); 5267 else 5268 { 5269 fprintf(outfile, "Latest Mark: "); 5270 PCHARSV(cb->mark, 0, -1, utf, outfile); 5271 putc('\n', outfile); 5272 } 5273 last_callout_mark = cb->mark; 5274 } 5275 5276 if (callout_data_ptr != NULL) 5277 { 5278 int callout_data = *((int32_t *)callout_data_ptr); 5279 if (callout_data != 0) 5280 { 5281 fprintf(outfile, "Callout data = %d\n", callout_data); 5282 return callout_data; 5283 } 5284 } 5285 5286 return (cb->callout_number != dat_datctl.cfail[0])? 0 : 5287 (++callout_count >= dat_datctl.cfail[1])? 1 : 0; 5288 } 5289 5290 5291 5292 /************************************************* 5293 * Handle *MARK and copy/get tests * 5294 *************************************************/ 5295 5296 /* This function is called after complete and partial matches. It runs the 5297 tests for substring extraction. 5298 5299 Arguments: 5300 utf TRUE for utf 5301 capcount return from pcre2_match() 5302 5303 Returns: nothing 5304 */ 5305 5306 static void 5307 copy_and_get(BOOL utf, int capcount) 5308 { 5309 int i; 5310 uint8_t *nptr; 5311 5312 /* Test copy strings by number */ 5313 5314 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++) 5315 { 5316 int rc; 5317 PCRE2_SIZE length, length2; 5318 uint32_t copybuffer[256]; 5319 uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]); 5320 length = sizeof(copybuffer)/code_unit_size; 5321 PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length); 5322 if (rc < 0) 5323 { 5324 fprintf(outfile, "Copy substring %d failed (%d): ", n, rc); 5325 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5326 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5327 fprintf(outfile, "\n"); 5328 } 5329 else 5330 { 5331 PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2); 5332 if (rc < 0) 5333 { 5334 fprintf(outfile, "Get substring %d length failed (%d): ", n, rc); 5335 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5336 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5337 fprintf(outfile, "\n"); 5338 } 5339 else if (length2 != length) 5340 { 5341 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n", 5342 (unsigned long int)length, (unsigned long int)length2); 5343 } 5344 fprintf(outfile, "%2dC ", n); 5345 PCHARSV(copybuffer, 0, length, utf, outfile); 5346 fprintf(outfile, " (%lu)\n", (unsigned long)length); 5347 } 5348 } 5349 5350 /* Test copy strings by name */ 5351 5352 nptr = dat_datctl.copy_names; 5353 for (;;) 5354 { 5355 int rc; 5356 int groupnumber; 5357 PCRE2_SIZE length, length2; 5358 uint32_t copybuffer[256]; 5359 int namelen = strlen((const char *)nptr); 5360 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 5361 PCRE2_SIZE cnl = namelen; 5362 #endif 5363 if (namelen == 0) break; 5364 5365 #ifdef SUPPORT_PCRE2_8 5366 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr); 5367 #endif 5368 #ifdef SUPPORT_PCRE2_16 5369 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl); 5370 #endif 5371 #ifdef SUPPORT_PCRE2_32 5372 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); 5373 #endif 5374 5375 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); 5376 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) 5377 fprintf(outfile, "Number not found for group '%s'\n", nptr); 5378 5379 length = sizeof(copybuffer)/code_unit_size; 5380 PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length); 5381 if (rc < 0) 5382 { 5383 fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc); 5384 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5385 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5386 fprintf(outfile, "\n"); 5387 } 5388 else 5389 { 5390 PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2); 5391 if (rc < 0) 5392 { 5393 fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc); 5394 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5395 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5396 fprintf(outfile, "\n"); 5397 } 5398 else if (length2 != length) 5399 { 5400 fprintf(outfile, "Mismatched substring lengths: %lu %lu\n", 5401 (unsigned long int)length, (unsigned long int)length2); 5402 } 5403 fprintf(outfile, " C "); 5404 PCHARSV(copybuffer, 0, length, utf, outfile); 5405 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr); 5406 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); 5407 else fprintf(outfile, " (non-unique)\n"); 5408 } 5409 nptr += namelen + 1; 5410 } 5411 5412 /* Test get strings by number */ 5413 5414 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++) 5415 { 5416 int rc; 5417 PCRE2_SIZE length; 5418 void *gotbuffer; 5419 uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]); 5420 PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length); 5421 if (rc < 0) 5422 { 5423 fprintf(outfile, "Get substring %d failed (%d): ", n, rc); 5424 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5425 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5426 fprintf(outfile, "\n"); 5427 } 5428 else 5429 { 5430 fprintf(outfile, "%2dG ", n); 5431 PCHARSV(gotbuffer, 0, length, utf, outfile); 5432 fprintf(outfile, " (%lu)\n", (unsigned long)length); 5433 PCRE2_SUBSTRING_FREE(gotbuffer); 5434 } 5435 } 5436 5437 /* Test get strings by name */ 5438 5439 nptr = dat_datctl.get_names; 5440 for (;;) 5441 { 5442 PCRE2_SIZE length; 5443 void *gotbuffer; 5444 int rc; 5445 int groupnumber; 5446 int namelen = strlen((const char *)nptr); 5447 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 5448 PCRE2_SIZE cnl = namelen; 5449 #endif 5450 if (namelen == 0) break; 5451 5452 #ifdef SUPPORT_PCRE2_8 5453 if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr); 5454 #endif 5455 #ifdef SUPPORT_PCRE2_16 5456 if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl); 5457 #endif 5458 #ifdef SUPPORT_PCRE2_32 5459 if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl); 5460 #endif 5461 5462 PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); 5463 if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) 5464 fprintf(outfile, "Number not found for group '%s'\n", nptr); 5465 5466 PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length); 5467 if (rc < 0) 5468 { 5469 fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc); 5470 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5471 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5472 fprintf(outfile, "\n"); 5473 } 5474 else 5475 { 5476 fprintf(outfile, " G "); 5477 PCHARSV(gotbuffer, 0, length, utf, outfile); 5478 fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr); 5479 if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); 5480 else fprintf(outfile, " (non-unique)\n"); 5481 PCRE2_SUBSTRING_FREE(gotbuffer); 5482 } 5483 nptr += namelen + 1; 5484 } 5485 5486 /* Test getting the complete list of captured strings. */ 5487 5488 if ((dat_datctl.control & CTL_GETALL) != 0) 5489 { 5490 int rc; 5491 void **stringlist; 5492 PCRE2_SIZE *lengths; 5493 PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths); 5494 if (rc < 0) 5495 { 5496 fprintf(outfile, "get substring list failed (%d): ", rc); 5497 PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer); 5498 PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile); 5499 fprintf(outfile, "\n"); 5500 } 5501 else 5502 { 5503 for (i = 0; i < capcount; i++) 5504 { 5505 fprintf(outfile, "%2dL ", i); 5506 PCHARSV(stringlist[i], 0, lengths[i], utf, outfile); 5507 putc('\n', outfile); 5508 } 5509 if (stringlist[i] != NULL) 5510 fprintf(outfile, "string list not terminated by NULL\n"); 5511 PCRE2_SUBSTRING_LIST_FREE(stringlist); 5512 } 5513 } 5514 } 5515 5516 5517 5518 /************************************************* 5519 * Process a data line * 5520 *************************************************/ 5521 5522 /* The line is in buffer; it will not be empty. 5523 5524 Arguments: none 5525 5526 Returns: PR_OK continue processing next line 5527 PR_SKIP skip to a blank line 5528 PR_ABEND abort the pcre2test run 5529 */ 5530 5531 static int 5532 process_data(void) 5533 { 5534 PCRE2_SIZE len, ulen; 5535 uint32_t gmatched; 5536 uint32_t c, k; 5537 uint32_t g_notempty = 0; 5538 uint8_t *p, *pp, *start_rep; 5539 size_t needlen; 5540 void *use_dat_context; 5541 BOOL utf; 5542 5543 #ifdef SUPPORT_PCRE2_8 5544 uint8_t *q8 = NULL; 5545 #endif 5546 #ifdef SUPPORT_PCRE2_16 5547 uint16_t *q16 = NULL; 5548 #endif 5549 #ifdef SUPPORT_PCRE2_32 5550 uint32_t *q32 = NULL; 5551 #endif 5552 5553 /* Copy the default context and data control blocks to the active ones. Then 5554 copy from the pattern the controls that can be set in either the pattern or the 5555 data. This allows them to be overridden in the data line. We do not do this for 5556 options because those that are common apply separately to compiling and 5557 matching. */ 5558 5559 DATCTXCPY(dat_context, default_dat_context); 5560 memcpy(&dat_datctl, &def_datctl, sizeof(datctl)); 5561 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD); 5562 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD); 5563 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement); 5564 5565 /* Initialize for scanning the data line. */ 5566 5567 #ifdef SUPPORT_PCRE2_8 5568 utf = ((((pat_patctl.control & CTL_POSIX) != 0)? 5569 ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options : 5570 FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0; 5571 #else 5572 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; 5573 #endif 5574 5575 start_rep = NULL; 5576 len = strlen((const char *)buffer); 5577 while (len > 0 && isspace(buffer[len-1])) len--; 5578 buffer[len] = 0; 5579 p = buffer; 5580 while (isspace(*p)) p++; 5581 5582 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create 5583 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */ 5584 5585 if (utf) 5586 { 5587 uint8_t *q; 5588 uint32_t cc; 5589 int n = 1; 5590 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc); 5591 if (n <= 0) 5592 { 5593 fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input " 5594 "in UTF mode\n"); 5595 return PR_OK; 5596 } 5597 } 5598 5599 #ifdef SUPPORT_VALGRIND 5600 /* Mark the dbuffer as addressable but undefined again. */ 5601 if (dbuffer != NULL) 5602 { 5603 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size); 5604 } 5605 #endif 5606 5607 /* Allocate a buffer to hold the data line; len+1 is an upper bound on 5608 the number of code units that will be needed (though the buffer may have to be 5609 extended if replication is involved). */ 5610 5611 needlen = (size_t)((len+1) * code_unit_size); 5612 if (dbuffer == NULL || needlen >= dbuffer_size) 5613 { 5614 while (needlen >= dbuffer_size) dbuffer_size *= 2; 5615 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); 5616 if (dbuffer == NULL) 5617 { 5618 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size); 5619 exit(1); 5620 } 5621 } 5622 SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */ 5623 5624 /* Scan the data line, interpreting data escapes, and put the result into a 5625 buffer of the appropriate width. In UTF mode, input can be UTF-8. */ 5626 5627 while ((c = *p++) != 0) 5628 { 5629 int32_t i = 0; 5630 size_t replen; 5631 5632 /* ] may mark the end of a replicated sequence */ 5633 5634 if (c == ']' && start_rep != NULL) 5635 { 5636 long li; 5637 char *endptr; 5638 size_t qoffset = CAST8VAR(q) - dbuffer; 5639 size_t rep_offset = start_rep - dbuffer; 5640 5641 if (*p++ != '{') 5642 { 5643 fprintf(outfile, "** Expected '{' after \\[....]\n"); 5644 return PR_OK; 5645 } 5646 5647 li = strtol((const char *)p, &endptr, 10); 5648 if (S32OVERFLOW(li)) 5649 { 5650 fprintf(outfile, "** Repeat count too large\n"); 5651 return PR_OK; 5652 } 5653 5654 p = (uint8_t *)endptr; 5655 if (*p++ != '}') 5656 { 5657 fprintf(outfile, "** Expected '}' after \\[...]{...\n"); 5658 return PR_OK; 5659 } 5660 5661 i = (int32_t)li; 5662 if (i-- == 0) 5663 { 5664 fprintf(outfile, "** Zero repeat not allowed\n"); 5665 return PR_OK; 5666 } 5667 5668 replen = CAST8VAR(q) - start_rep; 5669 needlen += replen * i; 5670 5671 if (needlen >= dbuffer_size) 5672 { 5673 while (needlen >= dbuffer_size) dbuffer_size *= 2; 5674 dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); 5675 if (dbuffer == NULL) 5676 { 5677 fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size); 5678 exit(1); 5679 } 5680 SETCASTPTR(q, dbuffer + qoffset); 5681 start_rep = dbuffer + rep_offset; 5682 } 5683 5684 while (i-- > 0) 5685 { 5686 memcpy(CAST8VAR(q), start_rep, replen); 5687 SETPLUS(q, replen/code_unit_size); 5688 } 5689 5690 start_rep = NULL; 5691 continue; 5692 } 5693 5694 /* Handle a non-escaped character */ 5695 5696 if (c != '\\') 5697 { 5698 if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } 5699 } 5700 5701 /* Handle backslash escapes */ 5702 5703 else switch ((c = *p++)) 5704 { 5705 case '\\': break; 5706 case 'a': c = CHAR_BEL; break; 5707 case 'b': c = '\b'; break; 5708 case 'e': c = CHAR_ESC; break; 5709 case 'f': c = '\f'; break; 5710 case 'n': c = '\n'; break; 5711 case 'r': c = '\r'; break; 5712 case 't': c = '\t'; break; 5713 case 'v': c = '\v'; break; 5714 5715 case '0': case '1': case '2': case '3': 5716 case '4': case '5': case '6': case '7': 5717 c -= '0'; 5718 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') 5719 c = c * 8 + *p++ - '0'; 5720 break; 5721 5722 case 'o': 5723 if (*p == '{') 5724 { 5725 uint8_t *pt = p; 5726 c = 0; 5727 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++) 5728 { 5729 if (++i == 12) 5730 fprintf(outfile, "** Too many octal digits in \\o{...} item; " 5731 "using only the first twelve.\n"); 5732 else c = c * 8 + *pt - '0'; 5733 } 5734 if (*pt == '}') p = pt + 1; 5735 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n"); 5736 } 5737 break; 5738 5739 case 'x': 5740 if (*p == '{') 5741 { 5742 uint8_t *pt = p; 5743 c = 0; 5744 5745 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails 5746 when isxdigit() is a macro that refers to its argument more than 5747 once. This is banned by the C Standard, but apparently happens in at 5748 least one MacOS environment. */ 5749 5750 for (pt++; isxdigit(*pt); pt++) 5751 { 5752 if (++i == 9) 5753 fprintf(outfile, "** Too many hex digits in \\x{...} item; " 5754 "using only the first eight.\n"); 5755 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); 5756 } 5757 if (*pt == '}') 5758 { 5759 p = pt + 1; 5760 break; 5761 } 5762 /* Not correct form for \x{...}; fall through */ 5763 } 5764 5765 /* \x without {} always defines just one byte in 8-bit mode. This 5766 allows UTF-8 characters to be constructed byte by byte, and also allows 5767 invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode. 5768 Otherwise, pass it down as data. */ 5769 5770 c = 0; 5771 while (i++ < 2 && isxdigit(*p)) 5772 { 5773 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); 5774 p++; 5775 } 5776 #if defined SUPPORT_PCRE2_8 5777 if (utf && (test_mode == PCRE8_MODE)) 5778 { 5779 *q8++ = c; 5780 continue; 5781 } 5782 #endif 5783 break; 5784 5785 case 0: /* \ followed by EOF allows for an empty line */ 5786 p--; 5787 continue; 5788 5789 case '=': /* \= terminates the data, starts modifiers */ 5790 goto ENDSTRING; 5791 5792 case '[': /* \[ introduces a replicated character sequence */ 5793 if (start_rep != NULL) 5794 { 5795 fprintf(outfile, "** Nested replication is not supported\n"); 5796 return PR_OK; 5797 } 5798 start_rep = CAST8VAR(q); 5799 continue; 5800 5801 default: 5802 if (isalnum(c)) 5803 { 5804 fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c); 5805 return PR_OK; 5806 } 5807 } 5808 5809 /* We now have a character value in c that may be greater than 255. 5810 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater 5811 than 127 in UTF mode must have come from \x{...} or octal constructs 5812 because values from \x.. get this far only in non-UTF mode. */ 5813 5814 #ifdef SUPPORT_PCRE2_8 5815 if (test_mode == PCRE8_MODE) 5816 { 5817 if (utf) 5818 { 5819 if (c > 0x7fffffff) 5820 { 5821 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff " 5822 "and so cannot be converted to UTF-8\n", c); 5823 return PR_OK; 5824 } 5825 q8 += ord2utf8(c, q8); 5826 } 5827 else 5828 { 5829 if (c > 0xffu) 5830 { 5831 fprintf(outfile, "** Character \\x{%x} is greater than 255 " 5832 "and UTF-8 mode is not enabled.\n", c); 5833 fprintf(outfile, "** Truncation will probably give the wrong " 5834 "result.\n"); 5835 } 5836 *q8++ = c; 5837 } 5838 } 5839 #endif 5840 #ifdef SUPPORT_PCRE2_16 5841 if (test_mode == PCRE16_MODE) 5842 { 5843 if (utf) 5844 { 5845 if (c > 0x10ffffu) 5846 { 5847 fprintf(outfile, "** Failed: character \\x{%x} is greater than " 5848 "0x10ffff and so cannot be converted to UTF-16\n", c); 5849 return PR_OK; 5850 } 5851 else if (c >= 0x10000u) 5852 { 5853 c-= 0x10000u; 5854 *q16++ = 0xD800 | (c >> 10); 5855 *q16++ = 0xDC00 | (c & 0x3ff); 5856 } 5857 else 5858 *q16++ = c; 5859 } 5860 else 5861 { 5862 if (c > 0xffffu) 5863 { 5864 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " 5865 "and UTF-16 mode is not enabled.\n", c); 5866 fprintf(outfile, "** Truncation will probably give the wrong " 5867 "result.\n"); 5868 } 5869 5870 *q16++ = c; 5871 } 5872 } 5873 #endif 5874 #ifdef SUPPORT_PCRE2_32 5875 if (test_mode == PCRE32_MODE) 5876 { 5877 *q32++ = c; 5878 } 5879 #endif 5880 } 5881 5882 ENDSTRING: 5883 SET(*q, 0); 5884 len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */ 5885 ulen = len/code_unit_size; /* Length in code units */ 5886 5887 /* If the string was terminated by \= we must now interpret modifiers. */ 5888 5889 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) 5890 return PR_OK; 5891 5892 /* Check for mutually exclusive modifiers. At present, these are all in the 5893 first control word. */ 5894 5895 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++) 5896 { 5897 c = dat_datctl.control & exclusive_dat_controls[k]; 5898 if (c != 0 && c != (c & (~c+1))) 5899 { 5900 show_controls(c, 0, "** Not allowed together:"); 5901 fprintf(outfile, "\n"); 5902 return PR_OK; 5903 } 5904 } 5905 5906 if (pat_patctl.replacement[0] != 0 && 5907 (dat_datctl.control & CTL_NULLCONTEXT) != 0) 5908 { 5909 fprintf(outfile, "** Replacement text is not supported with null_context.\n"); 5910 return PR_OK; 5911 } 5912 5913 /* We now have the subject in dbuffer, with len containing the byte length, and 5914 ulen containing the code unit length. Move the data to the end of the buffer so 5915 that a read over the end can be caught by valgrind or other means. If we have 5916 explicit valgrind support, mark the unused start of the buffer unaddressable. 5917 If we are using the POSIX interface, or testing zero-termination, we must 5918 include the terminating zero in the usable data. */ 5919 5920 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) + 5921 (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0); 5922 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c); 5923 #ifdef SUPPORT_VALGRIND 5924 VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); 5925 #endif 5926 5927 /* Now pp points to the subject string. POSIX matching is only possible in 5928 8-bit mode, and it does not support timing or other fancy features. Some were 5929 checked at compile time, but we need to check the match-time settings here. */ 5930 5931 #ifdef SUPPORT_PCRE2_8 5932 if ((pat_patctl.control & CTL_POSIX) != 0) 5933 { 5934 int rc; 5935 int eflags = 0; 5936 regmatch_t *pmatch = NULL; 5937 const char *msg = "** Ignored with POSIX interface:"; 5938 5939 if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET) 5940 prmsg(&msg, "callout_fail"); 5941 if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0) 5942 prmsg(&msg, "copy"); 5943 if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0) 5944 prmsg(&msg, "get"); 5945 if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack"); 5946 5947 if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0) 5948 { 5949 fprintf(outfile, "%s", msg); 5950 show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS); 5951 msg = ""; 5952 } 5953 if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 || 5954 (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0) 5955 { 5956 show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, 5957 dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg); 5958 msg = ""; 5959 } 5960 5961 if (msg[0] == 0) fprintf(outfile, "\n"); 5962 5963 if (dat_datctl.oveccount > 0) 5964 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount); 5965 if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL; 5966 if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL; 5967 if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; 5968 5969 rc = regexec(&preg, (const char *)pp + dat_datctl.offset, 5970 dat_datctl.oveccount, pmatch, eflags); 5971 if (rc != 0) 5972 { 5973 (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); 5974 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8); 5975 } 5976 else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) 5977 fprintf(outfile, "Matched with REG_NOSUB\n"); 5978 else if (dat_datctl.oveccount == 0) 5979 fprintf(outfile, "Matched without capture\n"); 5980 else 5981 { 5982 size_t i; 5983 for (i = 0; i < (size_t)dat_datctl.oveccount; i++) 5984 { 5985 if (pmatch[i].rm_so >= 0) 5986 { 5987 fprintf(outfile, "%2d: ", (int)i); 5988 PCHARSV(pp, pmatch[i].rm_so, 5989 pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile); 5990 fprintf(outfile, "\n"); 5991 if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) || 5992 (dat_datctl.control & CTL_ALLAFTERTEXT) != 0) 5993 { 5994 fprintf(outfile, "%2d+ ", (int)i); 5995 PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, 5996 utf, outfile); 5997 fprintf(outfile, "\n"); 5998 } 5999 } 6000 } 6001 } 6002 free(pmatch); 6003 return PR_OK; 6004 } 6005 #endif /* SUPPORT_PCRE2_8 */ 6006 6007 /* Handle matching via the native interface. Check for consistency of 6008 modifiers. */ 6009 6010 if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS)) 6011 { 6012 fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n"); 6013 dat_datctl.control &= ~CTL_FINDLIMITS; 6014 } 6015 6016 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA 6017 matching, even if the JIT compiler was used. */ 6018 6019 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT && 6020 FLD(compiled_code, executable_jit) != NULL) 6021 { 6022 fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n"); 6023 dat_datctl.control &= ~CTL_ALLUSEDTEXT; 6024 } 6025 6026 /* Handle passing the subject as zero-terminated. */ 6027 6028 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) 6029 ulen = PCRE2_ZERO_TERMINATED; 6030 6031 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a 6032 NULL context. */ 6033 6034 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)? 6035 NULL : PTR(dat_context); 6036 6037 /* Enable display of malloc/free if wanted. */ 6038 6039 show_memory = (dat_datctl.control & CTL_MEMORY) != 0; 6040 6041 /* Create and assign a JIT stack if requested. */ 6042 6043 if (dat_datctl.jitstack != 0) 6044 { 6045 if (dat_datctl.jitstack != jit_stack_size) 6046 { 6047 PCRE2_JIT_STACK_FREE(jit_stack); 6048 PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL); 6049 jit_stack_size = dat_datctl.jitstack; 6050 } 6051 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack); 6052 } 6053 6054 /* Or de-assign */ 6055 6056 else if (jit_stack != NULL) 6057 { 6058 PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL); 6059 PCRE2_JIT_STACK_FREE(jit_stack); 6060 jit_stack = NULL; 6061 jit_stack_size = 0; 6062 } 6063 6064 /* When no JIT stack is assigned, we must ensure that there is a JIT callback 6065 if we want to verify that JIT was actually used. */ 6066 6067 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL) 6068 { 6069 PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL); 6070 } 6071 6072 /* Adjust match_data according to size of offsets required. A size of zero 6073 causes a new match data block to be obtained that exactly fits the pattern. */ 6074 6075 if (dat_datctl.oveccount == 0) 6076 { 6077 PCRE2_MATCH_DATA_FREE(match_data); 6078 PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL); 6079 PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data); 6080 } 6081 else if (dat_datctl.oveccount <= max_oveccount) 6082 { 6083 SETFLD(match_data, oveccount, dat_datctl.oveccount); 6084 } 6085 else 6086 { 6087 max_oveccount = dat_datctl.oveccount; 6088 PCRE2_MATCH_DATA_FREE(match_data); 6089 PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL); 6090 } 6091 6092 /* Replacement processing is ignored for DFA matching. */ 6093 6094 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0) 6095 { 6096 fprintf(outfile, "** Ignored for DFA matching: replace\n"); 6097 dat_datctl.replacement[0] = 0; 6098 } 6099 6100 /* If a replacement string is provided, call pcre2_substitute() instead of one 6101 of the matching functions. First we have to convert the replacement string to 6102 the appropriate width. */ 6103 6104 if (dat_datctl.replacement[0] != 0) 6105 { 6106 int rc; 6107 uint8_t *pr; 6108 uint8_t rbuffer[REPLACE_BUFFSIZE]; 6109 uint8_t nbuffer[REPLACE_BUFFSIZE]; 6110 uint32_t xoptions; 6111 PCRE2_SIZE rlen, nsize, erroroffset; 6112 BOOL badutf = FALSE; 6113 6114 #ifdef SUPPORT_PCRE2_8 6115 uint8_t *r8 = NULL; 6116 #endif 6117 #ifdef SUPPORT_PCRE2_16 6118 uint16_t *r16 = NULL; 6119 #endif 6120 #ifdef SUPPORT_PCRE2_32 6121 uint32_t *r32 = NULL; 6122 #endif 6123 6124 if (timeitm) 6125 fprintf(outfile, "** Timing is not supported with replace: ignored\n"); 6126 6127 xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : 6128 PCRE2_SUBSTITUTE_GLOBAL) | 6129 (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 : 6130 PCRE2_SUBSTITUTE_EXTENDED) | 6131 (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 : 6132 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) | 6133 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 : 6134 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) | 6135 (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 : 6136 PCRE2_SUBSTITUTE_UNSET_EMPTY); 6137 6138 SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */ 6139 pr = dat_datctl.replacement; 6140 6141 /* If the replacement starts with '[<number>]' we interpret that as length 6142 value for the replacement buffer. */ 6143 6144 nsize = REPLACE_BUFFSIZE/code_unit_size; 6145 if (*pr == '[') 6146 { 6147 PCRE2_SIZE n = 0; 6148 while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0; 6149 if (*pr++ != ']') 6150 { 6151 fprintf(outfile, "Bad buffer size in replacement string\n"); 6152 return PR_OK; 6153 } 6154 if (n > nsize) 6155 { 6156 fprintf(outfile, "Replacement buffer setting (%lu) is too large " 6157 "(max %lu)\n", (unsigned long int)n, (unsigned long int)nsize); 6158 return PR_OK; 6159 } 6160 nsize = n; 6161 } 6162 6163 /* Now copy the replacement string to a buffer of the appropriate width. No 6164 escape processing is done for replacements. In UTF mode, check for an invalid 6165 UTF-8 input string, and if it is invalid, just copy its code units without 6166 UTF interpretation. This provides a means of checking that an invalid string 6167 is detected. Otherwise, UTF-8 can be used to include wide characters in a 6168 replacement. */ 6169 6170 if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset); 6171 6172 /* Not UTF or invalid UTF-8: just copy the code units. */ 6173 6174 if (!utf || badutf) 6175 { 6176 while ((c = *pr++) != 0) 6177 { 6178 #ifdef SUPPORT_PCRE2_8 6179 if (test_mode == PCRE8_MODE) *r8++ = c; 6180 #endif 6181 #ifdef SUPPORT_PCRE2_16 6182 if (test_mode == PCRE16_MODE) *r16++ = c; 6183 #endif 6184 #ifdef SUPPORT_PCRE2_32 6185 if (test_mode == PCRE32_MODE) *r32++ = c; 6186 #endif 6187 } 6188 } 6189 6190 /* Valid UTF-8 replacement string */ 6191 6192 else while ((c = *pr++) != 0) 6193 { 6194 if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); } 6195 6196 #ifdef SUPPORT_PCRE2_8 6197 if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8); 6198 #endif 6199 6200 #ifdef SUPPORT_PCRE2_16 6201 if (test_mode == PCRE16_MODE) 6202 { 6203 if (c >= 0x10000u) 6204 { 6205 c-= 0x10000u; 6206 *r16++ = 0xD800 | (c >> 10); 6207 *r16++ = 0xDC00 | (c & 0x3ff); 6208 } 6209 else *r16++ = c; 6210 } 6211 #endif 6212 6213 #ifdef SUPPORT_PCRE2_32 6214 if (test_mode == PCRE32_MODE) *r32++ = c; 6215 #endif 6216 } 6217 6218 SET(*r, 0); 6219 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) 6220 rlen = PCRE2_ZERO_TERMINATED; 6221 else 6222 rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size; 6223 PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset, 6224 dat_datctl.options|xoptions, match_data, dat_context, 6225 rbuffer, rlen, nbuffer, &nsize); 6226 6227 if (rc < 0) 6228 { 6229 PCRE2_SIZE msize; 6230 fprintf(outfile, "Failed: error %d", rc); 6231 if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET) 6232 fprintf(outfile, " at offset %ld in replacement", (long int)nsize); 6233 fprintf(outfile, ": "); 6234 PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer); 6235 PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile); 6236 if (rc == PCRE2_ERROR_NOMEMORY && 6237 (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0) 6238 fprintf(outfile, ": %ld code units are needed", (long int)nsize); 6239 } 6240 else 6241 { 6242 fprintf(outfile, "%2d: ", rc); 6243 PCHARSV(nbuffer, 0, nsize, utf, outfile); 6244 } 6245 6246 fprintf(outfile, "\n"); 6247 } /* End of substitution handling */ 6248 6249 /* When a replacement string is not provided, run a loop for global matching 6250 with one of the basic matching functions. */ 6251 6252 else for (gmatched = 0;; gmatched++) 6253 { 6254 PCRE2_SIZE j; 6255 int capcount; 6256 PCRE2_SIZE *ovector; 6257 PCRE2_SIZE ovecsave[2]; 6258 6259 ovector = FLD(match_data, ovector); 6260 6261 /* After the first time round a global loop, for a normal global (/g) 6262 iteration, save the current ovector[0,1] so that we can check that they do 6263 change each time. Otherwise a matching bug that returns the same string 6264 causes an infinite loop. It has happened! */ 6265 6266 if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0) 6267 { 6268 ovecsave[0] = ovector[0]; 6269 ovecsave[1] = ovector[1]; 6270 } 6271 6272 /* For altglobal (or first time round the loop), set an "unset" value. */ 6273 6274 else ovecsave[0] = ovecsave[1] = PCRE2_UNSET; 6275 6276 /* Fill the ovector with junk to detect elements that do not get set 6277 when they should be. */ 6278 6279 for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET; 6280 6281 /* When matching is via pcre2_match(), we will detect the use of JIT via the 6282 stack callback function. */ 6283 6284 jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0; 6285 6286 /* Do timing if required. */ 6287 6288 if (timeitm > 0) 6289 { 6290 register int i; 6291 clock_t start_time, time_taken; 6292 6293 if ((dat_datctl.control & CTL_DFA) != 0) 6294 { 6295 if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0) 6296 { 6297 fprintf(outfile, "Timing DFA restarts is not supported\n"); 6298 return PR_OK; 6299 } 6300 if (dfa_workspace == NULL) 6301 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 6302 start_time = clock(); 6303 for (i = 0; i < timeitm; i++) 6304 { 6305 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, 6306 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 6307 use_dat_context, dfa_workspace, DFA_WS_DIMENSION); 6308 } 6309 } 6310 6311 else if ((pat_patctl.control & CTL_JITFAST) != 0) 6312 { 6313 start_time = clock(); 6314 for (i = 0; i < timeitm; i++) 6315 { 6316 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, 6317 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 6318 use_dat_context); 6319 } 6320 } 6321 6322 else 6323 { 6324 start_time = clock(); 6325 for (i = 0; i < timeitm; i++) 6326 { 6327 PCRE2_MATCH(capcount, compiled_code, pp, ulen, 6328 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 6329 use_dat_context); 6330 } 6331 } 6332 total_match_time += (time_taken = clock() - start_time); 6333 fprintf(outfile, "Match time %.4f milliseconds\n", 6334 (((double)time_taken * 1000.0) / (double)timeitm) / 6335 (double)CLOCKS_PER_SEC); 6336 } 6337 6338 /* Find the match and recursion limits if requested. The recursion limit 6339 is not relevant for JIT. */ 6340 6341 if ((dat_datctl.control & CTL_FINDLIMITS) != 0) 6342 { 6343 capcount = check_match_limit(pp, ulen, PCRE2_ERROR_MATCHLIMIT, "match"); 6344 if (FLD(compiled_code, executable_jit) == NULL) 6345 (void)check_match_limit(pp, ulen, PCRE2_ERROR_RECURSIONLIMIT, 6346 "recursion"); 6347 } 6348 6349 /* Otherwise just run a single match, setting up a callout if required (the 6350 default). */ 6351 6352 else 6353 { 6354 if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0) 6355 { 6356 PCRE2_SET_CALLOUT(dat_context, callout_function, 6357 (void *)(&dat_datctl.callout_data)); 6358 first_callout = TRUE; 6359 last_callout_mark = NULL; 6360 callout_count = 0; 6361 } 6362 else 6363 { 6364 PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */ 6365 } 6366 6367 /* Run a single DFA or NFA match. */ 6368 6369 if ((dat_datctl.control & CTL_DFA) != 0) 6370 { 6371 if (dfa_workspace == NULL) 6372 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 6373 if (dfa_matched++ == 0) 6374 dfa_workspace[0] = -1; /* To catch bad restart */ 6375 PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, 6376 dat_datctl.offset, dat_datctl.options | g_notempty, match_data, 6377 use_dat_context, dfa_workspace, DFA_WS_DIMENSION); 6378 if (capcount == 0) 6379 { 6380 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); 6381 capcount = dat_datctl.oveccount; 6382 } 6383 } 6384 else 6385 { 6386 if ((pat_patctl.control & CTL_JITFAST) != 0) 6387 PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 6388 dat_datctl.options | g_notempty, match_data, use_dat_context); 6389 else 6390 PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset, 6391 dat_datctl.options | g_notempty, match_data, use_dat_context); 6392 if (capcount == 0) 6393 { 6394 fprintf(outfile, "Matched, but too many substrings\n"); 6395 capcount = dat_datctl.oveccount; 6396 } 6397 } 6398 } 6399 6400 /* The result of the match is now in capcount. First handle a successful 6401 match. */ 6402 6403 if (capcount >= 0) 6404 { 6405 int i; 6406 uint32_t oveccount; 6407 6408 /* This is a check against a lunatic return value. */ 6409 6410 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data); 6411 if (capcount > (int)oveccount) 6412 { 6413 fprintf(outfile, 6414 "** PCRE2 error: returned count %d is too big for ovector count %d\n", 6415 capcount, oveccount); 6416 capcount = oveccount; 6417 if ((dat_datctl.control & CTL_ANYGLOB) != 0) 6418 { 6419 fprintf(outfile, "** Global loop abandoned\n"); 6420 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */ 6421 } 6422 } 6423 6424 /* If this is not the first time round a global loop, check that the 6425 returned string has changed. If not, there is a bug somewhere and we must 6426 break the loop because it will go on for ever. We know that there are 6427 always at least two elements in the ovector. */ 6428 6429 if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) 6430 { 6431 fprintf(outfile, 6432 "** PCRE2 error: global repeat returned the same string as previous\n"); 6433 fprintf(outfile, "** Global loop abandoned\n"); 6434 dat_datctl.control &= ~CTL_ANYGLOB; /* Break g/G loop */ 6435 } 6436 6437 /* "allcaptures" requests showing of all captures in the pattern, to check 6438 unset ones at the end. It may be set on the pattern or the data. Implement 6439 by setting capcount to the maximum. This is not relevant for DFA matching, 6440 so ignore it. */ 6441 6442 if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) 6443 { 6444 uint32_t maxcapcount; 6445 if ((dat_datctl.control & CTL_DFA) != 0) 6446 { 6447 fprintf(outfile, "** Ignored after DFA matching: allcaptures\n"); 6448 } 6449 else 6450 { 6451 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0) 6452 return PR_SKIP; 6453 capcount = maxcapcount + 1; /* Allow for full match */ 6454 if (capcount > (int)oveccount) capcount = oveccount; 6455 } 6456 } 6457 6458 /* Output the captured substrings. Note that, for the matched string, 6459 the use of \K in an assertion can make the start later than the end. */ 6460 6461 for (i = 0; i < 2*capcount; i += 2) 6462 { 6463 PCRE2_SIZE lleft, lmiddle, lright; 6464 PCRE2_SIZE start = ovector[i]; 6465 PCRE2_SIZE end = ovector[i+1]; 6466 6467 if (start > end) 6468 { 6469 start = ovector[i+1]; 6470 end = ovector[i]; 6471 fprintf(outfile, "Start of matched string is beyond its end - " 6472 "displaying from end to start.\n"); 6473 } 6474 6475 fprintf(outfile, "%2d: ", i/2); 6476 6477 /* Check for an unset group */ 6478 6479 if (start == PCRE2_UNSET) 6480 { 6481 fprintf(outfile, "<unset>\n"); 6482 continue; 6483 } 6484 6485 /* Check for silly offsets, in particular, values that have not been 6486 set when they should have been. */ 6487 6488 if (start > ulen || end > ulen) 6489 { 6490 fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", 6491 (unsigned long int)start, (unsigned long int)end); 6492 continue; 6493 } 6494 6495 /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with 6496 JIT, it is disabled above, with a comment.) When the match is done by the 6497 interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is 6498 set, and if the leftmost consulted character is before the start of the 6499 match or the rightmost consulted character is past the end of the match, 6500 we want to show all consulted characters for the main matched string, and 6501 indicate which were lookarounds. */ 6502 6503 if (i == 0) 6504 { 6505 BOOL showallused; 6506 PCRE2_SIZE leftchar, rightchar; 6507 6508 if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0) 6509 { 6510 leftchar = FLD(match_data, leftchar); 6511 rightchar = FLD(match_data, rightchar); 6512 showallused = i == 0 && (leftchar < start || rightchar > end); 6513 } 6514 else showallused = FALSE; 6515 6516 if (showallused) 6517 { 6518 PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); 6519 PCHARS(lmiddle, pp, start, end - start, utf, outfile); 6520 PCHARS(lright, pp, end, rightchar - end, utf, outfile); 6521 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 6522 fprintf(outfile, " (JIT)"); 6523 fprintf(outfile, "\n "); 6524 for (j = 0; j < lleft; j++) fprintf(outfile, "<"); 6525 for (j = 0; j < lmiddle; j++) fprintf(outfile, " "); 6526 for (j = 0; j < lright; j++) fprintf(outfile, ">"); 6527 } 6528 6529 /* When a pattern contains \K, the start of match position may be 6530 different to the start of the matched string. When this is the case, 6531 show it when requested. */ 6532 6533 else if ((dat_datctl.control & CTL_STARTCHAR) != 0) 6534 { 6535 PCRE2_SIZE startchar; 6536 PCRE2_GET_STARTCHAR(startchar, match_data); 6537 PCHARS(lleft, pp, startchar, start - startchar, utf, outfile); 6538 PCHARSV(pp, start, end - start, utf, outfile); 6539 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 6540 fprintf(outfile, " (JIT)"); 6541 if (startchar != start) 6542 { 6543 fprintf(outfile, "\n "); 6544 for (j = 0; j < lleft; j++) fprintf(outfile, "^"); 6545 } 6546 } 6547 6548 /* Otherwise, just show the matched string. */ 6549 6550 else 6551 { 6552 PCHARSV(pp, start, end - start, utf, outfile); 6553 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 6554 fprintf(outfile, " (JIT)"); 6555 } 6556 } 6557 6558 /* Not the main matched string. Just show it unadorned. */ 6559 6560 else 6561 { 6562 PCHARSV(pp, start, end - start, utf, outfile); 6563 } 6564 6565 fprintf(outfile, "\n"); 6566 6567 /* Note: don't use the start/end variables here because we want to 6568 show the text from what is reported as the end. */ 6569 6570 if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 || 6571 (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0)) 6572 { 6573 fprintf(outfile, "%2d+ ", i/2); 6574 PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile); 6575 fprintf(outfile, "\n"); 6576 } 6577 } 6578 6579 /* Output (*MARK) data if requested */ 6580 6581 if ((dat_datctl.control & CTL_MARK) != 0 && 6582 TESTFLD(match_data, mark, !=, NULL)) 6583 { 6584 fprintf(outfile, "MK: "); 6585 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile); 6586 fprintf(outfile, "\n"); 6587 } 6588 6589 /* Process copy/get strings */ 6590 6591 copy_and_get(utf, capcount); 6592 6593 } /* End of handling a successful match */ 6594 6595 /* There was a partial match. The value of ovector[0] is the bumpalong point, 6596 that is, startchar, not any \K point that might have been passed. */ 6597 6598 else if (capcount == PCRE2_ERROR_PARTIAL) 6599 { 6600 PCRE2_SIZE poffset; 6601 int backlength; 6602 int rubriclength = 0; 6603 6604 fprintf(outfile, "Partial match"); 6605 if ((dat_datctl.control & CTL_MARK) != 0 && 6606 TESTFLD(match_data, mark, !=, NULL)) 6607 { 6608 fprintf(outfile, ", mark="); 6609 PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf, 6610 outfile); 6611 rubriclength += 7; 6612 } 6613 fprintf(outfile, ": "); 6614 rubriclength += 15; 6615 6616 poffset = backchars(pp, ovector[0], maxlookbehind, utf); 6617 PCHARS(backlength, pp, poffset, ovector[0] - poffset, utf, outfile); 6618 PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile); 6619 6620 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 6621 fprintf(outfile, " (JIT)"); 6622 fprintf(outfile, "\n"); 6623 6624 if (backlength != 0) 6625 { 6626 int i; 6627 for (i = 0; i < rubriclength; i++) fprintf(outfile, " "); 6628 for (i = 0; i < backlength; i++) fprintf(outfile, "<"); 6629 fprintf(outfile, "\n"); 6630 } 6631 6632 /* Process copy/get strings */ 6633 6634 copy_and_get(utf, 1); 6635 6636 break; /* Out of the /g loop */ 6637 } /* End of handling partial match */ 6638 6639 /* Failed to match. If this is a /g or /G loop, we might previously have 6640 set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match. 6641 If that is the case, this is not necessarily the end. We want to advance the 6642 start offset, and continue. We won't be at the end of the string - that was 6643 checked before setting g_notempty. We achieve the effect by pretending that a 6644 single character was matched. 6645 6646 Complication arises in the case when the newline convention is "any", "crlf", 6647 or "anycrlf". If the previous match was at the end of a line terminated by 6648 CRLF, an advance of one character just passes the CR, whereas we should 6649 prefer the longer newline sequence, as does the code in pcre2_match(). 6650 6651 Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one 6652 character, not one byte. */ 6653 6654 else if (g_notempty != 0) /* There was a previous null match */ 6655 { 6656 uint16_t nl = FLD(compiled_code, newline_convention); 6657 PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */ 6658 PCRE2_SIZE end_offset = start_offset + 1; 6659 6660 if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY || 6661 nl == PCRE2_NEWLINE_ANYCRLF) && 6662 start_offset < ulen - 1 && 6663 CODE_UNIT(pp, start_offset) == '\r' && 6664 CODE_UNIT(pp, end_offset) == '\n') 6665 end_offset++; 6666 6667 else if (utf && test_mode != PCRE32_MODE) 6668 { 6669 if (test_mode == PCRE8_MODE) 6670 { 6671 for (; end_offset < ulen; end_offset++) 6672 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; 6673 } 6674 else /* 16-bit mode */ 6675 { 6676 for (; end_offset < ulen; end_offset++) 6677 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; 6678 } 6679 } 6680 6681 SETFLDVEC(match_data, ovector, 0, start_offset); 6682 SETFLDVEC(match_data, ovector, 1, end_offset); 6683 } /* End of handling null match in a global loop */ 6684 6685 /* A "normal" match failure. There will be a negative error number in 6686 capcount. */ 6687 6688 else 6689 { 6690 int mlen; 6691 6692 switch(capcount) 6693 { 6694 case PCRE2_ERROR_NOMATCH: 6695 if (gmatched == 0) 6696 { 6697 fprintf(outfile, "No match"); 6698 if ((dat_datctl.control & CTL_MARK) != 0 && 6699 TESTFLD(match_data, mark, !=, NULL)) 6700 { 6701 fprintf(outfile, ", mark = "); 6702 PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile); 6703 } 6704 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) 6705 fprintf(outfile, " (JIT)"); 6706 fprintf(outfile, "\n"); 6707 } 6708 break; 6709 6710 case PCRE2_ERROR_BADUTFOFFSET: 6711 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode); 6712 break; 6713 6714 default: 6715 fprintf(outfile, "Failed: error %d: ", capcount); 6716 PCRE2_GET_ERROR_MESSAGE(mlen, capcount, pbuffer); 6717 PCHARSV(CASTVAR(void *, pbuffer), 0, mlen, FALSE, outfile); 6718 if (capcount <= PCRE2_ERROR_UTF8_ERR1 && 6719 capcount >= PCRE2_ERROR_UTF32_ERR2) 6720 { 6721 PCRE2_SIZE startchar; 6722 PCRE2_GET_STARTCHAR(startchar, match_data); 6723 fprintf(outfile, " at offset %lu", (unsigned long int)startchar); 6724 } 6725 fprintf(outfile, "\n"); 6726 break; 6727 } 6728 6729 break; /* Out of the /g loop */ 6730 } /* End of failed match handling */ 6731 6732 /* Control reaches here in two circumstances: (a) after a match, and (b) 6733 after a non-match that immediately followed a match on an empty string when 6734 doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and 6735 PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match 6736 of one character. So effectively we get here only after a match. If we 6737 are not doing a global search, we are done. */ 6738 6739 if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else 6740 { 6741 PCRE2_SIZE end_offset = FLD(match_data, ovector)[1]; 6742 6743 /* We must now set up for the next iteration of a global search. If we have 6744 matched an empty string, first check to see if we are at the end of the 6745 subject. If so, the loop is over. Otherwise, mimic what Perl's /g option 6746 does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again 6747 at the same point. If this fails it will be picked up above, where a fake 6748 match is set up so that at this point we advance to the next character. */ 6749 6750 if (FLD(match_data, ovector)[0] == end_offset) 6751 { 6752 if (end_offset == ulen) break; /* End of subject */ 6753 g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; 6754 } 6755 6756 /* However, even after matching a non-empty string, there is still one 6757 tricky case. If a pattern contains \K within a lookbehind assertion at the 6758 start, the end of the matched string can be at the offset where the match 6759 started. In the case of a normal /g iteration without special action, this 6760 leads to a loop that keeps on returning the same substring. The loop would 6761 be caught above, but we really want to move on to the next match. */ 6762 6763 else 6764 { 6765 g_notempty = 0; /* Set for a "normal" repeat */ 6766 if ((dat_datctl.control & CTL_GLOBAL) != 0) 6767 { 6768 PCRE2_SIZE startchar; 6769 PCRE2_GET_STARTCHAR(startchar, match_data); 6770 if (end_offset <= startchar) 6771 { 6772 if (startchar >= ulen) break; /* End of subject */ 6773 end_offset = startchar + 1; 6774 if (utf && test_mode != PCRE32_MODE) 6775 { 6776 if (test_mode == PCRE8_MODE) 6777 { 6778 for (; end_offset < ulen; end_offset++) 6779 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break; 6780 } 6781 else /* 16-bit mode */ 6782 { 6783 for (; end_offset < ulen; end_offset++) 6784 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break; 6785 } 6786 } 6787 } 6788 } 6789 } 6790 6791 /* For /g (global), update the start offset, leaving the rest alone. */ 6792 6793 if ((dat_datctl.control & CTL_GLOBAL) != 0) 6794 dat_datctl.offset = end_offset; 6795 6796 /* For altglobal, just update the pointer and length. */ 6797 6798 else 6799 { 6800 pp += end_offset * code_unit_size; 6801 len -= end_offset * code_unit_size; 6802 ulen -= end_offset; 6803 } 6804 } 6805 } /* End of global loop */ 6806 6807 show_memory = FALSE; 6808 return PR_OK; 6809 } 6810 6811 6812 6813 6814 /************************************************* 6815 * Print PCRE2 version * 6816 *************************************************/ 6817 6818 static void 6819 print_version(FILE *f) 6820 { 6821 VERSION_TYPE *vp; 6822 fprintf(f, "PCRE2 version "); 6823 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp); 6824 fprintf(f, "\n"); 6825 } 6826 6827 6828 6829 /************************************************* 6830 * Print Unicode version * 6831 *************************************************/ 6832 6833 static void 6834 print_unicode_version(FILE *f) 6835 { 6836 VERSION_TYPE *vp; 6837 fprintf(f, "Unicode version "); 6838 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp); 6839 } 6840 6841 6842 6843 /************************************************* 6844 * Print JIT target * 6845 *************************************************/ 6846 6847 static void 6848 print_jit_target(FILE *f) 6849 { 6850 VERSION_TYPE *vp; 6851 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp); 6852 } 6853 6854 6855 6856 /************************************************* 6857 * Print newline configuration * 6858 *************************************************/ 6859 6860 /* Output is always to stdout. 6861 6862 Arguments: 6863 rc the return code from PCRE2_CONFIG_NEWLINE 6864 isc TRUE if called from "-C newline" 6865 Returns: nothing 6866 */ 6867 6868 static void 6869 print_newline_config(uint32_t optval, BOOL isc) 6870 { 6871 if (!isc) printf(" Newline sequence is "); 6872 if (optval < sizeof(newlines)/sizeof(char *)) 6873 printf("%s\n", newlines[optval]); 6874 else 6875 printf("a non-standard value: %d\n", optval); 6876 } 6877 6878 6879 6880 /************************************************* 6881 * Usage function * 6882 *************************************************/ 6883 6884 static void 6885 usage(void) 6886 { 6887 printf("Usage: pcre2test [options] [<input file> [<output file>]]\n\n"); 6888 printf("Input and output default to stdin and stdout.\n"); 6889 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 6890 printf("If input is a terminal, readline() is used to read from it.\n"); 6891 #else 6892 printf("This version of pcre2test is not linked with readline().\n"); 6893 #endif 6894 printf("\nOptions:\n"); 6895 #ifdef SUPPORT_PCRE2_8 6896 printf(" -8 use the 8-bit library\n"); 6897 #endif 6898 #ifdef SUPPORT_PCRE2_16 6899 printf(" -16 use the 16-bit library\n"); 6900 #endif 6901 #ifdef SUPPORT_PCRE2_32 6902 printf(" -32 use the 32-bit library\n"); 6903 #endif 6904 printf(" -b set default pattern control 'fullbincode'\n"); 6905 printf(" -C show PCRE2 compile-time options and exit\n"); 6906 printf(" -C arg show a specific compile-time option and exit with its\n"); 6907 printf(" value if numeric (else 0). The arg can be:\n"); 6908 printf(" backslash-C use of \\C is enabled [0, 1]\n"); 6909 printf(" bsr \\R type [ANYCRLF, ANY]\n"); 6910 printf(" ebcdic compiled for EBCDIC character code [0,1]\n"); 6911 printf(" ebcdic-nl NL code if compiled for EBCDIC\n"); 6912 printf(" jit just-in-time compiler supported [0, 1]\n"); 6913 printf(" linksize internal link size [2, 3, 4]\n"); 6914 printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n"); 6915 printf(" pcre2-8 8 bit library support enabled [0, 1]\n"); 6916 printf(" pcre2-16 16 bit library support enabled [0, 1]\n"); 6917 printf(" pcre2-32 32 bit library support enabled [0, 1]\n"); 6918 printf(" unicode Unicode and UTF support enabled [0, 1]\n"); 6919 printf(" -d set default pattern control 'debug'\n"); 6920 printf(" -dfa set default subject control 'dfa'\n"); 6921 printf(" -error <n,m,..> show messages for error numbers, then exit\n"); 6922 printf(" -help show usage information\n"); 6923 printf(" -i set default pattern control 'info'\n"); 6924 printf(" -jit set default pattern control 'jit'\n"); 6925 printf(" -q quiet: do not output PCRE2 version number at start\n"); 6926 printf(" -pattern <s> set default pattern control fields\n"); 6927 printf(" -subject <s> set default subject control fields\n"); 6928 printf(" -S <n> set stack size to <n> megabytes\n"); 6929 printf(" -t [<n>] time compilation and execution, repeating <n> times\n"); 6930 printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n"); 6931 printf(" -T same as -t, but show total times at the end\n"); 6932 printf(" -TM same as -tm, but show total time at the end\n"); 6933 printf(" -version show PCRE2 version and exit\n"); 6934 } 6935 6936 6937 6938 /************************************************* 6939 * Handle -C option * 6940 *************************************************/ 6941 6942 /* This option outputs configuration options and sets an appropriate return 6943 code when asked for a single option. The code is abstracted into a separate 6944 function because of its size. Use whichever pcre2_config() function is 6945 available. 6946 6947 Argument: an option name or NULL 6948 Returns: the return code 6949 */ 6950 6951 static int 6952 c_option(const char *arg) 6953 { 6954 uint32_t optval; 6955 int yield = 0; 6956 6957 if (arg != NULL) 6958 { 6959 unsigned int i; 6960 6961 for (i = 0; i < COPTLISTCOUNT; i++) 6962 if (strcmp(arg, coptlist[i].name) == 0) break; 6963 6964 if (i >= COPTLISTCOUNT) 6965 { 6966 fprintf(stderr, "** Unknown -C option '%s'\n", arg); 6967 return -1; 6968 } 6969 6970 switch (coptlist[i].type) 6971 { 6972 case CONF_BSR: 6973 (void)PCRE2_CONFIG(coptlist[i].value, &optval); 6974 printf("%s\n", optval? "ANYCRLF" : "ANY"); 6975 break; 6976 6977 case CONF_FIX: 6978 yield = coptlist[i].value; 6979 printf("%d\n", yield); 6980 break; 6981 6982 case CONF_FIZ: 6983 optval = coptlist[i].value; 6984 printf("%d\n", optval); 6985 break; 6986 6987 case CONF_INT: 6988 (void)PCRE2_CONFIG(coptlist[i].value, &yield); 6989 printf("%d\n", yield); 6990 break; 6991 6992 case CONF_NL: 6993 (void)PCRE2_CONFIG(coptlist[i].value, &optval); 6994 print_newline_config(optval, TRUE); 6995 break; 6996 } 6997 6998 /* For VMS, return the value by setting a symbol, for certain values only. */ 6999 7000 #ifdef __VMS 7001 if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT) 7002 { 7003 char ucname[16]; 7004 strcpy(ucname, coptlist[i].name); 7005 for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]; 7006 vms_setsymbol(ucname, 0, optval); 7007 } 7008 #endif 7009 7010 return yield; 7011 } 7012 7013 /* No argument for -C: output all configuration information. */ 7014 7015 print_version(stdout); 7016 printf("Compiled with\n"); 7017 7018 #ifdef EBCDIC 7019 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); 7020 #if defined NATIVE_ZOS 7021 printf(" EBCDIC code page %s or similar\n", pcrz_cpversion()); 7022 #endif 7023 #endif 7024 7025 #ifdef SUPPORT_PCRE2_8 7026 printf(" 8-bit support\n"); 7027 #endif 7028 #ifdef SUPPORT_PCRE2_16 7029 printf(" 16-bit support\n"); 7030 #endif 7031 #ifdef SUPPORT_PCRE2_32 7032 printf(" 32-bit support\n"); 7033 #endif 7034 7035 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval); 7036 if (optval != 0) 7037 { 7038 printf(" UTF and UCP support ("); 7039 print_unicode_version(stdout); 7040 printf(")\n"); 7041 } 7042 else printf(" No Unicode support\n"); 7043 7044 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval); 7045 if (optval != 0) 7046 { 7047 printf(" Just-in-time compiler support: "); 7048 print_jit_target(stdout); 7049 printf("\n"); 7050 } 7051 else 7052 { 7053 printf(" No just-in-time compiler support\n"); 7054 } 7055 7056 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval); 7057 print_newline_config(optval, FALSE); 7058 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval); 7059 printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" : 7060 "all Unicode newlines"); 7061 #ifdef NEVER_BACKSLASH_C 7062 printf(" \\C is not supported\n"); 7063 #else 7064 printf(" \\C is supported\n"); 7065 #endif 7066 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval); 7067 printf(" Internal link size = %d\n", optval); 7068 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval); 7069 printf(" Parentheses nest limit = %d\n", optval); 7070 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval); 7071 printf(" Default match limit = %d\n", optval); 7072 (void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &optval); 7073 printf(" Default recursion depth limit = %d\n", optval); 7074 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &optval); 7075 printf(" Match recursion uses %s", optval? "stack" : "heap"); 7076 7077 printf("\n"); 7078 return 0; 7079 } 7080 7081 7082 7083 /************************************************* 7084 * Main Program * 7085 *************************************************/ 7086 7087 int 7088 main(int argc, char **argv) 7089 { 7090 uint32_t yield = 0; 7091 uint32_t op = 1; 7092 uint32_t stack_size; 7093 BOOL notdone = TRUE; 7094 BOOL quiet = FALSE; 7095 BOOL showtotaltimes = FALSE; 7096 BOOL skipping = FALSE; 7097 char *arg_subject = NULL; 7098 char *arg_pattern = NULL; 7099 char *arg_error = NULL; 7100 7101 /* The offsets to the options and control bits fields of the pattern and data 7102 control blocks must be the same so that common options and controls such as 7103 "anchored" or "memory" can work for either of them from a single table entry. 7104 We cannot test this till runtime because "offsetof" does not work in the 7105 preprocessor. */ 7106 7107 if (PO(options) != DO(options) || PO(control) != DO(control) || 7108 PO(control2) != DO(control2)) 7109 { 7110 fprintf(stderr, "** Coding error: " 7111 "options and control offsets for pattern and data must be the same.\n"); 7112 return 1; 7113 } 7114 7115 /* Get the PCRE2 and Unicode version number and JIT target information, at the 7116 same time checking that a request for the length gives the same answer. Also 7117 check lengths for non-string items. */ 7118 7119 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != 7120 PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) || 7121 7122 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) != 7123 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) || 7124 7125 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) != 7126 PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) || 7127 7128 PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) || 7129 PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t)) 7130 { 7131 fprintf(stderr, "** Error in pcre2_config(): bad length\n"); 7132 return 1; 7133 } 7134 7135 /* Get buffers from malloc() so that valgrind will check their misuse when 7136 debugging. They grow automatically when very long lines are read. The 16- 7137 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */ 7138 7139 buffer = (uint8_t *)malloc(pbuffer8_size); 7140 pbuffer8 = (uint8_t *)malloc(pbuffer8_size); 7141 7142 /* The following _setmode() stuff is some Windows magic that tells its runtime 7143 library to translate CRLF into a single LF character. At least, that's what 7144 I've been told: never having used Windows I take this all on trust. Originally 7145 it set 0x8000, but then I was advised that _O_BINARY was better. */ 7146 7147 #if defined(_WIN32) || defined(WIN32) 7148 _setmode( _fileno( stdout ), _O_BINARY ); 7149 #endif 7150 7151 /* Initialization that does not depend on the running mode. */ 7152 7153 locale_name[0] = 0; 7154 memset(&def_patctl, 0, sizeof(patctl)); 7155 memset(&def_datctl, 0, sizeof(datctl)); 7156 def_datctl.oveccount = DEFAULT_OVECCOUNT; 7157 def_datctl.copy_numbers[0] = -1; 7158 def_datctl.get_numbers[0] = -1; 7159 def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET; 7160 7161 /* Scan command line options. */ 7162 7163 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) 7164 { 7165 char *endptr; 7166 char *arg = argv[op]; 7167 unsigned long uli; 7168 7169 /* Display and/or set return code for configuration options. */ 7170 7171 if (strcmp(arg, "-C") == 0) 7172 { 7173 yield = c_option(argv[op + 1]); 7174 goto EXIT; 7175 } 7176 7177 /* Select operating mode */ 7178 7179 if (strcmp(arg, "-8") == 0) 7180 { 7181 #ifdef SUPPORT_PCRE2_8 7182 test_mode = PCRE8_MODE; 7183 #else 7184 fprintf(stderr, 7185 "** This version of PCRE2 was built without 8-bit support\n"); 7186 exit(1); 7187 #endif 7188 } 7189 else if (strcmp(arg, "-16") == 0) 7190 { 7191 #ifdef SUPPORT_PCRE2_16 7192 test_mode = PCRE16_MODE; 7193 #else 7194 fprintf(stderr, 7195 "** This version of PCRE2 was built without 16-bit support\n"); 7196 exit(1); 7197 #endif 7198 } 7199 else if (strcmp(arg, "-32") == 0) 7200 { 7201 #ifdef SUPPORT_PCRE2_32 7202 test_mode = PCRE32_MODE; 7203 #else 7204 fprintf(stderr, 7205 "** This version of PCRE2 was built without 32-bit support\n"); 7206 exit(1); 7207 #endif 7208 } 7209 7210 /* Set quiet (no version verification) */ 7211 7212 else if (strcmp(arg, "-q") == 0) quiet = TRUE; 7213 7214 /* Set system stack size */ 7215 7216 else if (strcmp(arg, "-S") == 0 && argc > 2 && 7217 ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) 7218 { 7219 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) 7220 fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); 7221 exit(1); 7222 #else 7223 int rc; 7224 struct rlimit rlim; 7225 if (U32OVERFLOW(uli)) 7226 { 7227 fprintf(stderr, "** Argument for -S is too big\n"); 7228 exit(1); 7229 } 7230 stack_size = (uint32_t)uli; 7231 getrlimit(RLIMIT_STACK, &rlim); 7232 rlim.rlim_cur = stack_size * 1024 * 1024; 7233 if (rlim.rlim_cur > rlim.rlim_max) 7234 { 7235 fprintf(stderr, 7236 "pcre2test: requested stack size %luM is greater than hard limit %lu\n", 7237 (unsigned long int)stack_size, 7238 (unsigned long int)(rlim.rlim_max)); 7239 exit(1); 7240 } 7241 rc = setrlimit(RLIMIT_STACK, &rlim); 7242 if (rc != 0) 7243 { 7244 fprintf(stderr, "pcre2test: setting stack size %luM failed: %s\n", 7245 (unsigned long int)stack_size, strerror(errno)); 7246 exit(1); 7247 } 7248 op++; 7249 argc--; 7250 #endif 7251 } 7252 7253 /* Set some common pattern and subject controls */ 7254 7255 else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; 7256 else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; 7257 else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; 7258 else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; 7259 else if (strcmp(arg, "-jit") == 0) 7260 { 7261 def_patctl.jit = 7; /* full & partial */ 7262 #ifndef SUPPORT_JIT 7263 fprintf(stderr, "** Warning: JIT support is not available: " 7264 "-jit calls functions that do nothing.\n"); 7265 #endif 7266 } 7267 7268 /* Set timing parameters */ 7269 7270 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 || 7271 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0) 7272 { 7273 int both = arg[2] == 0; 7274 showtotaltimes = arg[1] == 'T'; 7275 if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0)) 7276 { 7277 if (U32OVERFLOW(uli)) 7278 { 7279 fprintf(stderr, "** Argument for %s is too big\n", arg); 7280 exit(1); 7281 } 7282 timeitm = (int)uli; 7283 op++; 7284 argc--; 7285 } 7286 else timeitm = LOOPREPEAT; 7287 if (both) timeit = timeitm; 7288 } 7289 7290 /* Give help */ 7291 7292 else if (strcmp(arg, "-help") == 0 || 7293 strcmp(arg, "--help") == 0) 7294 { 7295 usage(); 7296 goto EXIT; 7297 } 7298 7299 /* Show version */ 7300 7301 else if (strcmp(arg, "-version") == 0 || 7302 strcmp(arg, "--version") == 0) 7303 { 7304 print_version(stdout); 7305 goto EXIT; 7306 } 7307 7308 /* The following options save their data for processing once we know what 7309 the running mode is. */ 7310 7311 else if (strcmp(arg, "-error") == 0) 7312 { 7313 arg_error = argv[op+1]; 7314 goto CHECK_VALUE_EXISTS; 7315 } 7316 7317 else if (strcmp(arg, "-subject") == 0) 7318 { 7319 arg_subject = argv[op+1]; 7320 goto CHECK_VALUE_EXISTS; 7321 } 7322 7323 else if (strcmp(arg, "-pattern") == 0) 7324 { 7325 arg_pattern = argv[op+1]; 7326 CHECK_VALUE_EXISTS: 7327 if (argc <= 2) 7328 { 7329 fprintf(stderr, "** Missing value for %s\n", arg); 7330 yield = 1; 7331 goto EXIT; 7332 } 7333 op++; 7334 argc--; 7335 } 7336 7337 /* Unrecognized option */ 7338 7339 else 7340 { 7341 fprintf(stderr, "** Unknown or malformed option '%s'\n", arg); 7342 usage(); 7343 yield = 1; 7344 goto EXIT; 7345 } 7346 op++; 7347 argc--; 7348 } 7349 7350 /* If -error was present, get the error numbers, show the messages, and exit. 7351 We wait to do this until we know which mode we are in. */ 7352 7353 if (arg_error != NULL) 7354 { 7355 int len; 7356 int errcode; 7357 char *endptr; 7358 7359 /* Ensure the relevant non-8-bit buffer is available. */ 7360 7361 #ifdef SUPPORT_PCRE2_16 7362 if (test_mode == PCRE16_MODE) 7363 { 7364 pbuffer16_size = 256; 7365 pbuffer16 = (uint16_t *)malloc(pbuffer16_size); 7366 if (pbuffer16 == NULL) 7367 { 7368 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n", 7369 (unsigned long int)pbuffer16_size); 7370 yield = 1; 7371 goto EXIT; 7372 } 7373 } 7374 #endif 7375 7376 #ifdef SUPPORT_PCRE2_32 7377 if (test_mode == PCRE32_MODE) 7378 { 7379 pbuffer32_size = 256; 7380 pbuffer32 = (uint32_t *)malloc(pbuffer32_size); 7381 if (pbuffer32 == NULL) 7382 { 7383 fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n", 7384 (unsigned long int)pbuffer32_size); 7385 yield = 1; 7386 goto EXIT; 7387 } 7388 } 7389 #endif 7390 7391 /* Loop along a list of error numbers. */ 7392 7393 for (;;) 7394 { 7395 errcode = strtol(arg_error, &endptr, 10); 7396 if (*endptr != 0 && *endptr != CHAR_COMMA) 7397 { 7398 fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error); 7399 yield = 1; 7400 goto EXIT; 7401 } 7402 printf("Error %d: ", errcode); 7403 PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer); 7404 if (len < 0) 7405 { 7406 switch (len) 7407 { 7408 case PCRE2_ERROR_BADDATA: 7409 printf("PCRE2_ERROR_BADDATA (unknown error number)"); 7410 break; 7411 7412 case PCRE2_ERROR_NOMEMORY: 7413 printf("PCRE2_ERROR_NOMEMORY (buffer too small)"); 7414 break; 7415 7416 default: 7417 printf("Unexpected return (%d) from pcre2_get_error_message()", len); 7418 break; 7419 } 7420 } 7421 else 7422 { 7423 PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout); 7424 } 7425 printf("\n"); 7426 if (*endptr == 0) goto EXIT; 7427 arg_error = endptr + 1; 7428 } 7429 /* Control never reaches here */ 7430 } /* End of -error handling */ 7431 7432 /* Initialize things that cannot be done until we know which test mode we are 7433 running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_ 7434 memory_management() is a no-op, but we call it in order to exercise it. Also 7435 exercise the general context copying function, which is not otherwise used. */ 7436 7437 code_unit_size = test_mode/8; 7438 max_oveccount = DEFAULT_OVECCOUNT; 7439 7440 /* Use macros to save a lot of duplication. */ 7441 7442 #define CREATECONTEXTS \ 7443 G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \ 7444 G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \ 7445 G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \ 7446 G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \ 7447 G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \ 7448 G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \ 7449 G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS)) 7450 7451 #ifdef HEAP_MATCH_RECURSE 7452 #define SETRECURSEMEMMAN \ 7453 (void)G(pcre2_set_recursion_memory_management_,BITS) \ 7454 (G(default_dat_context,BITS), \ 7455 &my_stack_malloc, &my_stack_free, NULL) 7456 #else 7457 #define SETRECURSEMEMMAN \ 7458 (void)G(pcre2_set_recursion_memory_management_,BITS)(NULL, NULL, NULL, NULL) 7459 #endif 7460 7461 /* Call the appropriate functions for the current mode. */ 7462 7463 #ifdef SUPPORT_PCRE2_8 7464 #undef BITS 7465 #define BITS 8 7466 if (test_mode == PCRE8_MODE) 7467 { 7468 CREATECONTEXTS; 7469 SETRECURSEMEMMAN; 7470 } 7471 #endif 7472 7473 #ifdef SUPPORT_PCRE2_16 7474 #undef BITS 7475 #define BITS 16 7476 if (test_mode == PCRE16_MODE) 7477 { 7478 CREATECONTEXTS; 7479 SETRECURSEMEMMAN; 7480 } 7481 #endif 7482 7483 #ifdef SUPPORT_PCRE2_32 7484 #undef BITS 7485 #define BITS 32 7486 if (test_mode == PCRE32_MODE) 7487 { 7488 CREATECONTEXTS; 7489 SETRECURSEMEMMAN; 7490 } 7491 #endif 7492 7493 /* Set a default parentheses nest limit that is large enough to run the 7494 standard tests (this also exercises the function). */ 7495 7496 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220); 7497 7498 /* Handle command line modifier settings, sending any error messages to 7499 stderr. We need to know the mode before modifying the context, and it is tidier 7500 to do them all in the same way. */ 7501 7502 outfile = stderr; 7503 if ((arg_pattern != NULL && 7504 !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) || 7505 (arg_subject != NULL && 7506 !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl))) 7507 { 7508 yield = 1; 7509 goto EXIT; 7510 } 7511 7512 /* Sort out the input and output files, defaulting to stdin/stdout. */ 7513 7514 infile = stdin; 7515 outfile = stdout; 7516 7517 if (argc > 1 && strcmp(argv[op], "-") != 0) 7518 { 7519 infile = fopen(argv[op], INPUT_MODE); 7520 if (infile == NULL) 7521 { 7522 printf("** Failed to open '%s'\n", argv[op]); 7523 yield = 1; 7524 goto EXIT; 7525 } 7526 } 7527 7528 if (argc > 2) 7529 { 7530 outfile = fopen(argv[op+1], OUTPUT_MODE); 7531 if (outfile == NULL) 7532 { 7533 printf("** Failed to open '%s'\n", argv[op+1]); 7534 yield = 1; 7535 goto EXIT; 7536 } 7537 } 7538 7539 /* Output a heading line unless quiet, then process input lines. */ 7540 7541 if (!quiet) print_version(outfile); 7542 7543 SET(compiled_code, NULL); 7544 7545 #ifdef SUPPORT_PCRE2_8 7546 preg.re_pcre2_code = NULL; 7547 preg.re_match_data = NULL; 7548 #endif 7549 7550 while (notdone) 7551 { 7552 uint8_t *p; 7553 int rc = PR_OK; 7554 BOOL expectdata = TEST(compiled_code, !=, NULL); 7555 #ifdef SUPPORT_PCRE2_8 7556 expectdata |= preg.re_pcre2_code != NULL; 7557 #endif 7558 7559 if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL) 7560 break; 7561 if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer); 7562 fflush(outfile); 7563 p = buffer; 7564 7565 /* If we have a pattern set up for testing, or we are skipping after a 7566 compile failure, a blank line terminates this test; otherwise process the 7567 line as a data line. */ 7568 7569 if (expectdata || skipping) 7570 { 7571 while (isspace(*p)) p++; 7572 if (*p == 0) 7573 { 7574 #ifdef SUPPORT_PCRE2_8 7575 if (preg.re_pcre2_code != NULL) 7576 { 7577 regfree(&preg); 7578 preg.re_pcre2_code = NULL; 7579 preg.re_match_data = NULL; 7580 } 7581 #endif /* SUPPORT_PCRE2_8 */ 7582 if (TEST(compiled_code, !=, NULL)) 7583 { 7584 SUB1(pcre2_code_free, compiled_code); 7585 SET(compiled_code, NULL); 7586 } 7587 skipping = FALSE; 7588 setlocale(LC_CTYPE, "C"); 7589 } 7590 else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2]))) 7591 rc = process_data(); 7592 } 7593 7594 /* We do not have a pattern set up for testing. Lines starting with # are 7595 either comments or special commands. Blank lines are ignored. Otherwise, the 7596 line must start with a valid delimiter. It is then processed as a pattern 7597 line. */ 7598 7599 else if (*p == '#') 7600 { 7601 if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue; 7602 rc = process_command(); 7603 } 7604 7605 else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL) 7606 { 7607 rc = process_pattern(); 7608 dfa_matched = 0; 7609 } 7610 7611 else 7612 { 7613 while (isspace(*p)) p++; 7614 if (*p != 0) 7615 { 7616 fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, 7617 *buffer); 7618 rc = PR_SKIP; 7619 } 7620 } 7621 7622 if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE; 7623 else if (rc == PR_ABEND) 7624 { 7625 fprintf(outfile, "** pcre2test run abandoned\n"); 7626 yield = 1; 7627 goto EXIT; 7628 } 7629 } 7630 7631 /* Finish off a normal run. */ 7632 7633 if (INTERACTIVE(infile)) fprintf(outfile, "\n"); 7634 7635 if (showtotaltimes) 7636 { 7637 const char *pad = ""; 7638 fprintf(outfile, "--------------------------------------\n"); 7639 if (timeit > 0) 7640 { 7641 fprintf(outfile, "Total compile time %.4f milliseconds\n", 7642 (((double)total_compile_time * 1000.0) / (double)timeit) / 7643 (double)CLOCKS_PER_SEC); 7644 if (total_jit_compile_time > 0) 7645 fprintf(outfile, "Total JIT compile %.4f milliseconds\n", 7646 (((double)total_jit_compile_time * 1000.0) / (double)timeit) / 7647 (double)CLOCKS_PER_SEC); 7648 pad = " "; 7649 } 7650 fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad, 7651 (((double)total_match_time * 1000.0) / (double)timeitm) / 7652 (double)CLOCKS_PER_SEC); 7653 } 7654 7655 7656 EXIT: 7657 7658 if (infile != NULL && infile != stdin) fclose(infile); 7659 if (outfile != NULL && outfile != stdout) fclose(outfile); 7660 7661 free(buffer); 7662 free(dbuffer); 7663 free(pbuffer8); 7664 free(dfa_workspace); 7665 free((void *)locale_tables); 7666 PCRE2_MATCH_DATA_FREE(match_data); 7667 SUB1(pcre2_code_free, compiled_code); 7668 7669 while(patstacknext-- > 0) 7670 { 7671 SET(compiled_code, patstack[patstacknext]); 7672 SUB1(pcre2_code_free, compiled_code); 7673 } 7674 7675 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context); 7676 if (jit_stack != NULL) 7677 { 7678 PCRE2_JIT_STACK_FREE(jit_stack); 7679 } 7680 7681 #define FREECONTEXTS \ 7682 G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \ 7683 G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \ 7684 G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \ 7685 G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \ 7686 G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \ 7687 G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)) 7688 7689 #ifdef SUPPORT_PCRE2_8 7690 #undef BITS 7691 #define BITS 8 7692 if (preg.re_pcre2_code != NULL) regfree(&preg); 7693 FREECONTEXTS; 7694 #endif 7695 7696 #ifdef SUPPORT_PCRE2_16 7697 #undef BITS 7698 #define BITS 16 7699 free(pbuffer16); 7700 FREECONTEXTS; 7701 #endif 7702 7703 #ifdef SUPPORT_PCRE2_32 7704 #undef BITS 7705 #define BITS 32 7706 free(pbuffer32); 7707 FREECONTEXTS; 7708 #endif 7709 7710 #if defined(__VMS) 7711 yield = SS$_NORMAL; /* Return values via DCL symbols */ 7712 #endif 7713 7714 return yield; 7715 } 7716 7717 /* End of pcre2test.c */ 7718