1 /************************************************* 2 * PCRE testing program * 3 *************************************************/ 4 5 /* This program was hacked up as a tester for PCRE. I really should have 6 written it more tidily in the first place. Will I ever learn? It has grown and 7 been extended and consequently is now rather, er, *very* untidy in places. The 8 addition of 16-bit support has made it even worse. :-( 9 10 ----------------------------------------------------------------------------- 11 Redistribution and use in source and binary forms, with or without 12 modification, are permitted provided that the following conditions are met: 13 14 * Redistributions of source code must retain the above copyright notice, 15 this list of conditions and the following disclaimer. 16 17 * Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in the 19 documentation and/or other materials provided with the distribution. 20 21 * Neither the name of the University of Cambridge nor the names of its 22 contributors may be used to endorse or promote products derived from 23 this software without specific prior written permission. 24 25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 POSSIBILITY OF SUCH DAMAGE. 36 ----------------------------------------------------------------------------- 37 */ 38 39 /* This program now supports the testing of all of the 8-bit, 16-bit, and 40 32-bit PCRE libraries in a single program. This is different from the modules 41 such as pcre_compile.c in the library itself, which are compiled separately for 42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled 43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not 44 make use of any of the macros from pcre_internal.h that depend on 45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of 46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only 47 supported library functions. */ 48 49 #ifdef HAVE_CONFIG_H 50 #include "config.h" 51 #endif 52 53 #include <ctype.h> 54 #include <stdio.h> 55 #include <string.h> 56 #include <stdlib.h> 57 #include <time.h> 58 #include <locale.h> 59 #include <errno.h> 60 61 /* Both libreadline and libedit are optionally supported. The user-supplied 62 original patch uses readline/readline.h for libedit, but in at least one system 63 it is installed as editline/readline.h, so the configuration code now looks for 64 that first, falling back to readline/readline.h. */ 65 66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 67 #ifdef HAVE_UNISTD_H 68 #include <unistd.h> 69 #endif 70 #if defined(SUPPORT_LIBREADLINE) 71 #include <readline/readline.h> 72 #include <readline/history.h> 73 #else 74 #if defined(HAVE_EDITLINE_READLINE_H) 75 #include <editline/readline.h> 76 #else 77 #include <readline/readline.h> 78 #endif 79 #endif 80 #endif 81 82 /* A number of things vary for Windows builds. Originally, pcretest opened its 83 input and output without "b"; then I was told that "b" was needed in some 84 environments, so it was added for release 5.0 to both the input and output. (It 85 makes no difference on Unix-like systems.) Later I was told that it is wrong 86 for the input on Windows. I've now abstracted the modes into two macros that 87 are set here, to make it easier to fiddle with them, and removed "b" from the 88 input mode under Windows. */ 89 90 #if defined(_WIN32) || defined(WIN32) 91 #include <io.h> /* For _setmode() */ 92 #include <fcntl.h> /* For _O_BINARY */ 93 #define INPUT_MODE "r" 94 #define OUTPUT_MODE "wb" 95 96 #ifndef isatty 97 #define isatty _isatty /* This is what Windows calls them, I'm told, */ 98 #endif /* though in some environments they seem to */ 99 /* be already defined, hence the #ifndefs. */ 100 #ifndef fileno 101 #define fileno _fileno 102 #endif 103 104 /* A user sent this fix for Borland Builder 5 under Windows. */ 105 106 #ifdef __BORLANDC__ 107 #define _setmode(handle, mode) setmode(handle, mode) 108 #endif 109 110 /* Not Windows */ 111 112 #else 113 #include <sys/time.h> /* These two includes are needed */ 114 #include <sys/resource.h> /* for setrlimit(). */ 115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */ 116 #define INPUT_MODE "r" 117 #define OUTPUT_MODE "w" 118 #else 119 #define INPUT_MODE "rb" 120 #define OUTPUT_MODE "wb" 121 #endif 122 #endif 123 124 #ifdef __VMS 125 #include <ssdef.h> 126 void vms_setsymbol( char *, char *, int ); 127 #endif 128 129 130 #define PRIV(name) name 131 132 /* We have to include pcre_internal.h because we need the internal info for 133 displaying the results of pcre_study() and we also need to know about the 134 internal macros, structures, and other internal data values; pcretest has 135 "inside information" compared to a program that strictly follows the PCRE API. 136 137 Although pcre_internal.h does itself include pcre.h, we explicitly include it 138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set 139 appropriately for an application, not for building PCRE. */ 140 141 #include "pcre.h" 142 #include "pcre_internal.h" 143 144 /* The pcre_printint() function, which prints the internal form of a compiled 145 regex, is held in a separate file so that (a) it can be compiled in either 146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c 147 when that is compiled in debug mode. */ 148 149 #ifdef SUPPORT_PCRE8 150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); 151 #endif 152 #ifdef SUPPORT_PCRE16 153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); 154 #endif 155 #ifdef SUPPORT_PCRE32 156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths); 157 #endif 158 159 /* We need access to some of the data tables that PCRE uses. So as not to have 160 to keep two copies, we include the source files here, changing the names of the 161 external symbols to prevent clashes. */ 162 163 #define PCRE_INCLUDED 164 165 #include "pcre_tables.c" 166 #include "pcre_ucd.c" 167 168 /* The definition of the macro PRINTABLE, which determines whether to print an 169 output character as-is or as a hex value when showing compiled patterns, is 170 the same as in the printint.src file. We uses it here in cases when the locale 171 has not been explicitly changed, so as to get consistent output from systems 172 that differ in their output from isprint() even in the "C" locale. */ 173 174 #ifdef EBCDIC 175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255) 176 #else 177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127) 178 #endif 179 180 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c)) 181 182 /* Posix support is disabled in 16 or 32 bit only mode. */ 183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX 184 #define NOPOSIX 185 #endif 186 187 /* It is possible to compile this test program without including support for 188 testing the POSIX interface, though this is not available via the standard 189 Makefile. */ 190 191 #if !defined NOPOSIX 192 #include "pcreposix.h" 193 #endif 194 195 /* It is also possible, originally for the benefit of a version that was 196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define 197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we 198 automatically cut out the UTF support if PCRE is built without it. */ 199 200 #ifndef SUPPORT_UTF 201 #ifndef NOUTF 202 #define NOUTF 203 #endif 204 #endif 205 206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros 207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called 208 only from one place and is handled differently). I couldn't dream up any way of 209 using a single macro to do this in a generic way, because of the many different 210 argument requirements. We know that at least one of SUPPORT_PCRE8 and 211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then 212 use these in the definitions of generic macros. 213 214 **** Special note about the PCHARSxxx macros: the address of the string to be 215 printed is always given as two arguments: a base address followed by an offset. 216 The base address is cast to the correct data size for 8 or 16 bit data; the 217 offset is in units of this size. If the string were given as base+offset in one 218 argument, the casting might be incorrectly applied. */ 219 220 #ifdef SUPPORT_PCRE8 221 222 #define PCHARS8(lv, p, offset, len, f) \ 223 lv = pchars((pcre_uint8 *)(p) + offset, len, f) 224 225 #define PCHARSV8(p, offset, len, f) \ 226 (void)pchars((pcre_uint8 *)(p) + offset, len, f) 227 228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \ 229 p = read_capture_name8(p, cn8, re) 230 231 #define STRLEN8(p) ((int)strlen((char *)p)) 232 233 #define SET_PCRE_CALLOUT8(callout) \ 234 pcre_callout = callout 235 236 #define SET_PCRE_STACK_GUARD8(stack_guard) \ 237 pcre_stack_guard = stack_guard 238 239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \ 240 pcre_assign_jit_stack(extra, callback, userdata) 241 242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \ 243 re = pcre_compile((char *)pat, options, error, erroffset, tables) 244 245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 246 namesptr, cbuffer, size) \ 247 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \ 248 (char *)namesptr, cbuffer, size) 249 250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \ 251 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size) 252 253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 254 offsets, size_offsets, workspace, size_workspace) \ 255 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \ 256 offsets, size_offsets, workspace, size_workspace) 257 258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 259 offsets, size_offsets) \ 260 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \ 261 offsets, size_offsets) 262 263 #define PCRE_FREE_STUDY8(extra) \ 264 pcre_free_study(extra) 265 266 #define PCRE_FREE_SUBSTRING8(substring) \ 267 pcre_free_substring(substring) 268 269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \ 270 pcre_free_substring_list(listptr) 271 272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 273 getnamesptr, subsptr) \ 274 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \ 275 (char *)getnamesptr, subsptr) 276 277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \ 278 n = pcre_get_stringnumber(re, (char *)ptr) 279 280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \ 281 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr) 282 283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \ 284 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr) 285 286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \ 287 rc = pcre_pattern_to_host_byte_order(re, extra, tables) 288 289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \ 290 pcre_printint(re, outfile, debug_lengths) 291 292 #define PCRE_STUDY8(extra, re, options, error) \ 293 extra = pcre_study(re, options, error) 294 295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \ 296 pcre_jit_stack_alloc(startsize, maxsize) 297 298 #define PCRE_JIT_STACK_FREE8(stack) \ 299 pcre_jit_stack_free(stack) 300 301 #define pcre8_maketables pcre_maketables 302 303 #endif /* SUPPORT_PCRE8 */ 304 305 /* -----------------------------------------------------------*/ 306 307 #ifdef SUPPORT_PCRE16 308 309 #define PCHARS16(lv, p, offset, len, f) \ 310 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f) 311 312 #define PCHARSV16(p, offset, len, f) \ 313 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f) 314 315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \ 316 p = read_capture_name16(p, cn16, re) 317 318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p)) 319 320 #define SET_PCRE_CALLOUT16(callout) \ 321 pcre16_callout = (int (*)(pcre16_callout_block *))callout 322 323 #define SET_PCRE_STACK_GUARD16(stack_guard) \ 324 pcre16_stack_guard = (int (*)(void))stack_guard 325 326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \ 327 pcre16_assign_jit_stack((pcre16_extra *)extra, \ 328 (pcre16_jit_callback)callback, userdata) 329 330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \ 331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \ 332 tables) 333 334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 335 namesptr, cbuffer, size) \ 336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ 337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2) 338 339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \ 340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \ 341 (PCRE_UCHAR16 *)cbuffer, size/2) 342 343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 344 offsets, size_offsets, workspace, size_workspace) \ 345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \ 346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \ 347 workspace, size_workspace) 348 349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 350 offsets, size_offsets) \ 351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \ 352 len, start_offset, options, offsets, size_offsets) 353 354 #define PCRE_FREE_STUDY16(extra) \ 355 pcre16_free_study((pcre16_extra *)extra) 356 357 #define PCRE_FREE_SUBSTRING16(substring) \ 358 pcre16_free_substring((PCRE_SPTR16)substring) 359 360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \ 361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr) 362 363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 364 getnamesptr, subsptr) \ 365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \ 366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr) 367 368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \ 369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr) 370 371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \ 372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \ 373 (PCRE_SPTR16 *)(void*)subsptr) 374 375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \ 376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \ 377 (PCRE_SPTR16 **)(void*)listptr) 378 379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \ 380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \ 381 tables) 382 383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \ 384 pcre16_printint(re, outfile, debug_lengths) 385 386 #define PCRE_STUDY16(extra, re, options, error) \ 387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error) 388 389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ 390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize) 391 392 #define PCRE_JIT_STACK_FREE16(stack) \ 393 pcre16_jit_stack_free((pcre16_jit_stack *)stack) 394 395 #endif /* SUPPORT_PCRE16 */ 396 397 /* -----------------------------------------------------------*/ 398 399 #ifdef SUPPORT_PCRE32 400 401 #define PCHARS32(lv, p, offset, len, f) \ 402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f) 403 404 #define PCHARSV32(p, offset, len, f) \ 405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f) 406 407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \ 408 p = read_capture_name32(p, cn32, re) 409 410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p)) 411 412 #define SET_PCRE_CALLOUT32(callout) \ 413 pcre32_callout = (int (*)(pcre32_callout_block *))callout 414 415 #define SET_PCRE_STACK_GUARD32(stack_guard) \ 416 pcre32_stack_guard = (int (*)(void))stack_guard 417 418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \ 419 pcre32_assign_jit_stack((pcre32_extra *)extra, \ 420 (pcre32_jit_callback)callback, userdata) 421 422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \ 423 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \ 424 tables) 425 426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ 427 namesptr, cbuffer, size) \ 428 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \ 429 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/2) 430 431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \ 432 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \ 433 (PCRE_UCHAR32 *)cbuffer, size/2) 434 435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \ 436 offsets, size_offsets, workspace, size_workspace) \ 437 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \ 438 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \ 439 workspace, size_workspace) 440 441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \ 442 offsets, size_offsets) \ 443 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \ 444 len, start_offset, options, offsets, size_offsets) 445 446 #define PCRE_FREE_STUDY32(extra) \ 447 pcre32_free_study((pcre32_extra *)extra) 448 449 #define PCRE_FREE_SUBSTRING32(substring) \ 450 pcre32_free_substring((PCRE_SPTR32)substring) 451 452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \ 453 pcre32_free_substring_list((PCRE_SPTR32 *)listptr) 454 455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ 456 getnamesptr, subsptr) \ 457 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \ 458 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr) 459 460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \ 461 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr) 462 463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \ 464 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \ 465 (PCRE_SPTR32 *)(void*)subsptr) 466 467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \ 468 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \ 469 (PCRE_SPTR32 **)(void*)listptr) 470 471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \ 472 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \ 473 tables) 474 475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \ 476 pcre32_printint(re, outfile, debug_lengths) 477 478 #define PCRE_STUDY32(extra, re, options, error) \ 479 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error) 480 481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \ 482 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize) 483 484 #define PCRE_JIT_STACK_FREE32(stack) \ 485 pcre32_jit_stack_free((pcre32_jit_stack *)stack) 486 487 #endif /* SUPPORT_PCRE32 */ 488 489 490 /* ----- More than one mode is supported; a runtime test is needed, except for 491 pcre_config(), and the JIT stack functions, when it doesn't matter which 492 available version is called. ----- */ 493 494 enum { 495 PCRE8_MODE, 496 PCRE16_MODE, 497 PCRE32_MODE 498 }; 499 500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \ 501 defined (SUPPORT_PCRE32)) >= 2 502 503 #define CHAR_SIZE (1 << pcre_mode) 504 505 /* There doesn't seem to be an easy way of writing these macros that can cope 506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the 507 cases separately. */ 508 509 /* ----- All three modes supported ----- */ 510 511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32) 512 513 #define PCHARS(lv, p, offset, len, f) \ 514 if (pcre_mode == PCRE32_MODE) \ 515 PCHARS32(lv, p, offset, len, f); \ 516 else if (pcre_mode == PCRE16_MODE) \ 517 PCHARS16(lv, p, offset, len, f); \ 518 else \ 519 PCHARS8(lv, p, offset, len, f) 520 521 #define PCHARSV(p, offset, len, f) \ 522 if (pcre_mode == PCRE32_MODE) \ 523 PCHARSV32(p, offset, len, f); \ 524 else if (pcre_mode == PCRE16_MODE) \ 525 PCHARSV16(p, offset, len, f); \ 526 else \ 527 PCHARSV8(p, offset, len, f) 528 529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \ 530 if (pcre_mode == PCRE32_MODE) \ 531 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \ 532 else if (pcre_mode == PCRE16_MODE) \ 533 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \ 534 else \ 535 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) 536 537 #define SET_PCRE_CALLOUT(callout) \ 538 if (pcre_mode == PCRE32_MODE) \ 539 SET_PCRE_CALLOUT32(callout); \ 540 else if (pcre_mode == PCRE16_MODE) \ 541 SET_PCRE_CALLOUT16(callout); \ 542 else \ 543 SET_PCRE_CALLOUT8(callout) 544 545 #define SET_PCRE_STACK_GUARD(stack_guard) \ 546 if (pcre_mode == PCRE32_MODE) \ 547 SET_PCRE_STACK_GUARD32(stack_guard); \ 548 else if (pcre_mode == PCRE16_MODE) \ 549 SET_PCRE_STACK_GUARD16(stack_guard); \ 550 else \ 551 SET_PCRE_STACK_GUARD8(stack_guard) 552 553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p)) 554 555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \ 556 if (pcre_mode == PCRE32_MODE) \ 557 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \ 558 else if (pcre_mode == PCRE16_MODE) \ 559 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \ 560 else \ 561 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) 562 563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \ 564 if (pcre_mode == PCRE32_MODE) \ 565 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \ 566 else if (pcre_mode == PCRE16_MODE) \ 567 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \ 568 else \ 569 PCRE_COMPILE8(re, pat, options, error, erroffset, tables) 570 571 #define PCRE_CONFIG pcre_config 572 573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 574 namesptr, cbuffer, size) \ 575 if (pcre_mode == PCRE32_MODE) \ 576 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ 577 namesptr, cbuffer, size); \ 578 else if (pcre_mode == PCRE16_MODE) \ 579 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 580 namesptr, cbuffer, size); \ 581 else \ 582 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 583 namesptr, cbuffer, size) 584 585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \ 586 if (pcre_mode == PCRE32_MODE) \ 587 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \ 588 else if (pcre_mode == PCRE16_MODE) \ 589 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \ 590 else \ 591 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) 592 593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \ 594 offsets, size_offsets, workspace, size_workspace) \ 595 if (pcre_mode == PCRE32_MODE) \ 596 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \ 597 offsets, size_offsets, workspace, size_workspace); \ 598 else if (pcre_mode == PCRE16_MODE) \ 599 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 600 offsets, size_offsets, workspace, size_workspace); \ 601 else \ 602 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 603 offsets, size_offsets, workspace, size_workspace) 604 605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \ 606 offsets, size_offsets) \ 607 if (pcre_mode == PCRE32_MODE) \ 608 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \ 609 offsets, size_offsets); \ 610 else if (pcre_mode == PCRE16_MODE) \ 611 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \ 612 offsets, size_offsets); \ 613 else \ 614 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \ 615 offsets, size_offsets) 616 617 #define PCRE_FREE_STUDY(extra) \ 618 if (pcre_mode == PCRE32_MODE) \ 619 PCRE_FREE_STUDY32(extra); \ 620 else if (pcre_mode == PCRE16_MODE) \ 621 PCRE_FREE_STUDY16(extra); \ 622 else \ 623 PCRE_FREE_STUDY8(extra) 624 625 #define PCRE_FREE_SUBSTRING(substring) \ 626 if (pcre_mode == PCRE32_MODE) \ 627 PCRE_FREE_SUBSTRING32(substring); \ 628 else if (pcre_mode == PCRE16_MODE) \ 629 PCRE_FREE_SUBSTRING16(substring); \ 630 else \ 631 PCRE_FREE_SUBSTRING8(substring) 632 633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \ 634 if (pcre_mode == PCRE32_MODE) \ 635 PCRE_FREE_SUBSTRING_LIST32(listptr); \ 636 else if (pcre_mode == PCRE16_MODE) \ 637 PCRE_FREE_SUBSTRING_LIST16(listptr); \ 638 else \ 639 PCRE_FREE_SUBSTRING_LIST8(listptr) 640 641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 642 getnamesptr, subsptr) \ 643 if (pcre_mode == PCRE32_MODE) \ 644 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \ 645 getnamesptr, subsptr); \ 646 else if (pcre_mode == PCRE16_MODE) \ 647 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \ 648 getnamesptr, subsptr); \ 649 else \ 650 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \ 651 getnamesptr, subsptr) 652 653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \ 654 if (pcre_mode == PCRE32_MODE) \ 655 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \ 656 else if (pcre_mode == PCRE16_MODE) \ 657 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \ 658 else \ 659 PCRE_GET_STRINGNUMBER8(n, rc, ptr) 660 661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \ 662 if (pcre_mode == PCRE32_MODE) \ 663 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \ 664 else if (pcre_mode == PCRE16_MODE) \ 665 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \ 666 else \ 667 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr) 668 669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \ 670 if (pcre_mode == PCRE32_MODE) \ 671 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \ 672 else if (pcre_mode == PCRE16_MODE) \ 673 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \ 674 else \ 675 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) 676 677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \ 678 (pcre_mode == PCRE32_MODE ? \ 679 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \ 680 : pcre_mode == PCRE16_MODE ? \ 681 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \ 682 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize)) 683 684 #define PCRE_JIT_STACK_FREE(stack) \ 685 if (pcre_mode == PCRE32_MODE) \ 686 PCRE_JIT_STACK_FREE32(stack); \ 687 else if (pcre_mode == PCRE16_MODE) \ 688 PCRE_JIT_STACK_FREE16(stack); \ 689 else \ 690 PCRE_JIT_STACK_FREE8(stack) 691 692 #define PCRE_MAKETABLES \ 693 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables()) 694 695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \ 696 if (pcre_mode == PCRE32_MODE) \ 697 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \ 698 else if (pcre_mode == PCRE16_MODE) \ 699 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \ 700 else \ 701 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) 702 703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \ 704 if (pcre_mode == PCRE32_MODE) \ 705 PCRE_PRINTINT32(re, outfile, debug_lengths); \ 706 else if (pcre_mode == PCRE16_MODE) \ 707 PCRE_PRINTINT16(re, outfile, debug_lengths); \ 708 else \ 709 PCRE_PRINTINT8(re, outfile, debug_lengths) 710 711 #define PCRE_STUDY(extra, re, options, error) \ 712 if (pcre_mode == PCRE32_MODE) \ 713 PCRE_STUDY32(extra, re, options, error); \ 714 else if (pcre_mode == PCRE16_MODE) \ 715 PCRE_STUDY16(extra, re, options, error); \ 716 else \ 717 PCRE_STUDY8(extra, re, options, error) 718 719 720 /* ----- Two out of three modes are supported ----- */ 721 722 #else 723 724 /* We can use some macro trickery to make a single set of definitions work in 725 the three different cases. */ 726 727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */ 728 729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16) 730 #define BITONE 32 731 #define BITTWO 16 732 733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */ 734 735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8) 736 #define BITONE 32 737 #define BITTWO 8 738 739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */ 740 741 #else 742 #define BITONE 16 743 #define BITTWO 8 744 #endif 745 746 #define glue(a,b) a##b 747 #define G(a,b) glue(a,b) 748 749 750 /* ----- Common macros for two-mode cases ----- */ 751 752 #define PCHARS(lv, p, offset, len, f) \ 753 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 754 G(PCHARS,BITONE)(lv, p, offset, len, f); \ 755 else \ 756 G(PCHARS,BITTWO)(lv, p, offset, len, f) 757 758 #define PCHARSV(p, offset, len, f) \ 759 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 760 G(PCHARSV,BITONE)(p, offset, len, f); \ 761 else \ 762 G(PCHARSV,BITTWO)(p, offset, len, f) 763 764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \ 765 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 766 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \ 767 else \ 768 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re) 769 770 #define SET_PCRE_CALLOUT(callout) \ 771 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 772 G(SET_PCRE_CALLOUT,BITONE)(callout); \ 773 else \ 774 G(SET_PCRE_CALLOUT,BITTWO)(callout) 775 776 #define SET_PCRE_STACK_GUARD(stack_guard) \ 777 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 778 G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \ 779 else \ 780 G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard) 781 782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \ 783 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p)) 784 785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \ 786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 787 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \ 788 else \ 789 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata) 790 791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \ 792 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 793 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \ 794 else \ 795 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables) 796 797 #define PCRE_CONFIG G(G(pcre,BITONE),_config) 798 799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 800 namesptr, cbuffer, size) \ 801 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 802 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \ 803 namesptr, cbuffer, size); \ 804 else \ 805 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \ 806 namesptr, cbuffer, size) 807 808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \ 809 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 810 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \ 811 else \ 812 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size) 813 814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \ 815 offsets, size_offsets, workspace, size_workspace) \ 816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 817 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \ 818 offsets, size_offsets, workspace, size_workspace); \ 819 else \ 820 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \ 821 offsets, size_offsets, workspace, size_workspace) 822 823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \ 824 offsets, size_offsets) \ 825 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 826 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \ 827 offsets, size_offsets); \ 828 else \ 829 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \ 830 offsets, size_offsets) 831 832 #define PCRE_FREE_STUDY(extra) \ 833 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 834 G(PCRE_FREE_STUDY,BITONE)(extra); \ 835 else \ 836 G(PCRE_FREE_STUDY,BITTWO)(extra) 837 838 #define PCRE_FREE_SUBSTRING(substring) \ 839 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 840 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \ 841 else \ 842 G(PCRE_FREE_SUBSTRING,BITTWO)(substring) 843 844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \ 845 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 846 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \ 847 else \ 848 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr) 849 850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \ 851 getnamesptr, subsptr) \ 852 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 853 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \ 854 getnamesptr, subsptr); \ 855 else \ 856 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \ 857 getnamesptr, subsptr) 858 859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \ 860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 861 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \ 862 else \ 863 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr) 864 865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \ 866 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 867 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \ 868 else \ 869 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr) 870 871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \ 872 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 873 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \ 874 else \ 875 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr) 876 877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \ 878 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \ 879 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \ 880 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize) 881 882 #define PCRE_JIT_STACK_FREE(stack) \ 883 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 884 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \ 885 else \ 886 G(PCRE_JIT_STACK_FREE,BITTWO)(stack) 887 888 #define PCRE_MAKETABLES \ 889 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \ 890 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)() 891 892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \ 893 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 894 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \ 895 else \ 896 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables) 897 898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \ 899 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 900 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \ 901 else \ 902 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths) 903 904 #define PCRE_STUDY(extra, re, options, error) \ 905 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \ 906 G(PCRE_STUDY,BITONE)(extra, re, options, error); \ 907 else \ 908 G(PCRE_STUDY,BITTWO)(extra, re, options, error) 909 910 #endif /* Two out of three modes */ 911 912 /* ----- End of cases where more than one mode is supported ----- */ 913 914 915 /* ----- Only 8-bit mode is supported ----- */ 916 917 #elif defined SUPPORT_PCRE8 918 #define CHAR_SIZE 1 919 #define PCHARS PCHARS8 920 #define PCHARSV PCHARSV8 921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8 922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8 923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8 924 #define STRLEN STRLEN8 925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8 926 #define PCRE_COMPILE PCRE_COMPILE8 927 #define PCRE_CONFIG pcre_config 928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8 929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8 930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8 931 #define PCRE_EXEC PCRE_EXEC8 932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8 933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8 934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8 935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8 936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8 937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8 938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8 939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8 940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8 941 #define PCRE_MAKETABLES pcre_maketables() 942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8 943 #define PCRE_PRINTINT PCRE_PRINTINT8 944 #define PCRE_STUDY PCRE_STUDY8 945 946 /* ----- Only 16-bit mode is supported ----- */ 947 948 #elif defined SUPPORT_PCRE16 949 #define CHAR_SIZE 2 950 #define PCHARS PCHARS16 951 #define PCHARSV PCHARSV16 952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16 953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16 954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16 955 #define STRLEN STRLEN16 956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16 957 #define PCRE_COMPILE PCRE_COMPILE16 958 #define PCRE_CONFIG pcre16_config 959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16 960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16 961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16 962 #define PCRE_EXEC PCRE_EXEC16 963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16 964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16 965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16 966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16 967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16 968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16 969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16 970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16 971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16 972 #define PCRE_MAKETABLES pcre16_maketables() 973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16 974 #define PCRE_PRINTINT PCRE_PRINTINT16 975 #define PCRE_STUDY PCRE_STUDY16 976 977 /* ----- Only 32-bit mode is supported ----- */ 978 979 #elif defined SUPPORT_PCRE32 980 #define CHAR_SIZE 4 981 #define PCHARS PCHARS32 982 #define PCHARSV PCHARSV32 983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32 984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32 985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32 986 #define STRLEN STRLEN32 987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32 988 #define PCRE_COMPILE PCRE_COMPILE32 989 #define PCRE_CONFIG pcre32_config 990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32 991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32 992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32 993 #define PCRE_EXEC PCRE_EXEC32 994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32 995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32 996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32 997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32 998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32 999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32 1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32 1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32 1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32 1003 #define PCRE_MAKETABLES pcre32_maketables() 1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32 1005 #define PCRE_PRINTINT PCRE_PRINTINT32 1006 #define PCRE_STUDY PCRE_STUDY32 1007 1008 #endif 1009 1010 /* ----- End of mode-specific function call macros ----- */ 1011 1012 1013 /* Other parameters */ 1014 1015 #ifndef CLOCKS_PER_SEC 1016 #ifdef CLK_TCK 1017 #define CLOCKS_PER_SEC CLK_TCK 1018 #else 1019 #define CLOCKS_PER_SEC 100 1020 #endif 1021 #endif 1022 1023 #if !defined NODFA 1024 #define DFA_WS_DIMENSION 1000 1025 #endif 1026 1027 /* This is the default loop count for timing. */ 1028 1029 #define LOOPREPEAT 500000 1030 1031 /* Static variables */ 1032 1033 static FILE *outfile; 1034 static int log_store = 0; 1035 static int callout_count; 1036 static int callout_extra; 1037 static int callout_fail_count; 1038 static int callout_fail_id; 1039 static int debug_lengths; 1040 static int first_callout; 1041 static int jit_was_used; 1042 static int locale_set = 0; 1043 static int show_malloc; 1044 static int stack_guard_return; 1045 static int use_utf; 1046 static const unsigned char *last_callout_mark = NULL; 1047 1048 /* The buffers grow automatically if very long input lines are encountered. */ 1049 1050 static int buffer_size = 50000; 1051 static pcre_uint8 *buffer = NULL; 1052 static pcre_uint8 *pbuffer = NULL; 1053 1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */ 1055 1056 #ifdef COMPILE_PCRE16 1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c 1058 #endif 1059 1060 #ifdef COMPILE_PCRE32 1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c 1062 #endif 1063 1064 /* We need buffers for building 16/32-bit strings, and the tables of operator 1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a 1066 pattern for saving/reloading testing. Luckily, the data for these tables is 1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which 1068 are used in the tables) are adjusted appropriately for the 16/32-bit world. 1069 LINK_SIZE is also used later in this program. */ 1070 1071 #ifdef SUPPORT_PCRE16 1072 #undef IMM2_SIZE 1073 #define IMM2_SIZE 1 1074 1075 #if LINK_SIZE == 2 1076 #undef LINK_SIZE 1077 #define LINK_SIZE 1 1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4 1079 #undef LINK_SIZE 1080 #define LINK_SIZE 2 1081 #else 1082 #error LINK_SIZE must be either 2, 3, or 4 1083 #endif 1084 1085 static int buffer16_size = 0; 1086 static pcre_uint16 *buffer16 = NULL; 1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS }; 1088 #endif /* SUPPORT_PCRE16 */ 1089 1090 #ifdef SUPPORT_PCRE32 1091 #undef IMM2_SIZE 1092 #define IMM2_SIZE 1 1093 #undef LINK_SIZE 1094 #define LINK_SIZE 1 1095 1096 static int buffer32_size = 0; 1097 static pcre_uint32 *buffer32 = NULL; 1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS }; 1099 #endif /* SUPPORT_PCRE32 */ 1100 1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit 1102 support, it can be changed by an option. If there is no 8-bit support, there 1103 must be 16-or 32-bit support, so default it to 1. */ 1104 1105 #if defined SUPPORT_PCRE8 1106 static int pcre_mode = PCRE8_MODE; 1107 #elif defined SUPPORT_PCRE16 1108 static int pcre_mode = PCRE16_MODE; 1109 #elif defined SUPPORT_PCRE32 1110 static int pcre_mode = PCRE32_MODE; 1111 #endif 1112 1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */ 1114 1115 static int jit_study_bits[] = 1116 { 1117 PCRE_STUDY_JIT_COMPILE, 1118 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE, 1119 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE, 1120 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 1121 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 1122 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 1123 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + 1124 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 1125 }; 1126 1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \ 1128 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) 1129 1130 /* Textual explanations for runtime error codes */ 1131 1132 static const char *errtexts[] = { 1133 NULL, /* 0 is no error */ 1134 NULL, /* NOMATCH is handled specially */ 1135 "NULL argument passed", 1136 "bad option value", 1137 "magic number missing", 1138 "unknown opcode - pattern overwritten?", 1139 "no more memory", 1140 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */ 1141 "match limit exceeded", 1142 "callout error code", 1143 NULL, /* BADUTF8/16 is handled specially */ 1144 NULL, /* BADUTF8/16 offset is handled specially */ 1145 NULL, /* PARTIAL is handled specially */ 1146 "not used - internal error", 1147 "internal error - pattern overwritten?", 1148 "bad count value", 1149 "item unsupported for DFA matching", 1150 "backreference condition or recursion test not supported for DFA matching", 1151 "match limit not supported for DFA matching", 1152 "workspace size exceeded in DFA matching", 1153 "too much recursion for DFA matching", 1154 "recursion limit exceeded", 1155 "not used - internal error", 1156 "invalid combination of newline options", 1157 "bad offset value", 1158 NULL, /* SHORTUTF8/16 is handled specially */ 1159 "nested recursion at the same subject position", 1160 "JIT stack limit reached", 1161 "pattern compiled in wrong mode: 8-bit/16-bit error", 1162 "pattern compiled with other endianness", 1163 "invalid data in workspace for DFA restart", 1164 "bad JIT option", 1165 "bad length" 1166 }; 1167 1168 1169 /************************************************* 1170 * Alternate character tables * 1171 *************************************************/ 1172 1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby 1174 using the default tables of the library. However, the T option can be used to 1175 select alternate sets of tables, for different kinds of testing. Note also that 1176 the L (locale) option also adjusts the tables. */ 1177 1178 /* This is the set of tables distributed as default with PCRE. It recognizes 1179 only ASCII characters. */ 1180 1181 static const pcre_uint8 tables0[] = { 1182 1183 /* This table is a lower casing table. */ 1184 1185 0, 1, 2, 3, 4, 5, 6, 7, 1186 8, 9, 10, 11, 12, 13, 14, 15, 1187 16, 17, 18, 19, 20, 21, 22, 23, 1188 24, 25, 26, 27, 28, 29, 30, 31, 1189 32, 33, 34, 35, 36, 37, 38, 39, 1190 40, 41, 42, 43, 44, 45, 46, 47, 1191 48, 49, 50, 51, 52, 53, 54, 55, 1192 56, 57, 58, 59, 60, 61, 62, 63, 1193 64, 97, 98, 99,100,101,102,103, 1194 104,105,106,107,108,109,110,111, 1195 112,113,114,115,116,117,118,119, 1196 120,121,122, 91, 92, 93, 94, 95, 1197 96, 97, 98, 99,100,101,102,103, 1198 104,105,106,107,108,109,110,111, 1199 112,113,114,115,116,117,118,119, 1200 120,121,122,123,124,125,126,127, 1201 128,129,130,131,132,133,134,135, 1202 136,137,138,139,140,141,142,143, 1203 144,145,146,147,148,149,150,151, 1204 152,153,154,155,156,157,158,159, 1205 160,161,162,163,164,165,166,167, 1206 168,169,170,171,172,173,174,175, 1207 176,177,178,179,180,181,182,183, 1208 184,185,186,187,188,189,190,191, 1209 192,193,194,195,196,197,198,199, 1210 200,201,202,203,204,205,206,207, 1211 208,209,210,211,212,213,214,215, 1212 216,217,218,219,220,221,222,223, 1213 224,225,226,227,228,229,230,231, 1214 232,233,234,235,236,237,238,239, 1215 240,241,242,243,244,245,246,247, 1216 248,249,250,251,252,253,254,255, 1217 1218 /* This table is a case flipping table. */ 1219 1220 0, 1, 2, 3, 4, 5, 6, 7, 1221 8, 9, 10, 11, 12, 13, 14, 15, 1222 16, 17, 18, 19, 20, 21, 22, 23, 1223 24, 25, 26, 27, 28, 29, 30, 31, 1224 32, 33, 34, 35, 36, 37, 38, 39, 1225 40, 41, 42, 43, 44, 45, 46, 47, 1226 48, 49, 50, 51, 52, 53, 54, 55, 1227 56, 57, 58, 59, 60, 61, 62, 63, 1228 64, 97, 98, 99,100,101,102,103, 1229 104,105,106,107,108,109,110,111, 1230 112,113,114,115,116,117,118,119, 1231 120,121,122, 91, 92, 93, 94, 95, 1232 96, 65, 66, 67, 68, 69, 70, 71, 1233 72, 73, 74, 75, 76, 77, 78, 79, 1234 80, 81, 82, 83, 84, 85, 86, 87, 1235 88, 89, 90,123,124,125,126,127, 1236 128,129,130,131,132,133,134,135, 1237 136,137,138,139,140,141,142,143, 1238 144,145,146,147,148,149,150,151, 1239 152,153,154,155,156,157,158,159, 1240 160,161,162,163,164,165,166,167, 1241 168,169,170,171,172,173,174,175, 1242 176,177,178,179,180,181,182,183, 1243 184,185,186,187,188,189,190,191, 1244 192,193,194,195,196,197,198,199, 1245 200,201,202,203,204,205,206,207, 1246 208,209,210,211,212,213,214,215, 1247 216,217,218,219,220,221,222,223, 1248 224,225,226,227,228,229,230,231, 1249 232,233,234,235,236,237,238,239, 1250 240,241,242,243,244,245,246,247, 1251 248,249,250,251,252,253,254,255, 1252 1253 /* This table contains bit maps for various character classes. Each map is 32 1254 bytes long and the bits run from the least significant end of each byte. The 1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word, 1256 graph, print, punct, and cntrl. Other classes are built from combinations. */ 1257 1258 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1262 1263 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 1264 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, 1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1267 1268 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1272 1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1274 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00, 1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1277 1278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1279 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07, 1280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1282 1283 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, 1284 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07, 1285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1287 1288 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, 1289 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 1290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1292 1293 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, 1294 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f, 1295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1296 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1297 1298 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, 1299 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78, 1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1302 1303 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, 1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80, 1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 1307 1308 /* This table identifies various classes of character by individual bits: 1309 0x01 white space character 1310 0x02 letter 1311 0x04 decimal digit 1312 0x08 hexadecimal digit 1313 0x10 alphanumeric or '_' 1314 0x80 regular expression metacharacter or binary zero 1315 */ 1316 1317 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 1318 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ 1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 1321 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 1322 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 1323 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 1324 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 1325 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 1326 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 1327 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 1328 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 1329 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 1330 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 1331 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 1332 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 1333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 1334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 1335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 1336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 1337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 1338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 1339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 1340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 1341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 1342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 1343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 1344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 1345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 1346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 1347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 1348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 1349 1350 /* This is a set of tables that came originally from a Windows user. It seems 1351 to be at least an approximation of ISO 8859. In particular, there are 1352 characters greater than 128 that are marked as spaces, letters, etc. */ 1353 1354 static const pcre_uint8 tables1[] = { 1355 0,1,2,3,4,5,6,7, 1356 8,9,10,11,12,13,14,15, 1357 16,17,18,19,20,21,22,23, 1358 24,25,26,27,28,29,30,31, 1359 32,33,34,35,36,37,38,39, 1360 40,41,42,43,44,45,46,47, 1361 48,49,50,51,52,53,54,55, 1362 56,57,58,59,60,61,62,63, 1363 64,97,98,99,100,101,102,103, 1364 104,105,106,107,108,109,110,111, 1365 112,113,114,115,116,117,118,119, 1366 120,121,122,91,92,93,94,95, 1367 96,97,98,99,100,101,102,103, 1368 104,105,106,107,108,109,110,111, 1369 112,113,114,115,116,117,118,119, 1370 120,121,122,123,124,125,126,127, 1371 128,129,130,131,132,133,134,135, 1372 136,137,138,139,140,141,142,143, 1373 144,145,146,147,148,149,150,151, 1374 152,153,154,155,156,157,158,159, 1375 160,161,162,163,164,165,166,167, 1376 168,169,170,171,172,173,174,175, 1377 176,177,178,179,180,181,182,183, 1378 184,185,186,187,188,189,190,191, 1379 224,225,226,227,228,229,230,231, 1380 232,233,234,235,236,237,238,239, 1381 240,241,242,243,244,245,246,215, 1382 248,249,250,251,252,253,254,223, 1383 224,225,226,227,228,229,230,231, 1384 232,233,234,235,236,237,238,239, 1385 240,241,242,243,244,245,246,247, 1386 248,249,250,251,252,253,254,255, 1387 0,1,2,3,4,5,6,7, 1388 8,9,10,11,12,13,14,15, 1389 16,17,18,19,20,21,22,23, 1390 24,25,26,27,28,29,30,31, 1391 32,33,34,35,36,37,38,39, 1392 40,41,42,43,44,45,46,47, 1393 48,49,50,51,52,53,54,55, 1394 56,57,58,59,60,61,62,63, 1395 64,97,98,99,100,101,102,103, 1396 104,105,106,107,108,109,110,111, 1397 112,113,114,115,116,117,118,119, 1398 120,121,122,91,92,93,94,95, 1399 96,65,66,67,68,69,70,71, 1400 72,73,74,75,76,77,78,79, 1401 80,81,82,83,84,85,86,87, 1402 88,89,90,123,124,125,126,127, 1403 128,129,130,131,132,133,134,135, 1404 136,137,138,139,140,141,142,143, 1405 144,145,146,147,148,149,150,151, 1406 152,153,154,155,156,157,158,159, 1407 160,161,162,163,164,165,166,167, 1408 168,169,170,171,172,173,174,175, 1409 176,177,178,179,180,181,182,183, 1410 184,185,186,187,188,189,190,191, 1411 224,225,226,227,228,229,230,231, 1412 232,233,234,235,236,237,238,239, 1413 240,241,242,243,244,245,246,215, 1414 248,249,250,251,252,253,254,223, 1415 192,193,194,195,196,197,198,199, 1416 200,201,202,203,204,205,206,207, 1417 208,209,210,211,212,213,214,247, 1418 216,217,218,219,220,221,222,255, 1419 0,62,0,0,1,0,0,0, 1420 0,0,0,0,0,0,0,0, 1421 32,0,0,0,1,0,0,0, 1422 0,0,0,0,0,0,0,0, 1423 0,0,0,0,0,0,255,3, 1424 126,0,0,0,126,0,0,0, 1425 0,0,0,0,0,0,0,0, 1426 0,0,0,0,0,0,0,0, 1427 0,0,0,0,0,0,255,3, 1428 0,0,0,0,0,0,0,0, 1429 0,0,0,0,0,0,12,2, 1430 0,0,0,0,0,0,0,0, 1431 0,0,0,0,0,0,0,0, 1432 254,255,255,7,0,0,0,0, 1433 0,0,0,0,0,0,0,0, 1434 255,255,127,127,0,0,0,0, 1435 0,0,0,0,0,0,0,0, 1436 0,0,0,0,254,255,255,7, 1437 0,0,0,0,0,4,32,4, 1438 0,0,0,128,255,255,127,255, 1439 0,0,0,0,0,0,255,3, 1440 254,255,255,135,254,255,255,7, 1441 0,0,0,0,0,4,44,6, 1442 255,255,127,255,255,255,127,255, 1443 0,0,0,0,254,255,255,255, 1444 255,255,255,255,255,255,255,127, 1445 0,0,0,0,254,255,255,255, 1446 255,255,255,255,255,255,255,255, 1447 0,2,0,0,255,255,255,255, 1448 255,255,255,255,255,255,255,127, 1449 0,0,0,0,255,255,255,255, 1450 255,255,255,255,255,255,255,255, 1451 0,0,0,0,254,255,0,252, 1452 1,0,0,248,1,0,0,120, 1453 0,0,0,0,254,255,255,255, 1454 0,0,128,0,0,0,128,0, 1455 255,255,255,255,0,0,0,0, 1456 0,0,0,0,0,0,0,128, 1457 255,255,255,255,0,0,0,0, 1458 0,0,0,0,0,0,0,0, 1459 128,0,0,0,0,0,0,0, 1460 0,1,1,0,1,1,0,0, 1461 0,0,0,0,0,0,0,0, 1462 0,0,0,0,0,0,0,0, 1463 1,0,0,0,128,0,0,0, 1464 128,128,128,128,0,0,128,0, 1465 28,28,28,28,28,28,28,28, 1466 28,28,0,0,0,0,0,128, 1467 0,26,26,26,26,26,26,18, 1468 18,18,18,18,18,18,18,18, 1469 18,18,18,18,18,18,18,18, 1470 18,18,18,128,128,0,128,16, 1471 0,26,26,26,26,26,26,18, 1472 18,18,18,18,18,18,18,18, 1473 18,18,18,18,18,18,18,18, 1474 18,18,18,128,128,0,0,0, 1475 0,0,0,0,0,1,0,0, 1476 0,0,0,0,0,0,0,0, 1477 0,0,0,0,0,0,0,0, 1478 0,0,0,0,0,0,0,0, 1479 1,0,0,0,0,0,0,0, 1480 0,0,18,0,0,0,0,0, 1481 0,0,20,20,0,18,0,0, 1482 0,20,18,0,0,0,0,0, 1483 18,18,18,18,18,18,18,18, 1484 18,18,18,18,18,18,18,18, 1485 18,18,18,18,18,18,18,0, 1486 18,18,18,18,18,18,18,18, 1487 18,18,18,18,18,18,18,18, 1488 18,18,18,18,18,18,18,18, 1489 18,18,18,18,18,18,18,0, 1490 18,18,18,18,18,18,18,18 1491 }; 1492 1493 1494 1495 1496 #ifndef HAVE_STRERROR 1497 /************************************************* 1498 * Provide strerror() for non-ANSI libraries * 1499 *************************************************/ 1500 1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 1502 in their libraries, but can provide the same facility by this simple 1503 alternative function. */ 1504 1505 extern int sys_nerr; 1506 extern char *sys_errlist[]; 1507 1508 char * 1509 strerror(int n) 1510 { 1511 if (n < 0 || n >= sys_nerr) return "unknown error number"; 1512 return sys_errlist[n]; 1513 } 1514 #endif /* HAVE_STRERROR */ 1515 1516 1517 1518 /************************************************* 1519 * Print newline configuration * 1520 *************************************************/ 1521 1522 /* 1523 Arguments: 1524 rc the return code from PCRE_CONFIG_NEWLINE 1525 isc TRUE if called from "-C newline" 1526 Returns: nothing 1527 */ 1528 1529 static void 1530 print_newline_config(int rc, BOOL isc) 1531 { 1532 const char *s = NULL; 1533 if (!isc) printf(" Newline sequence is "); 1534 switch(rc) 1535 { 1536 case CHAR_CR: s = "CR"; break; 1537 case CHAR_LF: s = "LF"; break; 1538 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break; 1539 case -1: s = "ANY"; break; 1540 case -2: s = "ANYCRLF"; break; 1541 1542 default: 1543 printf("a non-standard value: 0x%04x\n", rc); 1544 return; 1545 } 1546 1547 printf("%s\n", s); 1548 } 1549 1550 1551 1552 /************************************************* 1553 * JIT memory callback * 1554 *************************************************/ 1555 1556 static pcre_jit_stack* jit_callback(void *arg) 1557 { 1558 jit_was_used = TRUE; 1559 return (pcre_jit_stack *)arg; 1560 } 1561 1562 1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32 1564 /************************************************* 1565 * Convert UTF-8 string to value * 1566 *************************************************/ 1567 1568 /* This function takes one or more bytes that represents a UTF-8 character, 1569 and returns the value of the character. 1570 1571 Argument: 1572 utf8bytes a pointer to the byte vector 1573 vptr a pointer to an int to receive the value 1574 1575 Returns: > 0 => the number of bytes consumed 1576 -6 to 0 => malformed UTF-8 character at offset = (-return) 1577 */ 1578 1579 static int 1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr) 1581 { 1582 pcre_uint32 c = *utf8bytes++; 1583 pcre_uint32 d = c; 1584 int i, j, s; 1585 1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */ 1587 { 1588 if ((d & 0x80) == 0) break; 1589 d <<= 1; 1590 } 1591 1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */ 1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */ 1594 1595 /* i now has a value in the range 1-5 */ 1596 1597 s = 6*i; 1598 d = (c & utf8_table3[i]) << s; 1599 1600 for (j = 0; j < i; j++) 1601 { 1602 c = *utf8bytes++; 1603 if ((c & 0xc0) != 0x80) return -(j+1); 1604 s -= 6; 1605 d |= (c & 0x3f) << s; 1606 } 1607 1608 /* Check that encoding was the correct unique one */ 1609 1610 for (j = 0; j < utf8_table1_size; j++) 1611 if (d <= (pcre_uint32)utf8_table1[j]) break; 1612 if (j != i) return -(i+1); 1613 1614 /* Valid value */ 1615 1616 *vptr = d; 1617 return i+1; 1618 } 1619 #endif /* NOUTF || SUPPORT_PCRE16 */ 1620 1621 1622 1623 #if defined SUPPORT_PCRE8 && !defined NOUTF 1624 /************************************************* 1625 * Convert character value to UTF-8 * 1626 *************************************************/ 1627 1628 /* This function takes an integer value in the range 0 - 0x7fffffff 1629 and encodes it as a UTF-8 character in 0 to 6 bytes. 1630 1631 Arguments: 1632 cvalue the character value 1633 utf8bytes pointer to buffer for result - at least 6 bytes long 1634 1635 Returns: number of characters placed in the buffer 1636 */ 1637 1638 static int 1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes) 1640 { 1641 register int i, j; 1642 if (cvalue > 0x7fffffffu) 1643 return -1; 1644 for (i = 0; i < utf8_table1_size; i++) 1645 if (cvalue <= (pcre_uint32)utf8_table1[i]) break; 1646 utf8bytes += i; 1647 for (j = i; j > 0; j--) 1648 { 1649 *utf8bytes-- = 0x80 | (cvalue & 0x3f); 1650 cvalue >>= 6; 1651 } 1652 *utf8bytes = utf8_table2[i] | cvalue; 1653 return i + 1; 1654 } 1655 #endif 1656 1657 1658 #ifdef SUPPORT_PCRE16 1659 /************************************************* 1660 * Convert a string to 16-bit * 1661 *************************************************/ 1662 1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the 1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than 1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The 1667 result is always left in buffer16. 1668 1669 Note that this function does not object to surrogate values. This is 1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid, 1671 for the purpose of testing that they are correctly faulted. 1672 1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always 1674 in UTF-8 so that values greater than 255 can be handled. 1675 1676 Arguments: 1677 data TRUE if converting a data line; FALSE for a regex 1678 p points to a byte string 1679 utf true if UTF-8 (to be converted to UTF-16) 1680 len number of bytes in the string (excluding trailing zero) 1681 1682 Returns: number of 16-bit data items used (excluding trailing zero) 1683 OR -1 if a UTF-8 string is malformed 1684 OR -2 if a value > 0x10ffff is encountered 1685 OR -3 if a value > 0xffff is encountered when not in UTF mode 1686 */ 1687 1688 static int 1689 to16(int data, pcre_uint8 *p, int utf, int len) 1690 { 1691 pcre_uint16 *pp; 1692 1693 if (buffer16_size < 2*len + 2) 1694 { 1695 if (buffer16 != NULL) free(buffer16); 1696 buffer16_size = 2*len + 2; 1697 buffer16 = (pcre_uint16 *)malloc(buffer16_size); 1698 if (buffer16 == NULL) 1699 { 1700 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size); 1701 exit(1); 1702 } 1703 } 1704 1705 pp = buffer16; 1706 1707 if (!utf && !data) 1708 { 1709 while (len-- > 0) *pp++ = *p++; 1710 } 1711 1712 else 1713 { 1714 pcre_uint32 c = 0; 1715 while (len > 0) 1716 { 1717 int chlen = utf82ord(p, &c); 1718 if (chlen <= 0) return -1; 1719 if (c > 0x10ffff) return -2; 1720 p += chlen; 1721 len -= chlen; 1722 if (c < 0x10000) *pp++ = c; else 1723 { 1724 if (!utf) return -3; 1725 c -= 0x10000; 1726 *pp++ = 0xD800 | (c >> 10); 1727 *pp++ = 0xDC00 | (c & 0x3ff); 1728 } 1729 } 1730 } 1731 1732 *pp = 0; 1733 return pp - buffer16; 1734 } 1735 #endif 1736 1737 #ifdef SUPPORT_PCRE32 1738 /************************************************* 1739 * Convert a string to 32-bit * 1740 *************************************************/ 1741 1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the 1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four 1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4 1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The 1746 result is always left in buffer32. 1747 1748 Note that this function does not object to surrogate values. This is 1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid, 1750 for the purpose of testing that they are correctly faulted. 1751 1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always 1753 in UTF-8 so that values greater than 255 can be handled. 1754 1755 Arguments: 1756 data TRUE if converting a data line; FALSE for a regex 1757 p points to a byte string 1758 utf true if UTF-8 (to be converted to UTF-32) 1759 len number of bytes in the string (excluding trailing zero) 1760 1761 Returns: number of 32-bit data items used (excluding trailing zero) 1762 OR -1 if a UTF-8 string is malformed 1763 OR -2 if a value > 0x10ffff is encountered 1764 OR -3 if an ill-formed value is encountered (i.e. a surrogate) 1765 */ 1766 1767 static int 1768 to32(int data, pcre_uint8 *p, int utf, int len) 1769 { 1770 pcre_uint32 *pp; 1771 1772 if (buffer32_size < 4*len + 4) 1773 { 1774 if (buffer32 != NULL) free(buffer32); 1775 buffer32_size = 4*len + 4; 1776 buffer32 = (pcre_uint32 *)malloc(buffer32_size); 1777 if (buffer32 == NULL) 1778 { 1779 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size); 1780 exit(1); 1781 } 1782 } 1783 1784 pp = buffer32; 1785 1786 if (!utf && !data) 1787 { 1788 while (len-- > 0) *pp++ = *p++; 1789 } 1790 1791 else 1792 { 1793 pcre_uint32 c = 0; 1794 while (len > 0) 1795 { 1796 int chlen = utf82ord(p, &c); 1797 if (chlen <= 0) return -1; 1798 if (utf) 1799 { 1800 if (c > 0x10ffff) return -2; 1801 if (!data && (c & 0xfffff800u) == 0xd800u) return -3; 1802 } 1803 1804 p += chlen; 1805 len -= chlen; 1806 *pp++ = c; 1807 } 1808 } 1809 1810 *pp = 0; 1811 return pp - buffer32; 1812 } 1813 1814 /* Check that a 32-bit character string is valid UTF-32. 1815 1816 Arguments: 1817 string points to the string 1818 length length of string, or -1 if the string is zero-terminated 1819 1820 Returns: TRUE if the string is a valid UTF-32 string 1821 FALSE otherwise 1822 */ 1823 1824 #ifdef NEVER /* Not used */ 1825 #ifdef SUPPORT_UTF 1826 static BOOL 1827 valid_utf32(pcre_uint32 *string, int length) 1828 { 1829 register pcre_uint32 *p; 1830 register pcre_uint32 c; 1831 1832 for (p = string; length-- > 0; p++) 1833 { 1834 c = *p; 1835 if (c > 0x10ffffu) return FALSE; /* Too big */ 1836 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */ 1837 } 1838 1839 return TRUE; 1840 } 1841 #endif /* SUPPORT_UTF */ 1842 #endif /* NEVER */ 1843 #endif /* SUPPORT_PCRE32 */ 1844 1845 1846 /************************************************* 1847 * Read or extend an input line * 1848 *************************************************/ 1849 1850 /* Input lines are read into buffer, but both patterns and data lines can be 1851 continued over multiple input lines. In addition, if the buffer fills up, we 1852 want to automatically expand it so as to be able to handle extremely large 1853 lines that are needed for certain stress tests. When the input buffer is 1854 expanded, the other two buffers must also be expanded likewise, and the 1855 contents of pbuffer, which are a copy of the input for callouts, must be 1856 preserved (for when expansion happens for a data line). This is not the most 1857 optimal way of handling this, but hey, this is just a test program! 1858 1859 Arguments: 1860 f the file to read 1861 start where in buffer to start (this *must* be within buffer) 1862 prompt for stdin or readline() 1863 1864 Returns: pointer to the start of new data 1865 could be a copy of start, or could be moved 1866 NULL if no data read and EOF reached 1867 */ 1868 1869 static pcre_uint8 * 1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt) 1871 { 1872 pcre_uint8 *here = start; 1873 1874 for (;;) 1875 { 1876 size_t rlen = (size_t)(buffer_size - (here - buffer)); 1877 1878 if (rlen > 1000) 1879 { 1880 int dlen; 1881 1882 /* If libreadline or libedit support is required, use readline() to read a 1883 line if the input is a terminal. Note that readline() removes the trailing 1884 newline, so we must put it back again, to be compatible with fgets(). */ 1885 1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 1887 if (isatty(fileno(f))) 1888 { 1889 size_t len; 1890 char *s = readline(prompt); 1891 if (s == NULL) return (here == start)? NULL : start; 1892 len = strlen(s); 1893 if (len > 0) add_history(s); 1894 if (len > rlen - 1) len = rlen - 1; 1895 memcpy(here, s, len); 1896 here[len] = '\n'; 1897 here[len+1] = 0; 1898 free(s); 1899 } 1900 else 1901 #endif 1902 1903 /* Read the next line by normal means, prompting if the file is stdin. */ 1904 1905 { 1906 if (f == stdin) printf("%s", prompt); 1907 if (fgets((char *)here, rlen, f) == NULL) 1908 return (here == start)? NULL : start; 1909 } 1910 1911 dlen = (int)strlen((char *)here); 1912 if (dlen > 0 && here[dlen - 1] == '\n') return start; 1913 here += dlen; 1914 } 1915 1916 else 1917 { 1918 int new_buffer_size = 2*buffer_size; 1919 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size); 1920 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size); 1921 1922 if (new_buffer == NULL || new_pbuffer == NULL) 1923 { 1924 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size); 1925 exit(1); 1926 } 1927 1928 memcpy(new_buffer, buffer, buffer_size); 1929 memcpy(new_pbuffer, pbuffer, buffer_size); 1930 1931 buffer_size = new_buffer_size; 1932 1933 start = new_buffer + (start - buffer); 1934 here = new_buffer + (here - buffer); 1935 1936 free(buffer); 1937 free(pbuffer); 1938 1939 buffer = new_buffer; 1940 pbuffer = new_pbuffer; 1941 } 1942 } 1943 1944 /* Control never gets here */ 1945 } 1946 1947 1948 1949 /************************************************* 1950 * Read number from string * 1951 *************************************************/ 1952 1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess 1954 around with conditional compilation, just do the job by hand. It is only used 1955 for unpicking arguments, so just keep it simple. 1956 1957 Arguments: 1958 str string to be converted 1959 endptr where to put the end pointer 1960 1961 Returns: the unsigned long 1962 */ 1963 1964 static int 1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr) 1966 { 1967 int result = 0; 1968 while(*str != 0 && isspace(*str)) str++; 1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0'); 1970 *endptr = str; 1971 return(result); 1972 } 1973 1974 1975 1976 /************************************************* 1977 * Print one character * 1978 *************************************************/ 1979 1980 /* Print a single character either literally, or as a hex escape. */ 1981 1982 static int pchar(pcre_uint32 c, FILE *f) 1983 { 1984 int n = 0; 1985 if (PRINTOK(c)) 1986 { 1987 if (f != NULL) fprintf(f, "%c", c); 1988 return 1; 1989 } 1990 1991 if (c < 0x100) 1992 { 1993 if (use_utf) 1994 { 1995 if (f != NULL) fprintf(f, "\\x{%02x}", c); 1996 return 6; 1997 } 1998 else 1999 { 2000 if (f != NULL) fprintf(f, "\\x%02x", c); 2001 return 4; 2002 } 2003 } 2004 2005 if (f != NULL) n = fprintf(f, "\\x{%02x}", c); 2006 return n >= 0 ? n : 0; 2007 } 2008 2009 2010 2011 #ifdef SUPPORT_PCRE8 2012 /************************************************* 2013 * Print 8-bit character string * 2014 *************************************************/ 2015 2016 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. 2017 If handed a NULL file, just counts chars without printing. */ 2018 2019 static int pchars(pcre_uint8 *p, int length, FILE *f) 2020 { 2021 pcre_uint32 c = 0; 2022 int yield = 0; 2023 2024 if (length < 0) 2025 length = strlen((char *)p); 2026 2027 while (length-- > 0) 2028 { 2029 #if !defined NOUTF 2030 if (use_utf) 2031 { 2032 int rc = utf82ord(p, &c); 2033 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */ 2034 { 2035 length -= rc - 1; 2036 p += rc; 2037 yield += pchar(c, f); 2038 continue; 2039 } 2040 } 2041 #endif 2042 c = *p++; 2043 yield += pchar(c, f); 2044 } 2045 2046 return yield; 2047 } 2048 #endif 2049 2050 2051 2052 #ifdef SUPPORT_PCRE16 2053 /************************************************* 2054 * Find length of 0-terminated 16-bit string * 2055 *************************************************/ 2056 2057 static int strlen16(PCRE_SPTR16 p) 2058 { 2059 PCRE_SPTR16 pp = p; 2060 while (*pp != 0) pp++; 2061 return (int)(pp - p); 2062 } 2063 #endif /* SUPPORT_PCRE16 */ 2064 2065 2066 2067 #ifdef SUPPORT_PCRE32 2068 /************************************************* 2069 * Find length of 0-terminated 32-bit string * 2070 *************************************************/ 2071 2072 static int strlen32(PCRE_SPTR32 p) 2073 { 2074 PCRE_SPTR32 pp = p; 2075 while (*pp != 0) pp++; 2076 return (int)(pp - p); 2077 } 2078 #endif /* SUPPORT_PCRE32 */ 2079 2080 2081 2082 #ifdef SUPPORT_PCRE16 2083 /************************************************* 2084 * Print 16-bit character string * 2085 *************************************************/ 2086 2087 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed. 2088 If handed a NULL file, just counts chars without printing. */ 2089 2090 static int pchars16(PCRE_SPTR16 p, int length, FILE *f) 2091 { 2092 int yield = 0; 2093 2094 if (length < 0) 2095 length = strlen16(p); 2096 2097 while (length-- > 0) 2098 { 2099 pcre_uint32 c = *p++ & 0xffff; 2100 #if !defined NOUTF 2101 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0) 2102 { 2103 int d = *p & 0xffff; 2104 if (d >= 0xDC00 && d <= 0xDFFF) 2105 { 2106 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000; 2107 length--; 2108 p++; 2109 } 2110 } 2111 #endif 2112 yield += pchar(c, f); 2113 } 2114 2115 return yield; 2116 } 2117 #endif /* SUPPORT_PCRE16 */ 2118 2119 2120 2121 #ifdef SUPPORT_PCRE32 2122 /************************************************* 2123 * Print 32-bit character string * 2124 *************************************************/ 2125 2126 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed. 2127 If handed a NULL file, just counts chars without printing. */ 2128 2129 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f) 2130 { 2131 int yield = 0; 2132 2133 (void)(utf); /* Avoid compiler warning */ 2134 2135 if (length < 0) 2136 length = strlen32(p); 2137 2138 while (length-- > 0) 2139 { 2140 pcre_uint32 c = *p++; 2141 yield += pchar(c, f); 2142 } 2143 2144 return yield; 2145 } 2146 #endif /* SUPPORT_PCRE32 */ 2147 2148 2149 2150 #ifdef SUPPORT_PCRE8 2151 /************************************************* 2152 * Read a capture name (8-bit) and check it * 2153 *************************************************/ 2154 2155 static pcre_uint8 * 2156 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re) 2157 { 2158 pcre_uint8 *npp = *pp; 2159 while (isalnum(*p)) *npp++ = *p++; 2160 *npp++ = 0; 2161 *npp = 0; 2162 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0) 2163 { 2164 fprintf(outfile, "no parentheses with name \""); 2165 PCHARSV(*pp, 0, -1, outfile); 2166 fprintf(outfile, "\"\n"); 2167 } 2168 2169 *pp = npp; 2170 return p; 2171 } 2172 #endif /* SUPPORT_PCRE8 */ 2173 2174 2175 2176 #ifdef SUPPORT_PCRE16 2177 /************************************************* 2178 * Read a capture name (16-bit) and check it * 2179 *************************************************/ 2180 2181 /* Note that the text being read is 8-bit. */ 2182 2183 static pcre_uint8 * 2184 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re) 2185 { 2186 pcre_uint16 *npp = *pp; 2187 while (isalnum(*p)) *npp++ = *p++; 2188 *npp++ = 0; 2189 *npp = 0; 2190 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0) 2191 { 2192 fprintf(outfile, "no parentheses with name \""); 2193 PCHARSV(*pp, 0, -1, outfile); 2194 fprintf(outfile, "\"\n"); 2195 } 2196 *pp = npp; 2197 return p; 2198 } 2199 #endif /* SUPPORT_PCRE16 */ 2200 2201 2202 2203 #ifdef SUPPORT_PCRE32 2204 /************************************************* 2205 * Read a capture name (32-bit) and check it * 2206 *************************************************/ 2207 2208 /* Note that the text being read is 8-bit. */ 2209 2210 static pcre_uint8 * 2211 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re) 2212 { 2213 pcre_uint32 *npp = *pp; 2214 while (isalnum(*p)) *npp++ = *p++; 2215 *npp++ = 0; 2216 *npp = 0; 2217 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0) 2218 { 2219 fprintf(outfile, "no parentheses with name \""); 2220 PCHARSV(*pp, 0, -1, outfile); 2221 fprintf(outfile, "\"\n"); 2222 } 2223 *pp = npp; 2224 return p; 2225 } 2226 #endif /* SUPPORT_PCRE32 */ 2227 2228 2229 2230 /************************************************* 2231 * Stack guard function * 2232 *************************************************/ 2233 2234 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero) 2235 return when a count overflows. */ 2236 2237 static int stack_guard(void) 2238 { 2239 return stack_guard_return; 2240 } 2241 2242 /************************************************* 2243 * Callout function * 2244 *************************************************/ 2245 2246 /* Called from PCRE as a result of the (?C) item. We print out where we are in 2247 the match. Yield zero unless more callouts than the fail count, or the callout 2248 data is not zero. */ 2249 2250 static int callout(pcre_callout_block *cb) 2251 { 2252 FILE *f = (first_callout | callout_extra)? outfile : NULL; 2253 int i, pre_start, post_start, subject_length; 2254 2255 if (callout_extra) 2256 { 2257 fprintf(f, "Callout %d: last capture = %d\n", 2258 cb->callout_number, cb->capture_last); 2259 2260 if (cb->offset_vector != NULL) 2261 { 2262 for (i = 0; i < cb->capture_top * 2; i += 2) 2263 { 2264 if (cb->offset_vector[i] < 0) 2265 fprintf(f, "%2d: <unset>\n", i/2); 2266 else 2267 { 2268 fprintf(f, "%2d: ", i/2); 2269 PCHARSV(cb->subject, cb->offset_vector[i], 2270 cb->offset_vector[i+1] - cb->offset_vector[i], f); 2271 fprintf(f, "\n"); 2272 } 2273 } 2274 } 2275 } 2276 2277 /* Re-print the subject in canonical form, the first time or if giving full 2278 datails. On subsequent calls in the same match, we use pchars just to find the 2279 printed lengths of the substrings. */ 2280 2281 if (f != NULL) fprintf(f, "--->"); 2282 2283 PCHARS(pre_start, cb->subject, 0, cb->start_match, f); 2284 PCHARS(post_start, cb->subject, cb->start_match, 2285 cb->current_position - cb->start_match, f); 2286 2287 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL); 2288 2289 PCHARSV(cb->subject, cb->current_position, 2290 cb->subject_length - cb->current_position, f); 2291 2292 if (f != NULL) fprintf(f, "\n"); 2293 2294 /* Always print appropriate indicators, with callout number if not already 2295 shown. For automatic callouts, show the pattern offset. */ 2296 2297 if (cb->callout_number == 255) 2298 { 2299 fprintf(outfile, "%+3d ", cb->pattern_position); 2300 if (cb->pattern_position > 99) fprintf(outfile, "\n "); 2301 } 2302 else 2303 { 2304 if (callout_extra) fprintf(outfile, " "); 2305 else fprintf(outfile, "%3d ", cb->callout_number); 2306 } 2307 2308 for (i = 0; i < pre_start; i++) fprintf(outfile, " "); 2309 fprintf(outfile, "^"); 2310 2311 if (post_start > 0) 2312 { 2313 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); 2314 fprintf(outfile, "^"); 2315 } 2316 2317 for (i = 0; i < subject_length - pre_start - post_start + 4; i++) 2318 fprintf(outfile, " "); 2319 2320 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length, 2321 pbuffer + cb->pattern_position); 2322 2323 fprintf(outfile, "\n"); 2324 first_callout = 0; 2325 2326 if (cb->mark != last_callout_mark) 2327 { 2328 if (cb->mark == NULL) 2329 fprintf(outfile, "Latest Mark: <unset>\n"); 2330 else 2331 { 2332 fprintf(outfile, "Latest Mark: "); 2333 PCHARSV(cb->mark, 0, -1, outfile); 2334 putc('\n', outfile); 2335 } 2336 last_callout_mark = cb->mark; 2337 } 2338 2339 if (cb->callout_data != NULL) 2340 { 2341 int callout_data = *((int *)(cb->callout_data)); 2342 if (callout_data != 0) 2343 { 2344 fprintf(outfile, "Callout data = %d\n", callout_data); 2345 return callout_data; 2346 } 2347 } 2348 2349 return (cb->callout_number != callout_fail_id)? 0 : 2350 (++callout_count >= callout_fail_count)? 1 : 0; 2351 } 2352 2353 2354 /************************************************* 2355 * Local malloc functions * 2356 *************************************************/ 2357 2358 /* Alternative malloc function, to test functionality and save the size of a 2359 compiled re, which is the first store request that pcre_compile() makes. The 2360 show_malloc variable is set only during matching. */ 2361 2362 static void *new_malloc(size_t size) 2363 { 2364 void *block = malloc(size); 2365 if (show_malloc) 2366 fprintf(outfile, "malloc %3d %p\n", (int)size, block); 2367 return block; 2368 } 2369 2370 static void new_free(void *block) 2371 { 2372 if (show_malloc) 2373 fprintf(outfile, "free %p\n", block); 2374 free(block); 2375 } 2376 2377 /* For recursion malloc/free, to test stacking calls */ 2378 2379 static void *stack_malloc(size_t size) 2380 { 2381 void *block = malloc(size); 2382 if (show_malloc) 2383 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block); 2384 return block; 2385 } 2386 2387 static void stack_free(void *block) 2388 { 2389 if (show_malloc) 2390 fprintf(outfile, "stack_free %p\n", block); 2391 free(block); 2392 } 2393 2394 2395 /************************************************* 2396 * Call pcre_fullinfo() * 2397 *************************************************/ 2398 2399 /* Get one piece of information from the pcre_fullinfo() function. When only 2400 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct 2401 value, but the code is defensive. 2402 2403 Arguments: 2404 re compiled regex 2405 study study data 2406 option PCRE_INFO_xxx option 2407 ptr where to put the data 2408 2409 Returns: 0 when OK, < 0 on error 2410 */ 2411 2412 static int 2413 new_info(pcre *re, pcre_extra *study, int option, void *ptr) 2414 { 2415 int rc; 2416 2417 if (pcre_mode == PCRE32_MODE) 2418 #ifdef SUPPORT_PCRE32 2419 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr); 2420 #else 2421 rc = PCRE_ERROR_BADMODE; 2422 #endif 2423 else if (pcre_mode == PCRE16_MODE) 2424 #ifdef SUPPORT_PCRE16 2425 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr); 2426 #else 2427 rc = PCRE_ERROR_BADMODE; 2428 #endif 2429 else 2430 #ifdef SUPPORT_PCRE8 2431 rc = pcre_fullinfo(re, study, option, ptr); 2432 #else 2433 rc = PCRE_ERROR_BADMODE; 2434 #endif 2435 2436 if (rc < 0 && rc != PCRE_ERROR_UNSET) 2437 { 2438 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, 2439 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option); 2440 if (rc == PCRE_ERROR_BADMODE) 2441 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " 2442 "%d-bit mode\n", 8 * CHAR_SIZE, 2443 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK)); 2444 } 2445 2446 return rc; 2447 } 2448 2449 2450 2451 /************************************************* 2452 * Swap byte functions * 2453 *************************************************/ 2454 2455 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32 2456 value, respectively. 2457 2458 Arguments: 2459 value any number 2460 2461 Returns: the byte swapped value 2462 */ 2463 2464 static pcre_uint32 2465 swap_uint32(pcre_uint32 value) 2466 { 2467 return ((value & 0x000000ff) << 24) | 2468 ((value & 0x0000ff00) << 8) | 2469 ((value & 0x00ff0000) >> 8) | 2470 (value >> 24); 2471 } 2472 2473 static pcre_uint16 2474 swap_uint16(pcre_uint16 value) 2475 { 2476 return (value >> 8) | (value << 8); 2477 } 2478 2479 2480 2481 /************************************************* 2482 * Flip bytes in a compiled pattern * 2483 *************************************************/ 2484 2485 /* This function is called if the 'F' option was present on a pattern that is 2486 to be written to a file. We flip the bytes of all the integer fields in the 2487 regex data block and the study block. In 16-bit mode this also flips relevant 2488 bytes in the pattern itself. This is to make it possible to test PCRE's 2489 ability to reload byte-flipped patterns, e.g. those compiled on a different 2490 architecture. */ 2491 2492 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16 2493 static void 2494 regexflip8_or_16(pcre *ere, pcre_extra *extra) 2495 { 2496 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere; 2497 #ifdef SUPPORT_PCRE16 2498 int op; 2499 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset; 2500 int length = re->name_count * re->name_entry_size; 2501 #ifdef SUPPORT_UTF 2502 BOOL utf = (re->options & PCRE_UTF16) != 0; 2503 BOOL utf16_char = FALSE; 2504 #endif /* SUPPORT_UTF */ 2505 #endif /* SUPPORT_PCRE16 */ 2506 2507 /* Always flip the bytes in the main data block and study blocks. */ 2508 2509 re->magic_number = REVERSED_MAGIC_NUMBER; 2510 re->size = swap_uint32(re->size); 2511 re->options = swap_uint32(re->options); 2512 re->flags = swap_uint32(re->flags); 2513 re->limit_match = swap_uint32(re->limit_match); 2514 re->limit_recursion = swap_uint32(re->limit_recursion); 2515 re->first_char = swap_uint16(re->first_char); 2516 re->req_char = swap_uint16(re->req_char); 2517 re->max_lookbehind = swap_uint16(re->max_lookbehind); 2518 re->top_bracket = swap_uint16(re->top_bracket); 2519 re->top_backref = swap_uint16(re->top_backref); 2520 re->name_table_offset = swap_uint16(re->name_table_offset); 2521 re->name_entry_size = swap_uint16(re->name_entry_size); 2522 re->name_count = swap_uint16(re->name_count); 2523 re->ref_count = swap_uint16(re->ref_count); 2524 2525 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0) 2526 { 2527 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); 2528 rsd->size = swap_uint32(rsd->size); 2529 rsd->flags = swap_uint32(rsd->flags); 2530 rsd->minlength = swap_uint32(rsd->minlength); 2531 } 2532 2533 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes 2534 in the name table, if present, and then in the pattern itself. */ 2535 2536 #ifdef SUPPORT_PCRE16 2537 if (pcre_mode != PCRE16_MODE) return; 2538 2539 while(TRUE) 2540 { 2541 /* Swap previous characters. */ 2542 while (length-- > 0) 2543 { 2544 *ptr = swap_uint16(*ptr); 2545 ptr++; 2546 } 2547 #ifdef SUPPORT_UTF 2548 if (utf16_char) 2549 { 2550 if ((ptr[-1] & 0xfc00) == 0xd800) 2551 { 2552 /* We know that there is only one extra character in UTF-16. */ 2553 *ptr = swap_uint16(*ptr); 2554 ptr++; 2555 } 2556 } 2557 utf16_char = FALSE; 2558 #endif /* SUPPORT_UTF */ 2559 2560 /* Get next opcode. */ 2561 2562 length = 0; 2563 op = *ptr; 2564 *ptr++ = swap_uint16(op); 2565 2566 switch (op) 2567 { 2568 case OP_END: 2569 return; 2570 2571 #ifdef SUPPORT_UTF 2572 case OP_CHAR: 2573 case OP_CHARI: 2574 case OP_NOT: 2575 case OP_NOTI: 2576 case OP_STAR: 2577 case OP_MINSTAR: 2578 case OP_PLUS: 2579 case OP_MINPLUS: 2580 case OP_QUERY: 2581 case OP_MINQUERY: 2582 case OP_UPTO: 2583 case OP_MINUPTO: 2584 case OP_EXACT: 2585 case OP_POSSTAR: 2586 case OP_POSPLUS: 2587 case OP_POSQUERY: 2588 case OP_POSUPTO: 2589 case OP_STARI: 2590 case OP_MINSTARI: 2591 case OP_PLUSI: 2592 case OP_MINPLUSI: 2593 case OP_QUERYI: 2594 case OP_MINQUERYI: 2595 case OP_UPTOI: 2596 case OP_MINUPTOI: 2597 case OP_EXACTI: 2598 case OP_POSSTARI: 2599 case OP_POSPLUSI: 2600 case OP_POSQUERYI: 2601 case OP_POSUPTOI: 2602 case OP_NOTSTAR: 2603 case OP_NOTMINSTAR: 2604 case OP_NOTPLUS: 2605 case OP_NOTMINPLUS: 2606 case OP_NOTQUERY: 2607 case OP_NOTMINQUERY: 2608 case OP_NOTUPTO: 2609 case OP_NOTMINUPTO: 2610 case OP_NOTEXACT: 2611 case OP_NOTPOSSTAR: 2612 case OP_NOTPOSPLUS: 2613 case OP_NOTPOSQUERY: 2614 case OP_NOTPOSUPTO: 2615 case OP_NOTSTARI: 2616 case OP_NOTMINSTARI: 2617 case OP_NOTPLUSI: 2618 case OP_NOTMINPLUSI: 2619 case OP_NOTQUERYI: 2620 case OP_NOTMINQUERYI: 2621 case OP_NOTUPTOI: 2622 case OP_NOTMINUPTOI: 2623 case OP_NOTEXACTI: 2624 case OP_NOTPOSSTARI: 2625 case OP_NOTPOSPLUSI: 2626 case OP_NOTPOSQUERYI: 2627 case OP_NOTPOSUPTOI: 2628 if (utf) utf16_char = TRUE; 2629 #endif 2630 /* Fall through. */ 2631 2632 default: 2633 length = OP_lengths16[op] - 1; 2634 break; 2635 2636 case OP_CLASS: 2637 case OP_NCLASS: 2638 /* Skip the character bit map. */ 2639 ptr += 32/sizeof(pcre_uint16); 2640 length = 0; 2641 break; 2642 2643 case OP_XCLASS: 2644 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ 2645 if (LINK_SIZE > 1) 2646 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1])) 2647 - (1 + LINK_SIZE + 1)); 2648 else 2649 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1)); 2650 2651 /* Reverse the size of the XCLASS instance. */ 2652 *ptr = swap_uint16(*ptr); 2653 ptr++; 2654 if (LINK_SIZE > 1) 2655 { 2656 *ptr = swap_uint16(*ptr); 2657 ptr++; 2658 } 2659 2660 op = *ptr; 2661 *ptr = swap_uint16(op); 2662 ptr++; 2663 if ((op & XCL_MAP) != 0) 2664 { 2665 /* Skip the character bit map. */ 2666 ptr += 32/sizeof(pcre_uint16); 2667 length -= 32/sizeof(pcre_uint16); 2668 } 2669 break; 2670 } 2671 } 2672 /* Control should never reach here in 16 bit mode. */ 2673 #endif /* SUPPORT_PCRE16 */ 2674 } 2675 #endif /* SUPPORT_PCRE[8|16] */ 2676 2677 2678 2679 #if defined SUPPORT_PCRE32 2680 static void 2681 regexflip_32(pcre *ere, pcre_extra *extra) 2682 { 2683 real_pcre32 *re = (real_pcre32 *)ere; 2684 int op; 2685 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset; 2686 int length = re->name_count * re->name_entry_size; 2687 2688 /* Always flip the bytes in the main data block and study blocks. */ 2689 2690 re->magic_number = REVERSED_MAGIC_NUMBER; 2691 re->size = swap_uint32(re->size); 2692 re->options = swap_uint32(re->options); 2693 re->flags = swap_uint32(re->flags); 2694 re->limit_match = swap_uint32(re->limit_match); 2695 re->limit_recursion = swap_uint32(re->limit_recursion); 2696 re->first_char = swap_uint32(re->first_char); 2697 re->req_char = swap_uint32(re->req_char); 2698 re->max_lookbehind = swap_uint16(re->max_lookbehind); 2699 re->top_bracket = swap_uint16(re->top_bracket); 2700 re->top_backref = swap_uint16(re->top_backref); 2701 re->name_table_offset = swap_uint16(re->name_table_offset); 2702 re->name_entry_size = swap_uint16(re->name_entry_size); 2703 re->name_count = swap_uint16(re->name_count); 2704 re->ref_count = swap_uint16(re->ref_count); 2705 2706 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0) 2707 { 2708 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); 2709 rsd->size = swap_uint32(rsd->size); 2710 rsd->flags = swap_uint32(rsd->flags); 2711 rsd->minlength = swap_uint32(rsd->minlength); 2712 } 2713 2714 /* In 32-bit mode we must swap bytes in the name table, if present, and then in 2715 the pattern itself. */ 2716 2717 while(TRUE) 2718 { 2719 /* Swap previous characters. */ 2720 while (length-- > 0) 2721 { 2722 *ptr = swap_uint32(*ptr); 2723 ptr++; 2724 } 2725 2726 /* Get next opcode. */ 2727 2728 length = 0; 2729 op = *ptr; 2730 *ptr++ = swap_uint32(op); 2731 2732 switch (op) 2733 { 2734 case OP_END: 2735 return; 2736 2737 default: 2738 length = OP_lengths32[op] - 1; 2739 break; 2740 2741 case OP_CLASS: 2742 case OP_NCLASS: 2743 /* Skip the character bit map. */ 2744 ptr += 32/sizeof(pcre_uint32); 2745 length = 0; 2746 break; 2747 2748 case OP_XCLASS: 2749 /* LINK_SIZE can only be 1 in 32-bit mode. */ 2750 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1)); 2751 2752 /* Reverse the size of the XCLASS instance. */ 2753 *ptr = swap_uint32(*ptr); 2754 ptr++; 2755 2756 op = *ptr; 2757 *ptr = swap_uint32(op); 2758 ptr++; 2759 if ((op & XCL_MAP) != 0) 2760 { 2761 /* Skip the character bit map. */ 2762 ptr += 32/sizeof(pcre_uint32); 2763 length -= 32/sizeof(pcre_uint32); 2764 } 2765 break; 2766 } 2767 } 2768 /* Control should never reach here in 32 bit mode. */ 2769 } 2770 2771 #endif /* SUPPORT_PCRE32 */ 2772 2773 2774 2775 static void 2776 regexflip(pcre *ere, pcre_extra *extra) 2777 { 2778 #if defined SUPPORT_PCRE32 2779 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32) 2780 regexflip_32(ere, extra); 2781 #endif 2782 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16 2783 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16)) 2784 regexflip8_or_16(ere, extra); 2785 #endif 2786 } 2787 2788 2789 2790 /************************************************* 2791 * Check match or recursion limit * 2792 *************************************************/ 2793 2794 static int 2795 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len, 2796 int start_offset, int options, int *use_offsets, int use_size_offsets, 2797 int flag, unsigned long int *limit, int errnumber, const char *msg) 2798 { 2799 int count; 2800 int min = 0; 2801 int mid = 64; 2802 int max = -1; 2803 2804 extra->flags |= flag; 2805 2806 for (;;) 2807 { 2808 *limit = mid; 2809 2810 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, 2811 use_offsets, use_size_offsets); 2812 2813 if (count == errnumber) 2814 { 2815 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */ 2816 min = mid; 2817 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2; 2818 } 2819 2820 else if (count >= 0 || count == PCRE_ERROR_NOMATCH || 2821 count == PCRE_ERROR_PARTIAL) 2822 { 2823 if (mid == min + 1) 2824 { 2825 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); 2826 break; 2827 } 2828 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */ 2829 max = mid; 2830 mid = (min + mid)/2; 2831 } 2832 else break; /* Some other error */ 2833 } 2834 2835 extra->flags &= ~flag; 2836 return count; 2837 } 2838 2839 2840 2841 /************************************************* 2842 * Case-independent strncmp() function * 2843 *************************************************/ 2844 2845 /* 2846 Arguments: 2847 s first string 2848 t second string 2849 n number of characters to compare 2850 2851 Returns: < 0, = 0, or > 0, according to the comparison 2852 */ 2853 2854 static int 2855 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n) 2856 { 2857 while (n--) 2858 { 2859 int c = tolower(*s++) - tolower(*t++); 2860 if (c) return c; 2861 } 2862 return 0; 2863 } 2864 2865 2866 2867 /************************************************* 2868 * Check multicharacter option * 2869 *************************************************/ 2870 2871 /* This is used both at compile and run-time to check for <xxx> escapes. Print 2872 a message and return 0 if there is no match. 2873 2874 Arguments: 2875 p points after the leading '<' 2876 f file for error message 2877 nl TRUE to check only for newline settings 2878 stype "modifier" or "escape sequence" 2879 2880 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0 2881 */ 2882 2883 static int 2884 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype) 2885 { 2886 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR; 2887 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF; 2888 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF; 2889 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF; 2890 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY; 2891 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF; 2892 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE; 2893 2894 if (!nl) 2895 { 2896 if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT; 2897 } 2898 2899 fprintf(f, "Unknown %s at: <%s\n", stype, p); 2900 return 0; 2901 } 2902 2903 2904 2905 /************************************************* 2906 * Usage function * 2907 *************************************************/ 2908 2909 static void 2910 usage(void) 2911 { 2912 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n"); 2913 printf("Input and output default to stdin and stdout.\n"); 2914 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) 2915 printf("If input is a terminal, readline() is used to read from it.\n"); 2916 #else 2917 printf("This version of pcretest is not linked with readline().\n"); 2918 #endif 2919 printf("\nOptions:\n"); 2920 #ifdef SUPPORT_PCRE16 2921 printf(" -16 use the 16-bit library\n"); 2922 #endif 2923 #ifdef SUPPORT_PCRE32 2924 printf(" -32 use the 32-bit library\n"); 2925 #endif 2926 printf(" -b show compiled code\n"); 2927 printf(" -C show PCRE compile-time options and exit\n"); 2928 printf(" -C arg show a specific compile-time option and exit\n"); 2929 printf(" with its value if numeric (else 0). The arg can be:\n"); 2930 printf(" linksize internal link size [2, 3, 4]\n"); 2931 printf(" pcre8 8 bit library support enabled [0, 1]\n"); 2932 printf(" pcre16 16 bit library support enabled [0, 1]\n"); 2933 printf(" pcre32 32 bit library support enabled [0, 1]\n"); 2934 printf(" utf Unicode Transformation Format supported [0, 1]\n"); 2935 printf(" ucp Unicode Properties supported [0, 1]\n"); 2936 printf(" jit Just-in-time compiler supported [0, 1]\n"); 2937 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n"); 2938 printf(" bsr \\R type [ANYCRLF, ANY]\n"); 2939 printf(" -d debug: show compiled code and information (-b and -i)\n"); 2940 #if !defined NODFA 2941 printf(" -dfa force DFA matching for all subjects\n"); 2942 #endif 2943 printf(" -help show usage information\n"); 2944 printf(" -i show information about compiled patterns\n" 2945 " -M find MATCH_LIMIT minimum for each subject\n" 2946 " -m output memory used information\n" 2947 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n" 2948 " -o <n> set size of offsets vector to <n>\n"); 2949 #if !defined NOPOSIX 2950 printf(" -p use POSIX interface\n"); 2951 #endif 2952 printf(" -q quiet: do not output PCRE version number at start\n"); 2953 printf(" -S <n> set stack size to <n> megabytes\n"); 2954 printf(" -s force each pattern to be studied at basic level\n" 2955 " -s+ force each pattern to be studied, using JIT if available\n" 2956 " -s++ ditto, verifying when JIT was actually used\n" 2957 " -s+n force each pattern to be studied, using JIT if available,\n" 2958 " where 1 <= n <= 7 selects JIT options\n" 2959 " -s++n ditto, verifying when JIT was actually used\n" 2960 " -t time compilation and execution\n"); 2961 printf(" -t <n> time compilation and execution, repeating <n> times\n"); 2962 printf(" -tm time execution (matching) only\n"); 2963 printf(" -tm <n> time execution (matching) only, repeating <n> times\n"); 2964 printf(" -T same as -t, but show total times at the end\n"); 2965 printf(" -TM same as -tm, but show total time at the end\n"); 2966 } 2967 2968 2969 2970 /************************************************* 2971 * Main Program * 2972 *************************************************/ 2973 2974 /* Read lines from named file or stdin and write to named file or stdout; lines 2975 consist of a regular expression, in delimiters and optionally followed by 2976 options, followed by a set of test data, terminated by an empty line. */ 2977 2978 int main(int argc, char **argv) 2979 { 2980 FILE *infile = stdin; 2981 const char *version; 2982 int options = 0; 2983 int study_options = 0; 2984 int default_find_match_limit = FALSE; 2985 pcre_uint32 default_options = 0; 2986 int op = 1; 2987 int timeit = 0; 2988 int timeitm = 0; 2989 int showtotaltimes = 0; 2990 int showinfo = 0; 2991 int showstore = 0; 2992 int force_study = -1; 2993 int force_study_options = 0; 2994 int quiet = 0; 2995 int size_offsets = 45; 2996 int size_offsets_max; 2997 int *offsets = NULL; 2998 int debug = 0; 2999 int done = 0; 3000 int all_use_dfa = 0; 3001 int verify_jit = 0; 3002 int yield = 0; 3003 int stack_size; 3004 pcre_uint8 *dbuffer = NULL; 3005 pcre_uint8 lockout[24] = { 0 }; 3006 size_t dbuffer_size = 1u << 14; 3007 clock_t total_compile_time = 0; 3008 clock_t total_study_time = 0; 3009 clock_t total_match_time = 0; 3010 3011 #if !defined NOPOSIX 3012 int posix = 0; 3013 #endif 3014 #if !defined NODFA 3015 int *dfa_workspace = NULL; 3016 #endif 3017 3018 pcre_jit_stack *jit_stack = NULL; 3019 3020 /* These vectors store, end-to-end, a list of zero-terminated captured 3021 substring names, each list itself being terminated by an empty name. Assume 3022 that 1024 is plenty long enough for the few names we'll be testing. It is 3023 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version 3024 for the actual memory, to ensure alignment. */ 3025 3026 pcre_uint32 copynames[1024]; 3027 pcre_uint32 getnames[1024]; 3028 3029 #ifdef SUPPORT_PCRE32 3030 pcre_uint32 *cn32ptr; 3031 pcre_uint32 *gn32ptr; 3032 #endif 3033 3034 #ifdef SUPPORT_PCRE16 3035 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames; 3036 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames; 3037 pcre_uint16 *cn16ptr; 3038 pcre_uint16 *gn16ptr; 3039 #endif 3040 3041 #ifdef SUPPORT_PCRE8 3042 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames; 3043 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames; 3044 pcre_uint8 *cn8ptr; 3045 pcre_uint8 *gn8ptr; 3046 #endif 3047 3048 /* Get buffers from malloc() so that valgrind will check their misuse when 3049 debugging. They grow automatically when very long lines are read. The 16- 3050 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */ 3051 3052 buffer = (pcre_uint8 *)malloc(buffer_size); 3053 pbuffer = (pcre_uint8 *)malloc(buffer_size); 3054 3055 /* The outfile variable is static so that new_malloc can use it. */ 3056 3057 outfile = stdout; 3058 3059 /* The following _setmode() stuff is some Windows magic that tells its runtime 3060 library to translate CRLF into a single LF character. At least, that's what 3061 I've been told: never having used Windows I take this all on trust. Originally 3062 it set 0x8000, but then I was advised that _O_BINARY was better. */ 3063 3064 #if defined(_WIN32) || defined(WIN32) 3065 _setmode( _fileno( stdout ), _O_BINARY ); 3066 #endif 3067 3068 /* Get the version number: both pcre_version() and pcre16_version() give the 3069 same answer. We just need to ensure that we call one that is available. */ 3070 3071 #if defined SUPPORT_PCRE8 3072 version = pcre_version(); 3073 #elif defined SUPPORT_PCRE16 3074 version = pcre16_version(); 3075 #elif defined SUPPORT_PCRE32 3076 version = pcre32_version(); 3077 #endif 3078 3079 /* Scan options */ 3080 3081 while (argc > 1 && argv[op][0] == '-') 3082 { 3083 pcre_uint8 *endptr; 3084 char *arg = argv[op]; 3085 3086 if (strcmp(arg, "-m") == 0) showstore = 1; 3087 else if (strcmp(arg, "-s") == 0) force_study = 0; 3088 3089 else if (strncmp(arg, "-s+", 3) == 0) 3090 { 3091 arg += 3; 3092 if (*arg == '+') { arg++; verify_jit = TRUE; } 3093 force_study = 1; 3094 if (*arg == 0) 3095 force_study_options = jit_study_bits[6]; 3096 else if (*arg >= '1' && *arg <= '7') 3097 force_study_options = jit_study_bits[*arg - '1']; 3098 else goto BAD_ARG; 3099 } 3100 else if (strcmp(arg, "-8") == 0) 3101 { 3102 #ifdef SUPPORT_PCRE8 3103 pcre_mode = PCRE8_MODE; 3104 #else 3105 printf("** This version of PCRE was built without 8-bit support\n"); 3106 exit(1); 3107 #endif 3108 } 3109 else if (strcmp(arg, "-16") == 0) 3110 { 3111 #ifdef SUPPORT_PCRE16 3112 pcre_mode = PCRE16_MODE; 3113 #else 3114 printf("** This version of PCRE was built without 16-bit support\n"); 3115 exit(1); 3116 #endif 3117 } 3118 else if (strcmp(arg, "-32") == 0) 3119 { 3120 #ifdef SUPPORT_PCRE32 3121 pcre_mode = PCRE32_MODE; 3122 #else 3123 printf("** This version of PCRE was built without 32-bit support\n"); 3124 exit(1); 3125 #endif 3126 } 3127 else if (strcmp(arg, "-q") == 0) quiet = 1; 3128 else if (strcmp(arg, "-b") == 0) debug = 1; 3129 else if (strcmp(arg, "-i") == 0) showinfo = 1; 3130 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1; 3131 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE; 3132 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS; 3133 #if !defined NODFA 3134 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1; 3135 #endif 3136 else if (strcmp(arg, "-o") == 0 && argc > 2 && 3137 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)), 3138 *endptr == 0)) 3139 { 3140 op++; 3141 argc--; 3142 } 3143 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 || 3144 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0) 3145 { 3146 int temp; 3147 int both = arg[2] == 0; 3148 showtotaltimes = arg[1] == 'T'; 3149 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr), 3150 *endptr == 0)) 3151 { 3152 timeitm = temp; 3153 op++; 3154 argc--; 3155 } 3156 else timeitm = LOOPREPEAT; 3157 if (both) timeit = timeitm; 3158 } 3159 else if (strcmp(arg, "-S") == 0 && argc > 2 && 3160 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)), 3161 *endptr == 0)) 3162 { 3163 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) 3164 printf("PCRE: -S not supported on this OS\n"); 3165 exit(1); 3166 #else 3167 int rc; 3168 struct rlimit rlim; 3169 getrlimit(RLIMIT_STACK, &rlim); 3170 rlim.rlim_cur = stack_size * 1024 * 1024; 3171 rc = setrlimit(RLIMIT_STACK, &rlim); 3172 if (rc != 0) 3173 { 3174 printf("PCRE: setrlimit() failed with error %d\n", rc); 3175 exit(1); 3176 } 3177 op++; 3178 argc--; 3179 #endif 3180 } 3181 #if !defined NOPOSIX 3182 else if (strcmp(arg, "-p") == 0) posix = 1; 3183 #endif 3184 else if (strcmp(arg, "-C") == 0) 3185 { 3186 int rc; 3187 unsigned long int lrc; 3188 3189 if (argc > 2) 3190 { 3191 if (strcmp(argv[op + 1], "linksize") == 0) 3192 { 3193 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); 3194 printf("%d\n", rc); 3195 yield = rc; 3196 3197 #ifdef __VMS 3198 vms_setsymbol("LINKSIZE",0,yield ); 3199 #endif 3200 } 3201 else if (strcmp(argv[op + 1], "pcre8") == 0) 3202 { 3203 #ifdef SUPPORT_PCRE8 3204 printf("1\n"); 3205 yield = 1; 3206 #else 3207 printf("0\n"); 3208 yield = 0; 3209 #endif 3210 #ifdef __VMS 3211 vms_setsymbol("PCRE8",0,yield ); 3212 #endif 3213 } 3214 else if (strcmp(argv[op + 1], "pcre16") == 0) 3215 { 3216 #ifdef SUPPORT_PCRE16 3217 printf("1\n"); 3218 yield = 1; 3219 #else 3220 printf("0\n"); 3221 yield = 0; 3222 #endif 3223 #ifdef __VMS 3224 vms_setsymbol("PCRE16",0,yield ); 3225 #endif 3226 } 3227 else if (strcmp(argv[op + 1], "pcre32") == 0) 3228 { 3229 #ifdef SUPPORT_PCRE32 3230 printf("1\n"); 3231 yield = 1; 3232 #else 3233 printf("0\n"); 3234 yield = 0; 3235 #endif 3236 #ifdef __VMS 3237 vms_setsymbol("PCRE32",0,yield ); 3238 #endif 3239 } 3240 else if (strcmp(argv[op + 1], "utf") == 0) 3241 { 3242 #ifdef SUPPORT_PCRE8 3243 if (pcre_mode == PCRE8_MODE) 3244 (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 3245 #endif 3246 #ifdef SUPPORT_PCRE16 3247 if (pcre_mode == PCRE16_MODE) 3248 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); 3249 #endif 3250 #ifdef SUPPORT_PCRE32 3251 if (pcre_mode == PCRE32_MODE) 3252 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc); 3253 #endif 3254 printf("%d\n", rc); 3255 yield = rc; 3256 #ifdef __VMS 3257 vms_setsymbol("UTF",0,yield ); 3258 #endif 3259 } 3260 else if (strcmp(argv[op + 1], "ucp") == 0) 3261 { 3262 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 3263 printf("%d\n", rc); 3264 yield = rc; 3265 } 3266 else if (strcmp(argv[op + 1], "jit") == 0) 3267 { 3268 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); 3269 printf("%d\n", rc); 3270 yield = rc; 3271 } 3272 else if (strcmp(argv[op + 1], "newline") == 0) 3273 { 3274 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); 3275 print_newline_config(rc, TRUE); 3276 } 3277 else if (strcmp(argv[op + 1], "bsr") == 0) 3278 { 3279 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc); 3280 printf("%s\n", rc? "ANYCRLF" : "ANY"); 3281 } 3282 else if (strcmp(argv[op + 1], "ebcdic") == 0) 3283 { 3284 #ifdef EBCDIC 3285 printf("1\n"); 3286 yield = 1; 3287 #else 3288 printf("0\n"); 3289 #endif 3290 } 3291 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0) 3292 { 3293 #ifdef EBCDIC 3294 printf("0x%02x\n", CHAR_LF); 3295 #else 3296 printf("0\n"); 3297 #endif 3298 } 3299 else 3300 { 3301 printf("Unknown -C option: %s\n", argv[op + 1]); 3302 } 3303 goto EXIT; 3304 } 3305 3306 /* No argument for -C: output all configuration information. */ 3307 3308 printf("PCRE version %s\n", version); 3309 printf("Compiled with\n"); 3310 3311 #ifdef EBCDIC 3312 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF); 3313 #endif 3314 3315 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both 3316 are set, either both UTFs are supported or both are not supported. */ 3317 3318 #ifdef SUPPORT_PCRE8 3319 printf(" 8-bit support\n"); 3320 (void)pcre_config(PCRE_CONFIG_UTF8, &rc); 3321 printf (" %sUTF-8 support\n", rc ? "" : "No "); 3322 #endif 3323 #ifdef SUPPORT_PCRE16 3324 printf(" 16-bit support\n"); 3325 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc); 3326 printf (" %sUTF-16 support\n", rc ? "" : "No "); 3327 #endif 3328 #ifdef SUPPORT_PCRE32 3329 printf(" 32-bit support\n"); 3330 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc); 3331 printf (" %sUTF-32 support\n", rc ? "" : "No "); 3332 #endif 3333 3334 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc); 3335 printf(" %sUnicode properties support\n", rc? "" : "No "); 3336 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc); 3337 if (rc) 3338 { 3339 const char *arch; 3340 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch)); 3341 printf(" Just-in-time compiler support: %s\n", arch); 3342 } 3343 else 3344 printf(" No just-in-time compiler support\n"); 3345 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc); 3346 print_newline_config(rc, FALSE); 3347 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc); 3348 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" : 3349 "all Unicode newlines"); 3350 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc); 3351 printf(" Internal link size = %d\n", rc); 3352 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc); 3353 printf(" POSIX malloc threshold = %d\n", rc); 3354 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc); 3355 printf(" Parentheses nest limit = %ld\n", lrc); 3356 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc); 3357 printf(" Default match limit = %ld\n", lrc); 3358 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc); 3359 printf(" Default recursion depth limit = %ld\n", lrc); 3360 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc); 3361 printf(" Match recursion uses %s", rc? "stack" : "heap"); 3362 if (showstore) 3363 { 3364 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0); 3365 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size); 3366 } 3367 printf("\n"); 3368 goto EXIT; 3369 } 3370 else if (strcmp(arg, "-help") == 0 || 3371 strcmp(arg, "--help") == 0) 3372 { 3373 usage(); 3374 goto EXIT; 3375 } 3376 else 3377 { 3378 BAD_ARG: 3379 printf("** Unknown or malformed option %s\n", arg); 3380 usage(); 3381 yield = 1; 3382 goto EXIT; 3383 } 3384 op++; 3385 argc--; 3386 } 3387 3388 /* Get the store for the offsets vector, and remember what it was */ 3389 3390 size_offsets_max = size_offsets; 3391 offsets = (int *)malloc(size_offsets_max * sizeof(int)); 3392 if (offsets == NULL) 3393 { 3394 printf("** Failed to get %d bytes of memory for offsets vector\n", 3395 (int)(size_offsets_max * sizeof(int))); 3396 yield = 1; 3397 goto EXIT; 3398 } 3399 3400 /* Sort out the input and output files */ 3401 3402 if (argc > 1) 3403 { 3404 infile = fopen(argv[op], INPUT_MODE); 3405 if (infile == NULL) 3406 { 3407 printf("** Failed to open %s\n", argv[op]); 3408 yield = 1; 3409 goto EXIT; 3410 } 3411 } 3412 3413 if (argc > 2) 3414 { 3415 outfile = fopen(argv[op+1], OUTPUT_MODE); 3416 if (outfile == NULL) 3417 { 3418 printf("** Failed to open %s\n", argv[op+1]); 3419 yield = 1; 3420 goto EXIT; 3421 } 3422 } 3423 3424 /* Set alternative malloc function */ 3425 3426 #ifdef SUPPORT_PCRE8 3427 pcre_malloc = new_malloc; 3428 pcre_free = new_free; 3429 pcre_stack_malloc = stack_malloc; 3430 pcre_stack_free = stack_free; 3431 #endif 3432 3433 #ifdef SUPPORT_PCRE16 3434 pcre16_malloc = new_malloc; 3435 pcre16_free = new_free; 3436 pcre16_stack_malloc = stack_malloc; 3437 pcre16_stack_free = stack_free; 3438 #endif 3439 3440 #ifdef SUPPORT_PCRE32 3441 pcre32_malloc = new_malloc; 3442 pcre32_free = new_free; 3443 pcre32_stack_malloc = stack_malloc; 3444 pcre32_stack_free = stack_free; 3445 #endif 3446 3447 /* Heading line unless quiet */ 3448 3449 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version); 3450 3451 /* Main loop */ 3452 3453 while (!done) 3454 { 3455 pcre *re = NULL; 3456 pcre_extra *extra = NULL; 3457 3458 #if !defined NOPOSIX /* There are still compilers that require no indent */ 3459 regex_t preg = { NULL, 0, 0} ; 3460 int do_posix = 0; 3461 #endif 3462 3463 const char *error; 3464 pcre_uint8 *markptr; 3465 pcre_uint8 *p, *pp, *ppp; 3466 pcre_uint8 *to_file = NULL; 3467 const pcre_uint8 *tables = NULL; 3468 unsigned long int get_options; 3469 unsigned long int true_size, true_study_size = 0; 3470 size_t size; 3471 int do_allcaps = 0; 3472 int do_mark = 0; 3473 int do_study = 0; 3474 int no_force_study = 0; 3475 int do_debug = debug; 3476 int do_G = 0; 3477 int do_g = 0; 3478 int do_showinfo = showinfo; 3479 int do_showrest = 0; 3480 int do_showcaprest = 0; 3481 int do_flip = 0; 3482 int erroroffset, len, delimiter, poffset; 3483 3484 #if !defined NODFA 3485 int dfa_matched = 0; 3486 #endif 3487 3488 use_utf = 0; 3489 debug_lengths = 1; 3490 SET_PCRE_STACK_GUARD(NULL); 3491 3492 if (extend_inputline(infile, buffer, " re> ") == NULL) break; 3493 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 3494 fflush(outfile); 3495 3496 p = buffer; 3497 while (isspace(*p)) p++; 3498 if (*p == 0) continue; 3499 3500 /* Handle option lock-out setting */ 3501 3502 if (*p == '<' && p[1] == ' ') 3503 { 3504 p += 2; 3505 while (isspace(*p)) p++; 3506 if (strncmp((char *)p, "forbid ", 7) == 0) 3507 { 3508 p += 7; 3509 while (isspace(*p)) p++; 3510 pp = lockout; 3511 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1) 3512 *pp++ = *p++; 3513 *pp = 0; 3514 } 3515 else 3516 { 3517 printf("** Unrecognized special command '%s'\n", p); 3518 yield = 1; 3519 goto EXIT; 3520 } 3521 continue; 3522 } 3523 3524 /* See if the pattern is to be loaded pre-compiled from a file. */ 3525 3526 if (*p == '<' && strchr((char *)(p+1), '<') == NULL) 3527 { 3528 pcre_uint32 magic; 3529 pcre_uint8 sbuf[8]; 3530 FILE *f; 3531 3532 p++; 3533 if (*p == '!') 3534 { 3535 do_debug = TRUE; 3536 do_showinfo = TRUE; 3537 p++; 3538 } 3539 3540 pp = p + (int)strlen((char *)p); 3541 while (isspace(pp[-1])) pp--; 3542 *pp = 0; 3543 3544 f = fopen((char *)p, "rb"); 3545 if (f == NULL) 3546 { 3547 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno)); 3548 continue; 3549 } 3550 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ; 3551 3552 true_size = 3553 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3]; 3554 true_study_size = 3555 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7]; 3556 3557 re = (pcre *)new_malloc(true_size); 3558 if (re == NULL) 3559 { 3560 printf("** Failed to get %d bytes of memory for pcre object\n", 3561 (int)true_size); 3562 yield = 1; 3563 goto EXIT; 3564 } 3565 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ; 3566 3567 magic = REAL_PCRE_MAGIC(re); 3568 if (magic != MAGIC_NUMBER) 3569 { 3570 if (swap_uint32(magic) == MAGIC_NUMBER) 3571 { 3572 do_flip = 1; 3573 } 3574 else 3575 { 3576 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p); 3577 new_free(re); 3578 fclose(f); 3579 continue; 3580 } 3581 } 3582 3583 /* We hide the byte-invert info for little and big endian tests. */ 3584 fprintf(outfile, "Compiled pattern%s loaded from %s\n", 3585 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p); 3586 3587 /* Now see if there is any following study data. */ 3588 3589 if (true_study_size != 0) 3590 { 3591 pcre_study_data *psd; 3592 3593 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size); 3594 extra->flags = PCRE_EXTRA_STUDY_DATA; 3595 3596 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra)); 3597 extra->study_data = psd; 3598 3599 if (fread(psd, 1, true_study_size, f) != true_study_size) 3600 { 3601 FAIL_READ: 3602 fprintf(outfile, "Failed to read data from %s\n", p); 3603 if (extra != NULL) 3604 { 3605 PCRE_FREE_STUDY(extra); 3606 } 3607 new_free(re); 3608 fclose(f); 3609 continue; 3610 } 3611 fprintf(outfile, "Study data loaded from %s\n", p); 3612 do_study = 1; /* To get the data output if requested */ 3613 } 3614 else fprintf(outfile, "No study data\n"); 3615 3616 /* Flip the necessary bytes. */ 3617 if (do_flip) 3618 { 3619 int rc; 3620 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL); 3621 if (rc == PCRE_ERROR_BADMODE) 3622 { 3623 pcre_uint32 flags_in_host_byte_order; 3624 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER) 3625 flags_in_host_byte_order = REAL_PCRE_FLAGS(re); 3626 else 3627 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re)); 3628 /* Simulate the result of the function call below. */ 3629 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc, 3630 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", 3631 PCRE_INFO_OPTIONS); 3632 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " 3633 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK)); 3634 new_free(re); 3635 fclose(f); 3636 continue; 3637 } 3638 } 3639 3640 /* Need to know if UTF-8 for printing data strings. */ 3641 3642 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) 3643 { 3644 new_free(re); 3645 fclose(f); 3646 continue; 3647 } 3648 use_utf = (get_options & PCRE_UTF8) != 0; 3649 3650 fclose(f); 3651 goto SHOW_INFO; 3652 } 3653 3654 /* In-line pattern (the usual case). Get the delimiter and seek the end of 3655 the pattern; if it isn't complete, read more. */ 3656 3657 delimiter = *p++; 3658 3659 if (isalnum(delimiter) || delimiter == '\\') 3660 { 3661 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n"); 3662 goto SKIP_DATA; 3663 } 3664 3665 pp = p; 3666 poffset = (int)(p - buffer); 3667 3668 for(;;) 3669 { 3670 while (*pp != 0) 3671 { 3672 if (*pp == '\\' && pp[1] != 0) pp++; 3673 else if (*pp == delimiter) break; 3674 pp++; 3675 } 3676 if (*pp != 0) break; 3677 if ((pp = extend_inputline(infile, pp, " > ")) == NULL) 3678 { 3679 fprintf(outfile, "** Unexpected EOF\n"); 3680 done = 1; 3681 goto CONTINUE; 3682 } 3683 if (infile != stdin) fprintf(outfile, "%s", (char *)pp); 3684 } 3685 3686 /* The buffer may have moved while being extended; reset the start of data 3687 pointer to the correct relative point in the buffer. */ 3688 3689 p = buffer + poffset; 3690 3691 /* If the first character after the delimiter is backslash, make 3692 the pattern end with backslash. This is purely to provide a way 3693 of testing for the error message when a pattern ends with backslash. */ 3694 3695 if (pp[1] == '\\') *pp++ = '\\'; 3696 3697 /* Terminate the pattern at the delimiter, and save a copy of the pattern 3698 for callouts. */ 3699 3700 *pp++ = 0; 3701 strcpy((char *)pbuffer, (char *)p); 3702 3703 /* Look for modifiers and options after the final delimiter. */ 3704 3705 options = default_options; 3706 study_options = force_study_options; 3707 log_store = showstore; /* default from command line */ 3708 3709 while (*pp != 0) 3710 { 3711 /* Check to see whether this modifier has been locked out for this file. 3712 This is complicated for the multi-character options that begin with '<'. 3713 If there is no '>' in the lockout string, all multi-character modifiers are 3714 locked out. */ 3715 3716 if (strchr((char *)lockout, *pp) != NULL) 3717 { 3718 if (*pp == '<' && strchr((char *)lockout, '>') != NULL) 3719 { 3720 int x = check_mc_option(pp+1, outfile, FALSE, "modifier"); 3721 if (x == 0) goto SKIP_DATA; 3722 3723 for (ppp = lockout; *ppp != 0; ppp++) 3724 { 3725 if (*ppp == '<') 3726 { 3727 int y = check_mc_option(ppp+1, outfile, FALSE, "modifier"); 3728 if (y == 0) 3729 { 3730 printf("** Error in modifier forbid data - giving up.\n"); 3731 yield = 1; 3732 goto EXIT; 3733 } 3734 if (x == y) 3735 { 3736 ppp = pp; 3737 while (*ppp != '>') ppp++; 3738 printf("** The %.*s modifier is locked out - giving up.\n", 3739 (int)(ppp - pp + 1), pp); 3740 yield = 1; 3741 goto EXIT; 3742 } 3743 } 3744 } 3745 } 3746 3747 /* The single-character modifiers are straightforward. */ 3748 3749 else 3750 { 3751 printf("** The /%c modifier is locked out - giving up.\n", *pp); 3752 yield = 1; 3753 goto EXIT; 3754 } 3755 } 3756 3757 /* The modifier is not locked out; handle it. */ 3758 3759 switch (*pp++) 3760 { 3761 case 'f': options |= PCRE_FIRSTLINE; break; 3762 case 'g': do_g = 1; break; 3763 case 'i': options |= PCRE_CASELESS; break; 3764 case 'm': options |= PCRE_MULTILINE; break; 3765 case 's': options |= PCRE_DOTALL; break; 3766 case 'x': options |= PCRE_EXTENDED; break; 3767 3768 case '+': 3769 if (do_showrest) do_showcaprest = 1; else do_showrest = 1; 3770 break; 3771 3772 case '=': do_allcaps = 1; break; 3773 case 'A': options |= PCRE_ANCHORED; break; 3774 case 'B': do_debug = 1; break; 3775 case 'C': options |= PCRE_AUTO_CALLOUT; break; 3776 case 'D': do_debug = do_showinfo = 1; break; 3777 case 'E': options |= PCRE_DOLLAR_ENDONLY; break; 3778 case 'F': do_flip = 1; break; 3779 case 'G': do_G = 1; break; 3780 case 'I': do_showinfo = 1; break; 3781 case 'J': options |= PCRE_DUPNAMES; break; 3782 case 'K': do_mark = 1; break; 3783 case 'M': log_store = 1; break; 3784 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; 3785 case 'O': options |= PCRE_NO_AUTO_POSSESS; break; 3786 3787 #if !defined NOPOSIX 3788 case 'P': do_posix = 1; break; 3789 #endif 3790 3791 case 'Q': 3792 switch (*pp) 3793 { 3794 case '0': 3795 case '1': 3796 stack_guard_return = *pp++ - '0'; 3797 break; 3798 3799 default: 3800 fprintf(outfile, "** Missing 0 or 1 after /Q\n"); 3801 goto SKIP_DATA; 3802 } 3803 SET_PCRE_STACK_GUARD(stack_guard); 3804 break; 3805 3806 case 'S': 3807 do_study = 1; 3808 for (;;) 3809 { 3810 switch (*pp++) 3811 { 3812 case 'S': 3813 do_study = 0; 3814 no_force_study = 1; 3815 break; 3816 3817 case '!': 3818 study_options |= PCRE_STUDY_EXTRA_NEEDED; 3819 break; 3820 3821 case '+': 3822 if (*pp == '+') 3823 { 3824 verify_jit = TRUE; 3825 pp++; 3826 } 3827 if (*pp >= '1' && *pp <= '7') 3828 study_options |= jit_study_bits[*pp++ - '1']; 3829 else 3830 study_options |= jit_study_bits[6]; 3831 break; 3832 3833 case '-': 3834 study_options &= ~PCRE_STUDY_ALLJIT; 3835 break; 3836 3837 default: 3838 pp--; 3839 goto ENDLOOP; 3840 } 3841 } 3842 ENDLOOP: 3843 break; 3844 3845 case 'U': options |= PCRE_UNGREEDY; break; 3846 case 'W': options |= PCRE_UCP; break; 3847 case 'X': options |= PCRE_EXTRA; break; 3848 case 'Y': options |= PCRE_NO_START_OPTIMISE; break; 3849 case 'Z': debug_lengths = 0; break; 3850 case '8': options |= PCRE_UTF8; use_utf = 1; break; 3851 case '9': options |= PCRE_NEVER_UTF; break; 3852 case '?': options |= PCRE_NO_UTF8_CHECK; break; 3853 3854 case 'T': 3855 switch (*pp++) 3856 { 3857 case '0': tables = tables0; break; 3858 case '1': tables = tables1; break; 3859 3860 case '\r': 3861 case '\n': 3862 case ' ': 3863 case 0: 3864 fprintf(outfile, "** Missing table number after /T\n"); 3865 goto SKIP_DATA; 3866 3867 default: 3868 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]); 3869 goto SKIP_DATA; 3870 } 3871 break; 3872 3873 case 'L': 3874 ppp = pp; 3875 /* The '\r' test here is so that it works on Windows. */ 3876 /* The '0' test is just in case this is an unterminated line. */ 3877 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++; 3878 *ppp = 0; 3879 if (setlocale(LC_CTYPE, (const char *)pp) == NULL) 3880 { 3881 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp); 3882 goto SKIP_DATA; 3883 } 3884 locale_set = 1; 3885 tables = PCRE_MAKETABLES; 3886 pp = ppp; 3887 break; 3888 3889 case '>': 3890 to_file = pp; 3891 while (*pp != 0) pp++; 3892 while (isspace(pp[-1])) pp--; 3893 *pp = 0; 3894 break; 3895 3896 case '<': 3897 { 3898 int x = check_mc_option(pp, outfile, FALSE, "modifier"); 3899 if (x == 0) goto SKIP_DATA; 3900 options |= x; 3901 while (*pp++ != '>'); 3902 } 3903 break; 3904 3905 case '\r': /* So that it works in Windows */ 3906 case '\n': 3907 case ' ': 3908 break; 3909 3910 default: 3911 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]); 3912 goto SKIP_DATA; 3913 } 3914 } 3915 3916 /* Handle compiling via the POSIX interface, which doesn't support the 3917 timing, showing, or debugging options, nor the ability to pass over 3918 local character tables. Neither does it have 16-bit support. */ 3919 3920 #if !defined NOPOSIX 3921 if (posix || do_posix) 3922 { 3923 int rc; 3924 int cflags = 0; 3925 3926 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE; 3927 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE; 3928 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; 3929 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; 3930 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; 3931 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP; 3932 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY; 3933 3934 rc = regcomp(&preg, (char *)p, cflags); 3935 3936 /* Compilation failed; go back for another re, skipping to blank line 3937 if non-interactive. */ 3938 3939 if (rc != 0) 3940 { 3941 (void)regerror(rc, &preg, (char *)buffer, buffer_size); 3942 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer); 3943 goto SKIP_DATA; 3944 } 3945 } 3946 3947 /* Handle compiling via the native interface */ 3948 3949 else 3950 #endif /* !defined NOPOSIX */ 3951 3952 { 3953 /* In 16- or 32-bit mode, convert the input. */ 3954 3955 #ifdef SUPPORT_PCRE16 3956 if (pcre_mode == PCRE16_MODE) 3957 { 3958 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p))) 3959 { 3960 case -1: 3961 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " 3962 "converted to UTF-16\n"); 3963 goto SKIP_DATA; 3964 3965 case -2: 3966 fprintf(outfile, "**Failed: character value greater than 0x10ffff " 3967 "cannot be converted to UTF-16\n"); 3968 goto SKIP_DATA; 3969 3970 case -3: /* "Impossible error" when to16 is called arg1 FALSE */ 3971 fprintf(outfile, "**Failed: character value greater than 0xffff " 3972 "cannot be converted to 16-bit in non-UTF mode\n"); 3973 goto SKIP_DATA; 3974 3975 default: 3976 break; 3977 } 3978 p = (pcre_uint8 *)buffer16; 3979 } 3980 #endif 3981 3982 #ifdef SUPPORT_PCRE32 3983 if (pcre_mode == PCRE32_MODE) 3984 { 3985 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p))) 3986 { 3987 case -1: 3988 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be " 3989 "converted to UTF-32\n"); 3990 goto SKIP_DATA; 3991 3992 case -2: 3993 fprintf(outfile, "**Failed: character value greater than 0x10ffff " 3994 "cannot be converted to UTF-32\n"); 3995 goto SKIP_DATA; 3996 3997 case -3: 3998 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n"); 3999 goto SKIP_DATA; 4000 4001 default: 4002 break; 4003 } 4004 p = (pcre_uint8 *)buffer32; 4005 } 4006 #endif 4007 4008 /* Compile many times when timing */ 4009 4010 if (timeit > 0) 4011 { 4012 register int i; 4013 clock_t time_taken; 4014 clock_t start_time = clock(); 4015 for (i = 0; i < timeit; i++) 4016 { 4017 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); 4018 if (re != NULL) free(re); 4019 } 4020 total_compile_time += (time_taken = clock() - start_time); 4021 fprintf(outfile, "Compile time %.4f milliseconds\n", 4022 (((double)time_taken * 1000.0) / (double)timeit) / 4023 (double)CLOCKS_PER_SEC); 4024 } 4025 4026 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables); 4027 4028 /* Compilation failed; go back for another re, skipping to blank line 4029 if non-interactive. */ 4030 4031 if (re == NULL) 4032 { 4033 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset); 4034 SKIP_DATA: 4035 if (infile != stdin) 4036 { 4037 for (;;) 4038 { 4039 if (extend_inputline(infile, buffer, NULL) == NULL) 4040 { 4041 done = 1; 4042 goto CONTINUE; 4043 } 4044 len = (int)strlen((char *)buffer); 4045 while (len > 0 && isspace(buffer[len-1])) len--; 4046 if (len == 0) break; 4047 } 4048 fprintf(outfile, "\n"); 4049 } 4050 goto CONTINUE; 4051 } 4052 4053 /* Compilation succeeded. It is now possible to set the UTF-8 option from 4054 within the regex; check for this so that we know how to process the data 4055 lines. */ 4056 4057 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) 4058 goto SKIP_DATA; 4059 if ((get_options & PCRE_UTF8) != 0) use_utf = 1; 4060 4061 /* Extract the size for possible writing before possibly flipping it, 4062 and remember the store that was got. */ 4063 4064 true_size = REAL_PCRE_SIZE(re); 4065 4066 /* Output code size information if requested */ 4067 4068 if (log_store) 4069 { 4070 int name_count, name_entry_size, real_pcre_size; 4071 4072 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count); 4073 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); 4074 real_pcre_size = 0; 4075 #ifdef SUPPORT_PCRE8 4076 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8) 4077 real_pcre_size = sizeof(real_pcre); 4078 #endif 4079 #ifdef SUPPORT_PCRE16 4080 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16) 4081 real_pcre_size = sizeof(real_pcre16); 4082 #endif 4083 #ifdef SUPPORT_PCRE32 4084 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32) 4085 real_pcre_size = sizeof(real_pcre32); 4086 #endif 4087 new_info(re, NULL, PCRE_INFO_SIZE, &size); 4088 fprintf(outfile, "Memory allocation (code space): %d\n", 4089 (int)(size - real_pcre_size - name_count * name_entry_size)); 4090 } 4091 4092 /* If -s or /S was present, study the regex to generate additional info to 4093 help with the matching, unless the pattern has the SS option, which 4094 suppresses the effect of /S (used for a few test patterns where studying is 4095 never sensible). */ 4096 4097 if (do_study || (force_study >= 0 && !no_force_study)) 4098 { 4099 if (timeit > 0) 4100 { 4101 register int i; 4102 clock_t time_taken; 4103 clock_t start_time = clock(); 4104 for (i = 0; i < timeit; i++) 4105 { 4106 PCRE_STUDY(extra, re, study_options, &error); 4107 } 4108 total_study_time = (time_taken = clock() - start_time); 4109 if (extra != NULL) 4110 { 4111 PCRE_FREE_STUDY(extra); 4112 } 4113 fprintf(outfile, " Study time %.4f milliseconds\n", 4114 (((double)time_taken * 1000.0) / (double)timeit) / 4115 (double)CLOCKS_PER_SEC); 4116 } 4117 PCRE_STUDY(extra, re, study_options, &error); 4118 if (error != NULL) 4119 fprintf(outfile, "Failed to study: %s\n", error); 4120 else if (extra != NULL) 4121 { 4122 true_study_size = ((pcre_study_data *)(extra->study_data))->size; 4123 if (log_store) 4124 { 4125 size_t jitsize; 4126 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 && 4127 jitsize != 0) 4128 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize); 4129 } 4130 } 4131 } 4132 4133 /* If /K was present, we set up for handling MARK data. */ 4134 4135 if (do_mark) 4136 { 4137 if (extra == NULL) 4138 { 4139 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 4140 extra->flags = 0; 4141 } 4142 extra->mark = &markptr; 4143 extra->flags |= PCRE_EXTRA_MARK; 4144 } 4145 4146 /* Extract and display information from the compiled data if required. */ 4147 4148 SHOW_INFO: 4149 4150 if (do_debug) 4151 { 4152 fprintf(outfile, "------------------------------------------------------------------\n"); 4153 PCRE_PRINTINT(re, outfile, debug_lengths); 4154 } 4155 4156 /* We already have the options in get_options (see above) */ 4157 4158 if (do_showinfo) 4159 { 4160 unsigned long int all_options; 4161 pcre_uint32 first_char, need_char; 4162 pcre_uint32 match_limit, recursion_limit; 4163 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged, 4164 hascrorlf, maxlookbehind, match_empty; 4165 int nameentrysize, namecount; 4166 const pcre_uint8 *nametable; 4167 4168 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) + 4169 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) + 4170 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) + 4171 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) + 4172 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) + 4173 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) + 4174 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) + 4175 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) + 4176 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) + 4177 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) + 4178 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) + 4179 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) + 4180 new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) + 4181 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind) 4182 != 0) 4183 goto SKIP_DATA; 4184 4185 fprintf(outfile, "Capturing subpattern count = %d\n", count); 4186 4187 if (backrefmax > 0) 4188 fprintf(outfile, "Max back reference = %d\n", backrefmax); 4189 4190 if (maxlookbehind > 0) 4191 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); 4192 4193 if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0) 4194 fprintf(outfile, "Match limit = %u\n", match_limit); 4195 4196 if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0) 4197 fprintf(outfile, "Recursion limit = %u\n", recursion_limit); 4198 4199 if (namecount > 0) 4200 { 4201 fprintf(outfile, "Named capturing subpatterns:\n"); 4202 while (namecount-- > 0) 4203 { 4204 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1; 4205 int length = (int)STRLEN(nametable + imm2_size); 4206 fprintf(outfile, " "); 4207 PCHARSV(nametable, imm2_size, length, outfile); 4208 while (length++ < nameentrysize - imm2_size) putc(' ', outfile); 4209 #ifdef SUPPORT_PCRE32 4210 if (pcre_mode == PCRE32_MODE) 4211 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0])); 4212 #endif 4213 #ifdef SUPPORT_PCRE16 4214 if (pcre_mode == PCRE16_MODE) 4215 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0])); 4216 #endif 4217 #ifdef SUPPORT_PCRE8 4218 if (pcre_mode == PCRE8_MODE) 4219 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]); 4220 #endif 4221 nametable += nameentrysize * CHAR_SIZE; 4222 } 4223 } 4224 4225 if (!okpartial) fprintf(outfile, "Partial matching not supported\n"); 4226 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); 4227 if (match_empty) fprintf(outfile, "May match empty string\n"); 4228 4229 all_options = REAL_PCRE_OPTIONS(re); 4230 if (do_flip) all_options = swap_uint32(all_options); 4231 4232 if (get_options == 0) fprintf(outfile, "No options\n"); 4233 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", 4234 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "", 4235 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "", 4236 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "", 4237 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "", 4238 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "", 4239 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "", 4240 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "", 4241 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "", 4242 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", 4243 ((get_options & PCRE_EXTRA) != 0)? " extra" : "", 4244 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "", 4245 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "", 4246 ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "", 4247 ((get_options & PCRE_UTF8) != 0)? " utf" : "", 4248 ((get_options & PCRE_UCP) != 0)? " ucp" : "", 4249 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "", 4250 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "", 4251 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "", 4252 ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : ""); 4253 4254 if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); 4255 4256 switch (get_options & PCRE_NEWLINE_BITS) 4257 { 4258 case PCRE_NEWLINE_CR: 4259 fprintf(outfile, "Forced newline sequence: CR\n"); 4260 break; 4261 4262 case PCRE_NEWLINE_LF: 4263 fprintf(outfile, "Forced newline sequence: LF\n"); 4264 break; 4265 4266 case PCRE_NEWLINE_CRLF: 4267 fprintf(outfile, "Forced newline sequence: CRLF\n"); 4268 break; 4269 4270 case PCRE_NEWLINE_ANYCRLF: 4271 fprintf(outfile, "Forced newline sequence: ANYCRLF\n"); 4272 break; 4273 4274 case PCRE_NEWLINE_ANY: 4275 fprintf(outfile, "Forced newline sequence: ANY\n"); 4276 break; 4277 4278 default: 4279 break; 4280 } 4281 4282 if (first_char_set == 2) 4283 { 4284 fprintf(outfile, "First char at start or follows newline\n"); 4285 } 4286 else if (first_char_set == 1) 4287 { 4288 const char *caseless = 4289 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)? 4290 "" : " (caseless)"; 4291 4292 if (PRINTOK(first_char)) 4293 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless); 4294 else 4295 { 4296 fprintf(outfile, "First char = "); 4297 pchar(first_char, outfile); 4298 fprintf(outfile, "%s\n", caseless); 4299 } 4300 } 4301 else 4302 { 4303 fprintf(outfile, "No first char\n"); 4304 } 4305 4306 if (need_char_set == 0) 4307 { 4308 fprintf(outfile, "No need char\n"); 4309 } 4310 else 4311 { 4312 const char *caseless = 4313 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)? 4314 "" : " (caseless)"; 4315 4316 if (PRINTOK(need_char)) 4317 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless); 4318 else 4319 { 4320 fprintf(outfile, "Need char = "); 4321 pchar(need_char, outfile); 4322 fprintf(outfile, "%s\n", caseless); 4323 } 4324 } 4325 4326 /* Don't output study size; at present it is in any case a fixed 4327 value, but it varies, depending on the computer architecture, and 4328 so messes up the test suite. (And with the /F option, it might be 4329 flipped.) If study was forced by an external -s, don't show this 4330 information unless -i or -d was also present. This means that, except 4331 when auto-callouts are involved, the output from runs with and without 4332 -s should be identical. */ 4333 4334 if (do_study || (force_study >= 0 && showinfo && !no_force_study)) 4335 { 4336 if (extra == NULL) 4337 fprintf(outfile, "Study returned NULL\n"); 4338 else 4339 { 4340 pcre_uint8 *start_bits = NULL; 4341 int minlength; 4342 4343 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0) 4344 fprintf(outfile, "Subject length lower bound = %d\n", minlength); 4345 4346 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0) 4347 { 4348 if (start_bits == NULL) 4349 fprintf(outfile, "No starting char list\n"); 4350 else 4351 { 4352 int i; 4353 int c = 24; 4354 fprintf(outfile, "Starting chars: "); 4355 for (i = 0; i < 256; i++) 4356 { 4357 if ((start_bits[i/8] & (1<<(i&7))) != 0) 4358 { 4359 if (c > 75) 4360 { 4361 fprintf(outfile, "\n "); 4362 c = 2; 4363 } 4364 if (PRINTOK(i) && i != ' ') 4365 { 4366 fprintf(outfile, "%c ", i); 4367 c += 2; 4368 } 4369 else 4370 { 4371 fprintf(outfile, "\\x%02x ", i); 4372 c += 5; 4373 } 4374 } 4375 } 4376 fprintf(outfile, "\n"); 4377 } 4378 } 4379 } 4380 4381 /* Show this only if the JIT was set by /S, not by -s. */ 4382 4383 if ((study_options & PCRE_STUDY_ALLJIT) != 0 && 4384 (force_study_options & PCRE_STUDY_ALLJIT) == 0) 4385 { 4386 int jit; 4387 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0) 4388 { 4389 if (jit) 4390 fprintf(outfile, "JIT study was successful\n"); 4391 else 4392 #ifdef SUPPORT_JIT 4393 fprintf(outfile, "JIT study was not successful\n"); 4394 #else 4395 fprintf(outfile, "JIT support is not available in this version of PCRE\n"); 4396 #endif 4397 } 4398 } 4399 } 4400 } 4401 4402 /* If the '>' option was present, we write out the regex to a file, and 4403 that is all. The first 8 bytes of the file are the regex length and then 4404 the study length, in big-endian order. */ 4405 4406 if (to_file != NULL) 4407 { 4408 FILE *f = fopen((char *)to_file, "wb"); 4409 if (f == NULL) 4410 { 4411 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno)); 4412 } 4413 else 4414 { 4415 pcre_uint8 sbuf[8]; 4416 4417 if (do_flip) regexflip(re, extra); 4418 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255); 4419 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255); 4420 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255); 4421 sbuf[3] = (pcre_uint8)((true_size) & 255); 4422 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255); 4423 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255); 4424 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255); 4425 sbuf[7] = (pcre_uint8)((true_study_size) & 255); 4426 4427 if (fwrite(sbuf, 1, 8, f) < 8 || 4428 fwrite(re, 1, true_size, f) < true_size) 4429 { 4430 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno)); 4431 } 4432 else 4433 { 4434 fprintf(outfile, "Compiled pattern written to %s\n", to_file); 4435 4436 /* If there is study data, write it. */ 4437 4438 if (extra != NULL) 4439 { 4440 if (fwrite(extra->study_data, 1, true_study_size, f) < 4441 true_study_size) 4442 { 4443 fprintf(outfile, "Write error on %s: %s\n", to_file, 4444 strerror(errno)); 4445 } 4446 else fprintf(outfile, "Study data written to %s\n", to_file); 4447 } 4448 } 4449 fclose(f); 4450 } 4451 4452 new_free(re); 4453 if (extra != NULL) 4454 { 4455 PCRE_FREE_STUDY(extra); 4456 } 4457 if (locale_set) 4458 { 4459 new_free((void *)tables); 4460 setlocale(LC_CTYPE, "C"); 4461 locale_set = 0; 4462 } 4463 continue; /* With next regex */ 4464 } 4465 } /* End of non-POSIX compile */ 4466 4467 /* Read data lines and test them */ 4468 4469 for (;;) 4470 { 4471 #ifdef SUPPORT_PCRE8 4472 pcre_uint8 *q8; 4473 #endif 4474 #ifdef SUPPORT_PCRE16 4475 pcre_uint16 *q16; 4476 #endif 4477 #ifdef SUPPORT_PCRE32 4478 pcre_uint32 *q32; 4479 #endif 4480 pcre_uint8 *bptr; 4481 int *use_offsets = offsets; 4482 int use_size_offsets = size_offsets; 4483 int callout_data = 0; 4484 int callout_data_set = 0; 4485 int count; 4486 pcre_uint32 c; 4487 int copystrings = 0; 4488 int find_match_limit = default_find_match_limit; 4489 int getstrings = 0; 4490 int getlist = 0; 4491 int gmatched = 0; 4492 int start_offset = 0; 4493 int start_offset_sign = 1; 4494 int g_notempty = 0; 4495 int use_dfa = 0; 4496 4497 *copynames = 0; 4498 *getnames = 0; 4499 4500 #ifdef SUPPORT_PCRE32 4501 cn32ptr = copynames; 4502 gn32ptr = getnames; 4503 #endif 4504 #ifdef SUPPORT_PCRE16 4505 cn16ptr = copynames16; 4506 gn16ptr = getnames16; 4507 #endif 4508 #ifdef SUPPORT_PCRE8 4509 cn8ptr = copynames8; 4510 gn8ptr = getnames8; 4511 #endif 4512 4513 SET_PCRE_CALLOUT(callout); 4514 first_callout = 1; 4515 last_callout_mark = NULL; 4516 callout_extra = 0; 4517 callout_count = 0; 4518 callout_fail_count = 999999; 4519 callout_fail_id = -1; 4520 show_malloc = 0; 4521 options = 0; 4522 4523 if (extra != NULL) extra->flags &= 4524 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION); 4525 4526 len = 0; 4527 for (;;) 4528 { 4529 if (extend_inputline(infile, buffer + len, "data> ") == NULL) 4530 { 4531 if (len > 0) /* Reached EOF without hitting a newline */ 4532 { 4533 fprintf(outfile, "\n"); 4534 break; 4535 } 4536 done = 1; 4537 goto CONTINUE; 4538 } 4539 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); 4540 len = (int)strlen((char *)buffer); 4541 if (buffer[len-1] == '\n') break; 4542 } 4543 4544 while (len > 0 && isspace(buffer[len-1])) len--; 4545 buffer[len] = 0; 4546 if (len == 0) break; 4547 4548 p = buffer; 4549 while (isspace(*p)) p++; 4550 4551 #ifndef NOUTF 4552 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create 4553 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */ 4554 4555 if (use_utf) 4556 { 4557 pcre_uint8 *q; 4558 pcre_uint32 cc; 4559 int n = 1; 4560 4561 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc); 4562 if (n <= 0) 4563 { 4564 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n"); 4565 goto NEXT_DATA; 4566 } 4567 } 4568 #endif 4569 4570 #ifdef SUPPORT_VALGRIND 4571 /* Mark the dbuffer as addressable but undefined again. */ 4572 4573 if (dbuffer != NULL) 4574 { 4575 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE); 4576 } 4577 #endif 4578 4579 /* Allocate a buffer to hold the data line; len+1 is an upper bound on 4580 the number of pcre_uchar units that will be needed. */ 4581 4582 while (dbuffer == NULL || (size_t)len >= dbuffer_size) 4583 { 4584 dbuffer_size *= 2; 4585 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE); 4586 if (dbuffer == NULL) 4587 { 4588 fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size); 4589 exit(1); 4590 } 4591 } 4592 4593 #ifdef SUPPORT_PCRE8 4594 q8 = (pcre_uint8 *) dbuffer; 4595 #endif 4596 #ifdef SUPPORT_PCRE16 4597 q16 = (pcre_uint16 *) dbuffer; 4598 #endif 4599 #ifdef SUPPORT_PCRE32 4600 q32 = (pcre_uint32 *) dbuffer; 4601 #endif 4602 4603 while ((c = *p++) != 0) 4604 { 4605 int i = 0; 4606 int n = 0; 4607 4608 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes. 4609 In non-UTF mode, allow the value of the byte to fall through to later, 4610 where values greater than 127 are turned into UTF-8 when running in 4611 16-bit or 32-bit mode. */ 4612 4613 if (c != '\\') 4614 { 4615 #ifndef NOUTF 4616 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); } 4617 #endif 4618 } 4619 4620 /* Handle backslash escapes */ 4621 4622 else switch ((c = *p++)) 4623 { 4624 case 'a': c = CHAR_BEL; break; 4625 case 'b': c = '\b'; break; 4626 case 'e': c = CHAR_ESC; break; 4627 case 'f': c = '\f'; break; 4628 case 'n': c = '\n'; break; 4629 case 'r': c = '\r'; break; 4630 case 't': c = '\t'; break; 4631 case 'v': c = '\v'; break; 4632 4633 case '0': case '1': case '2': case '3': 4634 case '4': case '5': case '6': case '7': 4635 c -= '0'; 4636 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') 4637 c = c * 8 + *p++ - '0'; 4638 break; 4639 4640 case 'o': 4641 if (*p == '{') 4642 { 4643 pcre_uint8 *pt = p; 4644 c = 0; 4645 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++) 4646 { 4647 if (++i == 12) 4648 fprintf(outfile, "** Too many octal digits in \\o{...} item; " 4649 "using only the first twelve.\n"); 4650 else c = c * 8 + *pt - '0'; 4651 } 4652 if (*pt == '}') p = pt + 1; 4653 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n"); 4654 } 4655 break; 4656 4657 case 'x': 4658 if (*p == '{') 4659 { 4660 pcre_uint8 *pt = p; 4661 c = 0; 4662 4663 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails 4664 when isxdigit() is a macro that refers to its argument more than 4665 once. This is banned by the C Standard, but apparently happens in at 4666 least one MacOS environment. */ 4667 4668 for (pt++; isxdigit(*pt); pt++) 4669 { 4670 if (++i == 9) 4671 fprintf(outfile, "** Too many hex digits in \\x{...} item; " 4672 "using only the first eight.\n"); 4673 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10); 4674 } 4675 if (*pt == '}') 4676 { 4677 p = pt + 1; 4678 break; 4679 } 4680 /* Not correct form for \x{...}; fall through */ 4681 } 4682 4683 /* \x without {} always defines just one byte in 8-bit mode. This 4684 allows UTF-8 characters to be constructed byte by byte, and also allows 4685 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode. 4686 Otherwise, pass it down to later code so that it can be turned into 4687 UTF-8 when running in 16/32-bit mode. */ 4688 4689 c = 0; 4690 while (i++ < 2 && isxdigit(*p)) 4691 { 4692 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10); 4693 p++; 4694 } 4695 #if !defined NOUTF && defined SUPPORT_PCRE8 4696 if (use_utf && (pcre_mode == PCRE8_MODE)) 4697 { 4698 *q8++ = c; 4699 continue; 4700 } 4701 #endif 4702 break; 4703 4704 case 0: /* \ followed by EOF allows for an empty line */ 4705 p--; 4706 continue; 4707 4708 case '>': 4709 if (*p == '-') 4710 { 4711 start_offset_sign = -1; 4712 p++; 4713 } 4714 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0'; 4715 start_offset *= start_offset_sign; 4716 continue; 4717 4718 case 'A': /* Option setting */ 4719 options |= PCRE_ANCHORED; 4720 continue; 4721 4722 case 'B': 4723 options |= PCRE_NOTBOL; 4724 continue; 4725 4726 case 'C': 4727 if (isdigit(*p)) /* Set copy string */ 4728 { 4729 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 4730 copystrings |= 1 << n; 4731 } 4732 else if (isalnum(*p)) 4733 { 4734 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re); 4735 } 4736 else if (*p == '+') 4737 { 4738 callout_extra = 1; 4739 p++; 4740 } 4741 else if (*p == '-') 4742 { 4743 SET_PCRE_CALLOUT(NULL); 4744 p++; 4745 } 4746 else if (*p == '!') 4747 { 4748 callout_fail_id = 0; 4749 p++; 4750 while(isdigit(*p)) 4751 callout_fail_id = callout_fail_id * 10 + *p++ - '0'; 4752 callout_fail_count = 0; 4753 if (*p == '!') 4754 { 4755 p++; 4756 while(isdigit(*p)) 4757 callout_fail_count = callout_fail_count * 10 + *p++ - '0'; 4758 } 4759 } 4760 else if (*p == '*') 4761 { 4762 int sign = 1; 4763 callout_data = 0; 4764 if (*(++p) == '-') { sign = -1; p++; } 4765 while(isdigit(*p)) 4766 callout_data = callout_data * 10 + *p++ - '0'; 4767 callout_data *= sign; 4768 callout_data_set = 1; 4769 } 4770 continue; 4771 4772 #if !defined NODFA 4773 case 'D': 4774 #if !defined NOPOSIX 4775 if (posix || do_posix) 4776 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n"); 4777 else 4778 #endif 4779 use_dfa = 1; 4780 continue; 4781 #endif 4782 4783 #if !defined NODFA 4784 case 'F': 4785 options |= PCRE_DFA_SHORTEST; 4786 continue; 4787 #endif 4788 4789 case 'G': 4790 if (isdigit(*p)) 4791 { 4792 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 4793 getstrings |= 1 << n; 4794 } 4795 else if (isalnum(*p)) 4796 { 4797 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re); 4798 } 4799 continue; 4800 4801 case 'J': 4802 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 4803 if (extra != NULL 4804 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 4805 && extra->executable_jit != NULL) 4806 { 4807 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); } 4808 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024); 4809 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); 4810 } 4811 continue; 4812 4813 case 'L': 4814 getlist = 1; 4815 continue; 4816 4817 case 'M': 4818 find_match_limit = 1; 4819 continue; 4820 4821 case 'N': 4822 if ((options & PCRE_NOTEMPTY) != 0) 4823 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART; 4824 else 4825 options |= PCRE_NOTEMPTY; 4826 continue; 4827 4828 case 'O': 4829 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 4830 if (n > size_offsets_max) 4831 { 4832 size_offsets_max = n; 4833 free(offsets); 4834 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int)); 4835 if (offsets == NULL) 4836 { 4837 printf("** Failed to get %d bytes of memory for offsets vector\n", 4838 (int)(size_offsets_max * sizeof(int))); 4839 yield = 1; 4840 goto EXIT; 4841 } 4842 } 4843 use_size_offsets = n; 4844 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */ 4845 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */ 4846 continue; 4847 4848 case 'P': 4849 options |= ((options & PCRE_PARTIAL_SOFT) == 0)? 4850 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD; 4851 continue; 4852 4853 case 'Q': 4854 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 4855 if (extra == NULL) 4856 { 4857 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 4858 extra->flags = 0; 4859 } 4860 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 4861 extra->match_limit_recursion = n; 4862 continue; 4863 4864 case 'q': 4865 while(isdigit(*p)) n = n * 10 + *p++ - '0'; 4866 if (extra == NULL) 4867 { 4868 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 4869 extra->flags = 0; 4870 } 4871 extra->flags |= PCRE_EXTRA_MATCH_LIMIT; 4872 extra->match_limit = n; 4873 continue; 4874 4875 #if !defined NODFA 4876 case 'R': 4877 options |= PCRE_DFA_RESTART; 4878 continue; 4879 #endif 4880 4881 case 'S': 4882 show_malloc = 1; 4883 continue; 4884 4885 case 'Y': 4886 options |= PCRE_NO_START_OPTIMIZE; 4887 continue; 4888 4889 case 'Z': 4890 options |= PCRE_NOTEOL; 4891 continue; 4892 4893 case '?': 4894 options |= PCRE_NO_UTF8_CHECK; 4895 continue; 4896 4897 case '<': 4898 { 4899 int x = check_mc_option(p, outfile, TRUE, "escape sequence"); 4900 if (x == 0) goto NEXT_DATA; 4901 options |= x; 4902 while (*p++ != '>'); 4903 } 4904 continue; 4905 } 4906 4907 /* We now have a character value in c that may be greater than 255. 4908 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater 4909 than 127 in UTF mode must have come from \x{...} or octal constructs 4910 because values from \x.. get this far only in non-UTF mode. */ 4911 4912 #ifdef SUPPORT_PCRE8 4913 if (pcre_mode == PCRE8_MODE) 4914 { 4915 #ifndef NOUTF 4916 if (use_utf) 4917 { 4918 if (c > 0x7fffffff) 4919 { 4920 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff " 4921 "and so cannot be converted to UTF-8\n", c); 4922 goto NEXT_DATA; 4923 } 4924 q8 += ord2utf8(c, q8); 4925 } 4926 else 4927 #endif 4928 { 4929 if (c > 0xffu) 4930 { 4931 fprintf(outfile, "** Character \\x{%x} is greater than 255 " 4932 "and UTF-8 mode is not enabled.\n", c); 4933 fprintf(outfile, "** Truncation will probably give the wrong " 4934 "result.\n"); 4935 } 4936 *q8++ = c; 4937 } 4938 } 4939 #endif 4940 #ifdef SUPPORT_PCRE16 4941 if (pcre_mode == PCRE16_MODE) 4942 { 4943 #ifndef NOUTF 4944 if (use_utf) 4945 { 4946 if (c > 0x10ffffu) 4947 { 4948 fprintf(outfile, "** Failed: character \\x{%x} is greater than " 4949 "0x10ffff and so cannot be converted to UTF-16\n", c); 4950 goto NEXT_DATA; 4951 } 4952 else if (c >= 0x10000u) 4953 { 4954 c-= 0x10000u; 4955 *q16++ = 0xD800 | (c >> 10); 4956 *q16++ = 0xDC00 | (c & 0x3ff); 4957 } 4958 else 4959 *q16++ = c; 4960 } 4961 else 4962 #endif 4963 { 4964 if (c > 0xffffu) 4965 { 4966 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " 4967 "and UTF-16 mode is not enabled.\n", c); 4968 fprintf(outfile, "** Truncation will probably give the wrong " 4969 "result.\n"); 4970 } 4971 4972 *q16++ = c; 4973 } 4974 } 4975 #endif 4976 #ifdef SUPPORT_PCRE32 4977 if (pcre_mode == PCRE32_MODE) 4978 { 4979 *q32++ = c; 4980 } 4981 #endif 4982 4983 } 4984 4985 /* Reached end of subject string */ 4986 4987 #ifdef SUPPORT_PCRE8 4988 if (pcre_mode == PCRE8_MODE) 4989 { 4990 *q8 = 0; 4991 len = (int)(q8 - (pcre_uint8 *)dbuffer); 4992 } 4993 #endif 4994 #ifdef SUPPORT_PCRE16 4995 if (pcre_mode == PCRE16_MODE) 4996 { 4997 *q16 = 0; 4998 len = (int)(q16 - (pcre_uint16 *)dbuffer); 4999 } 5000 #endif 5001 #ifdef SUPPORT_PCRE32 5002 if (pcre_mode == PCRE32_MODE) 5003 { 5004 *q32 = 0; 5005 len = (int)(q32 - (pcre_uint32 *)dbuffer); 5006 } 5007 #endif 5008 5009 /* If we're compiling with explicit valgrind support, Mark the data from after 5010 its end to the end of the buffer as unaddressable, so that a read over the end 5011 of the buffer will be seen by valgrind, even if it doesn't cause a crash. 5012 If we're not building with valgrind support, at least move the data to the end 5013 of the buffer so that it might at least cause a crash. 5014 If we are using the POSIX interface, we must include the terminating zero. */ 5015 5016 bptr = dbuffer; 5017 5018 #if !defined NOPOSIX 5019 if (posix || do_posix) 5020 { 5021 #ifdef SUPPORT_VALGRIND 5022 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1)); 5023 #else 5024 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1); 5025 bptr += dbuffer_size - len - 1; 5026 #endif 5027 } 5028 else 5029 #endif 5030 { 5031 #ifdef SUPPORT_VALGRIND 5032 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE); 5033 #else 5034 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE); 5035 #endif 5036 } 5037 5038 if ((all_use_dfa || use_dfa) && find_match_limit) 5039 { 5040 printf("**Match limit not relevant for DFA matching: ignored\n"); 5041 find_match_limit = 0; 5042 } 5043 5044 /* Handle matching via the POSIX interface, which does not 5045 support timing or playing with the match limit or callout data. */ 5046 5047 #if !defined NOPOSIX 5048 if (posix || do_posix) 5049 { 5050 int rc; 5051 int eflags = 0; 5052 regmatch_t *pmatch = NULL; 5053 if (use_size_offsets > 0) 5054 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets); 5055 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; 5056 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; 5057 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; 5058 5059 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); 5060 5061 if (rc != 0) 5062 { 5063 (void)regerror(rc, &preg, (char *)buffer, buffer_size); 5064 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer); 5065 } 5066 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0) 5067 { 5068 fprintf(outfile, "Matched with REG_NOSUB\n"); 5069 } 5070 else 5071 { 5072 size_t i; 5073 for (i = 0; i < (size_t)use_size_offsets; i++) 5074 { 5075 if (pmatch[i].rm_so >= 0) 5076 { 5077 fprintf(outfile, "%2d: ", (int)i); 5078 PCHARSV(dbuffer, pmatch[i].rm_so, 5079 pmatch[i].rm_eo - pmatch[i].rm_so, outfile); 5080 fprintf(outfile, "\n"); 5081 if (do_showcaprest || (i == 0 && do_showrest)) 5082 { 5083 fprintf(outfile, "%2d+ ", (int)i); 5084 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo, 5085 outfile); 5086 fprintf(outfile, "\n"); 5087 } 5088 } 5089 } 5090 } 5091 free(pmatch); 5092 goto NEXT_DATA; 5093 } 5094 5095 #endif /* !defined NOPOSIX */ 5096 5097 /* Handle matching via the native interface - repeats for /g and /G */ 5098 5099 /* Ensure that there is a JIT callback if we want to verify that JIT was 5100 actually used. If jit_stack == NULL, no stack has yet been assigned. */ 5101 5102 if (verify_jit && jit_stack == NULL && extra != NULL) 5103 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); } 5104 5105 for (;; gmatched++) /* Loop for /g or /G */ 5106 { 5107 markptr = NULL; 5108 jit_was_used = FALSE; 5109 5110 if (timeitm > 0) 5111 { 5112 register int i; 5113 clock_t time_taken; 5114 clock_t start_time = clock(); 5115 5116 #if !defined NODFA 5117 if (all_use_dfa || use_dfa) 5118 { 5119 if ((options & PCRE_DFA_RESTART) != 0) 5120 { 5121 fprintf(outfile, "Timing DFA restarts is not supported\n"); 5122 break; 5123 } 5124 if (dfa_workspace == NULL) 5125 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 5126 for (i = 0; i < timeitm; i++) 5127 { 5128 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, 5129 (options | g_notempty), use_offsets, use_size_offsets, 5130 dfa_workspace, DFA_WS_DIMENSION); 5131 } 5132 } 5133 else 5134 #endif 5135 5136 for (i = 0; i < timeitm; i++) 5137 { 5138 PCRE_EXEC(count, re, extra, bptr, len, start_offset, 5139 (options | g_notempty), use_offsets, use_size_offsets); 5140 } 5141 total_match_time += (time_taken = clock() - start_time); 5142 fprintf(outfile, "Execute time %.4f milliseconds\n", 5143 (((double)time_taken * 1000.0) / (double)timeitm) / 5144 (double)CLOCKS_PER_SEC); 5145 } 5146 5147 /* If find_match_limit is set, we want to do repeated matches with 5148 varying limits in order to find the minimum value for the match limit and 5149 for the recursion limit. The match limits are relevant only to the normal 5150 running of pcre_exec(), so disable the JIT optimization. This makes it 5151 possible to run the same set of tests with and without JIT externally 5152 requested. */ 5153 5154 if (find_match_limit) 5155 { 5156 if (extra != NULL) { PCRE_FREE_STUDY(extra); } 5157 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 5158 extra->flags = 0; 5159 5160 (void)check_match_limit(re, extra, bptr, len, start_offset, 5161 options|g_notempty, use_offsets, use_size_offsets, 5162 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit), 5163 PCRE_ERROR_MATCHLIMIT, "match()"); 5164 5165 count = check_match_limit(re, extra, bptr, len, start_offset, 5166 options|g_notempty, use_offsets, use_size_offsets, 5167 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion), 5168 PCRE_ERROR_RECURSIONLIMIT, "match() recursion"); 5169 } 5170 5171 /* If callout_data is set, use the interface with additional data */ 5172 5173 else if (callout_data_set) 5174 { 5175 if (extra == NULL) 5176 { 5177 extra = (pcre_extra *)malloc(sizeof(pcre_extra)); 5178 extra->flags = 0; 5179 } 5180 extra->flags |= PCRE_EXTRA_CALLOUT_DATA; 5181 extra->callout_data = &callout_data; 5182 PCRE_EXEC(count, re, extra, bptr, len, start_offset, 5183 options | g_notempty, use_offsets, use_size_offsets); 5184 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA; 5185 } 5186 5187 /* The normal case is just to do the match once, with the default 5188 value of match_limit. */ 5189 5190 #if !defined NODFA 5191 else if (all_use_dfa || use_dfa) 5192 { 5193 if (dfa_workspace == NULL) 5194 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int)); 5195 if (dfa_matched++ == 0) 5196 dfa_workspace[0] = -1; /* To catch bad restart */ 5197 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, 5198 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace, 5199 DFA_WS_DIMENSION); 5200 if (count == 0) 5201 { 5202 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); 5203 count = use_size_offsets/2; 5204 } 5205 } 5206 #endif 5207 5208 else 5209 { 5210 PCRE_EXEC(count, re, extra, bptr, len, start_offset, 5211 options | g_notempty, use_offsets, use_size_offsets); 5212 if (count == 0) 5213 { 5214 fprintf(outfile, "Matched, but too many substrings\n"); 5215 /* 2 is a special case; match can be returned */ 5216 count = (use_size_offsets == 2)? 1 : use_size_offsets/3; 5217 } 5218 } 5219 5220 /* Matched */ 5221 5222 if (count >= 0) 5223 { 5224 int i, maxcount; 5225 void *cnptr, *gnptr; 5226 5227 #if !defined NODFA 5228 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else 5229 #endif 5230 /* 2 is a special case; match can be returned */ 5231 maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3; 5232 5233 /* This is a check against a lunatic return value. */ 5234 5235 if (count > maxcount) 5236 { 5237 fprintf(outfile, 5238 "** PCRE error: returned count %d is too big for offset size %d\n", 5239 count, use_size_offsets); 5240 count = use_size_offsets/3; 5241 if (do_g || do_G) 5242 { 5243 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G'); 5244 do_g = do_G = FALSE; /* Break g/G loop */ 5245 } 5246 } 5247 5248 /* do_allcaps requests showing of all captures in the pattern, to check 5249 unset ones at the end. */ 5250 5251 if (do_allcaps) 5252 { 5253 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0) 5254 goto SKIP_DATA; 5255 count++; /* Allow for full match */ 5256 if (count * 2 > use_size_offsets) count = use_size_offsets/2; 5257 } 5258 5259 /* Output the captured substrings. Note that, for the matched string, 5260 the use of \K in an assertion can make the start later than the end. */ 5261 5262 for (i = 0; i < count * 2; i += 2) 5263 { 5264 if (use_offsets[i] < 0) 5265 { 5266 if (use_offsets[i] != -1) 5267 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n", 5268 use_offsets[i], i); 5269 if (use_offsets[i+1] != -1) 5270 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n", 5271 use_offsets[i+1], i+1); 5272 fprintf(outfile, "%2d: <unset>\n", i/2); 5273 } 5274 else 5275 { 5276 int start = use_offsets[i]; 5277 int end = use_offsets[i+1]; 5278 5279 if (start > end) 5280 { 5281 start = use_offsets[i+1]; 5282 end = use_offsets[i]; 5283 fprintf(outfile, "Start of matched string is beyond its end - " 5284 "displaying from end to start.\n"); 5285 } 5286 5287 fprintf(outfile, "%2d: ", i/2); 5288 PCHARSV(bptr, start, end - start, outfile); 5289 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); 5290 fprintf(outfile, "\n"); 5291 5292 /* Note: don't use the start/end variables here because we want to 5293 show the text from what is reported as the end. */ 5294 5295 if (do_showcaprest || (i == 0 && do_showrest)) 5296 { 5297 fprintf(outfile, "%2d+ ", i/2); 5298 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1], 5299 outfile); 5300 fprintf(outfile, "\n"); 5301 } 5302 } 5303 } 5304 5305 if (markptr != NULL) 5306 { 5307 fprintf(outfile, "MK: "); 5308 PCHARSV(markptr, 0, -1, outfile); 5309 fprintf(outfile, "\n"); 5310 } 5311 5312 for (i = 0; i < 32; i++) 5313 { 5314 if ((copystrings & (1 << i)) != 0) 5315 { 5316 int rc; 5317 char copybuffer[256]; 5318 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i, 5319 copybuffer, sizeof(copybuffer)); 5320 if (rc < 0) 5321 fprintf(outfile, "copy substring %d failed %d\n", i, rc); 5322 else 5323 { 5324 fprintf(outfile, "%2dC ", i); 5325 PCHARSV(copybuffer, 0, rc, outfile); 5326 fprintf(outfile, " (%d)\n", rc); 5327 } 5328 } 5329 } 5330 5331 cnptr = copynames; 5332 for (;;) 5333 { 5334 int rc; 5335 char copybuffer[256]; 5336 5337 #ifdef SUPPORT_PCRE32 5338 if (pcre_mode == PCRE32_MODE) 5339 { 5340 if (*(pcre_uint32 *)cnptr == 0) break; 5341 } 5342 #endif 5343 #ifdef SUPPORT_PCRE16 5344 if (pcre_mode == PCRE16_MODE) 5345 { 5346 if (*(pcre_uint16 *)cnptr == 0) break; 5347 } 5348 #endif 5349 #ifdef SUPPORT_PCRE8 5350 if (pcre_mode == PCRE8_MODE) 5351 { 5352 if (*(pcre_uint8 *)cnptr == 0) break; 5353 } 5354 #endif 5355 5356 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count, 5357 cnptr, copybuffer, sizeof(copybuffer)); 5358 5359 if (rc < 0) 5360 { 5361 fprintf(outfile, "copy substring "); 5362 PCHARSV(cnptr, 0, -1, outfile); 5363 fprintf(outfile, " failed %d\n", rc); 5364 } 5365 else 5366 { 5367 fprintf(outfile, " C "); 5368 PCHARSV(copybuffer, 0, rc, outfile); 5369 fprintf(outfile, " (%d) ", rc); 5370 PCHARSV(cnptr, 0, -1, outfile); 5371 putc('\n', outfile); 5372 } 5373 5374 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE; 5375 } 5376 5377 for (i = 0; i < 32; i++) 5378 { 5379 if ((getstrings & (1 << i)) != 0) 5380 { 5381 int rc; 5382 const char *substring; 5383 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring); 5384 if (rc < 0) 5385 fprintf(outfile, "get substring %d failed %d\n", i, rc); 5386 else 5387 { 5388 fprintf(outfile, "%2dG ", i); 5389 PCHARSV(substring, 0, rc, outfile); 5390 fprintf(outfile, " (%d)\n", rc); 5391 PCRE_FREE_SUBSTRING(substring); 5392 } 5393 } 5394 } 5395 5396 gnptr = getnames; 5397 for (;;) 5398 { 5399 int rc; 5400 const char *substring; 5401 5402 #ifdef SUPPORT_PCRE32 5403 if (pcre_mode == PCRE32_MODE) 5404 { 5405 if (*(pcre_uint32 *)gnptr == 0) break; 5406 } 5407 #endif 5408 #ifdef SUPPORT_PCRE16 5409 if (pcre_mode == PCRE16_MODE) 5410 { 5411 if (*(pcre_uint16 *)gnptr == 0) break; 5412 } 5413 #endif 5414 #ifdef SUPPORT_PCRE8 5415 if (pcre_mode == PCRE8_MODE) 5416 { 5417 if (*(pcre_uint8 *)gnptr == 0) break; 5418 } 5419 #endif 5420 5421 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count, 5422 gnptr, &substring); 5423 if (rc < 0) 5424 { 5425 fprintf(outfile, "get substring "); 5426 PCHARSV(gnptr, 0, -1, outfile); 5427 fprintf(outfile, " failed %d\n", rc); 5428 } 5429 else 5430 { 5431 fprintf(outfile, " G "); 5432 PCHARSV(substring, 0, rc, outfile); 5433 fprintf(outfile, " (%d) ", rc); 5434 PCHARSV(gnptr, 0, -1, outfile); 5435 PCRE_FREE_SUBSTRING(substring); 5436 putc('\n', outfile); 5437 } 5438 5439 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE; 5440 } 5441 5442 if (getlist) 5443 { 5444 int rc; 5445 const char **stringlist; 5446 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist); 5447 if (rc < 0) 5448 fprintf(outfile, "get substring list failed %d\n", rc); 5449 else 5450 { 5451 for (i = 0; i < count; i++) 5452 { 5453 fprintf(outfile, "%2dL ", i); 5454 PCHARSV(stringlist[i], 0, -1, outfile); 5455 putc('\n', outfile); 5456 } 5457 if (stringlist[i] != NULL) 5458 fprintf(outfile, "string list not terminated by NULL\n"); 5459 PCRE_FREE_SUBSTRING_LIST(stringlist); 5460 } 5461 } 5462 } 5463 5464 /* There was a partial match. If the bumpalong point is not the same as 5465 the first inspected character, show the offset explicitly. */ 5466 5467 else if (count == PCRE_ERROR_PARTIAL) 5468 { 5469 fprintf(outfile, "Partial match"); 5470 if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2]) 5471 fprintf(outfile, " at offset %d", use_offsets[2]); 5472 if (markptr != NULL) 5473 { 5474 fprintf(outfile, ", mark="); 5475 PCHARSV(markptr, 0, -1, outfile); 5476 } 5477 if (use_size_offsets > 1) 5478 { 5479 fprintf(outfile, ": "); 5480 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0], 5481 outfile); 5482 } 5483 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); 5484 fprintf(outfile, "\n"); 5485 break; /* Out of the /g loop */ 5486 } 5487 5488 /* Failed to match. If this is a /g or /G loop and we previously set 5489 g_notempty after a null match, this is not necessarily the end. We want 5490 to advance the start offset, and continue. We won't be at the end of the 5491 string - that was checked before setting g_notempty. 5492 5493 Complication arises in the case when the newline convention is "any", 5494 "crlf", or "anycrlf". If the previous match was at the end of a line 5495 terminated by CRLF, an advance of one character just passes the \r, 5496 whereas we should prefer the longer newline sequence, as does the code in 5497 pcre_exec(). Fudge the offset value to achieve this. We check for a 5498 newline setting in the pattern; if none was set, use PCRE_CONFIG() to 5499 find the default. 5500 5501 Otherwise, in the case of UTF-8 matching, the advance must be one 5502 character, not one byte. */ 5503 5504 else 5505 { 5506 if (g_notempty != 0) 5507 { 5508 int onechar = 1; 5509 unsigned int obits = REAL_PCRE_OPTIONS(re); 5510 use_offsets[0] = start_offset; 5511 if ((obits & PCRE_NEWLINE_BITS) == 0) 5512 { 5513 int d; 5514 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d); 5515 /* Note that these values are always the ASCII ones, even in 5516 EBCDIC environments. CR = 13, NL = 10. */ 5517 obits = (d == 13)? PCRE_NEWLINE_CR : 5518 (d == 10)? PCRE_NEWLINE_LF : 5519 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF : 5520 (d == -2)? PCRE_NEWLINE_ANYCRLF : 5521 (d == -1)? PCRE_NEWLINE_ANY : 0; 5522 } 5523 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY || 5524 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF || 5525 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF) 5526 && 5527 start_offset < len - 1 && ( 5528 #ifdef SUPPORT_PCRE8 5529 (pcre_mode == PCRE8_MODE && 5530 bptr[start_offset] == '\r' && 5531 bptr[start_offset + 1] == '\n') || 5532 #endif 5533 #ifdef SUPPORT_PCRE16 5534 (pcre_mode == PCRE16_MODE && 5535 ((PCRE_SPTR16)bptr)[start_offset] == '\r' && 5536 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') || 5537 #endif 5538 #ifdef SUPPORT_PCRE32 5539 (pcre_mode == PCRE32_MODE && 5540 ((PCRE_SPTR32)bptr)[start_offset] == '\r' && 5541 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') || 5542 #endif 5543 0)) 5544 onechar++; 5545 else if (use_utf) 5546 { 5547 while (start_offset + onechar < len) 5548 { 5549 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break; 5550 onechar++; 5551 } 5552 } 5553 use_offsets[1] = start_offset + onechar; 5554 } 5555 else 5556 { 5557 switch(count) 5558 { 5559 case PCRE_ERROR_NOMATCH: 5560 if (gmatched == 0) 5561 { 5562 if (markptr == NULL) 5563 { 5564 fprintf(outfile, "No match"); 5565 } 5566 else 5567 { 5568 fprintf(outfile, "No match, mark = "); 5569 PCHARSV(markptr, 0, -1, outfile); 5570 } 5571 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); 5572 putc('\n', outfile); 5573 } 5574 break; 5575 5576 case PCRE_ERROR_BADUTF8: 5577 case PCRE_ERROR_SHORTUTF8: 5578 fprintf(outfile, "Error %d (%s UTF-%d string)", count, 5579 (count == PCRE_ERROR_BADUTF8)? "bad" : "short", 5580 8 * CHAR_SIZE); 5581 if (use_size_offsets >= 2) 5582 fprintf(outfile, " offset=%d reason=%d", use_offsets[0], 5583 use_offsets[1]); 5584 fprintf(outfile, "\n"); 5585 break; 5586 5587 case PCRE_ERROR_BADUTF8_OFFSET: 5588 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count, 5589 8 * CHAR_SIZE); 5590 break; 5591 5592 default: 5593 if (count < 0 && 5594 (-count) < (int)(sizeof(errtexts)/sizeof(const char *))) 5595 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]); 5596 else 5597 fprintf(outfile, "Error %d (Unexpected value)\n", count); 5598 break; 5599 } 5600 5601 break; /* Out of the /g loop */ 5602 } 5603 } 5604 5605 /* If not /g or /G we are done */ 5606 5607 if (!do_g && !do_G) break; 5608 5609 if (use_offsets == NULL) 5610 { 5611 fprintf(outfile, "Cannot do global matching without an ovector\n"); 5612 break; 5613 } 5614 5615 /* If we have matched an empty string, first check to see if we are at 5616 the end of the subject. If so, the /g loop is over. Otherwise, mimic what 5617 Perl's /g options does. This turns out to be rather cunning. First we set 5618 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the 5619 same point. If this fails (picked up above) we advance to the next 5620 character. */ 5621 5622 g_notempty = 0; 5623 5624 if (use_offsets[0] == use_offsets[1]) 5625 { 5626 if (use_offsets[0] == len) break; 5627 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; 5628 } 5629 5630 /* For /g, update the start offset, leaving the rest alone. There is a 5631 tricky case when \K is used in a positive lookbehind assertion. This can 5632 cause the end of the match to be less than or equal to the start offset. 5633 In this case we restart at one past the start offset. This may return the 5634 same match if the original start offset was bumped along during the 5635 match, but eventually the new start offset will hit the actual start 5636 offset. (In PCRE2 the true start offset is available, and this can be 5637 done better. It is not worth doing more than making sure we do not loop 5638 at this stage in the life of PCRE1.) */ 5639 5640 if (do_g) 5641 { 5642 if (g_notempty == 0 && use_offsets[1] <= start_offset) 5643 { 5644 if (start_offset >= len) break; /* End of subject */ 5645 start_offset++; 5646 if (use_utf) 5647 { 5648 while (start_offset < len) 5649 { 5650 if ((bptr[start_offset] & 0xc0) != 0x80) break; 5651 start_offset++; 5652 } 5653 } 5654 } 5655 else start_offset = use_offsets[1]; 5656 } 5657 5658 /* For /G, update the pointer and length */ 5659 5660 else 5661 { 5662 bptr += use_offsets[1] * CHAR_SIZE; 5663 len -= use_offsets[1]; 5664 } 5665 } /* End of loop for /g and /G */ 5666 5667 NEXT_DATA: continue; 5668 } /* End of loop for data lines */ 5669 5670 CONTINUE: 5671 5672 #if !defined NOPOSIX 5673 if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg); 5674 #endif 5675 5676 if (re != NULL) new_free(re); 5677 if (extra != NULL) 5678 { 5679 PCRE_FREE_STUDY(extra); 5680 } 5681 if (locale_set) 5682 { 5683 new_free((void *)tables); 5684 setlocale(LC_CTYPE, "C"); 5685 locale_set = 0; 5686 } 5687 if (jit_stack != NULL) 5688 { 5689 PCRE_JIT_STACK_FREE(jit_stack); 5690 jit_stack = NULL; 5691 } 5692 } 5693 5694 if (infile == stdin) fprintf(outfile, "\n"); 5695 5696 if (showtotaltimes) 5697 { 5698 fprintf(outfile, "--------------------------------------\n"); 5699 if (timeit > 0) 5700 { 5701 fprintf(outfile, "Total compile time %.4f milliseconds\n", 5702 (((double)total_compile_time * 1000.0) / (double)timeit) / 5703 (double)CLOCKS_PER_SEC); 5704 fprintf(outfile, "Total study time %.4f milliseconds\n", 5705 (((double)total_study_time * 1000.0) / (double)timeit) / 5706 (double)CLOCKS_PER_SEC); 5707 } 5708 fprintf(outfile, "Total execute time %.4f milliseconds\n", 5709 (((double)total_match_time * 1000.0) / (double)timeitm) / 5710 (double)CLOCKS_PER_SEC); 5711 } 5712 5713 EXIT: 5714 5715 if (infile != NULL && infile != stdin) fclose(infile); 5716 if (outfile != NULL && outfile != stdout) fclose(outfile); 5717 5718 free(buffer); 5719 free(dbuffer); 5720 free(pbuffer); 5721 free(offsets); 5722 5723 #ifdef SUPPORT_PCRE16 5724 if (buffer16 != NULL) free(buffer16); 5725 #endif 5726 #ifdef SUPPORT_PCRE32 5727 if (buffer32 != NULL) free(buffer32); 5728 #endif 5729 5730 #if !defined NODFA 5731 if (dfa_workspace != NULL) 5732 free(dfa_workspace); 5733 #endif 5734 5735 #if defined(__VMS) 5736 yield = SS$_NORMAL; /* Return values via DCL symbols */ 5737 #endif 5738 5739 return yield; 5740 } 5741 5742 /* End of pcretest.c */ 5743