1 /************************************************* 2 * pcre2grep program * 3 *************************************************/ 4 5 /* This is a grep program that uses the 8-bit PCRE regular expression library 6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows, 7 and native z/OS systems it can recurse into directories, and in z/OS it can 8 handle PDS files. 9 10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an 11 additional header is required. That header is not included in the main PCRE2 12 distribution because other apparatus is needed to compile pcre2grep for z/OS. 13 The header can be found in the special z/OS distribution, which is available 14 from www.zaconsultants.net or from www.cbttape.org. 15 16 Copyright (c) 1997-2018 University of Cambridge 17 18 ----------------------------------------------------------------------------- 19 Redistribution and use in source and binary forms, with or without 20 modification, are permitted provided that the following conditions are met: 21 22 * Redistributions of source code must retain the above copyright notice, 23 this list of conditions and the following disclaimer. 24 25 * Redistributions in binary form must reproduce the above copyright 26 notice, this list of conditions and the following disclaimer in the 27 documentation and/or other materials provided with the distribution. 28 29 * Neither the name of the University of Cambridge nor the names of its 30 contributors may be used to endorse or promote products derived from 31 this software without specific prior written permission. 32 33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 43 POSSIBILITY OF SUCH DAMAGE. 44 ----------------------------------------------------------------------------- 45 */ 46 47 #ifdef HAVE_CONFIG_H 48 #include "config.h" 49 #endif 50 51 #include <ctype.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <string.h> 55 #include <stdlib.h> 56 #include <errno.h> 57 58 #include <sys/types.h> 59 #include <sys/stat.h> 60 61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \ 62 && !defined WIN32 && !defined(__CYGWIN__) 63 #define WIN32 64 #endif 65 66 /* Some cmake's define it still */ 67 #if defined(__CYGWIN__) && defined(WIN32) 68 #undef WIN32 69 #endif 70 71 #ifdef WIN32 72 #include <io.h> /* For _setmode() */ 73 #include <fcntl.h> /* For _O_BINARY */ 74 #endif 75 76 #ifdef SUPPORT_PCRE2GREP_CALLOUT 77 #ifdef WIN32 78 #include <process.h> 79 #else 80 #include <sys/wait.h> 81 #endif 82 #endif 83 84 #ifdef HAVE_UNISTD_H 85 #include <unistd.h> 86 #endif 87 88 #ifdef SUPPORT_LIBZ 89 #include <zlib.h> 90 #endif 91 92 #ifdef SUPPORT_LIBBZ2 93 #include <bzlib.h> 94 #endif 95 96 #define PCRE2_CODE_UNIT_WIDTH 8 97 #include "pcre2.h" 98 99 /* Older versions of MSVC lack snprintf(). This define allows for 100 warning/error-free compilation and testing with MSVC compilers back to at least 101 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ 102 103 #if defined(_MSC_VER) && (_MSC_VER < 1900) 104 #define snprintf _snprintf 105 #endif 106 107 #define FALSE 0 108 #define TRUE 1 109 110 typedef int BOOL; 111 112 #define OFFSET_SIZE 33 113 114 #if BUFSIZ > 8192 115 #define MAXPATLEN BUFSIZ 116 #else 117 #define MAXPATLEN 8192 118 #endif 119 120 #define FNBUFSIZ 2048 121 #define ERRBUFSIZ 256 122 123 /* Values for the "filenames" variable, which specifies options for file name 124 output. The order is important; it is assumed that a file name is wanted for 125 all values greater than FN_DEFAULT. */ 126 127 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; 128 129 /* File reading styles */ 130 131 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; 132 133 /* Actions for the -d and -D options */ 134 135 enum { dee_READ, dee_SKIP, dee_RECURSE }; 136 enum { DEE_READ, DEE_SKIP }; 137 138 /* Actions for special processing options (flag bits) */ 139 140 #define PO_WORD_MATCH 0x0001 141 #define PO_LINE_MATCH 0x0002 142 #define PO_FIXED_STRINGS 0x0004 143 144 /* Binary file options */ 145 146 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT }; 147 148 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some 149 environments), a warning is issued if the value of fwrite() is ignored. 150 Unfortunately, casting to (void) does not suppress the warning. To get round 151 this, we use a macro that compiles a fudge. Oddly, this does not also seem to 152 apply to fprintf(). */ 153 154 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {} 155 156 /* Under Windows, we have to set stdout to be binary, so that it does not 157 convert \r\n at the ends of output lines to \r\r\n. However, that means that 158 any messages written to stdout must have \r\n as their line terminator. This is 159 handled by using STDOUT_NL as the newline string. We also use a normal double 160 quote for the example, as single quotes aren't usually available. */ 161 162 #ifdef WIN32 163 #define STDOUT_NL "\r\n" 164 #define QUOT "\"" 165 #else 166 #define STDOUT_NL "\n" 167 #define QUOT "'" 168 #endif 169 170 171 172 /************************************************* 173 * Global variables * 174 *************************************************/ 175 176 /* Jeffrey Friedl has some debugging requirements that are not part of the 177 regular code. */ 178 179 #ifdef JFRIEDL_DEBUG 180 static int S_arg = -1; 181 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ 182 static unsigned int jfriedl_XT = 0; /* replicate text this many times */ 183 static const char *jfriedl_prefix = ""; 184 static const char *jfriedl_postfix = ""; 185 #endif 186 187 static const char *colour_string = "1;31"; 188 static const char *colour_option = NULL; 189 static const char *dee_option = NULL; 190 static const char *DEE_option = NULL; 191 static const char *locale = NULL; 192 static const char *newline_arg = NULL; 193 static const char *om_separator = NULL; 194 static const char *stdin_name = "(standard input)"; 195 static const char *output_text = NULL; 196 197 static char *main_buffer = NULL; 198 199 static int after_context = 0; 200 static int before_context = 0; 201 static int binary_files = BIN_BINARY; 202 static int both_context = 0; 203 static int bufthird = PCRE2GREP_BUFSIZE; 204 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE; 205 static int bufsize = 3*PCRE2GREP_BUFSIZE; 206 static int endlinetype; 207 208 static unsigned long int total_count = 0; 209 static unsigned long int counts_printed = 0; 210 211 #ifdef WIN32 212 static int dee_action = dee_SKIP; 213 #else 214 static int dee_action = dee_READ; 215 #endif 216 217 static int DEE_action = DEE_READ; 218 static int error_count = 0; 219 static int filenames = FN_DEFAULT; 220 221 #ifdef SUPPORT_PCRE2GREP_JIT 222 static BOOL use_jit = TRUE; 223 #else 224 static BOOL use_jit = FALSE; 225 #endif 226 227 static const uint8_t *character_tables = NULL; 228 229 static uint32_t pcre2_options = 0; 230 static uint32_t extra_options = 0; 231 static PCRE2_SIZE heap_limit = PCRE2_UNSET; 232 static uint32_t match_limit = 0; 233 static uint32_t depth_limit = 0; 234 235 static pcre2_compile_context *compile_context; 236 static pcre2_match_context *match_context; 237 static pcre2_match_data *match_data; 238 static PCRE2_SIZE *offsets; 239 240 static BOOL count_only = FALSE; 241 static BOOL do_colour = FALSE; 242 #ifdef WIN32 243 static BOOL do_ansi = FALSE; 244 #endif 245 static BOOL file_offsets = FALSE; 246 static BOOL hyphenpending = FALSE; 247 static BOOL invert = FALSE; 248 static BOOL line_buffered = FALSE; 249 static BOOL line_offsets = FALSE; 250 static BOOL multiline = FALSE; 251 static BOOL number = FALSE; 252 static BOOL omit_zero_count = FALSE; 253 static BOOL resource_error = FALSE; 254 static BOOL quiet = FALSE; 255 static BOOL show_total_count = FALSE; 256 static BOOL silent = FALSE; 257 static BOOL utf = FALSE; 258 259 /* Structure for list of --only-matching capturing numbers. */ 260 261 typedef struct omstr { 262 struct omstr *next; 263 int groupnum; 264 } omstr; 265 266 static omstr *only_matching = NULL; 267 static omstr *only_matching_last = NULL; 268 static int only_matching_count; 269 270 /* Structure for holding the two variables that describe a number chain. */ 271 272 typedef struct omdatastr { 273 omstr **anchor; 274 omstr **lastptr; 275 } omdatastr; 276 277 static omdatastr only_matching_data = { &only_matching, &only_matching_last }; 278 279 /* Structure for list of file names (for -f and --{in,ex}clude-from) */ 280 281 typedef struct fnstr { 282 struct fnstr *next; 283 char *name; 284 } fnstr; 285 286 static fnstr *exclude_from = NULL; 287 static fnstr *exclude_from_last = NULL; 288 static fnstr *include_from = NULL; 289 static fnstr *include_from_last = NULL; 290 291 static fnstr *file_lists = NULL; 292 static fnstr *file_lists_last = NULL; 293 static fnstr *pattern_files = NULL; 294 static fnstr *pattern_files_last = NULL; 295 296 /* Structure for holding the two variables that describe a file name chain. */ 297 298 typedef struct fndatastr { 299 fnstr **anchor; 300 fnstr **lastptr; 301 } fndatastr; 302 303 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last }; 304 static fndatastr include_from_data = { &include_from, &include_from_last }; 305 static fndatastr file_lists_data = { &file_lists, &file_lists_last }; 306 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last }; 307 308 /* Structure for pattern and its compiled form; used for matching patterns and 309 also for include/exclude patterns. */ 310 311 typedef struct patstr { 312 struct patstr *next; 313 char *string; 314 PCRE2_SIZE length; 315 pcre2_code *compiled; 316 } patstr; 317 318 static patstr *patterns = NULL; 319 static patstr *patterns_last = NULL; 320 static patstr *include_patterns = NULL; 321 static patstr *include_patterns_last = NULL; 322 static patstr *exclude_patterns = NULL; 323 static patstr *exclude_patterns_last = NULL; 324 static patstr *include_dir_patterns = NULL; 325 static patstr *include_dir_patterns_last = NULL; 326 static patstr *exclude_dir_patterns = NULL; 327 static patstr *exclude_dir_patterns_last = NULL; 328 329 /* Structure holding the two variables that describe a pattern chain. A pointer 330 to such structures is used for each appropriate option. */ 331 332 typedef struct patdatastr { 333 patstr **anchor; 334 patstr **lastptr; 335 } patdatastr; 336 337 static patdatastr match_patdata = { &patterns, &patterns_last }; 338 static patdatastr include_patdata = { &include_patterns, &include_patterns_last }; 339 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last }; 340 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last }; 341 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last }; 342 343 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns, 344 &include_dir_patterns, &exclude_dir_patterns }; 345 346 static const char *incexname[4] = { "--include", "--exclude", 347 "--include-dir", "--exclude-dir" }; 348 349 /* Structure for options and list of them */ 350 351 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE, 352 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES }; 353 354 typedef struct option_item { 355 int type; 356 int one_char; 357 void *dataptr; 358 const char *long_name; 359 const char *help_text; 360 } option_item; 361 362 /* Options without a single-letter equivalent get a negative value. This can be 363 used to identify them. */ 364 365 #define N_COLOUR (-1) 366 #define N_EXCLUDE (-2) 367 #define N_EXCLUDE_DIR (-3) 368 #define N_HELP (-4) 369 #define N_INCLUDE (-5) 370 #define N_INCLUDE_DIR (-6) 371 #define N_LABEL (-7) 372 #define N_LOCALE (-8) 373 #define N_NULL (-9) 374 #define N_LOFFSETS (-10) 375 #define N_FOFFSETS (-11) 376 #define N_LBUFFER (-12) 377 #define N_H_LIMIT (-13) 378 #define N_M_LIMIT (-14) 379 #define N_M_LIMIT_DEP (-15) 380 #define N_BUFSIZE (-16) 381 #define N_NOJIT (-17) 382 #define N_FILE_LIST (-18) 383 #define N_BINARY_FILES (-19) 384 #define N_EXCLUDE_FROM (-20) 385 #define N_INCLUDE_FROM (-21) 386 #define N_OM_SEPARATOR (-22) 387 #define N_MAX_BUFSIZE (-23) 388 389 static option_item optionlist[] = { 390 { OP_NODATA, N_NULL, NULL, "", "terminate options" }, 391 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, 392 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, 393 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" }, 394 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, 395 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" }, 396 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" }, 397 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" }, 398 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, 399 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, 400 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, 401 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, 402 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, 403 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, 404 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" }, 405 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, 406 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" }, 407 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" }, 408 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, 409 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, 410 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, 411 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" }, 412 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, 413 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, 414 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, 415 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, 416 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, 417 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, 418 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, 419 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" }, 420 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" }, 421 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" }, 422 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" }, 423 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, 424 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" }, 425 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, 426 #ifdef SUPPORT_PCRE2GREP_JIT 427 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, 428 #else 429 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, 430 #endif 431 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" }, 432 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, 433 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, 434 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, 435 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, 436 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" }, 437 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" }, 438 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" }, 439 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" }, 440 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" }, 441 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" }, 442 #ifdef JFRIEDL_DEBUG 443 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, 444 #endif 445 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, 446 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" }, 447 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" }, 448 { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, 449 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, 450 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, 451 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, 452 { OP_NODATA, 0, NULL, NULL, NULL } 453 }; 454 455 /* Table of names for newline types. Must be kept in step with the definitions 456 of PCRE2_NEWLINE_xx in pcre2.h. */ 457 458 static const char *newlines[] = { 459 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" }; 460 461 /* UTF-8 tables - used only when the newline setting is "any". */ 462 463 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; 464 465 const char utf8_table4[] = { 466 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 467 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 468 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 469 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 470 471 472 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) 473 /************************************************* 474 * Emulated memmove() for systems without it * 475 *************************************************/ 476 477 /* This function can make use of bcopy() if it is available. Otherwise do it by 478 steam, as there are some non-Unix environments that lack both memmove() and 479 bcopy(). */ 480 481 static void * 482 emulated_memmove(void *d, const void *s, size_t n) 483 { 484 #ifdef HAVE_BCOPY 485 bcopy(s, d, n); 486 return d; 487 #else 488 size_t i; 489 unsigned char *dest = (unsigned char *)d; 490 const unsigned char *src = (const unsigned char *)s; 491 if (dest > src) 492 { 493 dest += n; 494 src += n; 495 for (i = 0; i < n; ++i) *(--dest) = *(--src); 496 return (void *)dest; 497 } 498 else 499 { 500 for (i = 0; i < n; ++i) *dest++ = *src++; 501 return (void *)(dest - n); 502 } 503 #endif /* not HAVE_BCOPY */ 504 } 505 #undef memmove 506 #define memmove(d,s,n) emulated_memmove(d,s,n) 507 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */ 508 509 510 /************************************************* 511 * Case-independent string compare * 512 *************************************************/ 513 514 static int 515 strcmpic(const char *str1, const char *str2) 516 { 517 unsigned int c1, c2; 518 while (*str1 != '\0' || *str2 != '\0') 519 { 520 c1 = tolower(*str1++); 521 c2 = tolower(*str2++); 522 if (c1 != c2) return ((c1 > c2) << 1) - 1; 523 } 524 return 0; 525 } 526 527 528 /************************************************* 529 * Parse GREP_COLORS * 530 *************************************************/ 531 532 /* Extract ms or mt from GREP_COLORS. 533 534 Argument: the string, possibly NULL 535 Returns: the value of ms or mt, or NULL if neither present 536 */ 537 538 static char * 539 parse_grep_colors(const char *gc) 540 { 541 static char seq[16]; 542 char *col; 543 uint32_t len; 544 if (gc == NULL) return NULL; 545 col = strstr(gc, "ms="); 546 if (col == NULL) col = strstr(gc, "mt="); 547 if (col == NULL) return NULL; 548 len = 0; 549 col += 3; 550 while (*col != ':' && *col != 0 && len < sizeof(seq)-1) 551 seq[len++] = *col++; 552 seq[len] = 0; 553 return seq; 554 } 555 556 557 /************************************************* 558 * Exit from the program * 559 *************************************************/ 560 561 /* If there has been a resource error, give a suitable message. 562 563 Argument: the return code 564 Returns: does not return 565 */ 566 567 static void 568 pcre2grep_exit(int rc) 569 { 570 /* VMS does exit codes differently: both exit(1) and exit(0) return with a 571 status of 1, which is not helpful. To help with this problem, define a symbol 572 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code 573 therein. */ 574 575 #ifdef __VMS 576 #include descrip 577 #include lib$routines 578 char val_buf[4]; 579 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC"); 580 $DESCRIPTOR(sym_val, val_buf); 581 sprintf(val_buf, "%d", rc); 582 sym_val.dsc$w_length = strlen(val_buf); 583 lib$set_symbol(&sym_nam, &sym_val); 584 #endif 585 586 if (resource_error) 587 { 588 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource " 589 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, 590 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT); 591 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); 592 } 593 exit(rc); 594 } 595 596 597 /************************************************* 598 * Add item to chain of patterns * 599 *************************************************/ 600 601 /* Used to add an item onto a chain, or just return an unconnected item if the 602 "after" argument is NULL. 603 604 Arguments: 605 s pattern string to add 606 patlen length of pattern 607 after if not NULL points to item to insert after 608 609 Returns: new pattern block or NULL on error 610 */ 611 612 static patstr * 613 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after) 614 { 615 patstr *p = (patstr *)malloc(sizeof(patstr)); 616 if (p == NULL) 617 { 618 fprintf(stderr, "pcre2grep: malloc failed\n"); 619 pcre2grep_exit(2); 620 } 621 if (patlen > MAXPATLEN) 622 { 623 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", 624 MAXPATLEN); 625 free(p); 626 return NULL; 627 } 628 p->next = NULL; 629 p->string = s; 630 p->length = patlen; 631 p->compiled = NULL; 632 633 if (after != NULL) 634 { 635 p->next = after->next; 636 after->next = p; 637 } 638 return p; 639 } 640 641 642 /************************************************* 643 * Free chain of patterns * 644 *************************************************/ 645 646 /* Used for several chains of patterns. 647 648 Argument: pointer to start of chain 649 Returns: nothing 650 */ 651 652 static void 653 free_pattern_chain(patstr *pc) 654 { 655 while (pc != NULL) 656 { 657 patstr *p = pc; 658 pc = p->next; 659 if (p->compiled != NULL) pcre2_code_free(p->compiled); 660 free(p); 661 } 662 } 663 664 665 /************************************************* 666 * Free chain of file names * 667 *************************************************/ 668 669 /* 670 Argument: pointer to start of chain 671 Returns: nothing 672 */ 673 674 static void 675 free_file_chain(fnstr *fn) 676 { 677 while (fn != NULL) 678 { 679 fnstr *f = fn; 680 fn = f->next; 681 free(f); 682 } 683 } 684 685 686 /************************************************* 687 * OS-specific functions * 688 *************************************************/ 689 690 /* These definitions are needed in all Windows environments, even those where 691 Unix-style directory scanning can be used (see below). */ 692 693 #ifdef WIN32 694 695 #ifndef STRICT 696 # define STRICT 697 #endif 698 #ifndef WIN32_LEAN_AND_MEAN 699 # define WIN32_LEAN_AND_MEAN 700 #endif 701 702 #include <windows.h> 703 704 #define iswild(name) (strpbrk(name, "*?") != NULL) 705 706 /* Convert ANSI BGR format to RGB used by Windows */ 707 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0)) 708 709 static HANDLE hstdout; 710 static CONSOLE_SCREEN_BUFFER_INFO csbi; 711 static WORD match_colour; 712 713 static WORD 714 decode_ANSI_colour(const char *cs) 715 { 716 WORD result = csbi.wAttributes; 717 while (*cs) 718 { 719 if (isdigit(*cs)) 720 { 721 int code = atoi(cs); 722 if (code == 1) result |= 0x08; 723 else if (code == 4) result |= 0x8000; 724 else if (code == 5) result |= 0x80; 725 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30); 726 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F); 727 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4); 728 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0); 729 /* aixterm high intensity colour codes */ 730 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08; 731 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80; 732 733 while (isdigit(*cs)) cs++; 734 } 735 if (*cs) cs++; 736 } 737 return result; 738 } 739 740 741 static void 742 init_colour_output() 743 { 744 if (do_colour) 745 { 746 hstdout = GetStdHandle(STD_OUTPUT_HANDLE); 747 /* This fails when redirected to con; try again if so. */ 748 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi) 749 { 750 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE, 751 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL); 752 GetConsoleScreenBufferInfo(hcon, &csbi); 753 CloseHandle(hcon); 754 } 755 match_colour = decode_ANSI_colour(colour_string); 756 /* No valid colour found - turn off colouring */ 757 if (!match_colour) do_colour = FALSE; 758 } 759 } 760 761 #endif /* WIN32 */ 762 763 764 /* The following sets of functions are defined so that they can be made system 765 specific. At present there are versions for Unix-style environments, Windows, 766 native z/OS, and "no support". */ 767 768 769 /************* Directory scanning Unix-style and z/OS ***********/ 770 771 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS 772 #include <sys/types.h> 773 #include <sys/stat.h> 774 #include <dirent.h> 775 776 #if defined NATIVE_ZOS 777 /************* Directory and PDS/E scanning for z/OS ***********/ 778 /************* z/OS looks mostly like Unix with USS ************/ 779 /* However, z/OS needs the #include statements in this header */ 780 #include "pcrzosfs.h" 781 /* That header is not included in the main PCRE distribution because 782 other apparatus is needed to compile pcre2grep for z/OS. The header 783 can be found in the special z/OS distribution, which is available 784 from www.zaconsultants.net or from www.cbttape.org. */ 785 #endif 786 787 typedef DIR directory_type; 788 #define FILESEP '/' 789 790 static int 791 isdirectory(char *filename) 792 { 793 struct stat statbuf; 794 if (stat(filename, &statbuf) < 0) 795 return 0; /* In the expectation that opening as a file will fail */ 796 return S_ISDIR(statbuf.st_mode); 797 } 798 799 static directory_type * 800 opendirectory(char *filename) 801 { 802 return opendir(filename); 803 } 804 805 static char * 806 readdirectory(directory_type *dir) 807 { 808 for (;;) 809 { 810 struct dirent *dent = readdir(dir); 811 if (dent == NULL) return NULL; 812 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) 813 return dent->d_name; 814 } 815 /* Control never reaches here */ 816 } 817 818 static void 819 closedirectory(directory_type *dir) 820 { 821 closedir(dir); 822 } 823 824 825 /************* Test for regular file, Unix-style **********/ 826 827 static int 828 isregfile(char *filename) 829 { 830 struct stat statbuf; 831 if (stat(filename, &statbuf) < 0) 832 return 1; /* In the expectation that opening as a file will fail */ 833 return S_ISREG(statbuf.st_mode); 834 } 835 836 837 #if defined NATIVE_ZOS 838 /************* Test for a terminal in z/OS **********/ 839 /* isatty() does not work in a TSO environment, so always give FALSE.*/ 840 841 static BOOL 842 is_stdout_tty(void) 843 { 844 return FALSE; 845 } 846 847 static BOOL 848 is_file_tty(FILE *f) 849 { 850 return FALSE; 851 } 852 853 854 /************* Test for a terminal, Unix-style **********/ 855 856 #else 857 static BOOL 858 is_stdout_tty(void) 859 { 860 return isatty(fileno(stdout)); 861 } 862 863 static BOOL 864 is_file_tty(FILE *f) 865 { 866 return isatty(fileno(f)); 867 } 868 #endif 869 870 871 /************* Print optionally coloured match Unix-style and z/OS **********/ 872 873 static void 874 print_match(const void *buf, int length) 875 { 876 if (length == 0) return; 877 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); 878 FWRITE_IGNORE(buf, 1, length, stdout); 879 if (do_colour) fprintf(stdout, "%c[0m", 0x1b); 880 } 881 882 /* End of Unix-style or native z/OS environment functions. */ 883 884 885 /************* Directory scanning in Windows ***********/ 886 887 /* I (Philip Hazel) have no means of testing this code. It was contributed by 888 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES 889 when it did not exist. David Byron added a patch that moved the #include of 890 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. 891 */ 892 893 #elif defined WIN32 894 895 #ifndef INVALID_FILE_ATTRIBUTES 896 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF 897 #endif 898 899 typedef struct directory_type 900 { 901 HANDLE handle; 902 BOOL first; 903 WIN32_FIND_DATA data; 904 } directory_type; 905 906 #define FILESEP '/' 907 908 int 909 isdirectory(char *filename) 910 { 911 DWORD attr = GetFileAttributes(filename); 912 if (attr == INVALID_FILE_ATTRIBUTES) 913 return 0; 914 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0; 915 } 916 917 directory_type * 918 opendirectory(char *filename) 919 { 920 size_t len; 921 char *pattern; 922 directory_type *dir; 923 DWORD err; 924 len = strlen(filename); 925 pattern = (char *)malloc(len + 3); 926 dir = (directory_type *)malloc(sizeof(*dir)); 927 if ((pattern == NULL) || (dir == NULL)) 928 { 929 fprintf(stderr, "pcre2grep: malloc failed\n"); 930 pcre2grep_exit(2); 931 } 932 memcpy(pattern, filename, len); 933 if (iswild(filename)) 934 pattern[len] = 0; 935 else 936 memcpy(&(pattern[len]), "\\*", 3); 937 dir->handle = FindFirstFile(pattern, &(dir->data)); 938 if (dir->handle != INVALID_HANDLE_VALUE) 939 { 940 free(pattern); 941 dir->first = TRUE; 942 return dir; 943 } 944 err = GetLastError(); 945 free(pattern); 946 free(dir); 947 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; 948 return NULL; 949 } 950 951 char * 952 readdirectory(directory_type *dir) 953 { 954 for (;;) 955 { 956 if (!dir->first) 957 { 958 if (!FindNextFile(dir->handle, &(dir->data))) 959 return NULL; 960 } 961 else 962 { 963 dir->first = FALSE; 964 } 965 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) 966 return dir->data.cFileName; 967 } 968 #ifndef _MSC_VER 969 return NULL; /* Keep compiler happy; never executed */ 970 #endif 971 } 972 973 void 974 closedirectory(directory_type *dir) 975 { 976 FindClose(dir->handle); 977 free(dir); 978 } 979 980 981 /************* Test for regular file in Windows **********/ 982 983 /* I don't know how to do this, or if it can be done; assume all paths are 984 regular if they are not directories. */ 985 986 int isregfile(char *filename) 987 { 988 return !isdirectory(filename); 989 } 990 991 992 /************* Test for a terminal in Windows **********/ 993 994 static BOOL 995 is_stdout_tty(void) 996 { 997 return _isatty(_fileno(stdout)); 998 } 999 1000 static BOOL 1001 is_file_tty(FILE *f) 1002 { 1003 return _isatty(_fileno(f)); 1004 } 1005 1006 1007 /************* Print optionally coloured match in Windows **********/ 1008 1009 static void 1010 print_match(const void *buf, int length) 1011 { 1012 if (length == 0) return; 1013 if (do_colour) 1014 { 1015 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1016 else SetConsoleTextAttribute(hstdout, match_colour); 1017 } 1018 FWRITE_IGNORE(buf, 1, length, stdout); 1019 if (do_colour) 1020 { 1021 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b); 1022 else SetConsoleTextAttribute(hstdout, csbi.wAttributes); 1023 } 1024 } 1025 1026 /* End of Windows functions */ 1027 1028 1029 /************* Directory scanning when we can't do it ***********/ 1030 1031 /* The type is void, and apart from isdirectory(), the functions do nothing. */ 1032 1033 #else 1034 1035 #define FILESEP 0 1036 typedef void directory_type; 1037 1038 int isdirectory(char *filename) { return 0; } 1039 directory_type * opendirectory(char *filename) { return (directory_type*)0;} 1040 char *readdirectory(directory_type *dir) { return (char*)0;} 1041 void closedirectory(directory_type *dir) {} 1042 1043 1044 /************* Test for regular file when we can't do it **********/ 1045 1046 /* Assume all files are regular. */ 1047 1048 int isregfile(char *filename) { return 1; } 1049 1050 1051 /************* Test for a terminal when we can't do it **********/ 1052 1053 static BOOL 1054 is_stdout_tty(void) 1055 { 1056 return FALSE; 1057 } 1058 1059 static BOOL 1060 is_file_tty(FILE *f) 1061 { 1062 return FALSE; 1063 } 1064 1065 1066 /************* Print optionally coloured match when we can't do it **********/ 1067 1068 static void 1069 print_match(const void *buf, int length) 1070 { 1071 if (length == 0) return; 1072 FWRITE_IGNORE(buf, 1, length, stdout); 1073 } 1074 1075 #endif /* End of system-specific functions */ 1076 1077 1078 1079 #ifndef HAVE_STRERROR 1080 /************************************************* 1081 * Provide strerror() for non-ANSI libraries * 1082 *************************************************/ 1083 1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 1085 in their libraries, but can provide the same facility by this simple 1086 alternative function. */ 1087 1088 extern int sys_nerr; 1089 extern char *sys_errlist[]; 1090 1091 char * 1092 strerror(int n) 1093 { 1094 if (n < 0 || n >= sys_nerr) return "unknown error number"; 1095 return sys_errlist[n]; 1096 } 1097 #endif /* HAVE_STRERROR */ 1098 1099 1100 1101 /************************************************* 1102 * Usage function * 1103 *************************************************/ 1104 1105 static int 1106 usage(int rc) 1107 { 1108 option_item *op; 1109 fprintf(stderr, "Usage: pcre2grep [-"); 1110 for (op = optionlist; op->one_char != 0; op++) 1111 { 1112 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); 1113 } 1114 fprintf(stderr, "] [long options] [pattern] [files]\n"); 1115 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long " 1116 "options.\n"); 1117 return rc; 1118 } 1119 1120 1121 1122 /************************************************* 1123 * Help function * 1124 *************************************************/ 1125 1126 static void 1127 help(void) 1128 { 1129 option_item *op; 1130 1131 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL); 1132 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL); 1133 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); 1134 1135 #ifdef SUPPORT_PCRE2GREP_CALLOUT 1136 printf("Callout scripts in patterns are supported." STDOUT_NL); 1137 #else 1138 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); 1139 #endif 1140 1141 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL); 1142 1143 #ifdef SUPPORT_LIBZ 1144 printf("Files whose names end in .gz are read using zlib." STDOUT_NL); 1145 #endif 1146 1147 #ifdef SUPPORT_LIBBZ2 1148 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL); 1149 #endif 1150 1151 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 1152 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL); 1153 #else 1154 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL); 1155 #endif 1156 1157 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL); 1158 printf("Options:" STDOUT_NL); 1159 1160 for (op = optionlist; op->one_char != 0; op++) 1161 { 1162 int n; 1163 char s[4]; 1164 1165 if (op->one_char > 0 && (op->long_name)[0] == 0) 1166 n = 31 - printf(" -%c", op->one_char); 1167 else 1168 { 1169 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); 1170 else strcpy(s, " "); 1171 n = 31 - printf(" %s --%s", s, op->long_name); 1172 } 1173 1174 if (n < 1) n = 1; 1175 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text); 1176 } 1177 1178 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL); 1179 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE); 1180 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE); 1181 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL); 1182 printf("space is removed and blank lines are ignored." STDOUT_NL); 1183 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN); 1184 1185 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL); 1186 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL); 1187 } 1188 1189 1190 1191 /************************************************* 1192 * Test exclude/includes * 1193 *************************************************/ 1194 1195 /* If any exclude pattern matches, the path is excluded. Otherwise, unless 1196 there are no includes, the path must match an include pattern. 1197 1198 Arguments: 1199 path the path to be matched 1200 ip the chain of include patterns 1201 ep the chain of exclude patterns 1202 1203 Returns: TRUE if the path is not excluded 1204 */ 1205 1206 static BOOL 1207 test_incexc(char *path, patstr *ip, patstr *ep) 1208 { 1209 int plen = strlen((const char *)path); 1210 1211 for (; ep != NULL; ep = ep->next) 1212 { 1213 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0) 1214 return FALSE; 1215 } 1216 1217 if (ip == NULL) return TRUE; 1218 1219 for (; ip != NULL; ip = ip->next) 1220 { 1221 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0) 1222 return TRUE; 1223 } 1224 1225 return FALSE; 1226 } 1227 1228 1229 1230 /************************************************* 1231 * Decode integer argument value * 1232 *************************************************/ 1233 1234 /* Integer arguments can be followed by K or M. Avoid the use of strtoul() 1235 because SunOS4 doesn't have it. This is used only for unpicking arguments, so 1236 just keep it simple. 1237 1238 Arguments: 1239 option_data the option data string 1240 op the option item (for error messages) 1241 longop TRUE if option given in long form 1242 1243 Returns: a long integer 1244 */ 1245 1246 static long int 1247 decode_number(char *option_data, option_item *op, BOOL longop) 1248 { 1249 unsigned long int n = 0; 1250 char *endptr = option_data; 1251 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++; 1252 while (isdigit((unsigned char)(*endptr))) 1253 n = n * 10 + (int)(*endptr++ - '0'); 1254 if (toupper(*endptr) == 'K') 1255 { 1256 n *= 1024; 1257 endptr++; 1258 } 1259 else if (toupper(*endptr) == 'M') 1260 { 1261 n *= 1024*1024; 1262 endptr++; 1263 } 1264 1265 if (*endptr != 0) /* Error */ 1266 { 1267 if (longop) 1268 { 1269 char *equals = strchr(op->long_name, '='); 1270 int nlen = (equals == NULL)? (int)strlen(op->long_name) : 1271 (int)(equals - op->long_name); 1272 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n", 1273 option_data, nlen, op->long_name); 1274 } 1275 else 1276 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n", 1277 option_data, op->one_char); 1278 pcre2grep_exit(usage(2)); 1279 } 1280 1281 return n; 1282 } 1283 1284 1285 1286 /************************************************* 1287 * Add item to a chain of numbers * 1288 *************************************************/ 1289 1290 /* Used to add an item onto a chain, or just return an unconnected item if the 1291 "after" argument is NULL. 1292 1293 Arguments: 1294 n the number to add 1295 after if not NULL points to item to insert after 1296 1297 Returns: new number block 1298 */ 1299 1300 static omstr * 1301 add_number(int n, omstr *after) 1302 { 1303 omstr *om = (omstr *)malloc(sizeof(omstr)); 1304 1305 if (om == NULL) 1306 { 1307 fprintf(stderr, "pcre2grep: malloc failed\n"); 1308 pcre2grep_exit(2); 1309 } 1310 om->next = NULL; 1311 om->groupnum = n; 1312 1313 if (after != NULL) 1314 { 1315 om->next = after->next; 1316 after->next = om; 1317 } 1318 return om; 1319 } 1320 1321 1322 1323 /************************************************* 1324 * Read one line of input * 1325 *************************************************/ 1326 1327 /* Normally, input that is to be scanned is read using fread() (or gzread, or 1328 BZ2_read) into a large buffer, so many lines may be read at once. However, 1329 doing this for tty input means that no output appears until a lot of input has 1330 been typed. Instead, tty input is handled line by line. We cannot use fgets() 1331 for this, because it does not stop at a binary zero, and therefore there is no 1332 way of telling how many characters it has read, because there may be binary 1333 zeros embedded in the data. This function is also used for reading patterns 1334 from files (the -f option). 1335 1336 Arguments: 1337 buffer the buffer to read into 1338 length the maximum number of characters to read 1339 f the file 1340 1341 Returns: the number of characters read, zero at end of file 1342 */ 1343 1344 static PCRE2_SIZE 1345 read_one_line(char *buffer, int length, FILE *f) 1346 { 1347 int c; 1348 int yield = 0; 1349 while ((c = fgetc(f)) != EOF) 1350 { 1351 buffer[yield++] = c; 1352 if (c == '\n' || yield >= length) break; 1353 } 1354 return yield; 1355 } 1356 1357 1358 1359 /************************************************* 1360 * Find end of line * 1361 *************************************************/ 1362 1363 /* The length of the endline sequence that is found is set via lenptr. This may 1364 be zero at the very end of the file if there is no line-ending sequence there. 1365 1366 Arguments: 1367 p current position in line 1368 endptr end of available data 1369 lenptr where to put the length of the eol sequence 1370 1371 Returns: pointer after the last byte of the line, 1372 including the newline byte(s) 1373 */ 1374 1375 static char * 1376 end_of_line(char *p, char *endptr, int *lenptr) 1377 { 1378 switch(endlinetype) 1379 { 1380 default: /* Just in case */ 1381 case PCRE2_NEWLINE_LF: 1382 while (p < endptr && *p != '\n') p++; 1383 if (p < endptr) 1384 { 1385 *lenptr = 1; 1386 return p + 1; 1387 } 1388 *lenptr = 0; 1389 return endptr; 1390 1391 case PCRE2_NEWLINE_CR: 1392 while (p < endptr && *p != '\r') p++; 1393 if (p < endptr) 1394 { 1395 *lenptr = 1; 1396 return p + 1; 1397 } 1398 *lenptr = 0; 1399 return endptr; 1400 1401 case PCRE2_NEWLINE_NUL: 1402 while (p < endptr && *p != '\0') p++; 1403 if (p < endptr) 1404 { 1405 *lenptr = 1; 1406 return p + 1; 1407 } 1408 *lenptr = 0; 1409 return endptr; 1410 1411 case PCRE2_NEWLINE_CRLF: 1412 for (;;) 1413 { 1414 while (p < endptr && *p != '\r') p++; 1415 if (++p >= endptr) 1416 { 1417 *lenptr = 0; 1418 return endptr; 1419 } 1420 if (*p == '\n') 1421 { 1422 *lenptr = 2; 1423 return p + 1; 1424 } 1425 } 1426 break; 1427 1428 case PCRE2_NEWLINE_ANYCRLF: 1429 while (p < endptr) 1430 { 1431 int extra = 0; 1432 int c = *((unsigned char *)p); 1433 1434 if (utf && c >= 0xc0) 1435 { 1436 int gcii, gcss; 1437 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1438 gcss = 6*extra; 1439 c = (c & utf8_table3[extra]) << gcss; 1440 for (gcii = 1; gcii <= extra; gcii++) 1441 { 1442 gcss -= 6; 1443 c |= (p[gcii] & 0x3f) << gcss; 1444 } 1445 } 1446 1447 p += 1 + extra; 1448 1449 switch (c) 1450 { 1451 case '\n': 1452 *lenptr = 1; 1453 return p; 1454 1455 case '\r': 1456 if (p < endptr && *p == '\n') 1457 { 1458 *lenptr = 2; 1459 p++; 1460 } 1461 else *lenptr = 1; 1462 return p; 1463 1464 default: 1465 break; 1466 } 1467 } /* End of loop for ANYCRLF case */ 1468 1469 *lenptr = 0; /* Must have hit the end */ 1470 return endptr; 1471 1472 case PCRE2_NEWLINE_ANY: 1473 while (p < endptr) 1474 { 1475 int extra = 0; 1476 int c = *((unsigned char *)p); 1477 1478 if (utf && c >= 0xc0) 1479 { 1480 int gcii, gcss; 1481 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1482 gcss = 6*extra; 1483 c = (c & utf8_table3[extra]) << gcss; 1484 for (gcii = 1; gcii <= extra; gcii++) 1485 { 1486 gcss -= 6; 1487 c |= (p[gcii] & 0x3f) << gcss; 1488 } 1489 } 1490 1491 p += 1 + extra; 1492 1493 switch (c) 1494 { 1495 case '\n': /* LF */ 1496 case '\v': /* VT */ 1497 case '\f': /* FF */ 1498 *lenptr = 1; 1499 return p; 1500 1501 case '\r': /* CR */ 1502 if (p < endptr && *p == '\n') 1503 { 1504 *lenptr = 2; 1505 p++; 1506 } 1507 else *lenptr = 1; 1508 return p; 1509 1510 #ifndef EBCDIC 1511 case 0x85: /* Unicode NEL */ 1512 *lenptr = utf? 2 : 1; 1513 return p; 1514 1515 case 0x2028: /* Unicode LS */ 1516 case 0x2029: /* Unicode PS */ 1517 *lenptr = 3; 1518 return p; 1519 #endif /* Not EBCDIC */ 1520 1521 default: 1522 break; 1523 } 1524 } /* End of loop for ANY case */ 1525 1526 *lenptr = 0; /* Must have hit the end */ 1527 return endptr; 1528 } /* End of overall switch */ 1529 } 1530 1531 1532 1533 /************************************************* 1534 * Find start of previous line * 1535 *************************************************/ 1536 1537 /* This is called when looking back for before lines to print. 1538 1539 Arguments: 1540 p start of the subsequent line 1541 startptr start of available data 1542 1543 Returns: pointer to the start of the previous line 1544 */ 1545 1546 static char * 1547 previous_line(char *p, char *startptr) 1548 { 1549 switch(endlinetype) 1550 { 1551 default: /* Just in case */ 1552 case PCRE2_NEWLINE_LF: 1553 p--; 1554 while (p > startptr && p[-1] != '\n') p--; 1555 return p; 1556 1557 case PCRE2_NEWLINE_CR: 1558 p--; 1559 while (p > startptr && p[-1] != '\n') p--; 1560 return p; 1561 1562 case PCRE2_NEWLINE_NUL: 1563 p--; 1564 while (p > startptr && p[-1] != '\0') p--; 1565 return p; 1566 1567 case PCRE2_NEWLINE_CRLF: 1568 for (;;) 1569 { 1570 p -= 2; 1571 while (p > startptr && p[-1] != '\n') p--; 1572 if (p <= startptr + 1 || p[-2] == '\r') return p; 1573 } 1574 /* Control can never get here */ 1575 1576 case PCRE2_NEWLINE_ANY: 1577 case PCRE2_NEWLINE_ANYCRLF: 1578 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; 1579 if (utf) while ((*p & 0xc0) == 0x80) p--; 1580 1581 while (p > startptr) 1582 { 1583 unsigned int c; 1584 char *pp = p - 1; 1585 1586 if (utf) 1587 { 1588 int extra = 0; 1589 while ((*pp & 0xc0) == 0x80) pp--; 1590 c = *((unsigned char *)pp); 1591 if (c >= 0xc0) 1592 { 1593 int gcii, gcss; 1594 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1595 gcss = 6*extra; 1596 c = (c & utf8_table3[extra]) << gcss; 1597 for (gcii = 1; gcii <= extra; gcii++) 1598 { 1599 gcss -= 6; 1600 c |= (pp[gcii] & 0x3f) << gcss; 1601 } 1602 } 1603 } 1604 else c = *((unsigned char *)pp); 1605 1606 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c) 1607 { 1608 case '\n': /* LF */ 1609 case '\r': /* CR */ 1610 return p; 1611 1612 default: 1613 break; 1614 } 1615 1616 else switch (c) 1617 { 1618 case '\n': /* LF */ 1619 case '\v': /* VT */ 1620 case '\f': /* FF */ 1621 case '\r': /* CR */ 1622 #ifndef EBCDIC 1623 case 0x85: /* Unicode NEL */ 1624 case 0x2028: /* Unicode LS */ 1625 case 0x2029: /* Unicode PS */ 1626 #endif /* Not EBCDIC */ 1627 return p; 1628 1629 default: 1630 break; 1631 } 1632 1633 p = pp; /* Back one character */ 1634 } /* End of loop for ANY case */ 1635 1636 return startptr; /* Hit start of data */ 1637 } /* End of overall switch */ 1638 } 1639 1640 1641 1642 /************************************************* 1643 * Print the previous "after" lines * 1644 *************************************************/ 1645 1646 /* This is called if we are about to lose said lines because of buffer filling, 1647 and at the end of the file. The data in the line is written using fwrite() so 1648 that a binary zero does not terminate it. 1649 1650 Arguments: 1651 lastmatchnumber the number of the last matching line, plus one 1652 lastmatchrestart where we restarted after the last match 1653 endptr end of available data 1654 printname filename for printing 1655 1656 Returns: nothing 1657 */ 1658 1659 static void 1660 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, 1661 char *endptr, const char *printname) 1662 { 1663 if (after_context > 0 && lastmatchnumber > 0) 1664 { 1665 int count = 0; 1666 while (lastmatchrestart < endptr && count < after_context) 1667 { 1668 int ellength; 1669 char *pp = end_of_line(lastmatchrestart, endptr, &ellength); 1670 if (ellength == 0 && pp == main_buffer + bufsize) break; 1671 if (printname != NULL) fprintf(stdout, "%s-", printname); 1672 if (number) fprintf(stdout, "%lu-", lastmatchnumber++); 1673 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 1674 lastmatchrestart = pp; 1675 count++; 1676 } 1677 if (count > 0) hyphenpending = TRUE; 1678 } 1679 } 1680 1681 1682 1683 /************************************************* 1684 * Apply patterns to subject till one matches * 1685 *************************************************/ 1686 1687 /* This function is called to run through all patterns, looking for a match. It 1688 is used multiple times for the same subject when colouring is enabled, in order 1689 to find all possible matches. 1690 1691 Arguments: 1692 matchptr the start of the subject 1693 length the length of the subject to match 1694 options options for pcre_exec 1695 startoffset where to start matching 1696 mrc address of where to put the result of pcre2_match() 1697 1698 Returns: TRUE if there was a match 1699 FALSE if there was no match 1700 invert if there was a non-fatal error 1701 */ 1702 1703 static BOOL 1704 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options, 1705 PCRE2_SIZE startoffset, int *mrc) 1706 { 1707 int i; 1708 PCRE2_SIZE slen = length; 1709 patstr *p = patterns; 1710 const char *msg = "this text:\n\n"; 1711 1712 if (slen > 200) 1713 { 1714 slen = 200; 1715 msg = "text that starts:\n\n"; 1716 } 1717 for (i = 1; p != NULL; p = p->next, i++) 1718 { 1719 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length, 1720 startoffset, options, match_data, match_context); 1721 if (*mrc >= 0) return TRUE; 1722 if (*mrc == PCRE2_ERROR_NOMATCH) continue; 1723 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc); 1724 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i); 1725 fprintf(stderr, "%s", msg); 1726 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */ 1727 fprintf(stderr, "\n\n"); 1728 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT || 1729 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT) 1730 resource_error = TRUE; 1731 if (error_count++ > 20) 1732 { 1733 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n"); 1734 pcre2grep_exit(2); 1735 } 1736 return invert; /* No more matching; don't show the line again */ 1737 } 1738 1739 return FALSE; /* No match, no errors */ 1740 } 1741 1742 1743 /************************************************* 1744 * Check output text for errors * 1745 *************************************************/ 1746 1747 static BOOL 1748 syntax_check_output_text(PCRE2_SPTR string, BOOL callout) 1749 { 1750 PCRE2_SPTR begin = string; 1751 for (; *string != 0; string++) 1752 { 1753 if (*string == '$') 1754 { 1755 PCRE2_SIZE capture_id = 0; 1756 BOOL brace = FALSE; 1757 1758 string++; 1759 1760 /* Syntax error: a character must be present after $. */ 1761 if (*string == 0) 1762 { 1763 if (!callout) 1764 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", 1765 (int)(string - begin), "no character after $"); 1766 return FALSE; 1767 } 1768 1769 if (*string == '{') 1770 { 1771 /* Must be a decimal number in braces, e.g: {5} or {38} */ 1772 string++; 1773 1774 brace = TRUE; 1775 } 1776 1777 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0')) 1778 { 1779 do 1780 { 1781 /* Maximum capture id is 65535. */ 1782 if (capture_id <= 65535) 1783 capture_id = capture_id * 10 + (*string - '0'); 1784 1785 string++; 1786 } 1787 while (*string >= '0' && *string <= '9'); 1788 1789 if (brace) 1790 { 1791 /* Syntax error: closing brace is missing. */ 1792 if (*string != '}') 1793 { 1794 if (!callout) 1795 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", 1796 (int)(string - begin), "missing closing brace"); 1797 return FALSE; 1798 } 1799 } 1800 else 1801 { 1802 /* To negate the effect of the for. */ 1803 string--; 1804 } 1805 } 1806 else if (brace) 1807 { 1808 /* Syntax error: a decimal number required. */ 1809 if (!callout) 1810 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", 1811 (int)(string - begin), "decimal number expected"); 1812 return FALSE; 1813 } 1814 else if (*string == 'o') 1815 { 1816 string++; 1817 1818 if (*string < '0' || *string > '7') 1819 { 1820 /* Syntax error: an octal number required. */ 1821 if (!callout) 1822 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", 1823 (int)(string - begin), "octal number expected"); 1824 return FALSE; 1825 } 1826 } 1827 else if (*string == 'x') 1828 { 1829 string++; 1830 1831 if (!isxdigit((unsigned char)*string)) 1832 { 1833 /* Syntax error: a hexdecimal number required. */ 1834 if (!callout) 1835 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", 1836 (int)(string - begin), "hexadecimal number expected"); 1837 return FALSE; 1838 } 1839 } 1840 } 1841 } 1842 1843 return TRUE; 1844 } 1845 1846 1847 /************************************************* 1848 * Display output text * 1849 *************************************************/ 1850 1851 /* Display the output text, which is assumed to have already been syntax 1852 checked. Output may contain escape sequences started by the dollar sign. The 1853 escape sequences are substituted as follows: 1854 1855 $<digits> or ${<digits>} is replaced by the captured substring of the given 1856 decimal number; zero will substitute the whole match. If the number is 1857 greater than the number of capturing substrings, or if the capture is unset, 1858 the replacement is empty. 1859 1860 $a is replaced by bell. 1861 $b is replaced by backspace. 1862 $e is replaced by escape. 1863 $f is replaced by form feed. 1864 $n is replaced by newline. 1865 $r is replaced by carriage return. 1866 $t is replaced by tab. 1867 $v is replaced by vertical tab. 1868 1869 $o<digits> is replaced by the character represented by the given octal 1870 number; up to three digits are processed. 1871 1872 $x<digits> is replaced by the character represented by the given hexadecimal 1873 number; up to two digits are processed. 1874 1875 Any other character is substituted by itself. E.g: $$ is replaced by a single 1876 dollar. 1877 1878 Arguments: 1879 string: the output text 1880 callout: TRUE for the builtin callout, FALSE for --output 1881 subject the start of the subject 1882 ovector: capture offsets 1883 capture_top: number of captures 1884 1885 Returns: TRUE if something was output, other than newline 1886 FALSE if nothing was output, or newline was last output 1887 */ 1888 1889 static BOOL 1890 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject, 1891 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top) 1892 { 1893 BOOL printed = FALSE; 1894 1895 for (; *string != 0; string++) 1896 { 1897 int ch = EOF; 1898 if (*string == '$') 1899 { 1900 PCRE2_SIZE capture_id = 0; 1901 BOOL brace = FALSE; 1902 1903 string++; 1904 1905 if (*string == '{') 1906 { 1907 /* Must be a decimal number in braces, e.g: {5} or {38} */ 1908 string++; 1909 1910 brace = TRUE; 1911 } 1912 1913 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0')) 1914 { 1915 do 1916 { 1917 /* Maximum capture id is 65535. */ 1918 if (capture_id <= 65535) 1919 capture_id = capture_id * 10 + (*string - '0'); 1920 1921 string++; 1922 } 1923 while (*string >= '0' && *string <= '9'); 1924 1925 if (!brace) 1926 { 1927 /* To negate the effect of the for. */ 1928 string--; 1929 } 1930 1931 if (capture_id < capture_top) 1932 { 1933 PCRE2_SIZE capturesize; 1934 capture_id *= 2; 1935 1936 capturesize = ovector[capture_id + 1] - ovector[capture_id]; 1937 if (capturesize > 0) 1938 { 1939 print_match(subject + ovector[capture_id], capturesize); 1940 printed = TRUE; 1941 } 1942 } 1943 } 1944 else if (*string == 'a') ch = '\a'; 1945 else if (*string == 'b') ch = '\b'; 1946 #ifndef EBCDIC 1947 else if (*string == 'e') ch = '\033'; 1948 #else 1949 else if (*string == 'e') ch = '\047'; 1950 #endif 1951 else if (*string == 'f') ch = '\f'; 1952 else if (*string == 'r') ch = '\r'; 1953 else if (*string == 't') ch = '\t'; 1954 else if (*string == 'v') ch = '\v'; 1955 else if (*string == 'n') 1956 { 1957 fprintf(stdout, STDOUT_NL); 1958 printed = FALSE; 1959 } 1960 else if (*string == 'o') 1961 { 1962 string++; 1963 1964 ch = *string - '0'; 1965 if (string[1] >= '0' && string[1] <= '7') 1966 { 1967 string++; 1968 ch = ch * 8 + (*string - '0'); 1969 } 1970 if (string[1] >= '0' && string[1] <= '7') 1971 { 1972 string++; 1973 ch = ch * 8 + (*string - '0'); 1974 } 1975 } 1976 else if (*string == 'x') 1977 { 1978 string++; 1979 1980 if (*string >= '0' && *string <= '9') 1981 ch = *string - '0'; 1982 else 1983 ch = (*string | 0x20) - 'a' + 10; 1984 if (isxdigit((unsigned char)string[1])) 1985 { 1986 string++; 1987 ch *= 16; 1988 if (*string >= '0' && *string <= '9') 1989 ch += *string - '0'; 1990 else 1991 ch += (*string | 0x20) - 'a' + 10; 1992 } 1993 } 1994 else 1995 { 1996 ch = *string; 1997 } 1998 } 1999 else 2000 { 2001 ch = *string; 2002 } 2003 if (ch != EOF) 2004 { 2005 fprintf(stdout, "%c", ch); 2006 printed = TRUE; 2007 } 2008 } 2009 2010 return printed; 2011 } 2012 2013 2014 #ifdef SUPPORT_PCRE2GREP_CALLOUT 2015 2016 /************************************************* 2017 * Parse and execute callout scripts * 2018 *************************************************/ 2019 2020 /* This function parses a callout string block and executes the 2021 program specified by the string. The string is a list of substrings 2022 separated by pipe characters. The first substring represents the 2023 executable name, and the following substrings specify the arguments: 2024 2025 program_name|param1|param2|... 2026 2027 Any substring (including the program name) can contain escape sequences 2028 started by the dollar character. The escape sequences are substituted as 2029 follows: 2030 2031 $<digits> or ${<digits>} is replaced by the captured substring of the given 2032 decimal number, which must be greater than zero. If the number is greater 2033 than the number of capturing substrings, or if the capture is unset, the 2034 replacement is empty. 2035 2036 Any other character is substituted by itself. E.g: $$ is replaced by a single 2037 dollar or $| replaced by a pipe character. 2038 2039 Alternatively, if string starts with pipe, the remainder is taken as an output 2040 string, same as --output. In this case, --om-separator is used to separate each 2041 callout, defaulting to newline. 2042 2043 Example: 2044 2045 echo -e "abcde\n12345" | pcre2grep \ 2046 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - 2047 2048 Output: 2049 2050 Arg1: [a] [bcd] [d] Arg2: |a| () 2051 abcde 2052 Arg1: [1] [234] [4] Arg2: |1| () 2053 12345 2054 2055 Arguments: 2056 blockptr the callout block 2057 2058 Returns: currently it always returns with 0 2059 */ 2060 2061 static int 2062 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused) 2063 { 2064 PCRE2_SIZE length = calloutptr->callout_string_length; 2065 PCRE2_SPTR string = calloutptr->callout_string; 2066 PCRE2_SPTR subject = calloutptr->subject; 2067 PCRE2_SIZE *ovector = calloutptr->offset_vector; 2068 PCRE2_SIZE capture_top = calloutptr->capture_top; 2069 PCRE2_SIZE argsvectorlen = 2; 2070 PCRE2_SIZE argslen = 1; 2071 char *args; 2072 char *argsptr; 2073 char **argsvector; 2074 char **argsvectorptr; 2075 #ifndef WIN32 2076 pid_t pid; 2077 #endif 2078 int result = 0; 2079 2080 (void)unused; /* Avoid compiler warning */ 2081 2082 /* Only callout with strings are supported. */ 2083 if (string == NULL || length == 0) return 0; 2084 2085 /* If there's no command, output the remainder directly. */ 2086 2087 if (*string == '|') 2088 { 2089 string++; 2090 if (!syntax_check_output_text(string, TRUE)) return 0; 2091 (void)display_output_text(string, TRUE, subject, ovector, capture_top); 2092 return 0; 2093 } 2094 2095 /* Checking syntax and compute the number of string fragments. Callout strings 2096 are ignored in case of a syntax error. */ 2097 2098 while (length > 0) 2099 { 2100 if (*string == '|') 2101 { 2102 argsvectorlen++; 2103 2104 /* Maximum 10000 arguments allowed. */ 2105 if (argsvectorlen > 10000) return 0; 2106 } 2107 else if (*string == '$') 2108 { 2109 PCRE2_SIZE capture_id = 0; 2110 2111 string++; 2112 length--; 2113 2114 /* Syntax error: a character must be present after $. */ 2115 if (length == 0) return 0; 2116 2117 if (*string >= '1' && *string <= '9') 2118 { 2119 do 2120 { 2121 /* Maximum capture id is 65535. */ 2122 if (capture_id <= 65535) 2123 capture_id = capture_id * 10 + (*string - '0'); 2124 2125 string++; 2126 length--; 2127 } 2128 while (length > 0 && *string >= '0' && *string <= '9'); 2129 2130 /* To negate the effect of string++ below. */ 2131 string--; 2132 length++; 2133 } 2134 else if (*string == '{') 2135 { 2136 /* Must be a decimal number in braces, e.g: {5} or {38} */ 2137 string++; 2138 length--; 2139 2140 /* Syntax error: a decimal number required. */ 2141 if (length == 0) return 0; 2142 if (*string < '1' || *string > '9') return 0; 2143 2144 do 2145 { 2146 /* Maximum capture id is 65535. */ 2147 if (capture_id <= 65535) 2148 capture_id = capture_id * 10 + (*string - '0'); 2149 2150 string++; 2151 length--; 2152 2153 /* Syntax error: no more characters */ 2154 if (length == 0) return 0; 2155 } 2156 while (*string >= '0' && *string <= '9'); 2157 2158 /* Syntax error: closing brace is missing. */ 2159 if (*string != '}') return 0; 2160 } 2161 2162 if (capture_id > 0) 2163 { 2164 if (capture_id < capture_top) 2165 { 2166 capture_id *= 2; 2167 argslen += ovector[capture_id + 1] - ovector[capture_id]; 2168 } 2169 2170 /* To negate the effect of argslen++ below. */ 2171 argslen--; 2172 } 2173 } 2174 2175 string++; 2176 length--; 2177 argslen++; 2178 } 2179 2180 args = (char*)malloc(argslen); 2181 if (args == NULL) return 0; 2182 2183 argsvector = (char**)malloc(argsvectorlen * sizeof(char*)); 2184 if (argsvector == NULL) 2185 { 2186 free(args); 2187 return 0; 2188 } 2189 2190 argsptr = args; 2191 argsvectorptr = argsvector; 2192 2193 *argsvectorptr++ = argsptr; 2194 2195 length = calloutptr->callout_string_length; 2196 string = calloutptr->callout_string; 2197 2198 while (length > 0) 2199 { 2200 if (*string == '|') 2201 { 2202 *argsptr++ = '\0'; 2203 *argsvectorptr++ = argsptr; 2204 } 2205 else if (*string == '$') 2206 { 2207 string++; 2208 length--; 2209 2210 if ((*string >= '1' && *string <= '9') || *string == '{') 2211 { 2212 PCRE2_SIZE capture_id = 0; 2213 2214 if (*string != '{') 2215 { 2216 do 2217 { 2218 /* Maximum capture id is 65535. */ 2219 if (capture_id <= 65535) 2220 capture_id = capture_id * 10 + (*string - '0'); 2221 2222 string++; 2223 length--; 2224 } 2225 while (length > 0 && *string >= '0' && *string <= '9'); 2226 2227 /* To negate the effect of string++ below. */ 2228 string--; 2229 length++; 2230 } 2231 else 2232 { 2233 string++; 2234 length--; 2235 2236 do 2237 { 2238 /* Maximum capture id is 65535. */ 2239 if (capture_id <= 65535) 2240 capture_id = capture_id * 10 + (*string - '0'); 2241 2242 string++; 2243 length--; 2244 } 2245 while (*string != '}'); 2246 } 2247 2248 if (capture_id < capture_top) 2249 { 2250 PCRE2_SIZE capturesize; 2251 capture_id *= 2; 2252 2253 capturesize = ovector[capture_id + 1] - ovector[capture_id]; 2254 memcpy(argsptr, subject + ovector[capture_id], capturesize); 2255 argsptr += capturesize; 2256 } 2257 } 2258 else 2259 { 2260 *argsptr++ = *string; 2261 } 2262 } 2263 else 2264 { 2265 *argsptr++ = *string; 2266 } 2267 2268 string++; 2269 length--; 2270 } 2271 2272 *argsptr++ = '\0'; 2273 *argsvectorptr = NULL; 2274 2275 #ifdef WIN32 2276 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector); 2277 #else 2278 pid = fork(); 2279 2280 if (pid == 0) 2281 { 2282 (void)execv(argsvector[0], argsvector); 2283 /* Control gets here if there is an error, e.g. a non-existent program */ 2284 exit(1); 2285 } 2286 else if (pid > 0) 2287 (void)waitpid(pid, &result, 0); 2288 #endif 2289 2290 free(args); 2291 free(argsvector); 2292 2293 /* Currently negative return values are not supported, only zero (match 2294 continues) or non-zero (match fails). */ 2295 2296 return result != 0; 2297 } 2298 2299 #endif 2300 2301 2302 2303 /************************************************* 2304 * Read a portion of the file into buffer * 2305 *************************************************/ 2306 2307 static int 2308 fill_buffer(void *handle, int frtype, char *buffer, int length, 2309 BOOL input_line_buffered) 2310 { 2311 (void)frtype; /* Avoid warning when not used */ 2312 2313 #ifdef SUPPORT_LIBZ 2314 if (frtype == FR_LIBZ) 2315 return gzread((gzFile)handle, buffer, length); 2316 else 2317 #endif 2318 2319 #ifdef SUPPORT_LIBBZ2 2320 if (frtype == FR_LIBBZ2) 2321 return BZ2_bzread((BZFILE *)handle, buffer, length); 2322 else 2323 #endif 2324 2325 return (input_line_buffered ? 2326 read_one_line(buffer, length, (FILE *)handle) : 2327 fread(buffer, 1, length, (FILE *)handle)); 2328 } 2329 2330 2331 2332 /************************************************* 2333 * Grep an individual file * 2334 *************************************************/ 2335 2336 /* This is called from grep_or_recurse() below. It uses a buffer that is three 2337 times the value of bufthird. The matching point is never allowed to stray into 2338 the top third of the buffer, thus keeping more of the file available for 2339 context printing or for multiline scanning. For large files, the pointer will 2340 be in the middle third most of the time, so the bottom third is available for 2341 "before" context printing. 2342 2343 Arguments: 2344 handle the fopened FILE stream for a normal file 2345 the gzFile pointer when reading is via libz 2346 the BZFILE pointer when reading is via libbz2 2347 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 2348 filename the file name or NULL (for errors) 2349 printname the file name if it is to be printed for each match 2350 or NULL if the file name is not to be printed 2351 it cannot be NULL if filenames[_nomatch]_only is set 2352 2353 Returns: 0 if there was at least one match 2354 1 otherwise (no matches) 2355 2 if an overlong line is encountered 2356 3 if there is a read error on a .bz2 file 2357 */ 2358 2359 static int 2360 pcre2grep(void *handle, int frtype, const char *filename, const char *printname) 2361 { 2362 int rc = 1; 2363 int filepos = 0; 2364 unsigned long int linenumber = 1; 2365 unsigned long int lastmatchnumber = 0; 2366 unsigned long int count = 0; 2367 char *lastmatchrestart = main_buffer; 2368 char *ptr = main_buffer; 2369 char *endptr; 2370 PCRE2_SIZE bufflength; 2371 BOOL binary = FALSE; 2372 BOOL endhyphenpending = FALSE; 2373 BOOL input_line_buffered = line_buffered; 2374 FILE *in = NULL; /* Ensure initialized */ 2375 2376 /* Do the first read into the start of the buffer and set up the pointer to end 2377 of what we have. In the case of libz, a non-zipped .gz file will be read as a 2378 plain file. However, if a .bz2 file isn't actually bzipped, the first read will 2379 fail. */ 2380 2381 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2) 2382 { 2383 in = (FILE *)handle; 2384 if (is_file_tty(in)) input_line_buffered = TRUE; 2385 } 2386 else input_line_buffered = FALSE; 2387 2388 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize, 2389 input_line_buffered); 2390 2391 #ifdef SUPPORT_LIBBZ2 2392 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */ 2393 #endif 2394 2395 endptr = main_buffer + bufflength; 2396 2397 /* Unless binary-files=text, see if we have a binary file. This uses the same 2398 rule as GNU grep, namely, a search for a binary zero byte near the start of the 2399 file. However, when the newline convention is binary zero, we can't do this. */ 2400 2401 if (binary_files != BIN_TEXT) 2402 { 2403 if (endlinetype != PCRE2_NEWLINE_NUL) 2404 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) 2405 != NULL; 2406 if (binary && binary_files == BIN_NOMATCH) return 1; 2407 } 2408 2409 /* Loop while the current pointer is not at the end of the file. For large 2410 files, endptr will be at the end of the buffer when we are in the middle of the 2411 file, but ptr will never get there, because as soon as it gets over 2/3 of the 2412 way, the buffer is shifted left and re-filled. */ 2413 2414 while (ptr < endptr) 2415 { 2416 int endlinelength; 2417 int mrc = 0; 2418 unsigned int options = 0; 2419 BOOL match; 2420 char *t = ptr; 2421 PCRE2_SIZE length, linelength; 2422 PCRE2_SIZE startoffset = 0; 2423 2424 /* At this point, ptr is at the start of a line. We need to find the length 2425 of the subject string to pass to pcre2_match(). In multiline mode, it is the 2426 length remainder of the data in the buffer. Otherwise, it is the length of 2427 the next line, excluding the terminating newline. After matching, we always 2428 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE 2429 option is used for compiling, so that any match is constrained to be in the 2430 first line. */ 2431 2432 t = end_of_line(t, endptr, &endlinelength); 2433 linelength = t - ptr - endlinelength; 2434 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength; 2435 2436 /* Check to see if the line we are looking at extends right to the very end 2437 of the buffer without a line terminator. This means the line is too long to 2438 handle at the current buffer size. Until the buffer reaches its maximum size, 2439 try doubling it and reading more data. */ 2440 2441 if (endlinelength == 0 && t == main_buffer + bufsize) 2442 { 2443 if (bufthird < max_bufthird) 2444 { 2445 char *new_buffer; 2446 int new_bufthird = 2*bufthird; 2447 2448 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird; 2449 new_buffer = (char *)malloc(3*new_bufthird); 2450 2451 if (new_buffer == NULL) 2452 { 2453 fprintf(stderr, 2454 "pcre2grep: line %lu%s%s is too long for the internal buffer\n" 2455 "pcre2grep: not enough memory to increase the buffer size to %d\n", 2456 linenumber, 2457 (filename == NULL)? "" : " of file ", 2458 (filename == NULL)? "" : filename, 2459 new_bufthird); 2460 return 2; 2461 } 2462 2463 /* Copy the data and adjust pointers to the new buffer location. */ 2464 2465 memcpy(new_buffer, main_buffer, bufsize); 2466 bufthird = new_bufthird; 2467 bufsize = 3*bufthird; 2468 ptr = new_buffer + (ptr - main_buffer); 2469 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer); 2470 free(main_buffer); 2471 main_buffer = new_buffer; 2472 2473 /* Read more data into the buffer and then try to find the line ending 2474 again. */ 2475 2476 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength, 2477 bufsize - bufflength, input_line_buffered); 2478 endptr = main_buffer + bufflength; 2479 continue; 2480 } 2481 else 2482 { 2483 fprintf(stderr, 2484 "pcre2grep: line %lu%s%s is too long for the internal buffer\n" 2485 "pcre2grep: the maximum buffer size is %d\n" 2486 "pcre2grep: use the --max-buffer-size option to change it\n", 2487 linenumber, 2488 (filename == NULL)? "" : " of file ", 2489 (filename == NULL)? "" : filename, 2490 bufthird); 2491 return 2; 2492 } 2493 } 2494 2495 /* Extra processing for Jeffrey Friedl's debugging. */ 2496 2497 #ifdef JFRIEDL_DEBUG 2498 if (jfriedl_XT || jfriedl_XR) 2499 { 2500 # include <sys/time.h> 2501 # include <time.h> 2502 struct timeval start_time, end_time; 2503 struct timezone dummy; 2504 int i; 2505 2506 if (jfriedl_XT) 2507 { 2508 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); 2509 const char *orig = ptr; 2510 ptr = malloc(newlen + 1); 2511 if (!ptr) { 2512 printf("out of memory"); 2513 pcre2grep_exit(2); 2514 } 2515 endptr = ptr; 2516 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); 2517 for (i = 0; i < jfriedl_XT; i++) { 2518 strncpy(endptr, orig, length); 2519 endptr += length; 2520 } 2521 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); 2522 length = newlen; 2523 } 2524 2525 if (gettimeofday(&start_time, &dummy) != 0) 2526 perror("bad gettimeofday"); 2527 2528 2529 for (i = 0; i < jfriedl_XR; i++) 2530 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0, 2531 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); 2532 2533 if (gettimeofday(&end_time, &dummy) != 0) 2534 perror("bad gettimeofday"); 2535 2536 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) 2537 - 2538 (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); 2539 2540 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); 2541 return 0; 2542 } 2543 #endif 2544 2545 /* We come back here after a match when only_matching_count is non-zero, in 2546 order to find any further matches in the same line. This applies to 2547 --only-matching, --file-offsets, and --line-offsets. */ 2548 2549 ONLY_MATCHING_RESTART: 2550 2551 /* Run through all the patterns until one matches or there is an error other 2552 than NOMATCH. This code is in a subroutine so that it can be re-used for 2553 finding subsequent matches when colouring matched lines. After finding one 2554 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in 2555 this line. */ 2556 2557 match = match_patterns(ptr, length, options, startoffset, &mrc); 2558 options = PCRE2_NOTEMPTY; 2559 2560 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use 2561 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its 2562 return code - to output data lines, so that binary zeroes are treated as just 2563 another data character. */ 2564 2565 if (match != invert) 2566 { 2567 BOOL hyphenprinted = FALSE; 2568 2569 /* We've failed if we want a file that doesn't have any matches. */ 2570 2571 if (filenames == FN_NOMATCH_ONLY) return 1; 2572 2573 /* If all we want is a yes/no answer, we can return immediately. */ 2574 2575 if (quiet) return 0; 2576 2577 /* Just count if just counting is wanted. */ 2578 2579 else if (count_only || show_total_count) count++; 2580 2581 /* When handling a binary file and binary-files==binary, the "binary" 2582 variable will be set true (it's false in all other cases). In this 2583 situation we just want to output the file name. No need to scan further. */ 2584 2585 else if (binary) 2586 { 2587 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename); 2588 return 0; 2589 } 2590 2591 /* Likewise, if all we want is a file name, there is no need to scan any 2592 more lines in the file. */ 2593 2594 else if (filenames == FN_MATCH_ONLY) 2595 { 2596 fprintf(stdout, "%s" STDOUT_NL, printname); 2597 return 0; 2598 } 2599 2600 /* The --only-matching option prints just the substring that matched, 2601 and/or one or more captured portions of it, as long as these strings are 2602 not empty. The --file-offsets and --line-offsets options output offsets for 2603 the matching substring (all three set only_matching_count non-zero). None 2604 of these mutually exclusive options prints any context. Afterwards, adjust 2605 the start and then jump back to look for further matches in the same line. 2606 If we are in invert mode, however, nothing is printed and we do not restart 2607 - this could still be useful because the return code is set. */ 2608 2609 else if (only_matching_count != 0) 2610 { 2611 if (!invert) 2612 { 2613 PCRE2_SIZE oldstartoffset; 2614 2615 if (printname != NULL) fprintf(stdout, "%s:", printname); 2616 if (number) fprintf(stdout, "%lu:", linenumber); 2617 2618 /* Handle --line-offsets */ 2619 2620 if (line_offsets) 2621 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr), 2622 (int)(offsets[1] - offsets[0])); 2623 2624 /* Handle --file-offsets */ 2625 2626 else if (file_offsets) 2627 fprintf(stdout, "%d,%d" STDOUT_NL, 2628 (int)(filepos + ptr + offsets[0] - ptr), 2629 (int)(offsets[1] - offsets[0])); 2630 2631 /* Handle --output (which has already been syntax checked) */ 2632 2633 else if (output_text != NULL) 2634 { 2635 if (display_output_text((PCRE2_SPTR)output_text, FALSE, 2636 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL || 2637 number) 2638 fprintf(stdout, STDOUT_NL); 2639 } 2640 2641 /* Handle --only-matching, which may occur many times */ 2642 2643 else 2644 { 2645 BOOL printed = FALSE; 2646 omstr *om; 2647 2648 for (om = only_matching; om != NULL; om = om->next) 2649 { 2650 int n = om->groupnum; 2651 if (n < mrc) 2652 { 2653 int plen = offsets[2*n + 1] - offsets[2*n]; 2654 if (plen > 0) 2655 { 2656 if (printed && om_separator != NULL) 2657 fprintf(stdout, "%s", om_separator); 2658 print_match(ptr + offsets[n*2], plen); 2659 printed = TRUE; 2660 } 2661 } 2662 } 2663 2664 if (printed || printname != NULL || number) 2665 fprintf(stdout, STDOUT_NL); 2666 } 2667 2668 /* Prepare to repeat to find the next match in the line. */ 2669 2670 match = FALSE; 2671 if (line_buffered) fflush(stdout); 2672 rc = 0; /* Had some success */ 2673 2674 /* If the pattern contained a lookbehind that included \K, it is 2675 possible that the end of the match might be at or before the actual 2676 starting offset we have just used. In this case, start one character 2677 further on. */ 2678 2679 startoffset = offsets[1]; /* Restart after the match */ 2680 oldstartoffset = pcre2_get_startchar(match_data); 2681 if (startoffset <= oldstartoffset) 2682 { 2683 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */ 2684 startoffset = oldstartoffset + 1; 2685 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++; 2686 } 2687 2688 /* If the current match ended past the end of the line (only possible 2689 in multiline mode), we must move on to the line in which it did end 2690 before searching for more matches. */ 2691 2692 while (startoffset > linelength) 2693 { 2694 ptr += linelength + endlinelength; 2695 filepos += (int)(linelength + endlinelength); 2696 linenumber++; 2697 startoffset -= (int)(linelength + endlinelength); 2698 t = end_of_line(ptr, endptr, &endlinelength); 2699 linelength = t - ptr - endlinelength; 2700 length = (PCRE2_SIZE)(endptr - ptr); 2701 } 2702 2703 goto ONLY_MATCHING_RESTART; 2704 } 2705 } 2706 2707 /* This is the default case when none of the above options is set. We print 2708 the matching lines(s), possibly preceded and/or followed by other lines of 2709 context. */ 2710 2711 else 2712 { 2713 /* See if there is a requirement to print some "after" lines from a 2714 previous match. We never print any overlaps. */ 2715 2716 if (after_context > 0 && lastmatchnumber > 0) 2717 { 2718 int ellength; 2719 int linecount = 0; 2720 char *p = lastmatchrestart; 2721 2722 while (p < ptr && linecount < after_context) 2723 { 2724 p = end_of_line(p, ptr, &ellength); 2725 linecount++; 2726 } 2727 2728 /* It is important to advance lastmatchrestart during this printing so 2729 that it interacts correctly with any "before" printing below. Print 2730 each line's data using fwrite() in case there are binary zeroes. */ 2731 2732 while (lastmatchrestart < p) 2733 { 2734 char *pp = lastmatchrestart; 2735 if (printname != NULL) fprintf(stdout, "%s-", printname); 2736 if (number) fprintf(stdout, "%lu-", lastmatchnumber++); 2737 pp = end_of_line(pp, endptr, &ellength); 2738 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 2739 lastmatchrestart = pp; 2740 } 2741 if (lastmatchrestart != ptr) hyphenpending = TRUE; 2742 } 2743 2744 /* If there were non-contiguous lines printed above, insert hyphens. */ 2745 2746 if (hyphenpending) 2747 { 2748 fprintf(stdout, "--" STDOUT_NL); 2749 hyphenpending = FALSE; 2750 hyphenprinted = TRUE; 2751 } 2752 2753 /* See if there is a requirement to print some "before" lines for this 2754 match. Again, don't print overlaps. */ 2755 2756 if (before_context > 0) 2757 { 2758 int linecount = 0; 2759 char *p = ptr; 2760 2761 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && 2762 linecount < before_context) 2763 { 2764 linecount++; 2765 p = previous_line(p, main_buffer); 2766 } 2767 2768 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) 2769 fprintf(stdout, "--" STDOUT_NL); 2770 2771 while (p < ptr) 2772 { 2773 int ellength; 2774 char *pp = p; 2775 if (printname != NULL) fprintf(stdout, "%s-", printname); 2776 if (number) fprintf(stdout, "%lu-", linenumber - linecount--); 2777 pp = end_of_line(pp, endptr, &ellength); 2778 FWRITE_IGNORE(p, 1, pp - p, stdout); 2779 p = pp; 2780 } 2781 } 2782 2783 /* Now print the matching line(s); ensure we set hyphenpending at the end 2784 of the file if any context lines are being output. */ 2785 2786 if (after_context > 0 || before_context > 0) 2787 endhyphenpending = TRUE; 2788 2789 if (printname != NULL) fprintf(stdout, "%s:", printname); 2790 if (number) fprintf(stdout, "%lu:", linenumber); 2791 2792 /* This extra option, for Jeffrey Friedl's debugging requirements, 2793 replaces the matched string, or a specific captured string if it exists, 2794 with X. When this happens, colouring is ignored. */ 2795 2796 #ifdef JFRIEDL_DEBUG 2797 if (S_arg >= 0 && S_arg < mrc) 2798 { 2799 int first = S_arg * 2; 2800 int last = first + 1; 2801 FWRITE_IGNORE(ptr, 1, offsets[first], stdout); 2802 fprintf(stdout, "X"); 2803 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout); 2804 } 2805 else 2806 #endif 2807 2808 /* In multiline mode, or if colouring, we have to split the line(s) up 2809 and search for further matches, but not of course if the line is a 2810 non-match. In multiline mode this is necessary in case there is another 2811 match that spans the end of the current line. When colouring we want to 2812 colour all matches. */ 2813 2814 if ((multiline || do_colour) && !invert) 2815 { 2816 int plength; 2817 PCRE2_SIZE endprevious; 2818 2819 /* The use of \K may make the end offset earlier than the start. In 2820 this situation, swap them round. */ 2821 2822 if (offsets[0] > offsets[1]) 2823 { 2824 PCRE2_SIZE temp = offsets[0]; 2825 offsets[0] = offsets[1]; 2826 offsets[1] = temp; 2827 } 2828 2829 FWRITE_IGNORE(ptr, 1, offsets[0], stdout); 2830 print_match(ptr + offsets[0], offsets[1] - offsets[0]); 2831 2832 for (;;) 2833 { 2834 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data); 2835 2836 endprevious = offsets[1]; 2837 startoffset = endprevious; /* Advance after previous match. */ 2838 2839 /* If the pattern contained a lookbehind that included \K, it is 2840 possible that the end of the match might be at or before the actual 2841 starting offset we have just used. In this case, start one character 2842 further on. */ 2843 2844 if (startoffset <= oldstartoffset) 2845 { 2846 startoffset = oldstartoffset + 1; 2847 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++; 2848 } 2849 2850 /* If the current match ended past the end of the line (only possible 2851 in multiline mode), we must move on to the line in which it did end 2852 before searching for more matches. Because the PCRE2_FIRSTLINE option 2853 is set, the start of the match will always be before the first 2854 newline sequence. */ 2855 2856 while (startoffset > linelength + endlinelength) 2857 { 2858 ptr += linelength + endlinelength; 2859 filepos += (int)(linelength + endlinelength); 2860 linenumber++; 2861 startoffset -= (int)(linelength + endlinelength); 2862 endprevious -= (int)(linelength + endlinelength); 2863 t = end_of_line(ptr, endptr, &endlinelength); 2864 linelength = t - ptr - endlinelength; 2865 length = (PCRE2_SIZE)(endptr - ptr); 2866 } 2867 2868 /* If startoffset is at the exact end of the line it means this 2869 complete line was the final part of the match, so there is nothing 2870 more to do. */ 2871 2872 if (startoffset == linelength + endlinelength) break; 2873 2874 /* Otherwise, run a match from within the final line, and if found, 2875 loop for any that may follow. */ 2876 2877 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break; 2878 2879 /* The use of \K may make the end offset earlier than the start. In 2880 this situation, swap them round. */ 2881 2882 if (offsets[0] > offsets[1]) 2883 { 2884 PCRE2_SIZE temp = offsets[0]; 2885 offsets[0] = offsets[1]; 2886 offsets[1] = temp; 2887 } 2888 2889 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout); 2890 print_match(ptr + offsets[0], offsets[1] - offsets[0]); 2891 } 2892 2893 /* In multiline mode, we may have already printed the complete line 2894 and its line-ending characters (if they matched the pattern), so there 2895 may be no more to print. */ 2896 2897 plength = (int)((linelength + endlinelength) - endprevious); 2898 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout); 2899 } 2900 2901 /* Not colouring or multiline; no need to search for further matches. */ 2902 2903 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout); 2904 } 2905 2906 /* End of doing what has to be done for a match. If --line-buffered was 2907 given, flush the output. */ 2908 2909 if (line_buffered) fflush(stdout); 2910 rc = 0; /* Had some success */ 2911 2912 /* Remember where the last match happened for after_context. We remember 2913 where we are about to restart, and that line's number. */ 2914 2915 lastmatchrestart = ptr + linelength + endlinelength; 2916 lastmatchnumber = linenumber + 1; 2917 } 2918 2919 /* For a match in multiline inverted mode (which of course did not cause 2920 anything to be printed), we have to move on to the end of the match before 2921 proceeding. */ 2922 2923 if (multiline && invert && match) 2924 { 2925 int ellength; 2926 char *endmatch = ptr + offsets[1]; 2927 t = ptr; 2928 while (t < endmatch) 2929 { 2930 t = end_of_line(t, endptr, &ellength); 2931 if (t <= endmatch) linenumber++; else break; 2932 } 2933 endmatch = end_of_line(endmatch, endptr, &ellength); 2934 linelength = endmatch - ptr - ellength; 2935 } 2936 2937 /* Advance to after the newline and increment the line number. The file 2938 offset to the current line is maintained in filepos. */ 2939 2940 END_ONE_MATCH: 2941 ptr += linelength + endlinelength; 2942 filepos += (int)(linelength + endlinelength); 2943 linenumber++; 2944 2945 /* If input is line buffered, and the buffer is not yet full, read another 2946 line and add it into the buffer. */ 2947 2948 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize) 2949 { 2950 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); 2951 bufflength += add; 2952 endptr += add; 2953 } 2954 2955 /* If we haven't yet reached the end of the file (the buffer is full), and 2956 the current point is in the top 1/3 of the buffer, slide the buffer down by 2957 1/3 and refill it. Before we do this, if some unprinted "after" lines are 2958 about to be lost, print them. */ 2959 2960 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird) 2961 { 2962 if (after_context > 0 && 2963 lastmatchnumber > 0 && 2964 lastmatchrestart < main_buffer + bufthird) 2965 { 2966 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 2967 lastmatchnumber = 0; /* Indicates no after lines pending */ 2968 } 2969 2970 /* Now do the shuffle */ 2971 2972 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird); 2973 ptr -= bufthird; 2974 2975 bufflength = 2*bufthird + fill_buffer(handle, frtype, 2976 main_buffer + 2*bufthird, bufthird, input_line_buffered); 2977 endptr = main_buffer + bufflength; 2978 2979 /* Adjust any last match point */ 2980 2981 if (lastmatchnumber > 0) lastmatchrestart -= bufthird; 2982 } 2983 } /* Loop through the whole file */ 2984 2985 /* End of file; print final "after" lines if wanted; do_after_lines sets 2986 hyphenpending if it prints something. */ 2987 2988 if (only_matching_count == 0 && !(count_only|show_total_count)) 2989 { 2990 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 2991 hyphenpending |= endhyphenpending; 2992 } 2993 2994 /* Print the file name if we are looking for those without matches and there 2995 were none. If we found a match, we won't have got this far. */ 2996 2997 if (filenames == FN_NOMATCH_ONLY) 2998 { 2999 fprintf(stdout, "%s" STDOUT_NL, printname); 3000 return 0; 3001 } 3002 3003 /* Print the match count if wanted */ 3004 3005 if (count_only && !quiet) 3006 { 3007 if (count > 0 || !omit_zero_count) 3008 { 3009 if (printname != NULL && filenames != FN_NONE) 3010 fprintf(stdout, "%s:", printname); 3011 fprintf(stdout, "%lu" STDOUT_NL, count); 3012 counts_printed++; 3013 } 3014 } 3015 3016 total_count += count; /* Can be set without count_only */ 3017 return rc; 3018 } 3019 3020 3021 3022 /************************************************* 3023 * Grep a file or recurse into a directory * 3024 *************************************************/ 3025 3026 /* Given a path name, if it's a directory, scan all the files if we are 3027 recursing; if it's a file, grep it. 3028 3029 Arguments: 3030 pathname the path to investigate 3031 dir_recurse TRUE if recursing is wanted (-r or -drecurse) 3032 only_one_at_top TRUE if the path is the only one at toplevel 3033 3034 Returns: -1 the file/directory was skipped 3035 0 if there was at least one match 3036 1 if there were no matches 3037 2 there was some kind of error 3038 3039 However, file opening failures are suppressed if "silent" is set. 3040 */ 3041 3042 static int 3043 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) 3044 { 3045 int rc = 1; 3046 int frtype; 3047 void *handle; 3048 char *lastcomp; 3049 FILE *in = NULL; /* Ensure initialized */ 3050 3051 #ifdef SUPPORT_LIBZ 3052 gzFile ingz = NULL; 3053 #endif 3054 3055 #ifdef SUPPORT_LIBBZ2 3056 BZFILE *inbz2 = NULL; 3057 #endif 3058 3059 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 3060 int pathlen; 3061 #endif 3062 3063 #if defined NATIVE_ZOS 3064 int zos_type; 3065 FILE *zos_test_file; 3066 #endif 3067 3068 /* If the file name is "-" we scan stdin */ 3069 3070 if (strcmp(pathname, "-") == 0) 3071 { 3072 return pcre2grep(stdin, FR_PLAIN, stdin_name, 3073 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? 3074 stdin_name : NULL); 3075 } 3076 3077 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to 3078 directories, whereas --include and --exclude apply to everything else. The test 3079 is against the final component of the path. */ 3080 3081 lastcomp = strrchr(pathname, FILESEP); 3082 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1; 3083 3084 /* If the file is a directory, skip if not recursing or if explicitly excluded. 3085 Otherwise, scan the directory and recurse for each path within it. The scanning 3086 code is localized so it can be made system-specific. */ 3087 3088 3089 /* For z/OS, determine the file type. */ 3090 3091 #if defined NATIVE_ZOS 3092 zos_test_file = fopen(pathname,"rb"); 3093 3094 if (zos_test_file == NULL) 3095 { 3096 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n", 3097 pathname, strerror(errno)); 3098 return -1; 3099 } 3100 zos_type = identifyzosfiletype (zos_test_file); 3101 fclose (zos_test_file); 3102 3103 /* Handle a PDS in separate code */ 3104 3105 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE) 3106 { 3107 return travelonpdsdir (pathname, only_one_at_top); 3108 } 3109 3110 /* Deal with regular files in the normal way below. These types are: 3111 zos_type == __ZOS_PDS_MEMBER 3112 zos_type == __ZOS_PS 3113 zos_type == __ZOS_VSAM_KSDS 3114 zos_type == __ZOS_VSAM_ESDS 3115 zos_type == __ZOS_VSAM_RRDS 3116 */ 3117 3118 /* Handle a z/OS directory using common code. */ 3119 3120 else if (zos_type == __ZOS_HFS) 3121 { 3122 #endif /* NATIVE_ZOS */ 3123 3124 3125 /* Handle directories: common code for all OS */ 3126 3127 if (isdirectory(pathname)) 3128 { 3129 if (dee_action == dee_SKIP || 3130 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns)) 3131 return -1; 3132 3133 if (dee_action == dee_RECURSE) 3134 { 3135 char buffer[FNBUFSIZ]; 3136 char *nextfile; 3137 directory_type *dir = opendirectory(pathname); 3138 3139 if (dir == NULL) 3140 { 3141 if (!silent) 3142 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname, 3143 strerror(errno)); 3144 return 2; 3145 } 3146 3147 while ((nextfile = readdirectory(dir)) != NULL) 3148 { 3149 int frc; 3150 int fnlength = strlen(pathname) + strlen(nextfile) + 2; 3151 if (fnlength > FNBUFSIZ) 3152 { 3153 fprintf(stderr, "pcre2grep: recursive filename is too long\n"); 3154 rc = 2; 3155 break; 3156 } 3157 sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile); 3158 frc = grep_or_recurse(buffer, dir_recurse, FALSE); 3159 if (frc > 1) rc = frc; 3160 else if (frc == 0 && rc == 1) rc = 0; 3161 } 3162 3163 closedirectory(dir); 3164 return rc; 3165 } 3166 } 3167 3168 #ifdef WIN32 3169 if (iswild(pathname)) 3170 { 3171 char buffer[1024]; 3172 char *nextfile; 3173 char *name; 3174 directory_type *dir = opendirectory(pathname); 3175 3176 if (dir == NULL) 3177 return 0; 3178 3179 for (nextfile = name = pathname; *nextfile != 0; nextfile++) 3180 if (*nextfile == '/' || *nextfile == '\\') 3181 name = nextfile + 1; 3182 *name = 0; 3183 3184 while ((nextfile = readdirectory(dir)) != NULL) 3185 { 3186 int frc; 3187 sprintf(buffer, "%.512s%.128s", pathname, nextfile); 3188 frc = grep_or_recurse(buffer, dir_recurse, FALSE); 3189 if (frc > 1) rc = frc; 3190 else if (frc == 0 && rc == 1) rc = 0; 3191 } 3192 3193 closedirectory(dir); 3194 return rc; 3195 } 3196 #endif 3197 3198 #if defined NATIVE_ZOS 3199 } 3200 #endif 3201 3202 /* If the file is not a directory, check for a regular file, and if it is not, 3203 skip it if that's been requested. Otherwise, check for an explicit inclusion or 3204 exclusion. */ 3205 3206 else if ( 3207 #if defined NATIVE_ZOS 3208 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) || 3209 #else /* all other OS */ 3210 (!isregfile(pathname) && DEE_action == DEE_SKIP) || 3211 #endif 3212 !test_incexc(lastcomp, include_patterns, exclude_patterns)) 3213 return -1; /* File skipped */ 3214 3215 /* Control reaches here if we have a regular file, or if we have a directory 3216 and recursion or skipping was not requested, or if we have anything else and 3217 skipping was not requested. The scan proceeds. If this is the first and only 3218 argument at top level, we don't show the file name, unless we are only showing 3219 the file name, or the filename was forced (-H). */ 3220 3221 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 3222 pathlen = (int)(strlen(pathname)); 3223 #endif 3224 3225 /* Open using zlib if it is supported and the file name ends with .gz. */ 3226 3227 #ifdef SUPPORT_LIBZ 3228 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) 3229 { 3230 ingz = gzopen(pathname, "rb"); 3231 if (ingz == NULL) 3232 { 3233 if (!silent) 3234 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname, 3235 strerror(errno)); 3236 return 2; 3237 } 3238 handle = (void *)ingz; 3239 frtype = FR_LIBZ; 3240 } 3241 else 3242 #endif 3243 3244 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ 3245 3246 #ifdef SUPPORT_LIBBZ2 3247 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) 3248 { 3249 inbz2 = BZ2_bzopen(pathname, "rb"); 3250 handle = (void *)inbz2; 3251 frtype = FR_LIBBZ2; 3252 } 3253 else 3254 #endif 3255 3256 /* Otherwise use plain fopen(). The label is so that we can come back here if 3257 an attempt to read a .bz2 file indicates that it really is a plain file. */ 3258 3259 #ifdef SUPPORT_LIBBZ2 3260 PLAIN_FILE: 3261 #endif 3262 { 3263 in = fopen(pathname, "rb"); 3264 handle = (void *)in; 3265 frtype = FR_PLAIN; 3266 } 3267 3268 /* All the opening methods return errno when they fail. */ 3269 3270 if (handle == NULL) 3271 { 3272 if (!silent) 3273 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname, 3274 strerror(errno)); 3275 return 2; 3276 } 3277 3278 /* Now grep the file */ 3279 3280 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT || 3281 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); 3282 3283 /* Close in an appropriate manner. */ 3284 3285 #ifdef SUPPORT_LIBZ 3286 if (frtype == FR_LIBZ) 3287 gzclose(ingz); 3288 else 3289 #endif 3290 3291 /* If it is a .bz2 file and the result is 3, it means that the first attempt to 3292 read failed. If the error indicates that the file isn't in fact bzipped, try 3293 again as a normal file. */ 3294 3295 #ifdef SUPPORT_LIBBZ2 3296 if (frtype == FR_LIBBZ2) 3297 { 3298 if (rc == 3) 3299 { 3300 int errnum; 3301 const char *err = BZ2_bzerror(inbz2, &errnum); 3302 if (errnum == BZ_DATA_ERROR_MAGIC) 3303 { 3304 BZ2_bzclose(inbz2); 3305 goto PLAIN_FILE; 3306 } 3307 else if (!silent) 3308 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n", 3309 pathname, err); 3310 rc = 2; /* The normal "something went wrong" code */ 3311 } 3312 BZ2_bzclose(inbz2); 3313 } 3314 else 3315 #endif 3316 3317 /* Normal file close */ 3318 3319 fclose(in); 3320 3321 /* Pass back the yield from pcre2grep(). */ 3322 3323 return rc; 3324 } 3325 3326 3327 3328 /************************************************* 3329 * Handle a single-letter, no data option * 3330 *************************************************/ 3331 3332 static int 3333 handle_option(int letter, int options) 3334 { 3335 switch(letter) 3336 { 3337 case N_FOFFSETS: file_offsets = TRUE; break; 3338 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */ 3339 case N_LBUFFER: line_buffered = TRUE; break; 3340 case N_LOFFSETS: line_offsets = number = TRUE; break; 3341 case N_NOJIT: use_jit = FALSE; break; 3342 case 'a': binary_files = BIN_TEXT; break; 3343 case 'c': count_only = TRUE; break; 3344 case 'F': options |= PCRE2_LITERAL; break; 3345 case 'H': filenames = FN_FORCE; break; 3346 case 'I': binary_files = BIN_NOMATCH; break; 3347 case 'h': filenames = FN_NONE; break; 3348 case 'i': options |= PCRE2_CASELESS; break; 3349 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; 3350 case 'L': filenames = FN_NOMATCH_ONLY; break; 3351 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break; 3352 case 'n': number = TRUE; break; 3353 3354 case 'o': 3355 only_matching_last = add_number(0, only_matching_last); 3356 if (only_matching == NULL) only_matching = only_matching_last; 3357 break; 3358 3359 case 'q': quiet = TRUE; break; 3360 case 'r': dee_action = dee_RECURSE; break; 3361 case 's': silent = TRUE; break; 3362 case 't': show_total_count = TRUE; break; 3363 case 'u': options |= PCRE2_UTF; utf = TRUE; break; 3364 case 'v': invert = TRUE; break; 3365 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break; 3366 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break; 3367 3368 case 'V': 3369 { 3370 unsigned char buffer[128]; 3371 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); 3372 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer); 3373 } 3374 pcre2grep_exit(0); 3375 break; 3376 3377 default: 3378 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter); 3379 pcre2grep_exit(usage(2)); 3380 } 3381 3382 return options; 3383 } 3384 3385 3386 3387 /************************************************* 3388 * Construct printed ordinal * 3389 *************************************************/ 3390 3391 /* This turns a number into "1st", "3rd", etc. */ 3392 3393 static char * 3394 ordin(int n) 3395 { 3396 static char buffer[14]; 3397 char *p = buffer; 3398 sprintf(p, "%d", n); 3399 while (*p != 0) p++; 3400 n %= 100; 3401 if (n >= 11 && n <= 13) n = 0; 3402 switch (n%10) 3403 { 3404 case 1: strcpy(p, "st"); break; 3405 case 2: strcpy(p, "nd"); break; 3406 case 3: strcpy(p, "rd"); break; 3407 default: strcpy(p, "th"); break; 3408 } 3409 return buffer; 3410 } 3411 3412 3413 3414 /************************************************* 3415 * Compile a single pattern * 3416 *************************************************/ 3417 3418 /* Do nothing if the pattern has already been compiled. This is the case for 3419 include/exclude patterns read from a file. 3420 3421 When the -F option has been used, each "pattern" may be a list of strings, 3422 separated by line breaks. They will be matched literally. We split such a 3423 string and compile the first substring, inserting an additional block into the 3424 pattern chain. 3425 3426 Arguments: 3427 p points to the pattern block 3428 options the PCRE options 3429 fromfile TRUE if the pattern was read from a file 3430 fromtext file name or identifying text (e.g. "include") 3431 count 0 if this is the only command line pattern, or 3432 number of the command line pattern, or 3433 linenumber for a pattern from a file 3434 3435 Returns: TRUE on success, FALSE after an error 3436 */ 3437 3438 static BOOL 3439 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext, 3440 int count) 3441 { 3442 char *ps; 3443 int errcode; 3444 PCRE2_SIZE patlen, erroffset; 3445 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ]; 3446 3447 if (p->compiled != NULL) return TRUE; 3448 ps = p->string; 3449 patlen = p->length; 3450 3451 if ((options & PCRE2_LITERAL) != 0) 3452 { 3453 int ellength; 3454 char *eop = ps + patlen; 3455 char *pe = end_of_line(ps, eop, &ellength); 3456 3457 if (ellength != 0) 3458 { 3459 patlen = pe - ps - ellength; 3460 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE; 3461 } 3462 } 3463 3464 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode, 3465 &erroffset, compile_context); 3466 3467 /* Handle successful compile. Try JIT-compiling if supported and enabled. We 3468 ignore any JIT compiler errors, relying falling back to interpreting if 3469 anything goes wrong with JIT. */ 3470 3471 if (p->compiled != NULL) 3472 { 3473 #ifdef SUPPORT_PCRE2GREP_JIT 3474 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); 3475 #endif 3476 return TRUE; 3477 } 3478 3479 /* Handle compile errors */ 3480 3481 if (erroffset > patlen) erroffset = patlen; 3482 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer)); 3483 3484 if (fromfile) 3485 { 3486 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s " 3487 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer); 3488 } 3489 else 3490 { 3491 if (count == 0) 3492 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n", 3493 fromtext, (int)erroffset, errmessbuffer); 3494 else 3495 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n", 3496 ordin(count), fromtext, (int)erroffset, errmessbuffer); 3497 } 3498 3499 return FALSE; 3500 } 3501 3502 3503 3504 /************************************************* 3505 * Read and compile a file of patterns * 3506 *************************************************/ 3507 3508 /* This is used for --filelist, --include-from, and --exclude-from. 3509 3510 Arguments: 3511 name the name of the file; "-" is stdin 3512 patptr pointer to the pattern chain anchor 3513 patlastptr pointer to the last pattern pointer 3514 3515 Returns: TRUE if all went well 3516 */ 3517 3518 static BOOL 3519 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr) 3520 { 3521 int linenumber = 0; 3522 PCRE2_SIZE patlen; 3523 FILE *f; 3524 const char *filename; 3525 char buffer[MAXPATLEN+20]; 3526 3527 if (strcmp(name, "-") == 0) 3528 { 3529 f = stdin; 3530 filename = stdin_name; 3531 } 3532 else 3533 { 3534 f = fopen(name, "r"); 3535 if (f == NULL) 3536 { 3537 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno)); 3538 return FALSE; 3539 } 3540 filename = name; 3541 } 3542 3543 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0) 3544 { 3545 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; 3546 linenumber++; 3547 if (patlen == 0) continue; /* Skip blank lines */ 3548 3549 /* Note: this call to add_pattern() puts a pointer to the local variable 3550 "buffer" into the pattern chain. However, that pointer is used only when 3551 compiling the pattern, which happens immediately below, so we flatten it 3552 afterwards, as a precaution against any later code trying to use it. */ 3553 3554 *patlastptr = add_pattern(buffer, patlen, *patlastptr); 3555 if (*patlastptr == NULL) 3556 { 3557 if (f != stdin) fclose(f); 3558 return FALSE; 3559 } 3560 if (*patptr == NULL) *patptr = *patlastptr; 3561 3562 /* This loop is needed because compiling a "pattern" when -F is set may add 3563 on additional literal patterns if the original contains a newline. In the 3564 common case, it never will, because read_one_line() stops at a newline. 3565 However, the -N option can be used to give pcre2grep a different newline 3566 setting. */ 3567 3568 for(;;) 3569 { 3570 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename, 3571 linenumber)) 3572 { 3573 if (f != stdin) fclose(f); 3574 return FALSE; 3575 } 3576 (*patlastptr)->string = NULL; /* Insurance */ 3577 if ((*patlastptr)->next == NULL) break; 3578 *patlastptr = (*patlastptr)->next; 3579 } 3580 } 3581 3582 if (f != stdin) fclose(f); 3583 return TRUE; 3584 } 3585 3586 3587 3588 /************************************************* 3589 * Main program * 3590 *************************************************/ 3591 3592 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ 3593 3594 int 3595 main(int argc, char **argv) 3596 { 3597 int i, j; 3598 int rc = 1; 3599 BOOL only_one_at_top; 3600 patstr *cp; 3601 fnstr *fn; 3602 const char *locale_from = "--locale"; 3603 3604 #ifdef SUPPORT_PCRE2GREP_JIT 3605 pcre2_jit_stack *jit_stack = NULL; 3606 #endif 3607 3608 /* In Windows, stdout is set up as a text stream, which means that \n is 3609 converted to \r\n. This causes output lines that are copied from the input to 3610 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure 3611 that stdout is a binary stream. Note that this means all other output to stdout 3612 must use STDOUT_NL to terminate lines. */ 3613 3614 #ifdef WIN32 3615 _setmode(_fileno(stdout), _O_BINARY); 3616 #endif 3617 3618 /* Set up a default compile and match contexts and a match data block. */ 3619 3620 compile_context = pcre2_compile_context_create(NULL); 3621 match_context = pcre2_match_context_create(NULL); 3622 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL); 3623 offsets = pcre2_get_ovector_pointer(match_data); 3624 3625 /* If string (script) callouts are supported, set up the callout processing 3626 function. */ 3627 3628 #ifdef SUPPORT_PCRE2GREP_CALLOUT 3629 pcre2_set_callout(match_context, pcre2grep_callout, NULL); 3630 #endif 3631 3632 /* Process the options */ 3633 3634 for (i = 1; i < argc; i++) 3635 { 3636 option_item *op = NULL; 3637 char *option_data = (char *)""; /* default to keep compiler happy */ 3638 BOOL longop; 3639 BOOL longopwasequals = FALSE; 3640 3641 if (argv[i][0] != '-') break; 3642 3643 /* If we hit an argument that is just "-", it may be a reference to STDIN, 3644 but only if we have previously had -e or -f to define the patterns. */ 3645 3646 if (argv[i][1] == 0) 3647 { 3648 if (pattern_files != NULL || patterns != NULL) break; 3649 else pcre2grep_exit(usage(2)); 3650 } 3651 3652 /* Handle a long name option, or -- to terminate the options */ 3653 3654 if (argv[i][1] == '-') 3655 { 3656 char *arg = argv[i] + 2; 3657 char *argequals = strchr(arg, '='); 3658 3659 if (*arg == 0) /* -- terminates options */ 3660 { 3661 i++; 3662 break; /* out of the options-handling loop */ 3663 } 3664 3665 longop = TRUE; 3666 3667 /* Some long options have data that follows after =, for example file=name. 3668 Some options have variations in the long name spelling: specifically, we 3669 allow "regexp" because GNU grep allows it, though I personally go along 3670 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". 3671 These options are entered in the table as "regex(p)". Options can be in 3672 both these categories. */ 3673 3674 for (op = optionlist; op->one_char != 0; op++) 3675 { 3676 char *opbra = strchr(op->long_name, '('); 3677 char *equals = strchr(op->long_name, '='); 3678 3679 /* Handle options with only one spelling of the name */ 3680 3681 if (opbra == NULL) /* Does not contain '(' */ 3682 { 3683 if (equals == NULL) /* Not thing=data case */ 3684 { 3685 if (strcmp(arg, op->long_name) == 0) break; 3686 } 3687 else /* Special case xxx=data */ 3688 { 3689 int oplen = (int)(equals - op->long_name); 3690 int arglen = (argequals == NULL)? 3691 (int)strlen(arg) : (int)(argequals - arg); 3692 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) 3693 { 3694 option_data = arg + arglen; 3695 if (*option_data == '=') 3696 { 3697 option_data++; 3698 longopwasequals = TRUE; 3699 } 3700 break; 3701 } 3702 } 3703 } 3704 3705 /* Handle options with an alternate spelling of the name */ 3706 3707 else 3708 { 3709 char buff1[24]; 3710 char buff2[24]; 3711 int ret; 3712 3713 int baselen = (int)(opbra - op->long_name); 3714 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1); 3715 int arglen = (argequals == NULL || equals == NULL)? 3716 (int)strlen(arg) : (int)(argequals - arg); 3717 3718 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name), 3719 ret < 0 || ret > (int)sizeof(buff1)) || 3720 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1, 3721 fulllen - baselen - 2, opbra + 1), 3722 ret < 0 || ret > (int)sizeof(buff2))) 3723 { 3724 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n", 3725 op->long_name); 3726 pcre2grep_exit(2); 3727 } 3728 3729 if (strncmp(arg, buff1, arglen) == 0 || 3730 strncmp(arg, buff2, arglen) == 0) 3731 { 3732 if (equals != NULL && argequals != NULL) 3733 { 3734 option_data = argequals; 3735 if (*option_data == '=') 3736 { 3737 option_data++; 3738 longopwasequals = TRUE; 3739 } 3740 } 3741 break; 3742 } 3743 } 3744 } 3745 3746 if (op->one_char == 0) 3747 { 3748 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]); 3749 pcre2grep_exit(usage(2)); 3750 } 3751 } 3752 3753 /* Jeffrey Friedl's debugging harness uses these additional options which 3754 are not in the right form for putting in the option table because they use 3755 only one hyphen, yet are more than one character long. By putting them 3756 separately here, they will not get displayed as part of the help() output, 3757 but I don't think Jeffrey will care about that. */ 3758 3759 #ifdef JFRIEDL_DEBUG 3760 else if (strcmp(argv[i], "-pre") == 0) { 3761 jfriedl_prefix = argv[++i]; 3762 continue; 3763 } else if (strcmp(argv[i], "-post") == 0) { 3764 jfriedl_postfix = argv[++i]; 3765 continue; 3766 } else if (strcmp(argv[i], "-XT") == 0) { 3767 sscanf(argv[++i], "%d", &jfriedl_XT); 3768 continue; 3769 } else if (strcmp(argv[i], "-XR") == 0) { 3770 sscanf(argv[++i], "%d", &jfriedl_XR); 3771 continue; 3772 } 3773 #endif 3774 3775 3776 /* One-char options; many that have no data may be in a single argument; we 3777 continue till we hit the last one or one that needs data. */ 3778 3779 else 3780 { 3781 char *s = argv[i] + 1; 3782 longop = FALSE; 3783 3784 while (*s != 0) 3785 { 3786 for (op = optionlist; op->one_char != 0; op++) 3787 { 3788 if (*s == op->one_char) break; 3789 } 3790 if (op->one_char == 0) 3791 { 3792 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n", 3793 *s, argv[i]); 3794 pcre2grep_exit(usage(2)); 3795 } 3796 3797 option_data = s+1; 3798 3799 /* Break out if this is the last character in the string; it's handled 3800 below like a single multi-char option. */ 3801 3802 if (*option_data == 0) break; 3803 3804 /* Check for a single-character option that has data: OP_OP_NUMBER(S) 3805 are used for ones that either have a numerical number or defaults, i.e. 3806 the data is optional. If a digit follows, there is data; if not, carry on 3807 with other single-character options in the same string. */ 3808 3809 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS) 3810 { 3811 if (isdigit((unsigned char)s[1])) break; 3812 } 3813 else /* Check for an option with data */ 3814 { 3815 if (op->type != OP_NODATA) break; 3816 } 3817 3818 /* Handle a single-character option with no data, then loop for the 3819 next character in the string. */ 3820 3821 pcre2_options = handle_option(*s++, pcre2_options); 3822 } 3823 } 3824 3825 /* At this point we should have op pointing to a matched option. If the type 3826 is NO_DATA, it means that there is no data, and the option might set 3827 something in the PCRE options. */ 3828 3829 if (op->type == OP_NODATA) 3830 { 3831 pcre2_options = handle_option(op->one_char, pcre2_options); 3832 continue; 3833 } 3834 3835 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that 3836 either has a value or defaults to something. It cannot have data in a 3837 separate item. At the moment, the only such options are "colo(u)r", 3838 "only-matching", and Jeffrey Friedl's special -S debugging option. */ 3839 3840 if (*option_data == 0 && 3841 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER || 3842 op->type == OP_OP_NUMBERS)) 3843 { 3844 switch (op->one_char) 3845 { 3846 case N_COLOUR: 3847 colour_option = "auto"; 3848 break; 3849 3850 case 'o': 3851 only_matching_last = add_number(0, only_matching_last); 3852 if (only_matching == NULL) only_matching = only_matching_last; 3853 break; 3854 3855 #ifdef JFRIEDL_DEBUG 3856 case 'S': 3857 S_arg = 0; 3858 break; 3859 #endif 3860 } 3861 continue; 3862 } 3863 3864 /* Otherwise, find the data string for the option. */ 3865 3866 if (*option_data == 0) 3867 { 3868 if (i >= argc - 1 || longopwasequals) 3869 { 3870 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]); 3871 pcre2grep_exit(usage(2)); 3872 } 3873 option_data = argv[++i]; 3874 } 3875 3876 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be 3877 added to a chain of numbers. */ 3878 3879 if (op->type == OP_OP_NUMBERS) 3880 { 3881 unsigned long int n = decode_number(option_data, op, longop); 3882 omdatastr *omd = (omdatastr *)op->dataptr; 3883 *(omd->lastptr) = add_number((int)n, *(omd->lastptr)); 3884 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr); 3885 } 3886 3887 /* If the option type is OP_PATLIST, it's the -e option, or one of the 3888 include/exclude options, which can be called multiple times to create lists 3889 of patterns. */ 3890 3891 else if (op->type == OP_PATLIST) 3892 { 3893 patdatastr *pd = (patdatastr *)op->dataptr; 3894 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data), 3895 *(pd->lastptr)); 3896 if (*(pd->lastptr) == NULL) goto EXIT2; 3897 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); 3898 } 3899 3900 /* If the option type is OP_FILELIST, it's one of the options that names a 3901 file. */ 3902 3903 else if (op->type == OP_FILELIST) 3904 { 3905 fndatastr *fd = (fndatastr *)op->dataptr; 3906 fn = (fnstr *)malloc(sizeof(fnstr)); 3907 if (fn == NULL) 3908 { 3909 fprintf(stderr, "pcre2grep: malloc failed\n"); 3910 goto EXIT2; 3911 } 3912 fn->next = NULL; 3913 fn->name = option_data; 3914 if (*(fd->anchor) == NULL) 3915 *(fd->anchor) = fn; 3916 else 3917 (*(fd->lastptr))->next = fn; 3918 *(fd->lastptr) = fn; 3919 } 3920 3921 /* Handle OP_BINARY_FILES */ 3922 3923 else if (op->type == OP_BINFILES) 3924 { 3925 if (strcmp(option_data, "binary") == 0) 3926 binary_files = BIN_BINARY; 3927 else if (strcmp(option_data, "without-match") == 0) 3928 binary_files = BIN_NOMATCH; 3929 else if (strcmp(option_data, "text") == 0) 3930 binary_files = BIN_TEXT; 3931 else 3932 { 3933 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n", 3934 option_data); 3935 pcre2grep_exit(usage(2)); 3936 } 3937 } 3938 3939 /* Otherwise, deal with a single string or numeric data value. */ 3940 3941 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER && 3942 op->type != OP_OP_NUMBER && op->type != OP_SIZE) 3943 { 3944 *((char **)op->dataptr) = option_data; 3945 } 3946 else 3947 { 3948 unsigned long int n = decode_number(option_data, op, longop); 3949 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n; 3950 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n; 3951 else *((int *)op->dataptr) = n; 3952 } 3953 } 3954 3955 /* Options have been decoded. If -C was used, its value is used as a default 3956 for -A and -B. */ 3957 3958 if (both_context > 0) 3959 { 3960 if (after_context == 0) after_context = both_context; 3961 if (before_context == 0) before_context = both_context; 3962 } 3963 3964 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is 3965 permitted. They display, each in their own way, only the data that has matched. 3966 */ 3967 3968 only_matching_count = (only_matching != NULL) + (output_text != NULL) + 3969 file_offsets + line_offsets; 3970 3971 if (only_matching_count > 1) 3972 { 3973 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, " 3974 "--file-offsets and/or --line-offsets\n"); 3975 pcre2grep_exit(usage(2)); 3976 } 3977 3978 /* Check the text supplied to --output for errors. */ 3979 3980 if (output_text != NULL && 3981 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE)) 3982 goto EXIT2; 3983 3984 /* Put limits into the match data block. */ 3985 3986 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit); 3987 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); 3988 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit); 3989 3990 /* If a locale has not been provided as an option, see if the LC_CTYPE or 3991 LC_ALL environment variable is set, and if so, use it. */ 3992 3993 if (locale == NULL) 3994 { 3995 locale = getenv("LC_ALL"); 3996 locale_from = "LC_ALL"; 3997 } 3998 3999 if (locale == NULL) 4000 { 4001 locale = getenv("LC_CTYPE"); 4002 locale_from = "LC_CTYPE"; 4003 } 4004 4005 /* If a locale is set, use it to generate the tables the PCRE needs. Passing 4006 NULL to pcre2_maketables() means that malloc() is used to get the memory. */ 4007 4008 if (locale != NULL) 4009 { 4010 if (setlocale(LC_CTYPE, locale) == NULL) 4011 { 4012 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n", 4013 locale, locale_from); 4014 goto EXIT2; 4015 } 4016 character_tables = pcre2_maketables(NULL); 4017 pcre2_set_character_tables(compile_context, character_tables); 4018 } 4019 4020 /* Sort out colouring */ 4021 4022 if (colour_option != NULL && strcmp(colour_option, "never") != 0) 4023 { 4024 if (strcmp(colour_option, "always") == 0) 4025 #ifdef WIN32 4026 do_ansi = !is_stdout_tty(), 4027 #endif 4028 do_colour = TRUE; 4029 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); 4030 else 4031 { 4032 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n", 4033 colour_option); 4034 goto EXIT2; 4035 } 4036 if (do_colour) 4037 { 4038 char *cs = getenv("PCRE2GREP_COLOUR"); 4039 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR"); 4040 if (cs == NULL) cs = getenv("PCREGREP_COLOUR"); 4041 if (cs == NULL) cs = getenv("PCREGREP_COLOR"); 4042 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS")); 4043 if (cs == NULL) cs = getenv("GREP_COLOR"); 4044 if (cs != NULL) 4045 { 4046 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs; 4047 } 4048 #ifdef WIN32 4049 init_colour_output(); 4050 #endif 4051 } 4052 } 4053 4054 /* Sort out a newline setting. */ 4055 4056 if (newline_arg != NULL) 4057 { 4058 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *)); 4059 endlinetype++) 4060 { 4061 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break; 4062 } 4063 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *))) 4064 pcre2_set_newline(compile_context, endlinetype); 4065 else 4066 { 4067 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", 4068 newline_arg); 4069 goto EXIT2; 4070 } 4071 } 4072 4073 /* Find default newline convention */ 4074 4075 else 4076 { 4077 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype); 4078 } 4079 4080 /* Interpret the text values for -d and -D */ 4081 4082 if (dee_option != NULL) 4083 { 4084 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; 4085 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; 4086 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; 4087 else 4088 { 4089 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option); 4090 goto EXIT2; 4091 } 4092 } 4093 4094 if (DEE_option != NULL) 4095 { 4096 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; 4097 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; 4098 else 4099 { 4100 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option); 4101 goto EXIT2; 4102 } 4103 } 4104 4105 /* Set the extra options */ 4106 4107 (void)pcre2_set_compile_extra_options(compile_context, extra_options); 4108 4109 /* Check the values for Jeffrey Friedl's debugging options. */ 4110 4111 #ifdef JFRIEDL_DEBUG 4112 if (S_arg > 9) 4113 { 4114 fprintf(stderr, "pcre2grep: bad value for -S option\n"); 4115 return 2; 4116 } 4117 if (jfriedl_XT != 0 || jfriedl_XR != 0) 4118 { 4119 if (jfriedl_XT == 0) jfriedl_XT = 1; 4120 if (jfriedl_XR == 0) jfriedl_XR = 1; 4121 } 4122 #endif 4123 4124 /* If use_jit is set, check whether JIT is available. If not, do not try 4125 to use JIT. */ 4126 4127 if (use_jit) 4128 { 4129 uint32_t answer; 4130 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer); 4131 if (!answer) use_jit = FALSE; 4132 } 4133 4134 /* Get memory for the main buffer. */ 4135 4136 if (bufthird <= 0) 4137 { 4138 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n"); 4139 goto EXIT2; 4140 } 4141 4142 bufsize = 3*bufthird; 4143 main_buffer = (char *)malloc(bufsize); 4144 4145 if (main_buffer == NULL) 4146 { 4147 fprintf(stderr, "pcre2grep: malloc failed\n"); 4148 goto EXIT2; 4149 } 4150 4151 /* If no patterns were provided by -e, and there are no files provided by -f, 4152 the first argument is the one and only pattern, and it must exist. */ 4153 4154 if (patterns == NULL && pattern_files == NULL) 4155 { 4156 if (i >= argc) return usage(2); 4157 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]), 4158 NULL); 4159 i++; 4160 if (patterns == NULL) goto EXIT2; 4161 } 4162 4163 /* Compile the patterns that were provided on the command line, either by 4164 multiple uses of -e or as a single unkeyed pattern. We cannot do this until 4165 after all the command-line options are read so that we know which PCRE options 4166 to use. When -F is used, compile_pattern() may add another block into the 4167 chain, so we must not access the next pointer till after the compile. */ 4168 4169 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) 4170 { 4171 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line", 4172 (j == 1 && patterns->next == NULL)? 0 : j)) 4173 goto EXIT2; 4174 } 4175 4176 /* Read and compile the regular expressions that are provided in files. */ 4177 4178 for (fn = pattern_files; fn != NULL; fn = fn->next) 4179 { 4180 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2; 4181 } 4182 4183 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */ 4184 4185 #ifdef SUPPORT_PCRE2GREP_JIT 4186 if (use_jit) 4187 { 4188 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL); 4189 if (jit_stack != NULL ) 4190 pcre2_jit_stack_assign(match_context, NULL, jit_stack); 4191 } 4192 #endif 4193 4194 /* -F, -w, and -x do not apply to include or exclude patterns, so we must 4195 adjust the options. */ 4196 4197 pcre2_options &= ~PCRE2_LITERAL; 4198 (void)pcre2_set_compile_extra_options(compile_context, 0); 4199 4200 /* If there are include or exclude patterns read from the command line, compile 4201 them. */ 4202 4203 for (j = 0; j < 4; j++) 4204 { 4205 int k; 4206 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next) 4207 { 4208 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j], 4209 (k == 1 && cp->next == NULL)? 0 : k)) 4210 goto EXIT2; 4211 } 4212 } 4213 4214 /* Read and compile include/exclude patterns from files. */ 4215 4216 for (fn = include_from; fn != NULL; fn = fn->next) 4217 { 4218 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last)) 4219 goto EXIT2; 4220 } 4221 4222 for (fn = exclude_from; fn != NULL; fn = fn->next) 4223 { 4224 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last)) 4225 goto EXIT2; 4226 } 4227 4228 /* If there are no files that contain lists of files to search, and there are 4229 no file arguments, search stdin, and then exit. */ 4230 4231 if (file_lists == NULL && i >= argc) 4232 { 4233 rc = pcre2grep(stdin, FR_PLAIN, stdin_name, 4234 (filenames > FN_DEFAULT)? stdin_name : NULL); 4235 goto EXIT; 4236 } 4237 4238 /* If any files that contains a list of files to search have been specified, 4239 read them line by line and search the given files. */ 4240 4241 for (fn = file_lists; fn != NULL; fn = fn->next) 4242 { 4243 char buffer[FNBUFSIZ]; 4244 FILE *fl; 4245 if (strcmp(fn->name, "-") == 0) fl = stdin; else 4246 { 4247 fl = fopen(fn->name, "rb"); 4248 if (fl == NULL) 4249 { 4250 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name, 4251 strerror(errno)); 4252 goto EXIT2; 4253 } 4254 } 4255 while (fgets(buffer, sizeof(buffer), fl) != NULL) 4256 { 4257 int frc; 4258 char *end = buffer + (int)strlen(buffer); 4259 while (end > buffer && isspace(end[-1])) end--; 4260 *end = 0; 4261 if (*buffer != 0) 4262 { 4263 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE); 4264 if (frc > 1) rc = frc; 4265 else if (frc == 0 && rc == 1) rc = 0; 4266 } 4267 } 4268 if (fl != stdin) fclose(fl); 4269 } 4270 4271 /* After handling file-list, work through remaining arguments. Pass in the fact 4272 that there is only one argument at top level - this suppresses the file name if 4273 the argument is not a directory and filenames are not otherwise forced. */ 4274 4275 only_one_at_top = i == argc - 1 && file_lists == NULL; 4276 4277 for (; i < argc; i++) 4278 { 4279 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, 4280 only_one_at_top); 4281 if (frc > 1) rc = frc; 4282 else if (frc == 0 && rc == 1) rc = 0; 4283 } 4284 4285 #ifdef SUPPORT_PCRE2GREP_CALLOUT 4286 /* If separating builtin echo callouts by implicit newline, add one more for 4287 the final item. */ 4288 4289 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0) 4290 fprintf(stdout, STDOUT_NL); 4291 #endif 4292 4293 /* Show the total number of matches if requested, but not if only one file's 4294 count was printed. */ 4295 4296 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY) 4297 { 4298 if (counts_printed != 0 && filenames >= FN_DEFAULT) 4299 fprintf(stdout, "TOTAL:"); 4300 fprintf(stdout, "%lu" STDOUT_NL, total_count); 4301 } 4302 4303 EXIT: 4304 #ifdef SUPPORT_PCRE2GREP_JIT 4305 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack); 4306 #endif 4307 4308 free(main_buffer); 4309 free((void *)character_tables); 4310 4311 pcre2_compile_context_free(compile_context); 4312 pcre2_match_context_free(match_context); 4313 pcre2_match_data_free(match_data); 4314 4315 free_pattern_chain(patterns); 4316 free_pattern_chain(include_patterns); 4317 free_pattern_chain(include_dir_patterns); 4318 free_pattern_chain(exclude_patterns); 4319 free_pattern_chain(exclude_dir_patterns); 4320 4321 free_file_chain(exclude_from); 4322 free_file_chain(include_from); 4323 free_file_chain(pattern_files); 4324 free_file_chain(file_lists); 4325 4326 while (only_matching != NULL) 4327 { 4328 omstr *this = only_matching; 4329 only_matching = this->next; 4330 free(this); 4331 } 4332 4333 pcre2grep_exit(rc); 4334 4335 EXIT2: 4336 rc = 2; 4337 goto EXIT; 4338 } 4339 4340 /* End of pcre2grep */ 4341