1 /************************************************* 2 * pcre2grep program * 3 *************************************************/ 4 5 /* This is a grep program that uses the 8-bit PCRE regular expression library 6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows, 7 and native z/OS systems it can recurse into directories, and in z/OS it can 8 handle PDS files. 9 10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an 11 additional header is required. That header is not included in the main PCRE2 12 distribution because other apparatus is needed to compile pcre2grep for z/OS. 13 The header can be found in the special z/OS distribution, which is available 14 from www.zaconsultants.net or from www.cbttape.org. 15 16 Copyright (c) 1997-2016 University of Cambridge 17 18 ----------------------------------------------------------------------------- 19 Redistribution and use in source and binary forms, with or without 20 modification, are permitted provided that the following conditions are met: 21 22 * Redistributions of source code must retain the above copyright notice, 23 this list of conditions and the following disclaimer. 24 25 * Redistributions in binary form must reproduce the above copyright 26 notice, this list of conditions and the following disclaimer in the 27 documentation and/or other materials provided with the distribution. 28 29 * Neither the name of the University of Cambridge nor the names of its 30 contributors may be used to endorse or promote products derived from 31 this software without specific prior written permission. 32 33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 43 POSSIBILITY OF SUCH DAMAGE. 44 ----------------------------------------------------------------------------- 45 */ 46 47 #ifdef HAVE_CONFIG_H 48 #include "config.h" 49 #endif 50 51 #include <ctype.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <string.h> 55 #include <stdlib.h> 56 #include <errno.h> 57 58 #include <sys/types.h> 59 #include <sys/stat.h> 60 61 #if defined(_WIN32) || defined(WIN32) 62 #include <io.h> /* For _setmode() */ 63 #include <fcntl.h> /* For _O_BINARY */ 64 #endif 65 66 #ifdef SUPPORT_PCRE2GREP_CALLOUT 67 #include <sys/wait.h> 68 #endif 69 70 #ifdef HAVE_UNISTD_H 71 #include <unistd.h> 72 #endif 73 74 #ifdef SUPPORT_LIBZ 75 #include <zlib.h> 76 #endif 77 78 #ifdef SUPPORT_LIBBZ2 79 #include <bzlib.h> 80 #endif 81 82 #define PCRE2_CODE_UNIT_WIDTH 8 83 #include "pcre2.h" 84 85 #define FALSE 0 86 #define TRUE 1 87 88 typedef int BOOL; 89 90 #define OFFSET_SIZE 33 91 92 #if BUFSIZ > 8192 93 #define MAXPATLEN BUFSIZ 94 #else 95 #define MAXPATLEN 8192 96 #endif 97 98 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */ 99 100 /* Values for the "filenames" variable, which specifies options for file name 101 output. The order is important; it is assumed that a file name is wanted for 102 all values greater than FN_DEFAULT. */ 103 104 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; 105 106 /* File reading styles */ 107 108 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; 109 110 /* Actions for the -d and -D options */ 111 112 enum { dee_READ, dee_SKIP, dee_RECURSE }; 113 enum { DEE_READ, DEE_SKIP }; 114 115 /* Actions for special processing options (flag bits) */ 116 117 #define PO_WORD_MATCH 0x0001 118 #define PO_LINE_MATCH 0x0002 119 #define PO_FIXED_STRINGS 0x0004 120 121 /* Binary file options */ 122 123 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT }; 124 125 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some 126 environments), a warning is issued if the value of fwrite() is ignored. 127 Unfortunately, casting to (void) does not suppress the warning. To get round 128 this, we use a macro that compiles a fudge. Oddly, this does not also seem to 129 apply to fprintf(). */ 130 131 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} 132 133 /* Under Windows, we have to set stdout to be binary, so that it does not 134 convert \r\n at the ends of output lines to \r\r\n. However, that means that 135 any messages written to stdout must have \r\n as their line terminator. This is 136 handled by using STDOUT_NL as the newline string. */ 137 138 #if defined(_WIN32) || defined(WIN32) 139 #define STDOUT_NL "\r\n" 140 #else 141 #define STDOUT_NL "\n" 142 #endif 143 144 145 146 /************************************************* 147 * Global variables * 148 *************************************************/ 149 150 /* Jeffrey Friedl has some debugging requirements that are not part of the 151 regular code. */ 152 153 #ifdef JFRIEDL_DEBUG 154 static int S_arg = -1; 155 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ 156 static unsigned int jfriedl_XT = 0; /* replicate text this many times */ 157 static const char *jfriedl_prefix = ""; 158 static const char *jfriedl_postfix = ""; 159 #endif 160 161 static char *colour_string = (char *)"1;31"; 162 static char *colour_option = NULL; 163 static char *dee_option = NULL; 164 static char *DEE_option = NULL; 165 static char *locale = NULL; 166 static char *main_buffer = NULL; 167 static char *newline_arg = NULL; 168 static char *om_separator = (char *)""; 169 static char *stdin_name = (char *)"(standard input)"; 170 171 static int after_context = 0; 172 static int before_context = 0; 173 static int binary_files = BIN_BINARY; 174 static int both_context = 0; 175 static int bufthird = PCRE2GREP_BUFSIZE; 176 static int bufsize = 3*PCRE2GREP_BUFSIZE; 177 static int endlinetype; 178 179 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H 180 static int dee_action = dee_SKIP; 181 #else 182 static int dee_action = dee_READ; 183 #endif 184 static int DEE_action = DEE_READ; 185 static int error_count = 0; 186 static int filenames = FN_DEFAULT; 187 188 #ifdef SUPPORT_PCRE2GREP_JIT 189 static BOOL use_jit = TRUE; 190 #else 191 static BOOL use_jit = FALSE; 192 #endif 193 194 static const uint8_t *character_tables = NULL; 195 196 static uint32_t pcre2_options = 0; 197 static uint32_t process_options = 0; 198 static uint32_t match_limit = 0; 199 static uint32_t recursion_limit = 0; 200 201 static pcre2_compile_context *compile_context; 202 static pcre2_match_context *match_context; 203 static pcre2_match_data *match_data; 204 static PCRE2_SIZE *offsets; 205 206 static BOOL count_only = FALSE; 207 static BOOL do_colour = FALSE; 208 static BOOL file_offsets = FALSE; 209 static BOOL hyphenpending = FALSE; 210 static BOOL invert = FALSE; 211 static BOOL line_buffered = FALSE; 212 static BOOL line_offsets = FALSE; 213 static BOOL multiline = FALSE; 214 static BOOL number = FALSE; 215 static BOOL omit_zero_count = FALSE; 216 static BOOL resource_error = FALSE; 217 static BOOL quiet = FALSE; 218 static BOOL show_only_matching = FALSE; 219 static BOOL silent = FALSE; 220 static BOOL utf = FALSE; 221 222 /* Structure for list of --only-matching capturing numbers. */ 223 224 typedef struct omstr { 225 struct omstr *next; 226 int groupnum; 227 } omstr; 228 229 static omstr *only_matching = NULL; 230 static omstr *only_matching_last = NULL; 231 232 /* Structure for holding the two variables that describe a number chain. */ 233 234 typedef struct omdatastr { 235 omstr **anchor; 236 omstr **lastptr; 237 } omdatastr; 238 239 static omdatastr only_matching_data = { &only_matching, &only_matching_last }; 240 241 /* Structure for list of file names (for -f and --{in,ex}clude-from) */ 242 243 typedef struct fnstr { 244 struct fnstr *next; 245 char *name; 246 } fnstr; 247 248 static fnstr *exclude_from = NULL; 249 static fnstr *exclude_from_last = NULL; 250 static fnstr *include_from = NULL; 251 static fnstr *include_from_last = NULL; 252 253 static fnstr *file_lists = NULL; 254 static fnstr *file_lists_last = NULL; 255 static fnstr *pattern_files = NULL; 256 static fnstr *pattern_files_last = NULL; 257 258 /* Structure for holding the two variables that describe a file name chain. */ 259 260 typedef struct fndatastr { 261 fnstr **anchor; 262 fnstr **lastptr; 263 } fndatastr; 264 265 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last }; 266 static fndatastr include_from_data = { &include_from, &include_from_last }; 267 static fndatastr file_lists_data = { &file_lists, &file_lists_last }; 268 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last }; 269 270 /* Structure for pattern and its compiled form; used for matching patterns and 271 also for include/exclude patterns. */ 272 273 typedef struct patstr { 274 struct patstr *next; 275 char *string; 276 pcre2_code *compiled; 277 } patstr; 278 279 static patstr *patterns = NULL; 280 static patstr *patterns_last = NULL; 281 static patstr *include_patterns = NULL; 282 static patstr *include_patterns_last = NULL; 283 static patstr *exclude_patterns = NULL; 284 static patstr *exclude_patterns_last = NULL; 285 static patstr *include_dir_patterns = NULL; 286 static patstr *include_dir_patterns_last = NULL; 287 static patstr *exclude_dir_patterns = NULL; 288 static patstr *exclude_dir_patterns_last = NULL; 289 290 /* Structure holding the two variables that describe a pattern chain. A pointer 291 to such structures is used for each appropriate option. */ 292 293 typedef struct patdatastr { 294 patstr **anchor; 295 patstr **lastptr; 296 } patdatastr; 297 298 static patdatastr match_patdata = { &patterns, &patterns_last }; 299 static patdatastr include_patdata = { &include_patterns, &include_patterns_last }; 300 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last }; 301 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last }; 302 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last }; 303 304 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns, 305 &include_dir_patterns, &exclude_dir_patterns }; 306 307 static const char *incexname[4] = { "--include", "--exclude", 308 "--include-dir", "--exclude-dir" }; 309 310 /* Structure for options and list of them */ 311 312 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, 313 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES }; 314 315 typedef struct option_item { 316 int type; 317 int one_char; 318 void *dataptr; 319 const char *long_name; 320 const char *help_text; 321 } option_item; 322 323 /* Options without a single-letter equivalent get a negative value. This can be 324 used to identify them. */ 325 326 #define N_COLOUR (-1) 327 #define N_EXCLUDE (-2) 328 #define N_EXCLUDE_DIR (-3) 329 #define N_HELP (-4) 330 #define N_INCLUDE (-5) 331 #define N_INCLUDE_DIR (-6) 332 #define N_LABEL (-7) 333 #define N_LOCALE (-8) 334 #define N_NULL (-9) 335 #define N_LOFFSETS (-10) 336 #define N_FOFFSETS (-11) 337 #define N_LBUFFER (-12) 338 #define N_M_LIMIT (-13) 339 #define N_M_LIMIT_REC (-14) 340 #define N_BUFSIZE (-15) 341 #define N_NOJIT (-16) 342 #define N_FILE_LIST (-17) 343 #define N_BINARY_FILES (-18) 344 #define N_EXCLUDE_FROM (-19) 345 #define N_INCLUDE_FROM (-20) 346 #define N_OM_SEPARATOR (-21) 347 348 static option_item optionlist[] = { 349 { OP_NODATA, N_NULL, NULL, "", "terminate options" }, 350 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, 351 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, 352 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" }, 353 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, 354 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" }, 355 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" }, 356 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, 357 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, 358 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, 359 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, 360 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, 361 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, 362 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" }, 363 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, 364 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" }, 365 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" }, 366 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, 367 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, 368 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, 369 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" }, 370 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, 371 #ifdef SUPPORT_PCRE2GREP_JIT 372 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" }, 373 #else 374 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, 375 #endif 376 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, 377 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, 378 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, 379 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, 380 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, 381 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, 382 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" }, 383 { OP_U32NUMBER, N_M_LIMIT_REC, &recursion_limit, "recursion-limit=number", "set PCRE match recursion limit option" }, 384 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, 385 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, 386 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, 387 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, 388 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, 389 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, 390 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, 391 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" }, 392 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" }, 393 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" }, 394 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" }, 395 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" }, 396 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" }, 397 #ifdef JFRIEDL_DEBUG 398 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, 399 #endif 400 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, 401 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" }, 402 { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, 403 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, 404 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, 405 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, 406 { OP_NODATA, 0, NULL, NULL, NULL } 407 }; 408 409 /* Table of names for newline types. Must be kept in step with the definitions 410 of PCRE2_NEWLINE_xx in pcre2.h. */ 411 412 static const char *newlines[] = { 413 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" }; 414 415 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F 416 options. These set the 1, 2, and 4 bits in process_options, respectively. Note 417 that the combination of -w and -x has the same effect as -x on its own, so we 418 can treat them as the same. Note that the MAXPATLEN macro assumes the longest 419 prefix+suffix is 10 characters; if anything longer is added, it must be 420 adjusted. */ 421 422 static const char *prefix[] = { 423 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; 424 425 static const char *suffix[] = { 426 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; 427 428 /* UTF-8 tables - used only when the newline setting is "any". */ 429 430 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; 431 432 const char utf8_table4[] = { 433 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 434 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 435 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 436 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 437 438 439 440 /************************************************* 441 * Case-independent string compare * 442 *************************************************/ 443 444 static int 445 strcmpic(const char *str1, const char *str2) 446 { 447 unsigned int c1, c2; 448 while (*str1 != '\0' || *str2 != '\0') 449 { 450 c1 = tolower(*str1++); 451 c2 = tolower(*str2++); 452 if (c1 != c2) return ((c1 > c2) << 1) - 1; 453 } 454 return 0; 455 } 456 457 458 459 /************************************************* 460 * Exit from the program * 461 *************************************************/ 462 463 /* If there has been a resource error, give a suitable message. 464 465 Argument: the return code 466 Returns: does not return 467 */ 468 469 static void 470 pcre2grep_exit(int rc) 471 { 472 if (resource_error) 473 { 474 fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit " 475 "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, 476 PCRE2_ERROR_RECURSIONLIMIT); 477 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); 478 } 479 exit(rc); 480 } 481 482 483 /************************************************* 484 * Add item to chain of patterns * 485 *************************************************/ 486 487 /* Used to add an item onto a chain, or just return an unconnected item if the 488 "after" argument is NULL. 489 490 Arguments: 491 s pattern string to add 492 after if not NULL points to item to insert after 493 494 Returns: new pattern block or NULL on error 495 */ 496 497 static patstr * 498 add_pattern(char *s, patstr *after) 499 { 500 patstr *p = (patstr *)malloc(sizeof(patstr)); 501 if (p == NULL) 502 { 503 fprintf(stderr, "pcre2grep: malloc failed\n"); 504 pcre2grep_exit(2); 505 } 506 if (strlen(s) > MAXPATLEN) 507 { 508 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", 509 MAXPATLEN); 510 free(p); 511 return NULL; 512 } 513 p->next = NULL; 514 p->string = s; 515 p->compiled = NULL; 516 517 if (after != NULL) 518 { 519 p->next = after->next; 520 after->next = p; 521 } 522 return p; 523 } 524 525 526 /************************************************* 527 * Free chain of patterns * 528 *************************************************/ 529 530 /* Used for several chains of patterns. 531 532 Argument: pointer to start of chain 533 Returns: nothing 534 */ 535 536 static void 537 free_pattern_chain(patstr *pc) 538 { 539 while (pc != NULL) 540 { 541 patstr *p = pc; 542 pc = p->next; 543 if (p->compiled != NULL) pcre2_code_free(p->compiled); 544 free(p); 545 } 546 } 547 548 549 /************************************************* 550 * Free chain of file names * 551 *************************************************/ 552 553 /* 554 Argument: pointer to start of chain 555 Returns: nothing 556 */ 557 558 static void 559 free_file_chain(fnstr *fn) 560 { 561 while (fn != NULL) 562 { 563 fnstr *f = fn; 564 fn = f->next; 565 free(f); 566 } 567 } 568 569 570 /************************************************* 571 * OS-specific functions * 572 *************************************************/ 573 574 /* These functions are defined so that they can be made system specific. 575 At present there are versions for Unix-style environments, Windows, native 576 z/OS, and "no support". */ 577 578 579 /************* Directory scanning Unix-style and z/OS ***********/ 580 581 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS 582 #include <sys/types.h> 583 #include <sys/stat.h> 584 #include <dirent.h> 585 586 #if defined NATIVE_ZOS 587 /************* Directory and PDS/E scanning for z/OS ***********/ 588 /************* z/OS looks mostly like Unix with USS ************/ 589 /* However, z/OS needs the #include statements in this header */ 590 #include "pcrzosfs.h" 591 /* That header is not included in the main PCRE distribution because 592 other apparatus is needed to compile pcre2grep for z/OS. The header 593 can be found in the special z/OS distribution, which is available 594 from www.zaconsultants.net or from www.cbttape.org. */ 595 #endif 596 597 typedef DIR directory_type; 598 #define FILESEP '/' 599 600 static int 601 isdirectory(char *filename) 602 { 603 struct stat statbuf; 604 if (stat(filename, &statbuf) < 0) 605 return 0; /* In the expectation that opening as a file will fail */ 606 return S_ISDIR(statbuf.st_mode); 607 } 608 609 static directory_type * 610 opendirectory(char *filename) 611 { 612 return opendir(filename); 613 } 614 615 static char * 616 readdirectory(directory_type *dir) 617 { 618 for (;;) 619 { 620 struct dirent *dent = readdir(dir); 621 if (dent == NULL) return NULL; 622 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) 623 return dent->d_name; 624 } 625 /* Control never reaches here */ 626 } 627 628 static void 629 closedirectory(directory_type *dir) 630 { 631 closedir(dir); 632 } 633 634 635 /************* Test for regular file, Unix-style **********/ 636 637 static int 638 isregfile(char *filename) 639 { 640 struct stat statbuf; 641 if (stat(filename, &statbuf) < 0) 642 return 1; /* In the expectation that opening as a file will fail */ 643 return S_ISREG(statbuf.st_mode); 644 } 645 646 647 #if defined NATIVE_ZOS 648 /************* Test for a terminal in z/OS **********/ 649 /* isatty() does not work in a TSO environment, so always give FALSE.*/ 650 651 static BOOL 652 is_stdout_tty(void) 653 { 654 return FALSE; 655 } 656 657 static BOOL 658 is_file_tty(FILE *f) 659 { 660 return FALSE; 661 } 662 663 664 /************* Test for a terminal, Unix-style **********/ 665 666 #else 667 static BOOL 668 is_stdout_tty(void) 669 { 670 return isatty(fileno(stdout)); 671 } 672 673 static BOOL 674 is_file_tty(FILE *f) 675 { 676 return isatty(fileno(f)); 677 } 678 #endif 679 680 /* End of Unix-style or native z/OS environment functions. */ 681 682 683 /************* Directory scanning in Windows ***********/ 684 685 /* I (Philip Hazel) have no means of testing this code. It was contributed by 686 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES 687 when it did not exist. David Byron added a patch that moved the #include of 688 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. 689 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is 690 undefined when it is indeed undefined. */ 691 692 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H 693 694 #ifndef STRICT 695 # define STRICT 696 #endif 697 #ifndef WIN32_LEAN_AND_MEAN 698 # define WIN32_LEAN_AND_MEAN 699 #endif 700 701 #include <windows.h> 702 703 #ifndef INVALID_FILE_ATTRIBUTES 704 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF 705 #endif 706 707 typedef struct directory_type 708 { 709 HANDLE handle; 710 BOOL first; 711 WIN32_FIND_DATA data; 712 } directory_type; 713 714 #define FILESEP '/' 715 716 int 717 isdirectory(char *filename) 718 { 719 DWORD attr = GetFileAttributes(filename); 720 if (attr == INVALID_FILE_ATTRIBUTES) 721 return 0; 722 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0; 723 } 724 725 directory_type * 726 opendirectory(char *filename) 727 { 728 size_t len; 729 char *pattern; 730 directory_type *dir; 731 DWORD err; 732 len = strlen(filename); 733 pattern = (char *)malloc(len + 3); 734 dir = (directory_type *)malloc(sizeof(*dir)); 735 if ((pattern == NULL) || (dir == NULL)) 736 { 737 fprintf(stderr, "pcre2grep: malloc failed\n"); 738 pcre2grep_exit(2); 739 } 740 memcpy(pattern, filename, len); 741 memcpy(&(pattern[len]), "\\*", 3); 742 dir->handle = FindFirstFile(pattern, &(dir->data)); 743 if (dir->handle != INVALID_HANDLE_VALUE) 744 { 745 free(pattern); 746 dir->first = TRUE; 747 return dir; 748 } 749 err = GetLastError(); 750 free(pattern); 751 free(dir); 752 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; 753 return NULL; 754 } 755 756 char * 757 readdirectory(directory_type *dir) 758 { 759 for (;;) 760 { 761 if (!dir->first) 762 { 763 if (!FindNextFile(dir->handle, &(dir->data))) 764 return NULL; 765 } 766 else 767 { 768 dir->first = FALSE; 769 } 770 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) 771 return dir->data.cFileName; 772 } 773 #ifndef _MSC_VER 774 return NULL; /* Keep compiler happy; never executed */ 775 #endif 776 } 777 778 void 779 closedirectory(directory_type *dir) 780 { 781 FindClose(dir->handle); 782 free(dir); 783 } 784 785 786 /************* Test for regular file in Windows **********/ 787 788 /* I don't know how to do this, or if it can be done; assume all paths are 789 regular if they are not directories. */ 790 791 int isregfile(char *filename) 792 { 793 return !isdirectory(filename); 794 } 795 796 797 /************* Test for a terminal in Windows **********/ 798 799 /* I don't know how to do this; assume never */ 800 801 static BOOL 802 is_stdout_tty(void) 803 { 804 return FALSE; 805 } 806 807 static BOOL 808 is_file_tty(FILE *f) 809 { 810 return FALSE; 811 } 812 813 /* End of Windows functions */ 814 815 816 /************* Directory scanning when we can't do it ***********/ 817 818 /* The type is void, and apart from isdirectory(), the functions do nothing. */ 819 820 #else 821 822 #define FILESEP 0 823 typedef void directory_type; 824 825 int isdirectory(char *filename) { return 0; } 826 directory_type * opendirectory(char *filename) { return (directory_type*)0;} 827 char *readdirectory(directory_type *dir) { return (char*)0;} 828 void closedirectory(directory_type *dir) {} 829 830 831 /************* Test for regular file when we can't do it **********/ 832 833 /* Assume all files are regular. */ 834 835 int isregfile(char *filename) { return 1; } 836 837 838 /************* Test for a terminal when we can't do it **********/ 839 840 static BOOL 841 is_stdout_tty(void) 842 { 843 return FALSE; 844 } 845 846 static BOOL 847 is_file_tty(FILE *f) 848 { 849 return FALSE; 850 } 851 852 #endif /* End of system-specific functions */ 853 854 855 856 #ifndef HAVE_STRERROR 857 /************************************************* 858 * Provide strerror() for non-ANSI libraries * 859 *************************************************/ 860 861 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 862 in their libraries, but can provide the same facility by this simple 863 alternative function. */ 864 865 extern int sys_nerr; 866 extern char *sys_errlist[]; 867 868 char * 869 strerror(int n) 870 { 871 if (n < 0 || n >= sys_nerr) return "unknown error number"; 872 return sys_errlist[n]; 873 } 874 #endif /* HAVE_STRERROR */ 875 876 877 878 /************************************************* 879 * Usage function * 880 *************************************************/ 881 882 static int 883 usage(int rc) 884 { 885 option_item *op; 886 fprintf(stderr, "Usage: pcre2grep [-"); 887 for (op = optionlist; op->one_char != 0; op++) 888 { 889 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); 890 } 891 fprintf(stderr, "] [long options] [pattern] [files]\n"); 892 fprintf(stderr, "Type `pcre2grep --help' for more information and the long " 893 "options.\n"); 894 return rc; 895 } 896 897 898 899 /************************************************* 900 * Help function * 901 *************************************************/ 902 903 static void 904 help(void) 905 { 906 option_item *op; 907 908 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL); 909 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL); 910 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL); 911 912 #ifdef SUPPORT_PCRE2GREP_CALLOUT 913 printf("Callout scripts in patterns are supported." STDOUT_NL); 914 #else 915 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL); 916 #endif 917 918 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL); 919 920 #ifdef SUPPORT_LIBZ 921 printf("Files whose names end in .gz are read using zlib." STDOUT_NL); 922 #endif 923 924 #ifdef SUPPORT_LIBBZ2 925 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL); 926 #endif 927 928 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 929 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL); 930 #else 931 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL); 932 #endif 933 934 printf("Example: pcre2grep -i 'hello.*world' menu.h main.c" STDOUT_NL STDOUT_NL); 935 printf("Options:" STDOUT_NL); 936 937 for (op = optionlist; op->one_char != 0; op++) 938 { 939 int n; 940 char s[4]; 941 942 if (op->one_char > 0 && (op->long_name)[0] == 0) 943 n = 31 - printf(" -%c", op->one_char); 944 else 945 { 946 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); 947 else strcpy(s, " "); 948 n = 31 - printf(" %s --%s", s, op->long_name); 949 } 950 951 if (n < 1) n = 1; 952 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text); 953 } 954 955 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL); 956 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE); 957 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL); 958 printf("space is removed and blank lines are ignored." STDOUT_NL); 959 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN); 960 961 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL); 962 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL); 963 } 964 965 966 967 /************************************************* 968 * Test exclude/includes * 969 *************************************************/ 970 971 /* If any exclude pattern matches, the path is excluded. Otherwise, unless 972 there are no includes, the path must match an include pattern. 973 974 Arguments: 975 path the path to be matched 976 ip the chain of include patterns 977 ep the chain of exclude patterns 978 979 Returns: TRUE if the path is not excluded 980 */ 981 982 static BOOL 983 test_incexc(char *path, patstr *ip, patstr *ep) 984 { 985 int plen = strlen((const char *)path); 986 987 for (; ep != NULL; ep = ep->next) 988 { 989 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0) 990 return FALSE; 991 } 992 993 if (ip == NULL) return TRUE; 994 995 for (; ip != NULL; ip = ip->next) 996 { 997 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0) 998 return TRUE; 999 } 1000 1001 return FALSE; 1002 } 1003 1004 1005 1006 /************************************************* 1007 * Decode integer argument value * 1008 *************************************************/ 1009 1010 /* Integer arguments can be followed by K or M. Avoid the use of strtoul() 1011 because SunOS4 doesn't have it. This is used only for unpicking arguments, so 1012 just keep it simple. 1013 1014 Arguments: 1015 option_data the option data string 1016 op the option item (for error messages) 1017 longop TRUE if option given in long form 1018 1019 Returns: a long integer 1020 */ 1021 1022 static long int 1023 decode_number(char *option_data, option_item *op, BOOL longop) 1024 { 1025 unsigned long int n = 0; 1026 char *endptr = option_data; 1027 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++; 1028 while (isdigit((unsigned char)(*endptr))) 1029 n = n * 10 + (int)(*endptr++ - '0'); 1030 if (toupper(*endptr) == 'K') 1031 { 1032 n *= 1024; 1033 endptr++; 1034 } 1035 else if (toupper(*endptr) == 'M') 1036 { 1037 n *= 1024*1024; 1038 endptr++; 1039 } 1040 1041 if (*endptr != 0) /* Error */ 1042 { 1043 if (longop) 1044 { 1045 char *equals = strchr(op->long_name, '='); 1046 int nlen = (equals == NULL)? (int)strlen(op->long_name) : 1047 (int)(equals - op->long_name); 1048 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n", 1049 option_data, nlen, op->long_name); 1050 } 1051 else 1052 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n", 1053 option_data, op->one_char); 1054 pcre2grep_exit(usage(2)); 1055 } 1056 1057 return n; 1058 } 1059 1060 1061 1062 /************************************************* 1063 * Add item to a chain of numbers * 1064 *************************************************/ 1065 1066 /* Used to add an item onto a chain, or just return an unconnected item if the 1067 "after" argument is NULL. 1068 1069 Arguments: 1070 n the number to add 1071 after if not NULL points to item to insert after 1072 1073 Returns: new number block 1074 */ 1075 1076 static omstr * 1077 add_number(int n, omstr *after) 1078 { 1079 omstr *om = (omstr *)malloc(sizeof(omstr)); 1080 1081 if (om == NULL) 1082 { 1083 fprintf(stderr, "pcre2grep: malloc failed\n"); 1084 pcre2grep_exit(2); 1085 } 1086 om->next = NULL; 1087 om->groupnum = n; 1088 1089 if (after != NULL) 1090 { 1091 om->next = after->next; 1092 after->next = om; 1093 } 1094 return om; 1095 } 1096 1097 1098 1099 /************************************************* 1100 * Read one line of input * 1101 *************************************************/ 1102 1103 /* Normally, input is read using fread() into a large buffer, so many lines may 1104 be read at once. However, doing this for tty input means that no output appears 1105 until a lot of input has been typed. Instead, tty input is handled line by 1106 line. We cannot use fgets() for this, because it does not stop at a binary 1107 zero, and therefore there is no way of telling how many characters it has read, 1108 because there may be binary zeros embedded in the data. 1109 1110 Arguments: 1111 buffer the buffer to read into 1112 length the maximum number of characters to read 1113 f the file 1114 1115 Returns: the number of characters read, zero at end of file 1116 */ 1117 1118 static unsigned int 1119 read_one_line(char *buffer, int length, FILE *f) 1120 { 1121 int c; 1122 int yield = 0; 1123 while ((c = fgetc(f)) != EOF) 1124 { 1125 buffer[yield++] = c; 1126 if (c == '\n' || yield >= length) break; 1127 } 1128 return yield; 1129 } 1130 1131 1132 1133 /************************************************* 1134 * Find end of line * 1135 *************************************************/ 1136 1137 /* The length of the endline sequence that is found is set via lenptr. This may 1138 be zero at the very end of the file if there is no line-ending sequence there. 1139 1140 Arguments: 1141 p current position in line 1142 endptr end of available data 1143 lenptr where to put the length of the eol sequence 1144 1145 Returns: pointer after the last byte of the line, 1146 including the newline byte(s) 1147 */ 1148 1149 static char * 1150 end_of_line(char *p, char *endptr, int *lenptr) 1151 { 1152 switch(endlinetype) 1153 { 1154 default: /* Just in case */ 1155 case PCRE2_NEWLINE_LF: 1156 while (p < endptr && *p != '\n') p++; 1157 if (p < endptr) 1158 { 1159 *lenptr = 1; 1160 return p + 1; 1161 } 1162 *lenptr = 0; 1163 return endptr; 1164 1165 case PCRE2_NEWLINE_CR: 1166 while (p < endptr && *p != '\r') p++; 1167 if (p < endptr) 1168 { 1169 *lenptr = 1; 1170 return p + 1; 1171 } 1172 *lenptr = 0; 1173 return endptr; 1174 1175 case PCRE2_NEWLINE_CRLF: 1176 for (;;) 1177 { 1178 while (p < endptr && *p != '\r') p++; 1179 if (++p >= endptr) 1180 { 1181 *lenptr = 0; 1182 return endptr; 1183 } 1184 if (*p == '\n') 1185 { 1186 *lenptr = 2; 1187 return p + 1; 1188 } 1189 } 1190 break; 1191 1192 case PCRE2_NEWLINE_ANYCRLF: 1193 while (p < endptr) 1194 { 1195 int extra = 0; 1196 register int c = *((unsigned char *)p); 1197 1198 if (utf && c >= 0xc0) 1199 { 1200 int gcii, gcss; 1201 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1202 gcss = 6*extra; 1203 c = (c & utf8_table3[extra]) << gcss; 1204 for (gcii = 1; gcii <= extra; gcii++) 1205 { 1206 gcss -= 6; 1207 c |= (p[gcii] & 0x3f) << gcss; 1208 } 1209 } 1210 1211 p += 1 + extra; 1212 1213 switch (c) 1214 { 1215 case '\n': 1216 *lenptr = 1; 1217 return p; 1218 1219 case '\r': 1220 if (p < endptr && *p == '\n') 1221 { 1222 *lenptr = 2; 1223 p++; 1224 } 1225 else *lenptr = 1; 1226 return p; 1227 1228 default: 1229 break; 1230 } 1231 } /* End of loop for ANYCRLF case */ 1232 1233 *lenptr = 0; /* Must have hit the end */ 1234 return endptr; 1235 1236 case PCRE2_NEWLINE_ANY: 1237 while (p < endptr) 1238 { 1239 int extra = 0; 1240 register int c = *((unsigned char *)p); 1241 1242 if (utf && c >= 0xc0) 1243 { 1244 int gcii, gcss; 1245 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1246 gcss = 6*extra; 1247 c = (c & utf8_table3[extra]) << gcss; 1248 for (gcii = 1; gcii <= extra; gcii++) 1249 { 1250 gcss -= 6; 1251 c |= (p[gcii] & 0x3f) << gcss; 1252 } 1253 } 1254 1255 p += 1 + extra; 1256 1257 switch (c) 1258 { 1259 case '\n': /* LF */ 1260 case '\v': /* VT */ 1261 case '\f': /* FF */ 1262 *lenptr = 1; 1263 return p; 1264 1265 case '\r': /* CR */ 1266 if (p < endptr && *p == '\n') 1267 { 1268 *lenptr = 2; 1269 p++; 1270 } 1271 else *lenptr = 1; 1272 return p; 1273 1274 #ifndef EBCDIC 1275 case 0x85: /* Unicode NEL */ 1276 *lenptr = utf? 2 : 1; 1277 return p; 1278 1279 case 0x2028: /* Unicode LS */ 1280 case 0x2029: /* Unicode PS */ 1281 *lenptr = 3; 1282 return p; 1283 #endif /* Not EBCDIC */ 1284 1285 default: 1286 break; 1287 } 1288 } /* End of loop for ANY case */ 1289 1290 *lenptr = 0; /* Must have hit the end */ 1291 return endptr; 1292 } /* End of overall switch */ 1293 } 1294 1295 1296 1297 /************************************************* 1298 * Find start of previous line * 1299 *************************************************/ 1300 1301 /* This is called when looking back for before lines to print. 1302 1303 Arguments: 1304 p start of the subsequent line 1305 startptr start of available data 1306 1307 Returns: pointer to the start of the previous line 1308 */ 1309 1310 static char * 1311 previous_line(char *p, char *startptr) 1312 { 1313 switch(endlinetype) 1314 { 1315 default: /* Just in case */ 1316 case PCRE2_NEWLINE_LF: 1317 p--; 1318 while (p > startptr && p[-1] != '\n') p--; 1319 return p; 1320 1321 case PCRE2_NEWLINE_CR: 1322 p--; 1323 while (p > startptr && p[-1] != '\n') p--; 1324 return p; 1325 1326 case PCRE2_NEWLINE_CRLF: 1327 for (;;) 1328 { 1329 p -= 2; 1330 while (p > startptr && p[-1] != '\n') p--; 1331 if (p <= startptr + 1 || p[-2] == '\r') return p; 1332 } 1333 /* Control can never get here */ 1334 1335 case PCRE2_NEWLINE_ANY: 1336 case PCRE2_NEWLINE_ANYCRLF: 1337 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; 1338 if (utf) while ((*p & 0xc0) == 0x80) p--; 1339 1340 while (p > startptr) 1341 { 1342 register unsigned int c; 1343 char *pp = p - 1; 1344 1345 if (utf) 1346 { 1347 int extra = 0; 1348 while ((*pp & 0xc0) == 0x80) pp--; 1349 c = *((unsigned char *)pp); 1350 if (c >= 0xc0) 1351 { 1352 int gcii, gcss; 1353 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 1354 gcss = 6*extra; 1355 c = (c & utf8_table3[extra]) << gcss; 1356 for (gcii = 1; gcii <= extra; gcii++) 1357 { 1358 gcss -= 6; 1359 c |= (pp[gcii] & 0x3f) << gcss; 1360 } 1361 } 1362 } 1363 else c = *((unsigned char *)pp); 1364 1365 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c) 1366 { 1367 case '\n': /* LF */ 1368 case '\r': /* CR */ 1369 return p; 1370 1371 default: 1372 break; 1373 } 1374 1375 else switch (c) 1376 { 1377 case '\n': /* LF */ 1378 case '\v': /* VT */ 1379 case '\f': /* FF */ 1380 case '\r': /* CR */ 1381 #ifndef EBCDIE 1382 case 0x85: /* Unicode NEL */ 1383 case 0x2028: /* Unicode LS */ 1384 case 0x2029: /* Unicode PS */ 1385 #endif /* Not EBCDIC */ 1386 return p; 1387 1388 default: 1389 break; 1390 } 1391 1392 p = pp; /* Back one character */ 1393 } /* End of loop for ANY case */ 1394 1395 return startptr; /* Hit start of data */ 1396 } /* End of overall switch */ 1397 } 1398 1399 1400 1401 1402 1403 /************************************************* 1404 * Print the previous "after" lines * 1405 *************************************************/ 1406 1407 /* This is called if we are about to lose said lines because of buffer filling, 1408 and at the end of the file. The data in the line is written using fwrite() so 1409 that a binary zero does not terminate it. 1410 1411 Arguments: 1412 lastmatchnumber the number of the last matching line, plus one 1413 lastmatchrestart where we restarted after the last match 1414 endptr end of available data 1415 printname filename for printing 1416 1417 Returns: nothing 1418 */ 1419 1420 static void 1421 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr, 1422 char *printname) 1423 { 1424 if (after_context > 0 && lastmatchnumber > 0) 1425 { 1426 int count = 0; 1427 while (lastmatchrestart < endptr && count++ < after_context) 1428 { 1429 int ellength; 1430 char *pp = lastmatchrestart; 1431 if (printname != NULL) fprintf(stdout, "%s-", printname); 1432 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 1433 pp = end_of_line(pp, endptr, &ellength); 1434 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 1435 lastmatchrestart = pp; 1436 } 1437 hyphenpending = TRUE; 1438 } 1439 } 1440 1441 1442 1443 /************************************************* 1444 * Apply patterns to subject till one matches * 1445 *************************************************/ 1446 1447 /* This function is called to run through all patterns, looking for a match. It 1448 is used multiple times for the same subject when colouring is enabled, in order 1449 to find all possible matches. 1450 1451 Arguments: 1452 matchptr the start of the subject 1453 length the length of the subject to match 1454 options options for pcre_exec 1455 startoffset where to start matching 1456 mrc address of where to put the result of pcre2_match() 1457 1458 Returns: TRUE if there was a match 1459 FALSE if there was no match 1460 invert if there was a non-fatal error 1461 */ 1462 1463 static BOOL 1464 match_patterns(char *matchptr, size_t length, unsigned int options, 1465 size_t startoffset, int *mrc) 1466 { 1467 int i; 1468 size_t slen = length; 1469 patstr *p = patterns; 1470 const char *msg = "this text:\n\n"; 1471 1472 if (slen > 200) 1473 { 1474 slen = 200; 1475 msg = "text that starts:\n\n"; 1476 } 1477 for (i = 1; p != NULL; p = p->next, i++) 1478 { 1479 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length, 1480 startoffset, options, match_data, match_context); 1481 if (*mrc >= 0) return TRUE; 1482 if (*mrc == PCRE2_ERROR_NOMATCH) continue; 1483 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc); 1484 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i); 1485 fprintf(stderr, "%s", msg); 1486 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ 1487 fprintf(stderr, "\n\n"); 1488 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_RECURSIONLIMIT || 1489 *mrc == PCRE2_ERROR_JIT_STACKLIMIT) 1490 resource_error = TRUE; 1491 if (error_count++ > 20) 1492 { 1493 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n"); 1494 pcre2grep_exit(2); 1495 } 1496 return invert; /* No more matching; don't show the line again */ 1497 } 1498 1499 return FALSE; /* No match, no errors */ 1500 } 1501 1502 1503 #ifdef SUPPORT_PCRE2GREP_CALLOUT 1504 1505 /************************************************* 1506 * Parse and execute callout scripts * 1507 *************************************************/ 1508 1509 /* This function parses a callout string block and executes the 1510 program specified by the string. The string is a list of substrings 1511 separated by pipe characters. The first substring represents the 1512 executable name, and the following substrings specify the arguments: 1513 1514 program_name|param1|param2|... 1515 1516 Any substirng (including the program name) can contain escape sequences 1517 started by the dollar character. The escape sequences are substituted as 1518 follows: 1519 1520 $<digits> or ${<digits>} is replaced by the captured substring of the given 1521 decimal number, which must be greater than zero. If the number is greater 1522 than the number of capturing substrings, or if the capture is unset, the 1523 replacement is empty. 1524 1525 Any other character is substituted by itself. E.g: $$ is replaced by a single 1526 dollar or $| replaced by a pipe character. 1527 1528 Example: 1529 1530 echo -e "abcde\n12345" | pcre2grep \ 1531 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - 1532 1533 Output: 1534 1535 Arg1: [a] [bcd] [d] Arg2: |a| () 1536 abcde 1537 Arg1: [1] [234] [4] Arg2: |1| () 1538 12345 1539 1540 Arguments: 1541 blockptr the callout block 1542 1543 Returns: currently it always returns with 0 1544 */ 1545 1546 static int 1547 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused) 1548 { 1549 PCRE2_SIZE length = calloutptr->callout_string_length; 1550 PCRE2_SPTR string = calloutptr->callout_string; 1551 PCRE2_SPTR subject = calloutptr->subject; 1552 PCRE2_SIZE *ovector = calloutptr->offset_vector; 1553 PCRE2_SIZE capture_top = calloutptr->capture_top; 1554 PCRE2_SIZE argsvectorlen = 2; 1555 PCRE2_SIZE argslen = 1; 1556 char *args; 1557 char *argsptr; 1558 char **argsvector; 1559 char **argsvectorptr; 1560 pid_t pid; 1561 int result = 0; 1562 1563 (void)unused; /* Avoid compiler warning */ 1564 1565 /* Only callout with strings are supported. */ 1566 if (string == NULL || length == 0) return 0; 1567 1568 /* Checking syntax and compute the number of string fragments. Callout strings 1569 are ignored in case of a syntax error. */ 1570 1571 while (length > 0) 1572 { 1573 if (*string == '|') 1574 { 1575 argsvectorlen++; 1576 1577 /* Maximum 10000 arguments allowed. */ 1578 if (argsvectorlen > 10000) return 0; 1579 } 1580 else if (*string == '$') 1581 { 1582 PCRE2_SIZE capture_id = 0; 1583 1584 string++; 1585 length--; 1586 1587 /* Syntax error: a character must be present after $. */ 1588 if (length == 0) return 0; 1589 1590 if (*string >= '1' && *string <= '9') 1591 { 1592 do 1593 { 1594 /* Maximum capture id is 65535. */ 1595 if (capture_id <= 65535) 1596 capture_id = capture_id * 10 + (*string - '0'); 1597 1598 string++; 1599 length--; 1600 } 1601 while (length > 0 && *string >= '0' && *string <= '9'); 1602 1603 /* To negate the effect of string++ below. */ 1604 string--; 1605 length++; 1606 } 1607 else if (*string == '{') 1608 { 1609 /* Must be a decimal number in parenthesis, e.g: (5) or (38) */ 1610 string++; 1611 length--; 1612 1613 /* Syntax error: a decimal number required. */ 1614 if (length == 0) return 0; 1615 if (*string < '1' || *string > '9') return 0; 1616 1617 do 1618 { 1619 /* Maximum capture id is 65535. */ 1620 if (capture_id <= 65535) 1621 capture_id = capture_id * 10 + (*string - '0'); 1622 1623 string++; 1624 length--; 1625 1626 /* Syntax error: no more characters */ 1627 if (length == 0) return 0; 1628 } 1629 while (*string >= '0' && *string <= '9'); 1630 1631 /* Syntax error: close paren is missing. */ 1632 if (*string != '}') return 0; 1633 } 1634 1635 if (capture_id > 0) 1636 { 1637 if (capture_id < capture_top) 1638 { 1639 capture_id *= 2; 1640 argslen += ovector[capture_id + 1] - ovector[capture_id]; 1641 } 1642 1643 /* To negate the effect of argslen++ below. */ 1644 argslen--; 1645 } 1646 } 1647 1648 string++; 1649 length--; 1650 argslen++; 1651 } 1652 1653 args = (char*)malloc(argslen); 1654 if (args == NULL) return 0; 1655 1656 argsvector = (char**)malloc(argsvectorlen * sizeof(char*)); 1657 if (argsvector == NULL) 1658 { 1659 free(args); 1660 return 0; 1661 } 1662 1663 argsptr = args; 1664 argsvectorptr = argsvector; 1665 1666 *argsvectorptr++ = argsptr; 1667 1668 length = calloutptr->callout_string_length; 1669 string = calloutptr->callout_string; 1670 1671 while (length > 0) 1672 { 1673 if (*string == '|') 1674 { 1675 *argsptr++ = '\0'; 1676 *argsvectorptr++ = argsptr; 1677 } 1678 else if (*string == '$') 1679 { 1680 string++; 1681 length--; 1682 1683 if ((*string >= '1' && *string <= '9') || *string == '{') 1684 { 1685 PCRE2_SIZE capture_id = 0; 1686 1687 if (*string != '{') 1688 { 1689 do 1690 { 1691 /* Maximum capture id is 65535. */ 1692 if (capture_id <= 65535) 1693 capture_id = capture_id * 10 + (*string - '0'); 1694 1695 string++; 1696 length--; 1697 } 1698 while (length > 0 && *string >= '0' && *string <= '9'); 1699 1700 /* To negate the effect of string++ below. */ 1701 string--; 1702 length++; 1703 } 1704 else 1705 { 1706 string++; 1707 length--; 1708 1709 do 1710 { 1711 /* Maximum capture id is 65535. */ 1712 if (capture_id <= 65535) 1713 capture_id = capture_id * 10 + (*string - '0'); 1714 1715 string++; 1716 length--; 1717 } 1718 while (*string != '}'); 1719 } 1720 1721 if (capture_id < capture_top) 1722 { 1723 PCRE2_SIZE capturesize; 1724 capture_id *= 2; 1725 1726 capturesize = ovector[capture_id + 1] - ovector[capture_id]; 1727 memcpy(argsptr, subject + ovector[capture_id], capturesize); 1728 argsptr += capturesize; 1729 } 1730 } 1731 else 1732 { 1733 *argsptr++ = *string; 1734 } 1735 } 1736 else 1737 { 1738 *argsptr++ = *string; 1739 } 1740 1741 string++; 1742 length--; 1743 } 1744 1745 *argsptr++ = '\0'; 1746 *argsvectorptr = NULL; 1747 1748 pid = fork(); 1749 1750 if (pid == 0) 1751 { 1752 (void)execv(argsvector[0], argsvector); 1753 /* Control gets here if there is an error, e.g. a non-existent program */ 1754 exit(1); 1755 } 1756 else if (pid > 0) 1757 (void)waitpid(pid, &result, 0); 1758 1759 free(args); 1760 free(argsvector); 1761 1762 /* Currently negative return values are not supported, only zero (match 1763 continues) or non-zero (match fails). */ 1764 1765 return result != 0; 1766 } 1767 1768 #endif 1769 1770 1771 1772 /************************************************* 1773 * Grep an individual file * 1774 *************************************************/ 1775 1776 /* This is called from grep_or_recurse() below. It uses a buffer that is three 1777 times the value of bufthird. The matching point is never allowed to stray into 1778 the top third of the buffer, thus keeping more of the file available for 1779 context printing or for multiline scanning. For large files, the pointer will 1780 be in the middle third most of the time, so the bottom third is available for 1781 "before" context printing. 1782 1783 Arguments: 1784 handle the fopened FILE stream for a normal file 1785 the gzFile pointer when reading is via libz 1786 the BZFILE pointer when reading is via libbz2 1787 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 1788 filename the file name or NULL (for errors) 1789 printname the file name if it is to be printed for each match 1790 or NULL if the file name is not to be printed 1791 it cannot be NULL if filenames[_nomatch]_only is set 1792 1793 Returns: 0 if there was at least one match 1794 1 otherwise (no matches) 1795 2 if an overlong line is encountered 1796 3 if there is a read error on a .bz2 file 1797 */ 1798 1799 static int 1800 pcre2grep(void *handle, int frtype, char *filename, char *printname) 1801 { 1802 int rc = 1; 1803 int linenumber = 1; 1804 int lastmatchnumber = 0; 1805 int count = 0; 1806 int filepos = 0; 1807 char *lastmatchrestart = NULL; 1808 char *ptr = main_buffer; 1809 char *endptr; 1810 size_t bufflength; 1811 BOOL binary = FALSE; 1812 BOOL endhyphenpending = FALSE; 1813 BOOL input_line_buffered = line_buffered; 1814 FILE *in = NULL; /* Ensure initialized */ 1815 1816 #ifdef SUPPORT_LIBZ 1817 gzFile ingz = NULL; 1818 #endif 1819 1820 #ifdef SUPPORT_LIBBZ2 1821 BZFILE *inbz2 = NULL; 1822 #endif 1823 1824 1825 /* Do the first read into the start of the buffer and set up the pointer to end 1826 of what we have. In the case of libz, a non-zipped .gz file will be read as a 1827 plain file. However, if a .bz2 file isn't actually bzipped, the first read will 1828 fail. */ 1829 1830 (void)frtype; 1831 1832 #ifdef SUPPORT_LIBZ 1833 if (frtype == FR_LIBZ) 1834 { 1835 ingz = (gzFile)handle; 1836 bufflength = gzread (ingz, main_buffer, bufsize); 1837 } 1838 else 1839 #endif 1840 1841 #ifdef SUPPORT_LIBBZ2 1842 if (frtype == FR_LIBBZ2) 1843 { 1844 inbz2 = (BZFILE *)handle; 1845 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize); 1846 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ 1847 } /* without the cast it is unsigned. */ 1848 else 1849 #endif 1850 1851 { 1852 in = (FILE *)handle; 1853 if (is_file_tty(in)) input_line_buffered = TRUE; 1854 bufflength = input_line_buffered? 1855 read_one_line(main_buffer, bufsize, in) : 1856 fread(main_buffer, 1, bufsize, in); 1857 } 1858 1859 endptr = main_buffer + bufflength; 1860 1861 /* Unless binary-files=text, see if we have a binary file. This uses the same 1862 rule as GNU grep, namely, a search for a binary zero byte near the start of the 1863 file. */ 1864 1865 if (binary_files != BIN_TEXT) 1866 { 1867 binary = 1868 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL; 1869 if (binary && binary_files == BIN_NOMATCH) return 1; 1870 } 1871 1872 /* Loop while the current pointer is not at the end of the file. For large 1873 files, endptr will be at the end of the buffer when we are in the middle of the 1874 file, but ptr will never get there, because as soon as it gets over 2/3 of the 1875 way, the buffer is shifted left and re-filled. */ 1876 1877 while (ptr < endptr) 1878 { 1879 int endlinelength; 1880 int mrc = 0; 1881 unsigned int options = 0; 1882 BOOL match; 1883 char *matchptr = ptr; 1884 char *t = ptr; 1885 size_t length, linelength; 1886 size_t startoffset = 0; 1887 1888 /* At this point, ptr is at the start of a line. We need to find the length 1889 of the subject string to pass to pcre2_match(). In multiline mode, it is the 1890 length remainder of the data in the buffer. Otherwise, it is the length of 1891 the next line, excluding the terminating newline. After matching, we always 1892 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE 1893 option is used for compiling, so that any match is constrained to be in the 1894 first line. */ 1895 1896 t = end_of_line(t, endptr, &endlinelength); 1897 linelength = t - ptr - endlinelength; 1898 length = multiline? (size_t)(endptr - ptr) : linelength; 1899 1900 /* Check to see if the line we are looking at extends right to the very end 1901 of the buffer without a line terminator. This means the line is too long to 1902 handle. */ 1903 1904 if (endlinelength == 0 && t == main_buffer + bufsize) 1905 { 1906 fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n" 1907 "pcre2grep: the buffer size is %d\n" 1908 "pcre2grep: use the --buffer-size option to change it\n", 1909 linenumber, 1910 (filename == NULL)? "" : " of file ", 1911 (filename == NULL)? "" : filename, 1912 bufthird); 1913 return 2; 1914 } 1915 1916 /* Extra processing for Jeffrey Friedl's debugging. */ 1917 1918 #ifdef JFRIEDL_DEBUG 1919 if (jfriedl_XT || jfriedl_XR) 1920 { 1921 # include <sys/time.h> 1922 # include <time.h> 1923 struct timeval start_time, end_time; 1924 struct timezone dummy; 1925 int i; 1926 1927 if (jfriedl_XT) 1928 { 1929 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); 1930 const char *orig = ptr; 1931 ptr = malloc(newlen + 1); 1932 if (!ptr) { 1933 printf("out of memory"); 1934 pcre2grep_exit(2); 1935 } 1936 endptr = ptr; 1937 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); 1938 for (i = 0; i < jfriedl_XT; i++) { 1939 strncpy(endptr, orig, length); 1940 endptr += length; 1941 } 1942 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); 1943 length = newlen; 1944 } 1945 1946 if (gettimeofday(&start_time, &dummy) != 0) 1947 perror("bad gettimeofday"); 1948 1949 1950 for (i = 0; i < jfriedl_XR; i++) 1951 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0, 1952 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); 1953 1954 if (gettimeofday(&end_time, &dummy) != 0) 1955 perror("bad gettimeofday"); 1956 1957 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) 1958 - 1959 (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); 1960 1961 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); 1962 return 0; 1963 } 1964 #endif 1965 1966 /* We come back here after a match when show_only_matching is set, in order 1967 to find any further matches in the same line. This applies to 1968 --only-matching, --file-offsets, and --line-offsets. */ 1969 1970 ONLY_MATCHING_RESTART: 1971 1972 /* Run through all the patterns until one matches or there is an error other 1973 than NOMATCH. This code is in a subroutine so that it can be re-used for 1974 finding subsequent matches when colouring matched lines. After finding one 1975 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in 1976 this line. */ 1977 1978 match = match_patterns(matchptr, length, options, startoffset, &mrc); 1979 options = PCRE2_NOTEMPTY; 1980 1981 /* If it's a match or a not-match (as required), do what's wanted. */ 1982 1983 if (match != invert) 1984 { 1985 BOOL hyphenprinted = FALSE; 1986 1987 /* We've failed if we want a file that doesn't have any matches. */ 1988 1989 if (filenames == FN_NOMATCH_ONLY) return 1; 1990 1991 /* If all we want is a yes/no answer, we can return immediately. */ 1992 1993 if (quiet) return 0; 1994 1995 /* Just count if just counting is wanted. */ 1996 1997 else if (count_only) count++; 1998 1999 /* When handling a binary file and binary-files==binary, the "binary" 2000 variable will be set true (it's false in all other cases). In this 2001 situation we just want to output the file name. No need to scan further. */ 2002 2003 else if (binary) 2004 { 2005 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename); 2006 return 0; 2007 } 2008 2009 /* Likewise, if all we want is a file name, there is no need to scan any 2010 more lines in the file. */ 2011 2012 else if (filenames == FN_MATCH_ONLY) 2013 { 2014 fprintf(stdout, "%s" STDOUT_NL, printname); 2015 return 0; 2016 } 2017 2018 /* The --only-matching option prints just the substring that matched, 2019 and/or one or more captured portions of it, as long as these strings are 2020 not empty. The --file-offsets and --line-offsets options output offsets for 2021 the matching substring (all three set show_only_matching). None of these 2022 mutually exclusive options prints any context. Afterwards, adjust the start 2023 and then jump back to look for further matches in the same line. If we are 2024 in invert mode, however, nothing is printed and we do not restart - this 2025 could still be useful because the return code is set. */ 2026 2027 else if (show_only_matching) 2028 { 2029 if (!invert) 2030 { 2031 size_t oldstartoffset; 2032 2033 if (printname != NULL) fprintf(stdout, "%s:", printname); 2034 if (number) fprintf(stdout, "%d:", linenumber); 2035 2036 /* Handle --line-offsets */ 2037 2038 if (line_offsets) 2039 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr), 2040 (int)(offsets[1] - offsets[0])); 2041 2042 /* Handle --file-offsets */ 2043 2044 else if (file_offsets) 2045 fprintf(stdout, "%d,%d" STDOUT_NL, 2046 (int)(filepos + matchptr + offsets[0] - ptr), 2047 (int)(offsets[1] - offsets[0])); 2048 2049 /* Handle --only-matching, which may occur many times */ 2050 2051 else 2052 { 2053 BOOL printed = FALSE; 2054 omstr *om; 2055 2056 for (om = only_matching; om != NULL; om = om->next) 2057 { 2058 int n = om->groupnum; 2059 if (n < mrc) 2060 { 2061 int plen = offsets[2*n + 1] - offsets[2*n]; 2062 if (plen > 0) 2063 { 2064 if (printed) fprintf(stdout, "%s", om_separator); 2065 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); 2066 FWRITE(matchptr + offsets[n*2], 1, plen, stdout); 2067 if (do_colour) fprintf(stdout, "%c[00m", 0x1b); 2068 printed = TRUE; 2069 } 2070 } 2071 } 2072 2073 if (printed || printname != NULL || number) 2074 fprintf(stdout, STDOUT_NL); 2075 } 2076 2077 /* Prepare to repeat to find the next match in the line. */ 2078 2079 match = FALSE; 2080 if (line_buffered) fflush(stdout); 2081 rc = 0; /* Had some success */ 2082 2083 /* If the current match ended past the end of the line (only possible 2084 in multiline mode), we are done with this line. */ 2085 2086 if (offsets[1] > linelength) goto END_ONE_MATCH; 2087 2088 /* If the pattern contained a lookbehind that included \K, it is 2089 possible that the end of the match might be at or before the actual 2090 starting offset we have just used. In this case, start one character 2091 further on. */ 2092 2093 startoffset = offsets[1]; /* Restart after the match */ 2094 oldstartoffset = pcre2_get_startchar(match_data); 2095 if (startoffset <= oldstartoffset) 2096 { 2097 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */ 2098 startoffset = oldstartoffset + 1; 2099 if (utf) 2100 while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; 2101 } 2102 goto ONLY_MATCHING_RESTART; 2103 } 2104 } 2105 2106 /* This is the default case when none of the above options is set. We print 2107 the matching lines(s), possibly preceded and/or followed by other lines of 2108 context. */ 2109 2110 else 2111 { 2112 /* See if there is a requirement to print some "after" lines from a 2113 previous match. We never print any overlaps. */ 2114 2115 if (after_context > 0 && lastmatchnumber > 0) 2116 { 2117 int ellength; 2118 int linecount = 0; 2119 char *p = lastmatchrestart; 2120 2121 while (p < ptr && linecount < after_context) 2122 { 2123 p = end_of_line(p, ptr, &ellength); 2124 linecount++; 2125 } 2126 2127 /* It is important to advance lastmatchrestart during this printing so 2128 that it interacts correctly with any "before" printing below. Print 2129 each line's data using fwrite() in case there are binary zeroes. */ 2130 2131 while (lastmatchrestart < p) 2132 { 2133 char *pp = lastmatchrestart; 2134 if (printname != NULL) fprintf(stdout, "%s-", printname); 2135 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 2136 pp = end_of_line(pp, endptr, &ellength); 2137 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 2138 lastmatchrestart = pp; 2139 } 2140 if (lastmatchrestart != ptr) hyphenpending = TRUE; 2141 } 2142 2143 /* If there were non-contiguous lines printed above, insert hyphens. */ 2144 2145 if (hyphenpending) 2146 { 2147 fprintf(stdout, "--" STDOUT_NL); 2148 hyphenpending = FALSE; 2149 hyphenprinted = TRUE; 2150 } 2151 2152 /* See if there is a requirement to print some "before" lines for this 2153 match. Again, don't print overlaps. */ 2154 2155 if (before_context > 0) 2156 { 2157 int linecount = 0; 2158 char *p = ptr; 2159 2160 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && 2161 linecount < before_context) 2162 { 2163 linecount++; 2164 p = previous_line(p, main_buffer); 2165 } 2166 2167 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) 2168 fprintf(stdout, "--" STDOUT_NL); 2169 2170 while (p < ptr) 2171 { 2172 int ellength; 2173 char *pp = p; 2174 if (printname != NULL) fprintf(stdout, "%s-", printname); 2175 if (number) fprintf(stdout, "%d-", linenumber - linecount--); 2176 pp = end_of_line(pp, endptr, &ellength); 2177 FWRITE(p, 1, pp - p, stdout); 2178 p = pp; 2179 } 2180 } 2181 2182 /* Now print the matching line(s); ensure we set hyphenpending at the end 2183 of the file if any context lines are being output. */ 2184 2185 if (after_context > 0 || before_context > 0) 2186 endhyphenpending = TRUE; 2187 2188 if (printname != NULL) fprintf(stdout, "%s:", printname); 2189 if (number) fprintf(stdout, "%d:", linenumber); 2190 2191 /* In multiline mode, we want to print to the end of the line in which 2192 the end of the matched string is found, so we adjust linelength and the 2193 line number appropriately, but only when there actually was a match 2194 (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of 2195 the match will always be before the first newline sequence. */ 2196 2197 if (multiline & !invert) 2198 { 2199 char *endmatch = ptr + offsets[1]; 2200 t = ptr; 2201 while (t <= endmatch) 2202 { 2203 t = end_of_line(t, endptr, &endlinelength); 2204 if (t < endmatch) linenumber++; else break; 2205 } 2206 linelength = t - ptr - endlinelength; 2207 } 2208 2209 /*** NOTE: Use only fwrite() to output the data line, so that binary 2210 zeroes are treated as just another data character. */ 2211 2212 /* This extra option, for Jeffrey Friedl's debugging requirements, 2213 replaces the matched string, or a specific captured string if it exists, 2214 with X. When this happens, colouring is ignored. */ 2215 2216 #ifdef JFRIEDL_DEBUG 2217 if (S_arg >= 0 && S_arg < mrc) 2218 { 2219 int first = S_arg * 2; 2220 int last = first + 1; 2221 FWRITE(ptr, 1, offsets[first], stdout); 2222 fprintf(stdout, "X"); 2223 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout); 2224 } 2225 else 2226 #endif 2227 2228 /* We have to split the line(s) up if colouring, and search for further 2229 matches, but not of course if the line is a non-match. */ 2230 2231 if (do_colour && !invert) 2232 { 2233 int plength; 2234 FWRITE(ptr, 1, offsets[0], stdout); 2235 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 2236 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 2237 fprintf(stdout, "%c[00m", 0x1b); 2238 for (;;) 2239 { 2240 startoffset = offsets[1]; 2241 if (startoffset >= linelength + endlinelength || 2242 !match_patterns(matchptr, length, options, startoffset, &mrc)) 2243 break; 2244 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout); 2245 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 2246 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 2247 fprintf(stdout, "%c[00m", 0x1b); 2248 } 2249 2250 /* In multiline mode, we may have already printed the complete line 2251 and its line-ending characters (if they matched the pattern), so there 2252 may be no more to print. */ 2253 2254 plength = (int)((linelength + endlinelength) - startoffset); 2255 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout); 2256 } 2257 2258 /* Not colouring; no need to search for further matches */ 2259 2260 else FWRITE(ptr, 1, linelength + endlinelength, stdout); 2261 } 2262 2263 /* End of doing what has to be done for a match. If --line-buffered was 2264 given, flush the output. */ 2265 2266 if (line_buffered) fflush(stdout); 2267 rc = 0; /* Had some success */ 2268 2269 /* Remember where the last match happened for after_context. We remember 2270 where we are about to restart, and that line's number. */ 2271 2272 lastmatchrestart = ptr + linelength + endlinelength; 2273 lastmatchnumber = linenumber + 1; 2274 } 2275 2276 /* For a match in multiline inverted mode (which of course did not cause 2277 anything to be printed), we have to move on to the end of the match before 2278 proceeding. */ 2279 2280 if (multiline && invert && match) 2281 { 2282 int ellength; 2283 char *endmatch = ptr + offsets[1]; 2284 t = ptr; 2285 while (t < endmatch) 2286 { 2287 t = end_of_line(t, endptr, &ellength); 2288 if (t <= endmatch) linenumber++; else break; 2289 } 2290 endmatch = end_of_line(endmatch, endptr, &ellength); 2291 linelength = endmatch - ptr - ellength; 2292 } 2293 2294 /* Advance to after the newline and increment the line number. The file 2295 offset to the current line is maintained in filepos. */ 2296 2297 END_ONE_MATCH: 2298 ptr += linelength + endlinelength; 2299 filepos += (int)(linelength + endlinelength); 2300 linenumber++; 2301 2302 /* If input is line buffered, and the buffer is not yet full, read another 2303 line and add it into the buffer. */ 2304 2305 if (input_line_buffered && bufflength < (size_t)bufsize) 2306 { 2307 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); 2308 bufflength += add; 2309 endptr += add; 2310 } 2311 2312 /* If we haven't yet reached the end of the file (the buffer is full), and 2313 the current point is in the top 1/3 of the buffer, slide the buffer down by 2314 1/3 and refill it. Before we do this, if some unprinted "after" lines are 2315 about to be lost, print them. */ 2316 2317 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird) 2318 { 2319 if (after_context > 0 && 2320 lastmatchnumber > 0 && 2321 lastmatchrestart < main_buffer + bufthird) 2322 { 2323 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 2324 lastmatchnumber = 0; 2325 } 2326 2327 /* Now do the shuffle */ 2328 2329 memmove(main_buffer, main_buffer + bufthird, 2*bufthird); 2330 ptr -= bufthird; 2331 2332 #ifdef SUPPORT_LIBZ 2333 if (frtype == FR_LIBZ) 2334 bufflength = 2*bufthird + 2335 gzread (ingz, main_buffer + 2*bufthird, bufthird); 2336 else 2337 #endif 2338 2339 #ifdef SUPPORT_LIBBZ2 2340 if (frtype == FR_LIBBZ2) 2341 bufflength = 2*bufthird + 2342 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird); 2343 else 2344 #endif 2345 2346 bufflength = 2*bufthird + 2347 (input_line_buffered? 2348 read_one_line(main_buffer + 2*bufthird, bufthird, in) : 2349 fread(main_buffer + 2*bufthird, 1, bufthird, in)); 2350 endptr = main_buffer + bufflength; 2351 2352 /* Adjust any last match point */ 2353 2354 if (lastmatchnumber > 0) lastmatchrestart -= bufthird; 2355 } 2356 } /* Loop through the whole file */ 2357 2358 /* End of file; print final "after" lines if wanted; do_after_lines sets 2359 hyphenpending if it prints something. */ 2360 2361 if (!show_only_matching && !count_only) 2362 { 2363 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 2364 hyphenpending |= endhyphenpending; 2365 } 2366 2367 /* Print the file name if we are looking for those without matches and there 2368 were none. If we found a match, we won't have got this far. */ 2369 2370 if (filenames == FN_NOMATCH_ONLY) 2371 { 2372 fprintf(stdout, "%s" STDOUT_NL, printname); 2373 return 0; 2374 } 2375 2376 /* Print the match count if wanted */ 2377 2378 if (count_only && !quiet) 2379 { 2380 if (count > 0 || !omit_zero_count) 2381 { 2382 if (printname != NULL && filenames != FN_NONE) 2383 fprintf(stdout, "%s:", printname); 2384 fprintf(stdout, "%d" STDOUT_NL, count); 2385 } 2386 } 2387 2388 return rc; 2389 } 2390 2391 2392 2393 /************************************************* 2394 * Grep a file or recurse into a directory * 2395 *************************************************/ 2396 2397 /* Given a path name, if it's a directory, scan all the files if we are 2398 recursing; if it's a file, grep it. 2399 2400 Arguments: 2401 pathname the path to investigate 2402 dir_recurse TRUE if recursing is wanted (-r or -drecurse) 2403 only_one_at_top TRUE if the path is the only one at toplevel 2404 2405 Returns: -1 the file/directory was skipped 2406 0 if there was at least one match 2407 1 if there were no matches 2408 2 there was some kind of error 2409 2410 However, file opening failures are suppressed if "silent" is set. 2411 */ 2412 2413 static int 2414 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) 2415 { 2416 int rc = 1; 2417 int frtype; 2418 void *handle; 2419 char *lastcomp; 2420 FILE *in = NULL; /* Ensure initialized */ 2421 2422 #ifdef SUPPORT_LIBZ 2423 gzFile ingz = NULL; 2424 #endif 2425 2426 #ifdef SUPPORT_LIBBZ2 2427 BZFILE *inbz2 = NULL; 2428 #endif 2429 2430 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 2431 int pathlen; 2432 #endif 2433 2434 #if defined NATIVE_ZOS 2435 int zos_type; 2436 FILE *zos_test_file; 2437 #endif 2438 2439 /* If the file name is "-" we scan stdin */ 2440 2441 if (strcmp(pathname, "-") == 0) 2442 { 2443 return pcre2grep(stdin, FR_PLAIN, stdin_name, 2444 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? 2445 stdin_name : NULL); 2446 } 2447 2448 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to 2449 directories, whereas --include and --exclude apply to everything else. The test 2450 is against the final component of the path. */ 2451 2452 lastcomp = strrchr(pathname, FILESEP); 2453 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1; 2454 2455 /* If the file is a directory, skip if not recursing or if explicitly excluded. 2456 Otherwise, scan the directory and recurse for each path within it. The scanning 2457 code is localized so it can be made system-specific. */ 2458 2459 2460 /* For z/OS, determine the file type. */ 2461 2462 #if defined NATIVE_ZOS 2463 zos_test_file = fopen(pathname,"rb"); 2464 2465 if (zos_test_file == NULL) 2466 { 2467 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n", 2468 pathname, strerror(errno)); 2469 return -1; 2470 } 2471 zos_type = identifyzosfiletype (zos_test_file); 2472 fclose (zos_test_file); 2473 2474 /* Handle a PDS in separate code */ 2475 2476 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE) 2477 { 2478 return travelonpdsdir (pathname, only_one_at_top); 2479 } 2480 2481 /* Deal with regular files in the normal way below. These types are: 2482 zos_type == __ZOS_PDS_MEMBER 2483 zos_type == __ZOS_PS 2484 zos_type == __ZOS_VSAM_KSDS 2485 zos_type == __ZOS_VSAM_ESDS 2486 zos_type == __ZOS_VSAM_RRDS 2487 */ 2488 2489 /* Handle a z/OS directory using common code. */ 2490 2491 else if (zos_type == __ZOS_HFS) 2492 { 2493 #endif /* NATIVE_ZOS */ 2494 2495 2496 /* Handle directories: common code for all OS */ 2497 2498 if (isdirectory(pathname)) 2499 { 2500 if (dee_action == dee_SKIP || 2501 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns)) 2502 return -1; 2503 2504 if (dee_action == dee_RECURSE) 2505 { 2506 char buffer[1024]; 2507 char *nextfile; 2508 directory_type *dir = opendirectory(pathname); 2509 2510 if (dir == NULL) 2511 { 2512 if (!silent) 2513 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname, 2514 strerror(errno)); 2515 return 2; 2516 } 2517 2518 while ((nextfile = readdirectory(dir)) != NULL) 2519 { 2520 int frc; 2521 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile); 2522 frc = grep_or_recurse(buffer, dir_recurse, FALSE); 2523 if (frc > 1) rc = frc; 2524 else if (frc == 0 && rc == 1) rc = 0; 2525 } 2526 2527 closedirectory(dir); 2528 return rc; 2529 } 2530 } 2531 2532 #if defined NATIVE_ZOS 2533 } 2534 #endif 2535 2536 /* If the file is not a directory, check for a regular file, and if it is not, 2537 skip it if that's been requested. Otherwise, check for an explicit inclusion or 2538 exclusion. */ 2539 2540 else if ( 2541 #if defined NATIVE_ZOS 2542 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) || 2543 #else /* all other OS */ 2544 (!isregfile(pathname) && DEE_action == DEE_SKIP) || 2545 #endif 2546 !test_incexc(lastcomp, include_patterns, exclude_patterns)) 2547 return -1; /* File skipped */ 2548 2549 /* Control reaches here if we have a regular file, or if we have a directory 2550 and recursion or skipping was not requested, or if we have anything else and 2551 skipping was not requested. The scan proceeds. If this is the first and only 2552 argument at top level, we don't show the file name, unless we are only showing 2553 the file name, or the filename was forced (-H). */ 2554 2555 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 2556 pathlen = (int)(strlen(pathname)); 2557 #endif 2558 2559 /* Open using zlib if it is supported and the file name ends with .gz. */ 2560 2561 #ifdef SUPPORT_LIBZ 2562 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) 2563 { 2564 ingz = gzopen(pathname, "rb"); 2565 if (ingz == NULL) 2566 { 2567 if (!silent) 2568 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname, 2569 strerror(errno)); 2570 return 2; 2571 } 2572 handle = (void *)ingz; 2573 frtype = FR_LIBZ; 2574 } 2575 else 2576 #endif 2577 2578 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ 2579 2580 #ifdef SUPPORT_LIBBZ2 2581 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) 2582 { 2583 inbz2 = BZ2_bzopen(pathname, "rb"); 2584 handle = (void *)inbz2; 2585 frtype = FR_LIBBZ2; 2586 } 2587 else 2588 #endif 2589 2590 /* Otherwise use plain fopen(). The label is so that we can come back here if 2591 an attempt to read a .bz2 file indicates that it really is a plain file. */ 2592 2593 #ifdef SUPPORT_LIBBZ2 2594 PLAIN_FILE: 2595 #endif 2596 { 2597 in = fopen(pathname, "rb"); 2598 handle = (void *)in; 2599 frtype = FR_PLAIN; 2600 } 2601 2602 /* All the opening methods return errno when they fail. */ 2603 2604 if (handle == NULL) 2605 { 2606 if (!silent) 2607 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname, 2608 strerror(errno)); 2609 return 2; 2610 } 2611 2612 /* Now grep the file */ 2613 2614 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT || 2615 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); 2616 2617 /* Close in an appropriate manner. */ 2618 2619 #ifdef SUPPORT_LIBZ 2620 if (frtype == FR_LIBZ) 2621 gzclose(ingz); 2622 else 2623 #endif 2624 2625 /* If it is a .bz2 file and the result is 3, it means that the first attempt to 2626 read failed. If the error indicates that the file isn't in fact bzipped, try 2627 again as a normal file. */ 2628 2629 #ifdef SUPPORT_LIBBZ2 2630 if (frtype == FR_LIBBZ2) 2631 { 2632 if (rc == 3) 2633 { 2634 int errnum; 2635 const char *err = BZ2_bzerror(inbz2, &errnum); 2636 if (errnum == BZ_DATA_ERROR_MAGIC) 2637 { 2638 BZ2_bzclose(inbz2); 2639 goto PLAIN_FILE; 2640 } 2641 else if (!silent) 2642 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n", 2643 pathname, err); 2644 rc = 2; /* The normal "something went wrong" code */ 2645 } 2646 BZ2_bzclose(inbz2); 2647 } 2648 else 2649 #endif 2650 2651 /* Normal file close */ 2652 2653 fclose(in); 2654 2655 /* Pass back the yield from pcre2grep(). */ 2656 2657 return rc; 2658 } 2659 2660 2661 2662 /************************************************* 2663 * Handle a single-letter, no data option * 2664 *************************************************/ 2665 2666 static int 2667 handle_option(int letter, int options) 2668 { 2669 switch(letter) 2670 { 2671 case N_FOFFSETS: file_offsets = TRUE; break; 2672 case N_HELP: help(); pcre2grep_exit(0); 2673 case N_LBUFFER: line_buffered = TRUE; break; 2674 case N_LOFFSETS: line_offsets = number = TRUE; break; 2675 case N_NOJIT: use_jit = FALSE; break; 2676 case 'a': binary_files = BIN_TEXT; break; 2677 case 'c': count_only = TRUE; break; 2678 case 'F': process_options |= PO_FIXED_STRINGS; break; 2679 case 'H': filenames = FN_FORCE; break; 2680 case 'I': binary_files = BIN_NOMATCH; break; 2681 case 'h': filenames = FN_NONE; break; 2682 case 'i': options |= PCRE2_CASELESS; break; 2683 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; 2684 case 'L': filenames = FN_NOMATCH_ONLY; break; 2685 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break; 2686 case 'n': number = TRUE; break; 2687 2688 case 'o': 2689 only_matching_last = add_number(0, only_matching_last); 2690 if (only_matching == NULL) only_matching = only_matching_last; 2691 break; 2692 2693 case 'q': quiet = TRUE; break; 2694 case 'r': dee_action = dee_RECURSE; break; 2695 case 's': silent = TRUE; break; 2696 case 'u': options |= PCRE2_UTF; utf = TRUE; break; 2697 case 'v': invert = TRUE; break; 2698 case 'w': process_options |= PO_WORD_MATCH; break; 2699 case 'x': process_options |= PO_LINE_MATCH; break; 2700 2701 case 'V': 2702 { 2703 unsigned char buffer[128]; 2704 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); 2705 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer); 2706 } 2707 pcre2grep_exit(0); 2708 break; 2709 2710 default: 2711 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter); 2712 pcre2grep_exit(usage(2)); 2713 } 2714 2715 return options; 2716 } 2717 2718 2719 2720 2721 /************************************************* 2722 * Construct printed ordinal * 2723 *************************************************/ 2724 2725 /* This turns a number into "1st", "3rd", etc. */ 2726 2727 static char * 2728 ordin(int n) 2729 { 2730 static char buffer[14]; 2731 char *p = buffer; 2732 sprintf(p, "%d", n); 2733 while (*p != 0) p++; 2734 switch (n%10) 2735 { 2736 case 1: strcpy(p, "st"); break; 2737 case 2: strcpy(p, "nd"); break; 2738 case 3: strcpy(p, "rd"); break; 2739 default: strcpy(p, "th"); break; 2740 } 2741 return buffer; 2742 } 2743 2744 2745 2746 /************************************************* 2747 * Compile a single pattern * 2748 *************************************************/ 2749 2750 /* Do nothing if the pattern has already been compiled. This is the case for 2751 include/exclude patterns read from a file. 2752 2753 When the -F option has been used, each "pattern" may be a list of strings, 2754 separated by line breaks. They will be matched literally. We split such a 2755 string and compile the first substring, inserting an additional block into the 2756 pattern chain. 2757 2758 Arguments: 2759 p points to the pattern block 2760 options the PCRE options 2761 popts the processing options 2762 fromfile TRUE if the pattern was read from a file 2763 fromtext file name or identifying text (e.g. "include") 2764 count 0 if this is the only command line pattern, or 2765 number of the command line pattern, or 2766 linenumber for a pattern from a file 2767 2768 Returns: TRUE on success, FALSE after an error 2769 */ 2770 2771 static BOOL 2772 compile_pattern(patstr *p, int options, int popts, int fromfile, 2773 const char *fromtext, int count) 2774 { 2775 unsigned char buffer[PATBUFSIZE]; 2776 PCRE2_SIZE erroffset; 2777 char *ps = p->string; 2778 unsigned int patlen = strlen(ps); 2779 int errcode; 2780 2781 if (p->compiled != NULL) return TRUE; 2782 2783 if ((popts & PO_FIXED_STRINGS) != 0) 2784 { 2785 int ellength; 2786 char *eop = ps + patlen; 2787 char *pe = end_of_line(ps, eop, &ellength); 2788 2789 if (ellength != 0) 2790 { 2791 if (add_pattern(pe, p) == NULL) return FALSE; 2792 patlen = (int)(pe - ps - ellength); 2793 } 2794 } 2795 2796 sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); 2797 p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode, 2798 &erroffset, compile_context); 2799 2800 /* Handle successful compile */ 2801 2802 if (p->compiled != NULL) 2803 { 2804 #ifdef SUPPORT_PCRE2GREP_JIT 2805 if (use_jit) 2806 { 2807 errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); 2808 if (errcode == 0) return TRUE; 2809 erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */ 2810 } 2811 else 2812 #endif 2813 return TRUE; 2814 } 2815 2816 /* Handle compile and JIT compile errors */ 2817 2818 erroffset -= (int)strlen(prefix[popts]); 2819 if (erroffset > patlen) erroffset = patlen; 2820 pcre2_get_error_message(errcode, buffer, PATBUFSIZE); 2821 2822 if (fromfile) 2823 { 2824 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s " 2825 "at offset %d: %s\n", count, fromtext, (int)erroffset, buffer); 2826 } 2827 else 2828 { 2829 if (count == 0) 2830 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n", 2831 fromtext, (int)erroffset, buffer); 2832 else 2833 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n", 2834 ordin(count), fromtext, (int)erroffset, buffer); 2835 } 2836 2837 return FALSE; 2838 } 2839 2840 2841 2842 /************************************************* 2843 * Read and compile a file of patterns * 2844 *************************************************/ 2845 2846 /* This is used for --filelist, --include-from, and --exclude-from. 2847 2848 Arguments: 2849 name the name of the file; "-" is stdin 2850 patptr pointer to the pattern chain anchor 2851 patlastptr pointer to the last pattern pointer 2852 popts the process options to pass to pattern_compile() 2853 2854 Returns: TRUE if all went well 2855 */ 2856 2857 static BOOL 2858 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts) 2859 { 2860 int linenumber = 0; 2861 FILE *f; 2862 char *filename; 2863 char buffer[PATBUFSIZE]; 2864 2865 if (strcmp(name, "-") == 0) 2866 { 2867 f = stdin; 2868 filename = stdin_name; 2869 } 2870 else 2871 { 2872 f = fopen(name, "r"); 2873 if (f == NULL) 2874 { 2875 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno)); 2876 return FALSE; 2877 } 2878 filename = name; 2879 } 2880 2881 while (fgets(buffer, PATBUFSIZE, f) != NULL) 2882 { 2883 char *s = buffer + (int)strlen(buffer); 2884 while (s > buffer && isspace((unsigned char)(s[-1]))) s--; 2885 *s = 0; 2886 linenumber++; 2887 if (buffer[0] == 0) continue; /* Skip blank lines */ 2888 2889 /* Note: this call to add_pattern() puts a pointer to the local variable 2890 "buffer" into the pattern chain. However, that pointer is used only when 2891 compiling the pattern, which happens immediately below, so we flatten it 2892 afterwards, as a precaution against any later code trying to use it. */ 2893 2894 *patlastptr = add_pattern(buffer, *patlastptr); 2895 if (*patlastptr == NULL) 2896 { 2897 if (f != stdin) fclose(f); 2898 return FALSE; 2899 } 2900 if (*patptr == NULL) *patptr = *patlastptr; 2901 2902 /* This loop is needed because compiling a "pattern" when -F is set may add 2903 on additional literal patterns if the original contains a newline. In the 2904 common case, it never will, because fgets() stops at a newline. However, 2905 the -N option can be used to give pcre2grep a different newline setting. */ 2906 2907 for(;;) 2908 { 2909 if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename, 2910 linenumber)) 2911 { 2912 if (f != stdin) fclose(f); 2913 return FALSE; 2914 } 2915 (*patlastptr)->string = NULL; /* Insurance */ 2916 if ((*patlastptr)->next == NULL) break; 2917 *patlastptr = (*patlastptr)->next; 2918 } 2919 } 2920 2921 if (f != stdin) fclose(f); 2922 return TRUE; 2923 } 2924 2925 2926 2927 /************************************************* 2928 * Main program * 2929 *************************************************/ 2930 2931 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ 2932 2933 int 2934 main(int argc, char **argv) 2935 { 2936 int i, j; 2937 int rc = 1; 2938 BOOL only_one_at_top; 2939 patstr *cp; 2940 fnstr *fn; 2941 const char *locale_from = "--locale"; 2942 2943 #ifdef SUPPORT_PCRE2GREP_JIT 2944 pcre2_jit_stack *jit_stack = NULL; 2945 #endif 2946 2947 /* In Windows, stdout is set up as a text stream, which means that \n is 2948 converted to \r\n. This causes output lines that are copied from the input to 2949 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure 2950 that stdout is a binary stream. Note that this means all other output to stdout 2951 must use STDOUT_NL to terminate lines. */ 2952 2953 #if defined(_WIN32) || defined(WIN32) 2954 _setmode( _fileno(stdout), _O_BINARY); 2955 #endif 2956 2957 /* Set up a default compile and match contexts and a match data block. */ 2958 2959 compile_context = pcre2_compile_context_create(NULL); 2960 match_context = pcre2_match_context_create(NULL); 2961 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL); 2962 offsets = pcre2_get_ovector_pointer(match_data); 2963 2964 /* If string (script) callouts are supported, set up the callout processing 2965 function. */ 2966 2967 #ifdef SUPPORT_PCRE2GREP_CALLOUT 2968 pcre2_set_callout(match_context, pcre2grep_callout, NULL); 2969 #endif 2970 2971 /* Process the options */ 2972 2973 for (i = 1; i < argc; i++) 2974 { 2975 option_item *op = NULL; 2976 char *option_data = (char *)""; /* default to keep compiler happy */ 2977 BOOL longop; 2978 BOOL longopwasequals = FALSE; 2979 2980 if (argv[i][0] != '-') break; 2981 2982 /* If we hit an argument that is just "-", it may be a reference to STDIN, 2983 but only if we have previously had -e or -f to define the patterns. */ 2984 2985 if (argv[i][1] == 0) 2986 { 2987 if (pattern_files != NULL || patterns != NULL) break; 2988 else pcre2grep_exit(usage(2)); 2989 } 2990 2991 /* Handle a long name option, or -- to terminate the options */ 2992 2993 if (argv[i][1] == '-') 2994 { 2995 char *arg = argv[i] + 2; 2996 char *argequals = strchr(arg, '='); 2997 2998 if (*arg == 0) /* -- terminates options */ 2999 { 3000 i++; 3001 break; /* out of the options-handling loop */ 3002 } 3003 3004 longop = TRUE; 3005 3006 /* Some long options have data that follows after =, for example file=name. 3007 Some options have variations in the long name spelling: specifically, we 3008 allow "regexp" because GNU grep allows it, though I personally go along 3009 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". 3010 These options are entered in the table as "regex(p)". Options can be in 3011 both these categories. */ 3012 3013 for (op = optionlist; op->one_char != 0; op++) 3014 { 3015 char *opbra = strchr(op->long_name, '('); 3016 char *equals = strchr(op->long_name, '='); 3017 3018 /* Handle options with only one spelling of the name */ 3019 3020 if (opbra == NULL) /* Does not contain '(' */ 3021 { 3022 if (equals == NULL) /* Not thing=data case */ 3023 { 3024 if (strcmp(arg, op->long_name) == 0) break; 3025 } 3026 else /* Special case xxx=data */ 3027 { 3028 int oplen = (int)(equals - op->long_name); 3029 int arglen = (argequals == NULL)? 3030 (int)strlen(arg) : (int)(argequals - arg); 3031 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) 3032 { 3033 option_data = arg + arglen; 3034 if (*option_data == '=') 3035 { 3036 option_data++; 3037 longopwasequals = TRUE; 3038 } 3039 break; 3040 } 3041 } 3042 } 3043 3044 /* Handle options with an alternate spelling of the name */ 3045 3046 else 3047 { 3048 char buff1[24]; 3049 char buff2[24]; 3050 3051 int baselen = (int)(opbra - op->long_name); 3052 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1); 3053 int arglen = (argequals == NULL || equals == NULL)? 3054 (int)strlen(arg) : (int)(argequals - arg); 3055 3056 sprintf(buff1, "%.*s", baselen, op->long_name); 3057 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1); 3058 3059 if (strncmp(arg, buff1, arglen) == 0 || 3060 strncmp(arg, buff2, arglen) == 0) 3061 { 3062 if (equals != NULL && argequals != NULL) 3063 { 3064 option_data = argequals; 3065 if (*option_data == '=') 3066 { 3067 option_data++; 3068 longopwasequals = TRUE; 3069 } 3070 } 3071 break; 3072 } 3073 } 3074 } 3075 3076 if (op->one_char == 0) 3077 { 3078 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]); 3079 pcre2grep_exit(usage(2)); 3080 } 3081 } 3082 3083 /* Jeffrey Friedl's debugging harness uses these additional options which 3084 are not in the right form for putting in the option table because they use 3085 only one hyphen, yet are more than one character long. By putting them 3086 separately here, they will not get displayed as part of the help() output, 3087 but I don't think Jeffrey will care about that. */ 3088 3089 #ifdef JFRIEDL_DEBUG 3090 else if (strcmp(argv[i], "-pre") == 0) { 3091 jfriedl_prefix = argv[++i]; 3092 continue; 3093 } else if (strcmp(argv[i], "-post") == 0) { 3094 jfriedl_postfix = argv[++i]; 3095 continue; 3096 } else if (strcmp(argv[i], "-XT") == 0) { 3097 sscanf(argv[++i], "%d", &jfriedl_XT); 3098 continue; 3099 } else if (strcmp(argv[i], "-XR") == 0) { 3100 sscanf(argv[++i], "%d", &jfriedl_XR); 3101 continue; 3102 } 3103 #endif 3104 3105 3106 /* One-char options; many that have no data may be in a single argument; we 3107 continue till we hit the last one or one that needs data. */ 3108 3109 else 3110 { 3111 char *s = argv[i] + 1; 3112 longop = FALSE; 3113 3114 while (*s != 0) 3115 { 3116 for (op = optionlist; op->one_char != 0; op++) 3117 { 3118 if (*s == op->one_char) break; 3119 } 3120 if (op->one_char == 0) 3121 { 3122 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n", 3123 *s, argv[i]); 3124 pcre2grep_exit(usage(2)); 3125 } 3126 3127 option_data = s+1; 3128 3129 /* Break out if this is the last character in the string; it's handled 3130 below like a single multi-char option. */ 3131 3132 if (*option_data == 0) break; 3133 3134 /* Check for a single-character option that has data: OP_OP_NUMBER(S) 3135 are used for ones that either have a numerical number or defaults, i.e. 3136 the data is optional. If a digit follows, there is data; if not, carry on 3137 with other single-character options in the same string. */ 3138 3139 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS) 3140 { 3141 if (isdigit((unsigned char)s[1])) break; 3142 } 3143 else /* Check for an option with data */ 3144 { 3145 if (op->type != OP_NODATA) break; 3146 } 3147 3148 /* Handle a single-character option with no data, then loop for the 3149 next character in the string. */ 3150 3151 pcre2_options = handle_option(*s++, pcre2_options); 3152 } 3153 } 3154 3155 /* At this point we should have op pointing to a matched option. If the type 3156 is NO_DATA, it means that there is no data, and the option might set 3157 something in the PCRE options. */ 3158 3159 if (op->type == OP_NODATA) 3160 { 3161 pcre2_options = handle_option(op->one_char, pcre2_options); 3162 continue; 3163 } 3164 3165 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that 3166 either has a value or defaults to something. It cannot have data in a 3167 separate item. At the moment, the only such options are "colo(u)r", 3168 "only-matching", and Jeffrey Friedl's special -S debugging option. */ 3169 3170 if (*option_data == 0 && 3171 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER || 3172 op->type == OP_OP_NUMBERS)) 3173 { 3174 switch (op->one_char) 3175 { 3176 case N_COLOUR: 3177 colour_option = (char *)"auto"; 3178 break; 3179 3180 case 'o': 3181 only_matching_last = add_number(0, only_matching_last); 3182 if (only_matching == NULL) only_matching = only_matching_last; 3183 break; 3184 3185 #ifdef JFRIEDL_DEBUG 3186 case 'S': 3187 S_arg = 0; 3188 break; 3189 #endif 3190 } 3191 continue; 3192 } 3193 3194 /* Otherwise, find the data string for the option. */ 3195 3196 if (*option_data == 0) 3197 { 3198 if (i >= argc - 1 || longopwasequals) 3199 { 3200 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]); 3201 pcre2grep_exit(usage(2)); 3202 } 3203 option_data = argv[++i]; 3204 } 3205 3206 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be 3207 added to a chain of numbers. */ 3208 3209 if (op->type == OP_OP_NUMBERS) 3210 { 3211 unsigned long int n = decode_number(option_data, op, longop); 3212 omdatastr *omd = (omdatastr *)op->dataptr; 3213 *(omd->lastptr) = add_number((int)n, *(omd->lastptr)); 3214 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr); 3215 } 3216 3217 /* If the option type is OP_PATLIST, it's the -e option, or one of the 3218 include/exclude options, which can be called multiple times to create lists 3219 of patterns. */ 3220 3221 else if (op->type == OP_PATLIST) 3222 { 3223 patdatastr *pd = (patdatastr *)op->dataptr; 3224 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr)); 3225 if (*(pd->lastptr) == NULL) goto EXIT2; 3226 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); 3227 } 3228 3229 /* If the option type is OP_FILELIST, it's one of the options that names a 3230 file. */ 3231 3232 else if (op->type == OP_FILELIST) 3233 { 3234 fndatastr *fd = (fndatastr *)op->dataptr; 3235 fn = (fnstr *)malloc(sizeof(fnstr)); 3236 if (fn == NULL) 3237 { 3238 fprintf(stderr, "pcre2grep: malloc failed\n"); 3239 goto EXIT2; 3240 } 3241 fn->next = NULL; 3242 fn->name = option_data; 3243 if (*(fd->anchor) == NULL) 3244 *(fd->anchor) = fn; 3245 else 3246 (*(fd->lastptr))->next = fn; 3247 *(fd->lastptr) = fn; 3248 } 3249 3250 /* Handle OP_BINARY_FILES */ 3251 3252 else if (op->type == OP_BINFILES) 3253 { 3254 if (strcmp(option_data, "binary") == 0) 3255 binary_files = BIN_BINARY; 3256 else if (strcmp(option_data, "without-match") == 0) 3257 binary_files = BIN_NOMATCH; 3258 else if (strcmp(option_data, "text") == 0) 3259 binary_files = BIN_TEXT; 3260 else 3261 { 3262 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n", 3263 option_data); 3264 pcre2grep_exit(usage(2)); 3265 } 3266 } 3267 3268 /* Otherwise, deal with a single string or numeric data value. */ 3269 3270 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER && 3271 op->type != OP_OP_NUMBER) 3272 { 3273 *((char **)op->dataptr) = option_data; 3274 } 3275 else 3276 { 3277 unsigned long int n = decode_number(option_data, op, longop); 3278 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n; 3279 else *((int *)op->dataptr) = n; 3280 } 3281 } 3282 3283 /* Options have been decoded. If -C was used, its value is used as a default 3284 for -A and -B. */ 3285 3286 if (both_context > 0) 3287 { 3288 if (after_context == 0) after_context = both_context; 3289 if (before_context == 0) before_context = both_context; 3290 } 3291 3292 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. 3293 However, all three set show_only_matching because they display, each in their 3294 own way, only the data that has matched. */ 3295 3296 if ((only_matching != NULL && (file_offsets || line_offsets)) || 3297 (file_offsets && line_offsets)) 3298 { 3299 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets " 3300 "and/or --line-offsets\n"); 3301 pcre2grep_exit(usage(2)); 3302 } 3303 3304 /* Put limits into the match data block. */ 3305 3306 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); 3307 if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit); 3308 3309 if (only_matching != NULL || file_offsets || line_offsets) 3310 show_only_matching = TRUE; 3311 3312 /* If a locale has not been provided as an option, see if the LC_CTYPE or 3313 LC_ALL environment variable is set, and if so, use it. */ 3314 3315 if (locale == NULL) 3316 { 3317 locale = getenv("LC_ALL"); 3318 locale_from = "LCC_ALL"; 3319 } 3320 3321 if (locale == NULL) 3322 { 3323 locale = getenv("LC_CTYPE"); 3324 locale_from = "LC_CTYPE"; 3325 } 3326 3327 /* If a locale is set, use it to generate the tables the PCRE needs. Passing 3328 NULL to pcre2_maketables() means that malloc() is used to get the memory. */ 3329 3330 if (locale != NULL) 3331 { 3332 if (setlocale(LC_CTYPE, locale) == NULL) 3333 { 3334 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n", 3335 locale, locale_from); 3336 goto EXIT2; 3337 } 3338 character_tables = pcre2_maketables(NULL); 3339 pcre2_set_character_tables(compile_context, character_tables); 3340 } 3341 3342 /* Sort out colouring */ 3343 3344 if (colour_option != NULL && strcmp(colour_option, "never") != 0) 3345 { 3346 if (strcmp(colour_option, "always") == 0) do_colour = TRUE; 3347 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); 3348 else 3349 { 3350 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n", 3351 colour_option); 3352 goto EXIT2; 3353 } 3354 if (do_colour) 3355 { 3356 char *cs = getenv("PCRE2GREP_COLOUR"); 3357 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR"); 3358 if (cs != NULL) colour_string = cs; 3359 } 3360 } 3361 3362 /* Sort out a newline setting. */ 3363 3364 if (newline_arg != NULL) 3365 { 3366 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *)); 3367 endlinetype++) 3368 { 3369 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break; 3370 } 3371 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *))) 3372 pcre2_set_newline(compile_context, endlinetype); 3373 else 3374 { 3375 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", 3376 newline_arg); 3377 goto EXIT2; 3378 } 3379 } 3380 3381 /* Find default newline convention */ 3382 3383 else 3384 { 3385 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype); 3386 } 3387 3388 /* Interpret the text values for -d and -D */ 3389 3390 if (dee_option != NULL) 3391 { 3392 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; 3393 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; 3394 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; 3395 else 3396 { 3397 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option); 3398 goto EXIT2; 3399 } 3400 } 3401 3402 if (DEE_option != NULL) 3403 { 3404 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; 3405 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; 3406 else 3407 { 3408 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option); 3409 goto EXIT2; 3410 } 3411 } 3412 3413 /* Check the values for Jeffrey Friedl's debugging options. */ 3414 3415 #ifdef JFRIEDL_DEBUG 3416 if (S_arg > 9) 3417 { 3418 fprintf(stderr, "pcre2grep: bad value for -S option\n"); 3419 return 2; 3420 } 3421 if (jfriedl_XT != 0 || jfriedl_XR != 0) 3422 { 3423 if (jfriedl_XT == 0) jfriedl_XT = 1; 3424 if (jfriedl_XR == 0) jfriedl_XR = 1; 3425 } 3426 #endif 3427 3428 /* Get memory for the main buffer. */ 3429 3430 bufsize = 3*bufthird; 3431 main_buffer = (char *)malloc(bufsize); 3432 3433 if (main_buffer == NULL) 3434 { 3435 fprintf(stderr, "pcre2grep: malloc failed\n"); 3436 goto EXIT2; 3437 } 3438 3439 /* If no patterns were provided by -e, and there are no files provided by -f, 3440 the first argument is the one and only pattern, and it must exist. */ 3441 3442 if (patterns == NULL && pattern_files == NULL) 3443 { 3444 if (i >= argc) return usage(2); 3445 patterns = patterns_last = add_pattern(argv[i++], NULL); 3446 if (patterns == NULL) goto EXIT2; 3447 } 3448 3449 /* Compile the patterns that were provided on the command line, either by 3450 multiple uses of -e or as a single unkeyed pattern. We cannot do this until 3451 after all the command-line options are read so that we know which PCRE options 3452 to use. When -F is used, compile_pattern() may add another block into the 3453 chain, so we must not access the next pointer till after the compile. */ 3454 3455 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) 3456 { 3457 if (!compile_pattern(cp, pcre2_options, process_options, FALSE, "command-line", 3458 (j == 1 && patterns->next == NULL)? 0 : j)) 3459 goto EXIT2; 3460 } 3461 3462 /* Read and compile the regular expressions that are provided in files. */ 3463 3464 for (fn = pattern_files; fn != NULL; fn = fn->next) 3465 { 3466 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options)) 3467 goto EXIT2; 3468 } 3469 3470 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */ 3471 3472 #ifdef SUPPORT_PCRE2GREP_JIT 3473 if (use_jit) 3474 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL); 3475 #endif 3476 3477 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next) 3478 { 3479 #ifdef SUPPORT_PCRE2GREP_JIT 3480 if (jit_stack != NULL && cp->compiled != NULL) 3481 pcre2_jit_stack_assign(match_context, NULL, jit_stack); 3482 #endif 3483 } 3484 3485 /* If there are include or exclude patterns read from the command line, compile 3486 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is 3487 0. */ 3488 3489 for (j = 0; j < 4; j++) 3490 { 3491 int k; 3492 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next) 3493 { 3494 if (!compile_pattern(cp, pcre2_options, 0, FALSE, incexname[j], 3495 (k == 1 && cp->next == NULL)? 0 : k)) 3496 goto EXIT2; 3497 } 3498 } 3499 3500 /* Read and compile include/exclude patterns from files. */ 3501 3502 for (fn = include_from; fn != NULL; fn = fn->next) 3503 { 3504 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0)) 3505 goto EXIT2; 3506 } 3507 3508 for (fn = exclude_from; fn != NULL; fn = fn->next) 3509 { 3510 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0)) 3511 goto EXIT2; 3512 } 3513 3514 /* If there are no files that contain lists of files to search, and there are 3515 no file arguments, search stdin, and then exit. */ 3516 3517 if (file_lists == NULL && i >= argc) 3518 { 3519 rc = pcre2grep(stdin, FR_PLAIN, stdin_name, 3520 (filenames > FN_DEFAULT)? stdin_name : NULL); 3521 goto EXIT; 3522 } 3523 3524 /* If any files that contains a list of files to search have been specified, 3525 read them line by line and search the given files. */ 3526 3527 for (fn = file_lists; fn != NULL; fn = fn->next) 3528 { 3529 char buffer[PATBUFSIZE]; 3530 FILE *fl; 3531 if (strcmp(fn->name, "-") == 0) fl = stdin; else 3532 { 3533 fl = fopen(fn->name, "rb"); 3534 if (fl == NULL) 3535 { 3536 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name, 3537 strerror(errno)); 3538 goto EXIT2; 3539 } 3540 } 3541 while (fgets(buffer, PATBUFSIZE, fl) != NULL) 3542 { 3543 int frc; 3544 char *end = buffer + (int)strlen(buffer); 3545 while (end > buffer && isspace(end[-1])) end--; 3546 *end = 0; 3547 if (*buffer != 0) 3548 { 3549 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE); 3550 if (frc > 1) rc = frc; 3551 else if (frc == 0 && rc == 1) rc = 0; 3552 } 3553 } 3554 if (fl != stdin) fclose(fl); 3555 } 3556 3557 /* After handling file-list, work through remaining arguments. Pass in the fact 3558 that there is only one argument at top level - this suppresses the file name if 3559 the argument is not a directory and filenames are not otherwise forced. */ 3560 3561 only_one_at_top = i == argc - 1 && file_lists == NULL; 3562 3563 for (; i < argc; i++) 3564 { 3565 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, 3566 only_one_at_top); 3567 if (frc > 1) rc = frc; 3568 else if (frc == 0 && rc == 1) rc = 0; 3569 } 3570 3571 EXIT: 3572 #ifdef SUPPORT_PCRE2GREP_JIT 3573 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack); 3574 #endif 3575 3576 free(main_buffer); 3577 free((void *)character_tables); 3578 3579 pcre2_compile_context_free(compile_context); 3580 pcre2_match_context_free(match_context); 3581 pcre2_match_data_free(match_data); 3582 3583 free_pattern_chain(patterns); 3584 free_pattern_chain(include_patterns); 3585 free_pattern_chain(include_dir_patterns); 3586 free_pattern_chain(exclude_patterns); 3587 free_pattern_chain(exclude_dir_patterns); 3588 3589 free_file_chain(exclude_from); 3590 free_file_chain(include_from); 3591 free_file_chain(pattern_files); 3592 free_file_chain(file_lists); 3593 3594 while (only_matching != NULL) 3595 { 3596 omstr *this = only_matching; 3597 only_matching = this->next; 3598 free(this); 3599 } 3600 3601 pcre2grep_exit(rc); 3602 3603 EXIT2: 3604 rc = 2; 3605 goto EXIT; 3606 } 3607 3608 /* End of pcre2grep */ 3609