1 /************************************************* 2 * pcregrep program * 3 *************************************************/ 4 5 /* This is a grep program that uses the PCRE regular expression library to do 6 its pattern matching. On a Unix or Win32 system it can recurse into 7 directories. 8 9 Copyright (c) 1997-2011 University of Cambridge 10 11 ----------------------------------------------------------------------------- 12 Redistribution and use in source and binary forms, with or without 13 modification, are permitted provided that the following conditions are met: 14 15 * Redistributions of source code must retain the above copyright notice, 16 this list of conditions and the following disclaimer. 17 18 * Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in the 20 documentation and/or other materials provided with the distribution. 21 22 * Neither the name of the University of Cambridge nor the names of its 23 contributors may be used to endorse or promote products derived from 24 this software without specific prior written permission. 25 26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 POSSIBILITY OF SUCH DAMAGE. 37 ----------------------------------------------------------------------------- 38 */ 39 40 #ifdef HAVE_CONFIG_H 41 #include "config.h" 42 #endif 43 44 #include <ctype.h> 45 #include <locale.h> 46 #include <stdio.h> 47 #include <string.h> 48 #include <stdlib.h> 49 #include <errno.h> 50 51 #include <sys/types.h> 52 #include <sys/stat.h> 53 54 #ifdef HAVE_UNISTD_H 55 #include <unistd.h> 56 #endif 57 58 #ifdef SUPPORT_LIBZ 59 #include <zlib.h> 60 #endif 61 62 #ifdef SUPPORT_LIBBZ2 63 #include <bzlib.h> 64 #endif 65 66 #include "pcre.h" 67 68 #define FALSE 0 69 #define TRUE 1 70 71 typedef int BOOL; 72 73 #define MAX_PATTERN_COUNT 100 74 #define OFFSET_SIZE 99 75 76 #if BUFSIZ > 8192 77 #define MBUFTHIRD BUFSIZ 78 #else 79 #define MBUFTHIRD 8192 80 #endif 81 82 /* Values for the "filenames" variable, which specifies options for file name 83 output. The order is important; it is assumed that a file name is wanted for 84 all values greater than FN_DEFAULT. */ 85 86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; 87 88 /* File reading styles */ 89 90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; 91 92 /* Actions for the -d and -D options */ 93 94 enum { dee_READ, dee_SKIP, dee_RECURSE }; 95 enum { DEE_READ, DEE_SKIP }; 96 97 /* Actions for special processing options (flag bits) */ 98 99 #define PO_WORD_MATCH 0x0001 100 #define PO_LINE_MATCH 0x0002 101 #define PO_FIXED_STRINGS 0x0004 102 103 /* Line ending types */ 104 105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; 106 107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some 108 environments), a warning is issued if the value of fwrite() is ignored. 109 Unfortunately, casting to (void) does not suppress the warning. To get round 110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to 111 apply to fprintf(). */ 112 113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} 114 115 116 117 /************************************************* 118 * Global variables * 119 *************************************************/ 120 121 /* Jeffrey Friedl has some debugging requirements that are not part of the 122 regular code. */ 123 124 #ifdef JFRIEDL_DEBUG 125 static int S_arg = -1; 126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ 127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */ 128 static const char *jfriedl_prefix = ""; 129 static const char *jfriedl_postfix = ""; 130 #endif 131 132 static int endlinetype; 133 134 static char *colour_string = (char *)"1;31"; 135 static char *colour_option = NULL; 136 static char *dee_option = NULL; 137 static char *DEE_option = NULL; 138 static char *newline = NULL; 139 static char *pattern_filename = NULL; 140 static char *stdin_name = (char *)"(standard input)"; 141 static char *locale = NULL; 142 143 static const unsigned char *pcretables = NULL; 144 145 static int pattern_count = 0; 146 static pcre **pattern_list = NULL; 147 static pcre_extra **hints_list = NULL; 148 149 static char *include_pattern = NULL; 150 static char *exclude_pattern = NULL; 151 static char *include_dir_pattern = NULL; 152 static char *exclude_dir_pattern = NULL; 153 154 static pcre *include_compiled = NULL; 155 static pcre *exclude_compiled = NULL; 156 static pcre *include_dir_compiled = NULL; 157 static pcre *exclude_dir_compiled = NULL; 158 159 static int after_context = 0; 160 static int before_context = 0; 161 static int both_context = 0; 162 static int dee_action = dee_READ; 163 static int DEE_action = DEE_READ; 164 static int error_count = 0; 165 static int filenames = FN_DEFAULT; 166 static int only_matching = -1; 167 static int process_options = 0; 168 169 static unsigned long int match_limit = 0; 170 static unsigned long int match_limit_recursion = 0; 171 172 static BOOL count_only = FALSE; 173 static BOOL do_colour = FALSE; 174 static BOOL file_offsets = FALSE; 175 static BOOL hyphenpending = FALSE; 176 static BOOL invert = FALSE; 177 static BOOL line_buffered = FALSE; 178 static BOOL line_offsets = FALSE; 179 static BOOL multiline = FALSE; 180 static BOOL number = FALSE; 181 static BOOL omit_zero_count = FALSE; 182 static BOOL resource_error = FALSE; 183 static BOOL quiet = FALSE; 184 static BOOL silent = FALSE; 185 static BOOL utf8 = FALSE; 186 187 /* Structure for options and list of them */ 188 189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER, 190 OP_OP_NUMBER, OP_PATLIST }; 191 192 typedef struct option_item { 193 int type; 194 int one_char; 195 void *dataptr; 196 const char *long_name; 197 const char *help_text; 198 } option_item; 199 200 /* Options without a single-letter equivalent get a negative value. This can be 201 used to identify them. */ 202 203 #define N_COLOUR (-1) 204 #define N_EXCLUDE (-2) 205 #define N_EXCLUDE_DIR (-3) 206 #define N_HELP (-4) 207 #define N_INCLUDE (-5) 208 #define N_INCLUDE_DIR (-6) 209 #define N_LABEL (-7) 210 #define N_LOCALE (-8) 211 #define N_NULL (-9) 212 #define N_LOFFSETS (-10) 213 #define N_FOFFSETS (-11) 214 #define N_LBUFFER (-12) 215 #define N_M_LIMIT (-13) 216 #define N_M_LIMIT_REC (-14) 217 218 static option_item optionlist[] = { 219 { OP_NODATA, N_NULL, NULL, "", " terminate options" }, 220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, 221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, 222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, 223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, 224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, 225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, 226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, 227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, 228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, 229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" }, 230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, 231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, 232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, 233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, 234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, 235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, 236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, 237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, 238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, 239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, 240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, 241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, 242 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" }, 243 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" }, 244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, 245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, 246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, 247 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" }, 248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, 249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, 250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, 251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, 252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" }, 253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" }, 254 255 /* These two were accidentally implemented with underscores instead of 256 hyphens in the option names. As this was not discovered for several releases, 257 the incorrect versions are left in the table for compatibility. However, the 258 --help function misses out any option that has an underscore in its name. */ 259 260 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" }, 261 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" }, 262 263 #ifdef JFRIEDL_DEBUG 264 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, 265 #endif 266 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, 267 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, 268 { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, 269 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, 270 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, 271 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, 272 { OP_NODATA, 0, NULL, NULL, NULL } 273 }; 274 275 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F 276 options. These set the 1, 2, and 4 bits in process_options, respectively. Note 277 that the combination of -w and -x has the same effect as -x on its own, so we 278 can treat them as the same. */ 279 280 static const char *prefix[] = { 281 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; 282 283 static const char *suffix[] = { 284 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; 285 286 /* UTF-8 tables - used only when the newline setting is "any". */ 287 288 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; 289 290 const char utf8_table4[] = { 291 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 292 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 293 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 294 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 295 296 297 298 /************************************************* 299 * Exit from the program * 300 *************************************************/ 301 302 /* If there has been a resource error, give a suitable message. 303 304 Argument: the return code 305 Returns: does not return 306 */ 307 308 static void 309 pcregrep_exit(int rc) 310 { 311 if (resource_error) 312 { 313 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit " 314 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT); 315 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n"); 316 } 317 318 exit(rc); 319 } 320 321 322 /************************************************* 323 * OS-specific functions * 324 *************************************************/ 325 326 /* These functions are defined so that they can be made system specific, 327 although at present the only ones are for Unix, Win32, and for "no support". */ 328 329 330 /************* Directory scanning in Unix ***********/ 331 332 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H 333 #include <sys/types.h> 334 #include <sys/stat.h> 335 #include <dirent.h> 336 337 typedef DIR directory_type; 338 339 static int 340 isdirectory(char *filename) 341 { 342 struct stat statbuf; 343 if (stat(filename, &statbuf) < 0) 344 return 0; /* In the expectation that opening as a file will fail */ 345 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; 346 } 347 348 static directory_type * 349 opendirectory(char *filename) 350 { 351 return opendir(filename); 352 } 353 354 static char * 355 readdirectory(directory_type *dir) 356 { 357 for (;;) 358 { 359 struct dirent *dent = readdir(dir); 360 if (dent == NULL) return NULL; 361 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) 362 return dent->d_name; 363 } 364 /* Control never reaches here */ 365 } 366 367 static void 368 closedirectory(directory_type *dir) 369 { 370 closedir(dir); 371 } 372 373 374 /************* Test for regular file in Unix **********/ 375 376 static int 377 isregfile(char *filename) 378 { 379 struct stat statbuf; 380 if (stat(filename, &statbuf) < 0) 381 return 1; /* In the expectation that opening as a file will fail */ 382 return (statbuf.st_mode & S_IFMT) == S_IFREG; 383 } 384 385 386 /************* Test for a terminal in Unix **********/ 387 388 static BOOL 389 is_stdout_tty(void) 390 { 391 return isatty(fileno(stdout)); 392 } 393 394 static BOOL 395 is_file_tty(FILE *f) 396 { 397 return isatty(fileno(f)); 398 } 399 400 401 /************* Directory scanning in Win32 ***********/ 402 403 /* I (Philip Hazel) have no means of testing this code. It was contributed by 404 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES 405 when it did not exist. David Byron added a patch that moved the #include of 406 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. 407 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is 408 undefined when it is indeed undefined. */ 409 410 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H 411 412 #ifndef STRICT 413 # define STRICT 414 #endif 415 #ifndef WIN32_LEAN_AND_MEAN 416 # define WIN32_LEAN_AND_MEAN 417 #endif 418 419 #include <windows.h> 420 421 #ifndef INVALID_FILE_ATTRIBUTES 422 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF 423 #endif 424 425 typedef struct directory_type 426 { 427 HANDLE handle; 428 BOOL first; 429 WIN32_FIND_DATA data; 430 } directory_type; 431 432 int 433 isdirectory(char *filename) 434 { 435 DWORD attr = GetFileAttributes(filename); 436 if (attr == INVALID_FILE_ATTRIBUTES) 437 return 0; 438 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0; 439 } 440 441 directory_type * 442 opendirectory(char *filename) 443 { 444 size_t len; 445 char *pattern; 446 directory_type *dir; 447 DWORD err; 448 len = strlen(filename); 449 pattern = (char *) malloc(len + 3); 450 dir = (directory_type *) malloc(sizeof(*dir)); 451 if ((pattern == NULL) || (dir == NULL)) 452 { 453 fprintf(stderr, "pcregrep: malloc failed\n"); 454 pcregrep_exit(2); 455 } 456 memcpy(pattern, filename, len); 457 memcpy(&(pattern[len]), "\\*", 3); 458 dir->handle = FindFirstFile(pattern, &(dir->data)); 459 if (dir->handle != INVALID_HANDLE_VALUE) 460 { 461 free(pattern); 462 dir->first = TRUE; 463 return dir; 464 } 465 err = GetLastError(); 466 free(pattern); 467 free(dir); 468 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; 469 return NULL; 470 } 471 472 char * 473 readdirectory(directory_type *dir) 474 { 475 for (;;) 476 { 477 if (!dir->first) 478 { 479 if (!FindNextFile(dir->handle, &(dir->data))) 480 return NULL; 481 } 482 else 483 { 484 dir->first = FALSE; 485 } 486 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) 487 return dir->data.cFileName; 488 } 489 #ifndef _MSC_VER 490 return NULL; /* Keep compiler happy; never executed */ 491 #endif 492 } 493 494 void 495 closedirectory(directory_type *dir) 496 { 497 FindClose(dir->handle); 498 free(dir); 499 } 500 501 502 /************* Test for regular file in Win32 **********/ 503 504 /* I don't know how to do this, or if it can be done; assume all paths are 505 regular if they are not directories. */ 506 507 int isregfile(char *filename) 508 { 509 return !isdirectory(filename); 510 } 511 512 513 /************* Test for a terminal in Win32 **********/ 514 515 /* I don't know how to do this; assume never */ 516 517 static BOOL 518 is_stdout_tty(void) 519 { 520 return FALSE; 521 } 522 523 static BOOL 524 is_file_tty(FILE *f) 525 { 526 return FALSE; 527 } 528 529 530 /************* Directory scanning when we can't do it ***********/ 531 532 /* The type is void, and apart from isdirectory(), the functions do nothing. */ 533 534 #else 535 536 typedef void directory_type; 537 538 int isdirectory(char *filename) { return 0; } 539 directory_type * opendirectory(char *filename) { return (directory_type*)0;} 540 char *readdirectory(directory_type *dir) { return (char*)0;} 541 void closedirectory(directory_type *dir) {} 542 543 544 /************* Test for regular when we can't do it **********/ 545 546 /* Assume all files are regular. */ 547 548 int isregfile(char *filename) { return 1; } 549 550 551 /************* Test for a terminal when we can't do it **********/ 552 553 static BOOL 554 is_stdout_tty(void) 555 { 556 return FALSE; 557 } 558 559 static BOOL 560 is_file_tty(FILE *f) 561 { 562 return FALSE; 563 } 564 565 #endif 566 567 568 569 #ifndef HAVE_STRERROR 570 /************************************************* 571 * Provide strerror() for non-ANSI libraries * 572 *************************************************/ 573 574 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() 575 in their libraries, but can provide the same facility by this simple 576 alternative function. */ 577 578 extern int sys_nerr; 579 extern char *sys_errlist[]; 580 581 char * 582 strerror(int n) 583 { 584 if (n < 0 || n >= sys_nerr) return "unknown error number"; 585 return sys_errlist[n]; 586 } 587 #endif /* HAVE_STRERROR */ 588 589 590 591 /************************************************* 592 * Read one line of input * 593 *************************************************/ 594 595 /* Normally, input is read using fread() into a large buffer, so many lines may 596 be read at once. However, doing this for tty input means that no output appears 597 until a lot of input has been typed. Instead, tty input is handled line by 598 line. We cannot use fgets() for this, because it does not stop at a binary 599 zero, and therefore there is no way of telling how many characters it has read, 600 because there may be binary zeros embedded in the data. 601 602 Arguments: 603 buffer the buffer to read into 604 length the maximum number of characters to read 605 f the file 606 607 Returns: the number of characters read, zero at end of file 608 */ 609 610 static int 611 read_one_line(char *buffer, int length, FILE *f) 612 { 613 int c; 614 int yield = 0; 615 while ((c = fgetc(f)) != EOF) 616 { 617 buffer[yield++] = c; 618 if (c == '\n' || yield >= length) break; 619 } 620 return yield; 621 } 622 623 624 625 /************************************************* 626 * Find end of line * 627 *************************************************/ 628 629 /* The length of the endline sequence that is found is set via lenptr. This may 630 be zero at the very end of the file if there is no line-ending sequence there. 631 632 Arguments: 633 p current position in line 634 endptr end of available data 635 lenptr where to put the length of the eol sequence 636 637 Returns: pointer to the last byte of the line, including the newline byte(s) 638 */ 639 640 static char * 641 end_of_line(char *p, char *endptr, int *lenptr) 642 { 643 switch(endlinetype) 644 { 645 default: /* Just in case */ 646 case EL_LF: 647 while (p < endptr && *p != '\n') p++; 648 if (p < endptr) 649 { 650 *lenptr = 1; 651 return p + 1; 652 } 653 *lenptr = 0; 654 return endptr; 655 656 case EL_CR: 657 while (p < endptr && *p != '\r') p++; 658 if (p < endptr) 659 { 660 *lenptr = 1; 661 return p + 1; 662 } 663 *lenptr = 0; 664 return endptr; 665 666 case EL_CRLF: 667 for (;;) 668 { 669 while (p < endptr && *p != '\r') p++; 670 if (++p >= endptr) 671 { 672 *lenptr = 0; 673 return endptr; 674 } 675 if (*p == '\n') 676 { 677 *lenptr = 2; 678 return p + 1; 679 } 680 } 681 break; 682 683 case EL_ANYCRLF: 684 while (p < endptr) 685 { 686 int extra = 0; 687 register int c = *((unsigned char *)p); 688 689 if (utf8 && c >= 0xc0) 690 { 691 int gcii, gcss; 692 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 693 gcss = 6*extra; 694 c = (c & utf8_table3[extra]) << gcss; 695 for (gcii = 1; gcii <= extra; gcii++) 696 { 697 gcss -= 6; 698 c |= (p[gcii] & 0x3f) << gcss; 699 } 700 } 701 702 p += 1 + extra; 703 704 switch (c) 705 { 706 case 0x0a: /* LF */ 707 *lenptr = 1; 708 return p; 709 710 case 0x0d: /* CR */ 711 if (p < endptr && *p == 0x0a) 712 { 713 *lenptr = 2; 714 p++; 715 } 716 else *lenptr = 1; 717 return p; 718 719 default: 720 break; 721 } 722 } /* End of loop for ANYCRLF case */ 723 724 *lenptr = 0; /* Must have hit the end */ 725 return endptr; 726 727 case EL_ANY: 728 while (p < endptr) 729 { 730 int extra = 0; 731 register int c = *((unsigned char *)p); 732 733 if (utf8 && c >= 0xc0) 734 { 735 int gcii, gcss; 736 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 737 gcss = 6*extra; 738 c = (c & utf8_table3[extra]) << gcss; 739 for (gcii = 1; gcii <= extra; gcii++) 740 { 741 gcss -= 6; 742 c |= (p[gcii] & 0x3f) << gcss; 743 } 744 } 745 746 p += 1 + extra; 747 748 switch (c) 749 { 750 case 0x0a: /* LF */ 751 case 0x0b: /* VT */ 752 case 0x0c: /* FF */ 753 *lenptr = 1; 754 return p; 755 756 case 0x0d: /* CR */ 757 if (p < endptr && *p == 0x0a) 758 { 759 *lenptr = 2; 760 p++; 761 } 762 else *lenptr = 1; 763 return p; 764 765 case 0x85: /* NEL */ 766 *lenptr = utf8? 2 : 1; 767 return p; 768 769 case 0x2028: /* LS */ 770 case 0x2029: /* PS */ 771 *lenptr = 3; 772 return p; 773 774 default: 775 break; 776 } 777 } /* End of loop for ANY case */ 778 779 *lenptr = 0; /* Must have hit the end */ 780 return endptr; 781 } /* End of overall switch */ 782 } 783 784 785 786 /************************************************* 787 * Find start of previous line * 788 *************************************************/ 789 790 /* This is called when looking back for before lines to print. 791 792 Arguments: 793 p start of the subsequent line 794 startptr start of available data 795 796 Returns: pointer to the start of the previous line 797 */ 798 799 static char * 800 previous_line(char *p, char *startptr) 801 { 802 switch(endlinetype) 803 { 804 default: /* Just in case */ 805 case EL_LF: 806 p--; 807 while (p > startptr && p[-1] != '\n') p--; 808 return p; 809 810 case EL_CR: 811 p--; 812 while (p > startptr && p[-1] != '\n') p--; 813 return p; 814 815 case EL_CRLF: 816 for (;;) 817 { 818 p -= 2; 819 while (p > startptr && p[-1] != '\n') p--; 820 if (p <= startptr + 1 || p[-2] == '\r') return p; 821 } 822 return p; /* But control should never get here */ 823 824 case EL_ANY: 825 case EL_ANYCRLF: 826 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; 827 if (utf8) while ((*p & 0xc0) == 0x80) p--; 828 829 while (p > startptr) 830 { 831 register int c; 832 char *pp = p - 1; 833 834 if (utf8) 835 { 836 int extra = 0; 837 while ((*pp & 0xc0) == 0x80) pp--; 838 c = *((unsigned char *)pp); 839 if (c >= 0xc0) 840 { 841 int gcii, gcss; 842 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ 843 gcss = 6*extra; 844 c = (c & utf8_table3[extra]) << gcss; 845 for (gcii = 1; gcii <= extra; gcii++) 846 { 847 gcss -= 6; 848 c |= (pp[gcii] & 0x3f) << gcss; 849 } 850 } 851 } 852 else c = *((unsigned char *)pp); 853 854 if (endlinetype == EL_ANYCRLF) switch (c) 855 { 856 case 0x0a: /* LF */ 857 case 0x0d: /* CR */ 858 return p; 859 860 default: 861 break; 862 } 863 864 else switch (c) 865 { 866 case 0x0a: /* LF */ 867 case 0x0b: /* VT */ 868 case 0x0c: /* FF */ 869 case 0x0d: /* CR */ 870 case 0x85: /* NEL */ 871 case 0x2028: /* LS */ 872 case 0x2029: /* PS */ 873 return p; 874 875 default: 876 break; 877 } 878 879 p = pp; /* Back one character */ 880 } /* End of loop for ANY case */ 881 882 return startptr; /* Hit start of data */ 883 } /* End of overall switch */ 884 } 885 886 887 888 889 890 /************************************************* 891 * Print the previous "after" lines * 892 *************************************************/ 893 894 /* This is called if we are about to lose said lines because of buffer filling, 895 and at the end of the file. The data in the line is written using fwrite() so 896 that a binary zero does not terminate it. 897 898 Arguments: 899 lastmatchnumber the number of the last matching line, plus one 900 lastmatchrestart where we restarted after the last match 901 endptr end of available data 902 printname filename for printing 903 904 Returns: nothing 905 */ 906 907 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart, 908 char *endptr, char *printname) 909 { 910 if (after_context > 0 && lastmatchnumber > 0) 911 { 912 int count = 0; 913 while (lastmatchrestart < endptr && count++ < after_context) 914 { 915 int ellength; 916 char *pp = lastmatchrestart; 917 if (printname != NULL) fprintf(stdout, "%s-", printname); 918 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 919 pp = end_of_line(pp, endptr, &ellength); 920 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 921 lastmatchrestart = pp; 922 } 923 hyphenpending = TRUE; 924 } 925 } 926 927 928 929 /************************************************* 930 * Apply patterns to subject till one matches * 931 *************************************************/ 932 933 /* This function is called to run through all patterns, looking for a match. It 934 is used multiple times for the same subject when colouring is enabled, in order 935 to find all possible matches. 936 937 Arguments: 938 matchptr the start of the subject 939 length the length of the subject to match 940 offsets the offets vector to fill in 941 mrc address of where to put the result of pcre_exec() 942 943 Returns: TRUE if there was a match 944 FALSE if there was no match 945 invert if there was a non-fatal error 946 */ 947 948 static BOOL 949 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc) 950 { 951 int i; 952 size_t slen = length; 953 const char *msg = "this text:\n\n"; 954 if (slen > 200) 955 { 956 slen = 200; 957 msg = "text that starts:\n\n"; 958 } 959 for (i = 0; i < pattern_count; i++) 960 { 961 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0, 962 PCRE_NOTEMPTY, offsets, OFFSET_SIZE); 963 if (*mrc >= 0) return TRUE; 964 if (*mrc == PCRE_ERROR_NOMATCH) continue; 965 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc); 966 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); 967 fprintf(stderr, "%s", msg); 968 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ 969 fprintf(stderr, "\n\n"); 970 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT) 971 resource_error = TRUE; 972 if (error_count++ > 20) 973 { 974 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n"); 975 pcregrep_exit(2); 976 } 977 return invert; /* No more matching; don't show the line again */ 978 } 979 980 return FALSE; /* No match, no errors */ 981 } 982 983 984 985 /************************************************* 986 * Grep an individual file * 987 *************************************************/ 988 989 /* This is called from grep_or_recurse() below. It uses a buffer that is three 990 times the value of MBUFTHIRD. The matching point is never allowed to stray into 991 the top third of the buffer, thus keeping more of the file available for 992 context printing or for multiline scanning. For large files, the pointer will 993 be in the middle third most of the time, so the bottom third is available for 994 "before" context printing. 995 996 Arguments: 997 handle the fopened FILE stream for a normal file 998 the gzFile pointer when reading is via libz 999 the BZFILE pointer when reading is via libbz2 1000 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 1001 printname the file name if it is to be printed for each match 1002 or NULL if the file name is not to be printed 1003 it cannot be NULL if filenames[_nomatch]_only is set 1004 1005 Returns: 0 if there was at least one match 1006 1 otherwise (no matches) 1007 2 if there is a read error on a .bz2 file 1008 */ 1009 1010 static int 1011 pcregrep(void *handle, int frtype, char *printname) 1012 { 1013 int rc = 1; 1014 int linenumber = 1; 1015 int lastmatchnumber = 0; 1016 int count = 0; 1017 int filepos = 0; 1018 int offsets[OFFSET_SIZE]; 1019 char *lastmatchrestart = NULL; 1020 char buffer[3*MBUFTHIRD]; 1021 char *ptr = buffer; 1022 char *endptr; 1023 size_t bufflength; 1024 BOOL endhyphenpending = FALSE; 1025 BOOL input_line_buffered = line_buffered; 1026 FILE *in = NULL; /* Ensure initialized */ 1027 1028 #ifdef SUPPORT_LIBZ 1029 gzFile ingz = NULL; 1030 #endif 1031 1032 #ifdef SUPPORT_LIBBZ2 1033 BZFILE *inbz2 = NULL; 1034 #endif 1035 1036 1037 /* Do the first read into the start of the buffer and set up the pointer to end 1038 of what we have. In the case of libz, a non-zipped .gz file will be read as a 1039 plain file. However, if a .bz2 file isn't actually bzipped, the first read will 1040 fail. */ 1041 1042 #ifdef SUPPORT_LIBZ 1043 if (frtype == FR_LIBZ) 1044 { 1045 ingz = (gzFile)handle; 1046 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD); 1047 } 1048 else 1049 #endif 1050 1051 #ifdef SUPPORT_LIBBZ2 1052 if (frtype == FR_LIBBZ2) 1053 { 1054 inbz2 = (BZFILE *)handle; 1055 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD); 1056 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ 1057 } /* without the cast it is unsigned. */ 1058 else 1059 #endif 1060 1061 { 1062 in = (FILE *)handle; 1063 if (is_file_tty(in)) input_line_buffered = TRUE; 1064 bufflength = input_line_buffered? 1065 read_one_line(buffer, 3*MBUFTHIRD, in) : 1066 fread(buffer, 1, 3*MBUFTHIRD, in); 1067 } 1068 1069 endptr = buffer + bufflength; 1070 1071 /* Loop while the current pointer is not at the end of the file. For large 1072 files, endptr will be at the end of the buffer when we are in the middle of the 1073 file, but ptr will never get there, because as soon as it gets over 2/3 of the 1074 way, the buffer is shifted left and re-filled. */ 1075 1076 while (ptr < endptr) 1077 { 1078 int endlinelength; 1079 int mrc = 0; 1080 BOOL match; 1081 char *matchptr = ptr; 1082 char *t = ptr; 1083 size_t length, linelength; 1084 1085 /* At this point, ptr is at the start of a line. We need to find the length 1086 of the subject string to pass to pcre_exec(). In multiline mode, it is the 1087 length remainder of the data in the buffer. Otherwise, it is the length of 1088 the next line, excluding the terminating newline. After matching, we always 1089 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE 1090 option is used for compiling, so that any match is constrained to be in the 1091 first line. */ 1092 1093 t = end_of_line(t, endptr, &endlinelength); 1094 linelength = t - ptr - endlinelength; 1095 length = multiline? (size_t)(endptr - ptr) : linelength; 1096 1097 /* Extra processing for Jeffrey Friedl's debugging. */ 1098 1099 #ifdef JFRIEDL_DEBUG 1100 if (jfriedl_XT || jfriedl_XR) 1101 { 1102 #include <sys/time.h> 1103 #include <time.h> 1104 struct timeval start_time, end_time; 1105 struct timezone dummy; 1106 int i; 1107 1108 if (jfriedl_XT) 1109 { 1110 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); 1111 const char *orig = ptr; 1112 ptr = malloc(newlen + 1); 1113 if (!ptr) { 1114 printf("out of memory"); 1115 pcregrep_exit(2); 1116 } 1117 endptr = ptr; 1118 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); 1119 for (i = 0; i < jfriedl_XT; i++) { 1120 strncpy(endptr, orig, length); 1121 endptr += length; 1122 } 1123 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); 1124 length = newlen; 1125 } 1126 1127 if (gettimeofday(&start_time, &dummy) != 0) 1128 perror("bad gettimeofday"); 1129 1130 1131 for (i = 0; i < jfriedl_XR; i++) 1132 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 1133 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); 1134 1135 if (gettimeofday(&end_time, &dummy) != 0) 1136 perror("bad gettimeofday"); 1137 1138 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) 1139 - 1140 (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); 1141 1142 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); 1143 return 0; 1144 } 1145 #endif 1146 1147 /* We come back here after a match when the -o option (only_matching) is set, 1148 in order to find any further matches in the same line. */ 1149 1150 ONLY_MATCHING_RESTART: 1151 1152 /* Run through all the patterns until one matches or there is an error other 1153 than NOMATCH. This code is in a subroutine so that it can be re-used for 1154 finding subsequent matches when colouring matched lines. */ 1155 1156 match = match_patterns(matchptr, length, offsets, &mrc); 1157 1158 /* If it's a match or a not-match (as required), do what's wanted. */ 1159 1160 if (match != invert) 1161 { 1162 BOOL hyphenprinted = FALSE; 1163 1164 /* We've failed if we want a file that doesn't have any matches. */ 1165 1166 if (filenames == FN_NOMATCH_ONLY) return 1; 1167 1168 /* Just count if just counting is wanted. */ 1169 1170 if (count_only) count++; 1171 1172 /* If all we want is a file name, there is no need to scan any more lines 1173 in the file. */ 1174 1175 else if (filenames == FN_MATCH_ONLY) 1176 { 1177 fprintf(stdout, "%s\n", printname); 1178 return 0; 1179 } 1180 1181 /* Likewise, if all we want is a yes/no answer. */ 1182 1183 else if (quiet) return 0; 1184 1185 /* The --only-matching option prints just the substring that matched, or a 1186 captured portion of it, as long as this string is not empty, and the 1187 --file-offsets and --line-offsets options output offsets for the matching 1188 substring (they both force --only-matching = 0). None of these options 1189 prints any context. Afterwards, adjust the start and length, and then jump 1190 back to look for further matches in the same line. If we are in invert 1191 mode, however, nothing is printed and we do not restart - this could still 1192 be useful because the return code is set. */ 1193 1194 else if (only_matching >= 0) 1195 { 1196 if (!invert) 1197 { 1198 if (printname != NULL) fprintf(stdout, "%s:", printname); 1199 if (number) fprintf(stdout, "%d:", linenumber); 1200 if (line_offsets) 1201 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), 1202 offsets[1] - offsets[0]); 1203 else if (file_offsets) 1204 fprintf(stdout, "%d,%d\n", 1205 (int)(filepos + matchptr + offsets[0] - ptr), 1206 offsets[1] - offsets[0]); 1207 else if (only_matching < mrc) 1208 { 1209 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching]; 1210 if (plen > 0) 1211 { 1212 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1213 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout); 1214 if (do_colour) fprintf(stdout, "%c[00m", 0x1b); 1215 fprintf(stdout, "\n"); 1216 } 1217 } 1218 else if (printname != NULL || number) fprintf(stdout, "\n"); 1219 matchptr += offsets[1]; 1220 length -= offsets[1]; 1221 match = FALSE; 1222 if (line_buffered) fflush(stdout); 1223 rc = 0; /* Had some success */ 1224 goto ONLY_MATCHING_RESTART; 1225 } 1226 } 1227 1228 /* This is the default case when none of the above options is set. We print 1229 the matching lines(s), possibly preceded and/or followed by other lines of 1230 context. */ 1231 1232 else 1233 { 1234 /* See if there is a requirement to print some "after" lines from a 1235 previous match. We never print any overlaps. */ 1236 1237 if (after_context > 0 && lastmatchnumber > 0) 1238 { 1239 int ellength; 1240 int linecount = 0; 1241 char *p = lastmatchrestart; 1242 1243 while (p < ptr && linecount < after_context) 1244 { 1245 p = end_of_line(p, ptr, &ellength); 1246 linecount++; 1247 } 1248 1249 /* It is important to advance lastmatchrestart during this printing so 1250 that it interacts correctly with any "before" printing below. Print 1251 each line's data using fwrite() in case there are binary zeroes. */ 1252 1253 while (lastmatchrestart < p) 1254 { 1255 char *pp = lastmatchrestart; 1256 if (printname != NULL) fprintf(stdout, "%s-", printname); 1257 if (number) fprintf(stdout, "%d-", lastmatchnumber++); 1258 pp = end_of_line(pp, endptr, &ellength); 1259 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); 1260 lastmatchrestart = pp; 1261 } 1262 if (lastmatchrestart != ptr) hyphenpending = TRUE; 1263 } 1264 1265 /* If there were non-contiguous lines printed above, insert hyphens. */ 1266 1267 if (hyphenpending) 1268 { 1269 fprintf(stdout, "--\n"); 1270 hyphenpending = FALSE; 1271 hyphenprinted = TRUE; 1272 } 1273 1274 /* See if there is a requirement to print some "before" lines for this 1275 match. Again, don't print overlaps. */ 1276 1277 if (before_context > 0) 1278 { 1279 int linecount = 0; 1280 char *p = ptr; 1281 1282 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && 1283 linecount < before_context) 1284 { 1285 linecount++; 1286 p = previous_line(p, buffer); 1287 } 1288 1289 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) 1290 fprintf(stdout, "--\n"); 1291 1292 while (p < ptr) 1293 { 1294 int ellength; 1295 char *pp = p; 1296 if (printname != NULL) fprintf(stdout, "%s-", printname); 1297 if (number) fprintf(stdout, "%d-", linenumber - linecount--); 1298 pp = end_of_line(pp, endptr, &ellength); 1299 FWRITE(p, 1, pp - p, stdout); 1300 p = pp; 1301 } 1302 } 1303 1304 /* Now print the matching line(s); ensure we set hyphenpending at the end 1305 of the file if any context lines are being output. */ 1306 1307 if (after_context > 0 || before_context > 0) 1308 endhyphenpending = TRUE; 1309 1310 if (printname != NULL) fprintf(stdout, "%s:", printname); 1311 if (number) fprintf(stdout, "%d:", linenumber); 1312 1313 /* In multiline mode, we want to print to the end of the line in which 1314 the end of the matched string is found, so we adjust linelength and the 1315 line number appropriately, but only when there actually was a match 1316 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of 1317 the match will always be before the first newline sequence. */ 1318 1319 if (multiline & !invert) 1320 { 1321 char *endmatch = ptr + offsets[1]; 1322 t = ptr; 1323 while (t < endmatch) 1324 { 1325 t = end_of_line(t, endptr, &endlinelength); 1326 if (t < endmatch) linenumber++; else break; 1327 } 1328 linelength = t - ptr - endlinelength; 1329 } 1330 1331 /*** NOTE: Use only fwrite() to output the data line, so that binary 1332 zeroes are treated as just another data character. */ 1333 1334 /* This extra option, for Jeffrey Friedl's debugging requirements, 1335 replaces the matched string, or a specific captured string if it exists, 1336 with X. When this happens, colouring is ignored. */ 1337 1338 #ifdef JFRIEDL_DEBUG 1339 if (S_arg >= 0 && S_arg < mrc) 1340 { 1341 int first = S_arg * 2; 1342 int last = first + 1; 1343 FWRITE(ptr, 1, offsets[first], stdout); 1344 fprintf(stdout, "X"); 1345 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout); 1346 } 1347 else 1348 #endif 1349 1350 /* We have to split the line(s) up if colouring, and search for further 1351 matches, but not of course if the line is a non-match. */ 1352 1353 if (do_colour && !invert) 1354 { 1355 int plength; 1356 int last_offset = 0; 1357 FWRITE(ptr, 1, offsets[0], stdout); 1358 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1359 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1360 fprintf(stdout, "%c[00m", 0x1b); 1361 for (;;) 1362 { 1363 last_offset += offsets[1]; 1364 matchptr += offsets[1]; 1365 length -= offsets[1]; 1366 if (last_offset >= linelength + endlinelength || 1367 !match_patterns(matchptr, length, offsets, &mrc)) break; 1368 FWRITE(matchptr, 1, offsets[0], stdout); 1369 fprintf(stdout, "%c[%sm", 0x1b, colour_string); 1370 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); 1371 fprintf(stdout, "%c[00m", 0x1b); 1372 } 1373 1374 /* In multiline mode, we may have already printed the complete line 1375 and its line-ending characters (if they matched the pattern), so there 1376 may be no more to print. */ 1377 1378 plength = (linelength + endlinelength) - last_offset; 1379 if (plength > 0) 1380 FWRITE(ptr + last_offset, 1, plength, stdout); 1381 } 1382 1383 /* Not colouring; no need to search for further matches */ 1384 1385 else FWRITE(ptr, 1, linelength + endlinelength, stdout); 1386 } 1387 1388 /* End of doing what has to be done for a match. If --line-buffered was 1389 given, flush the output. */ 1390 1391 if (line_buffered) fflush(stdout); 1392 rc = 0; /* Had some success */ 1393 1394 /* Remember where the last match happened for after_context. We remember 1395 where we are about to restart, and that line's number. */ 1396 1397 lastmatchrestart = ptr + linelength + endlinelength; 1398 lastmatchnumber = linenumber + 1; 1399 } 1400 1401 /* For a match in multiline inverted mode (which of course did not cause 1402 anything to be printed), we have to move on to the end of the match before 1403 proceeding. */ 1404 1405 if (multiline && invert && match) 1406 { 1407 int ellength; 1408 char *endmatch = ptr + offsets[1]; 1409 t = ptr; 1410 while (t < endmatch) 1411 { 1412 t = end_of_line(t, endptr, &ellength); 1413 if (t <= endmatch) linenumber++; else break; 1414 } 1415 endmatch = end_of_line(endmatch, endptr, &ellength); 1416 linelength = endmatch - ptr - ellength; 1417 } 1418 1419 /* Advance to after the newline and increment the line number. The file 1420 offset to the current line is maintained in filepos. */ 1421 1422 ptr += linelength + endlinelength; 1423 filepos += (int)(linelength + endlinelength); 1424 linenumber++; 1425 1426 /* If input is line buffered, and the buffer is not yet full, read another 1427 line and add it into the buffer. */ 1428 1429 if (input_line_buffered && bufflength < sizeof(buffer)) 1430 { 1431 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in); 1432 bufflength += add; 1433 endptr += add; 1434 } 1435 1436 /* If we haven't yet reached the end of the file (the buffer is full), and 1437 the current point is in the top 1/3 of the buffer, slide the buffer down by 1438 1/3 and refill it. Before we do this, if some unprinted "after" lines are 1439 about to be lost, print them. */ 1440 1441 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD) 1442 { 1443 if (after_context > 0 && 1444 lastmatchnumber > 0 && 1445 lastmatchrestart < buffer + MBUFTHIRD) 1446 { 1447 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 1448 lastmatchnumber = 0; 1449 } 1450 1451 /* Now do the shuffle */ 1452 1453 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD); 1454 ptr -= MBUFTHIRD; 1455 1456 #ifdef SUPPORT_LIBZ 1457 if (frtype == FR_LIBZ) 1458 bufflength = 2*MBUFTHIRD + 1459 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD); 1460 else 1461 #endif 1462 1463 #ifdef SUPPORT_LIBBZ2 1464 if (frtype == FR_LIBBZ2) 1465 bufflength = 2*MBUFTHIRD + 1466 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD); 1467 else 1468 #endif 1469 1470 bufflength = 2*MBUFTHIRD + 1471 (input_line_buffered? 1472 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) : 1473 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in)); 1474 endptr = buffer + bufflength; 1475 1476 /* Adjust any last match point */ 1477 1478 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD; 1479 } 1480 } /* Loop through the whole file */ 1481 1482 /* End of file; print final "after" lines if wanted; do_after_lines sets 1483 hyphenpending if it prints something. */ 1484 1485 if (only_matching < 0 && !count_only) 1486 { 1487 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); 1488 hyphenpending |= endhyphenpending; 1489 } 1490 1491 /* Print the file name if we are looking for those without matches and there 1492 were none. If we found a match, we won't have got this far. */ 1493 1494 if (filenames == FN_NOMATCH_ONLY) 1495 { 1496 fprintf(stdout, "%s\n", printname); 1497 return 0; 1498 } 1499 1500 /* Print the match count if wanted */ 1501 1502 if (count_only) 1503 { 1504 if (count > 0 || !omit_zero_count) 1505 { 1506 if (printname != NULL && filenames != FN_NONE) 1507 fprintf(stdout, "%s:", printname); 1508 fprintf(stdout, "%d\n", count); 1509 } 1510 } 1511 1512 return rc; 1513 } 1514 1515 1516 1517 /************************************************* 1518 * Grep a file or recurse into a directory * 1519 *************************************************/ 1520 1521 /* Given a path name, if it's a directory, scan all the files if we are 1522 recursing; if it's a file, grep it. 1523 1524 Arguments: 1525 pathname the path to investigate 1526 dir_recurse TRUE if recursing is wanted (-r or -drecurse) 1527 only_one_at_top TRUE if the path is the only one at toplevel 1528 1529 Returns: 0 if there was at least one match 1530 1 if there were no matches 1531 2 there was some kind of error 1532 1533 However, file opening failures are suppressed if "silent" is set. 1534 */ 1535 1536 static int 1537 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) 1538 { 1539 int rc = 1; 1540 int sep; 1541 int frtype; 1542 int pathlen; 1543 void *handle; 1544 FILE *in = NULL; /* Ensure initialized */ 1545 1546 #ifdef SUPPORT_LIBZ 1547 gzFile ingz = NULL; 1548 #endif 1549 1550 #ifdef SUPPORT_LIBBZ2 1551 BZFILE *inbz2 = NULL; 1552 #endif 1553 1554 /* If the file name is "-" we scan stdin */ 1555 1556 if (strcmp(pathname, "-") == 0) 1557 { 1558 return pcregrep(stdin, FR_PLAIN, 1559 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? 1560 stdin_name : NULL); 1561 } 1562 1563 /* If the file is a directory, skip if skipping or if we are recursing, scan 1564 each file and directory within it, subject to any include or exclude patterns 1565 that were set. The scanning code is localized so it can be made 1566 system-specific. */ 1567 1568 if ((sep = isdirectory(pathname)) != 0) 1569 { 1570 if (dee_action == dee_SKIP) return 1; 1571 if (dee_action == dee_RECURSE) 1572 { 1573 char buffer[1024]; 1574 char *nextfile; 1575 directory_type *dir = opendirectory(pathname); 1576 1577 if (dir == NULL) 1578 { 1579 if (!silent) 1580 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, 1581 strerror(errno)); 1582 return 2; 1583 } 1584 1585 while ((nextfile = readdirectory(dir)) != NULL) 1586 { 1587 int frc, nflen; 1588 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); 1589 nflen = (int)(strlen(nextfile)); 1590 1591 if (isdirectory(buffer)) 1592 { 1593 if (exclude_dir_compiled != NULL && 1594 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) 1595 continue; 1596 1597 if (include_dir_compiled != NULL && 1598 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) 1599 continue; 1600 } 1601 else 1602 { 1603 if (exclude_compiled != NULL && 1604 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) 1605 continue; 1606 1607 if (include_compiled != NULL && 1608 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) 1609 continue; 1610 } 1611 1612 frc = grep_or_recurse(buffer, dir_recurse, FALSE); 1613 if (frc > 1) rc = frc; 1614 else if (frc == 0 && rc == 1) rc = 0; 1615 } 1616 1617 closedirectory(dir); 1618 return rc; 1619 } 1620 } 1621 1622 /* If the file is not a directory and not a regular file, skip it if that's 1623 been requested. */ 1624 1625 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1; 1626 1627 /* Control reaches here if we have a regular file, or if we have a directory 1628 and recursion or skipping was not requested, or if we have anything else and 1629 skipping was not requested. The scan proceeds. If this is the first and only 1630 argument at top level, we don't show the file name, unless we are only showing 1631 the file name, or the filename was forced (-H). */ 1632 1633 pathlen = (int)(strlen(pathname)); 1634 1635 /* Open using zlib if it is supported and the file name ends with .gz. */ 1636 1637 #ifdef SUPPORT_LIBZ 1638 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) 1639 { 1640 ingz = gzopen(pathname, "rb"); 1641 if (ingz == NULL) 1642 { 1643 if (!silent) 1644 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, 1645 strerror(errno)); 1646 return 2; 1647 } 1648 handle = (void *)ingz; 1649 frtype = FR_LIBZ; 1650 } 1651 else 1652 #endif 1653 1654 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ 1655 1656 #ifdef SUPPORT_LIBBZ2 1657 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) 1658 { 1659 inbz2 = BZ2_bzopen(pathname, "rb"); 1660 handle = (void *)inbz2; 1661 frtype = FR_LIBBZ2; 1662 } 1663 else 1664 #endif 1665 1666 /* Otherwise use plain fopen(). The label is so that we can come back here if 1667 an attempt to read a .bz2 file indicates that it really is a plain file. */ 1668 1669 #ifdef SUPPORT_LIBBZ2 1670 PLAIN_FILE: 1671 #endif 1672 { 1673 in = fopen(pathname, "rb"); 1674 handle = (void *)in; 1675 frtype = FR_PLAIN; 1676 } 1677 1678 /* All the opening methods return errno when they fail. */ 1679 1680 if (handle == NULL) 1681 { 1682 if (!silent) 1683 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, 1684 strerror(errno)); 1685 return 2; 1686 } 1687 1688 /* Now grep the file */ 1689 1690 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT || 1691 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); 1692 1693 /* Close in an appropriate manner. */ 1694 1695 #ifdef SUPPORT_LIBZ 1696 if (frtype == FR_LIBZ) 1697 gzclose(ingz); 1698 else 1699 #endif 1700 1701 /* If it is a .bz2 file and the result is 2, it means that the first attempt to 1702 read failed. If the error indicates that the file isn't in fact bzipped, try 1703 again as a normal file. */ 1704 1705 #ifdef SUPPORT_LIBBZ2 1706 if (frtype == FR_LIBBZ2) 1707 { 1708 if (rc == 2) 1709 { 1710 int errnum; 1711 const char *err = BZ2_bzerror(inbz2, &errnum); 1712 if (errnum == BZ_DATA_ERROR_MAGIC) 1713 { 1714 BZ2_bzclose(inbz2); 1715 goto PLAIN_FILE; 1716 } 1717 else if (!silent) 1718 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n", 1719 pathname, err); 1720 } 1721 BZ2_bzclose(inbz2); 1722 } 1723 else 1724 #endif 1725 1726 /* Normal file close */ 1727 1728 fclose(in); 1729 1730 /* Pass back the yield from pcregrep(). */ 1731 1732 return rc; 1733 } 1734 1735 1736 1737 1738 /************************************************* 1739 * Usage function * 1740 *************************************************/ 1741 1742 static int 1743 usage(int rc) 1744 { 1745 option_item *op; 1746 fprintf(stderr, "Usage: pcregrep [-"); 1747 for (op = optionlist; op->one_char != 0; op++) 1748 { 1749 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); 1750 } 1751 fprintf(stderr, "] [long options] [pattern] [files]\n"); 1752 fprintf(stderr, "Type `pcregrep --help' for more information and the long " 1753 "options.\n"); 1754 return rc; 1755 } 1756 1757 1758 1759 1760 /************************************************* 1761 * Help function * 1762 *************************************************/ 1763 1764 static void 1765 help(void) 1766 { 1767 option_item *op; 1768 1769 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); 1770 printf("Search for PATTERN in each FILE or standard input.\n"); 1771 printf("PATTERN must be present if neither -e nor -f is used.\n"); 1772 printf("\"-\" can be used as a file name to mean STDIN.\n"); 1773 1774 #ifdef SUPPORT_LIBZ 1775 printf("Files whose names end in .gz are read using zlib.\n"); 1776 #endif 1777 1778 #ifdef SUPPORT_LIBBZ2 1779 printf("Files whose names end in .bz2 are read using bzlib2.\n"); 1780 #endif 1781 1782 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 1783 printf("Other files and the standard input are read as plain files.\n\n"); 1784 #else 1785 printf("All files are read as plain files, without any interpretation.\n\n"); 1786 #endif 1787 1788 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); 1789 printf("Options:\n"); 1790 1791 for (op = optionlist; op->one_char != 0; op++) 1792 { 1793 int n; 1794 char s[4]; 1795 1796 /* Two options were accidentally implemented and documented with underscores 1797 instead of hyphens in their names, something that was not noticed for quite a 1798 few releases. When fixing this, I left the underscored versions in the list 1799 in case people were using them. However, we don't want to display them in the 1800 help data. There are no other options that contain underscores, and we do not 1801 expect ever to implement such options. Therefore, just omit any option that 1802 contains an underscore. */ 1803 1804 if (strchr(op->long_name, '_') != NULL) continue; 1805 1806 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); 1807 n = 31 - printf(" %s --%s", s, op->long_name); 1808 if (n < 1) n = 1; 1809 printf("%.*s%s\n", n, " ", op->help_text); 1810 } 1811 1812 printf("\nWhen reading patterns from a file instead of using a command line option,\n"); 1813 printf("trailing white space is removed and blank lines are ignored.\n"); 1814 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); 1815 1816 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); 1817 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); 1818 } 1819 1820 1821 1822 1823 /************************************************* 1824 * Handle a single-letter, no data option * 1825 *************************************************/ 1826 1827 static int 1828 handle_option(int letter, int options) 1829 { 1830 switch(letter) 1831 { 1832 case N_FOFFSETS: file_offsets = TRUE; break; 1833 case N_HELP: help(); pcregrep_exit(0); 1834 case N_LOFFSETS: line_offsets = number = TRUE; break; 1835 case N_LBUFFER: line_buffered = TRUE; break; 1836 case 'c': count_only = TRUE; break; 1837 case 'F': process_options |= PO_FIXED_STRINGS; break; 1838 case 'H': filenames = FN_FORCE; break; 1839 case 'h': filenames = FN_NONE; break; 1840 case 'i': options |= PCRE_CASELESS; break; 1841 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; 1842 case 'L': filenames = FN_NOMATCH_ONLY; break; 1843 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; 1844 case 'n': number = TRUE; break; 1845 case 'o': only_matching = 0; break; 1846 case 'q': quiet = TRUE; break; 1847 case 'r': dee_action = dee_RECURSE; break; 1848 case 's': silent = TRUE; break; 1849 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break; 1850 case 'v': invert = TRUE; break; 1851 case 'w': process_options |= PO_WORD_MATCH; break; 1852 case 'x': process_options |= PO_LINE_MATCH; break; 1853 1854 case 'V': 1855 fprintf(stderr, "pcregrep version %s\n", pcre_version()); 1856 pcregrep_exit(0); 1857 break; 1858 1859 default: 1860 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); 1861 pcregrep_exit(usage(2)); 1862 } 1863 1864 return options; 1865 } 1866 1867 1868 1869 1870 /************************************************* 1871 * Construct printed ordinal * 1872 *************************************************/ 1873 1874 /* This turns a number into "1st", "3rd", etc. */ 1875 1876 static char * 1877 ordin(int n) 1878 { 1879 static char buffer[8]; 1880 char *p = buffer; 1881 sprintf(p, "%d", n); 1882 while (*p != 0) p++; 1883 switch (n%10) 1884 { 1885 case 1: strcpy(p, "st"); break; 1886 case 2: strcpy(p, "nd"); break; 1887 case 3: strcpy(p, "rd"); break; 1888 default: strcpy(p, "th"); break; 1889 } 1890 return buffer; 1891 } 1892 1893 1894 1895 /************************************************* 1896 * Compile a single pattern * 1897 *************************************************/ 1898 1899 /* When the -F option has been used, this is called for each substring. 1900 Otherwise it's called for each supplied pattern. 1901 1902 Arguments: 1903 pattern the pattern string 1904 options the PCRE options 1905 filename the file name, or NULL for a command-line pattern 1906 count 0 if this is the only command line pattern, or 1907 number of the command line pattern, or 1908 linenumber for a pattern from a file 1909 1910 Returns: TRUE on success, FALSE after an error 1911 */ 1912 1913 static BOOL 1914 compile_single_pattern(char *pattern, int options, char *filename, int count) 1915 { 1916 char buffer[MBUFTHIRD + 16]; 1917 const char *error; 1918 int errptr; 1919 1920 if (pattern_count >= MAX_PATTERN_COUNT) 1921 { 1922 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n", 1923 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT); 1924 return FALSE; 1925 } 1926 1927 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, 1928 suffix[process_options]); 1929 pattern_list[pattern_count] = 1930 pcre_compile(buffer, options, &error, &errptr, pcretables); 1931 if (pattern_list[pattern_count] != NULL) 1932 { 1933 pattern_count++; 1934 return TRUE; 1935 } 1936 1937 /* Handle compile errors */ 1938 1939 errptr -= (int)strlen(prefix[process_options]); 1940 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern); 1941 1942 if (filename == NULL) 1943 { 1944 if (count == 0) 1945 fprintf(stderr, "pcregrep: Error in command-line regex " 1946 "at offset %d: %s\n", errptr, error); 1947 else 1948 fprintf(stderr, "pcregrep: Error in %s command-line regex " 1949 "at offset %d: %s\n", ordin(count), errptr, error); 1950 } 1951 else 1952 { 1953 fprintf(stderr, "pcregrep: Error in regex in line %d of %s " 1954 "at offset %d: %s\n", count, filename, errptr, error); 1955 } 1956 1957 return FALSE; 1958 } 1959 1960 1961 1962 /************************************************* 1963 * Compile one supplied pattern * 1964 *************************************************/ 1965 1966 /* When the -F option has been used, each string may be a list of strings, 1967 separated by line breaks. They will be matched literally. 1968 1969 Arguments: 1970 pattern the pattern string 1971 options the PCRE options 1972 filename the file name, or NULL for a command-line pattern 1973 count 0 if this is the only command line pattern, or 1974 number of the command line pattern, or 1975 linenumber for a pattern from a file 1976 1977 Returns: TRUE on success, FALSE after an error 1978 */ 1979 1980 static BOOL 1981 compile_pattern(char *pattern, int options, char *filename, int count) 1982 { 1983 if ((process_options & PO_FIXED_STRINGS) != 0) 1984 { 1985 char *eop = pattern + strlen(pattern); 1986 char buffer[MBUFTHIRD]; 1987 for(;;) 1988 { 1989 int ellength; 1990 char *p = end_of_line(pattern, eop, &ellength); 1991 if (ellength == 0) 1992 return compile_single_pattern(pattern, options, filename, count); 1993 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern); 1994 pattern = p; 1995 if (!compile_single_pattern(buffer, options, filename, count)) 1996 return FALSE; 1997 } 1998 } 1999 else return compile_single_pattern(pattern, options, filename, count); 2000 } 2001 2002 2003 2004 /************************************************* 2005 * Main program * 2006 *************************************************/ 2007 2008 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ 2009 2010 int 2011 main(int argc, char **argv) 2012 { 2013 int i, j; 2014 int rc = 1; 2015 int pcre_options = 0; 2016 int cmd_pattern_count = 0; 2017 int hint_count = 0; 2018 int errptr; 2019 BOOL only_one_at_top; 2020 char *patterns[MAX_PATTERN_COUNT]; 2021 const char *locale_from = "--locale"; 2022 const char *error; 2023 2024 /* Set the default line ending value from the default in the PCRE library; 2025 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". 2026 Note that the return values from pcre_config(), though derived from the ASCII 2027 codes, are the same in EBCDIC environments, so we must use the actual values 2028 rather than escapes such as as '\r'. */ 2029 2030 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i); 2031 switch(i) 2032 { 2033 default: newline = (char *)"lf"; break; 2034 case 13: newline = (char *)"cr"; break; 2035 case (13 << 8) | 10: newline = (char *)"crlf"; break; 2036 case -1: newline = (char *)"any"; break; 2037 case -2: newline = (char *)"anycrlf"; break; 2038 } 2039 2040 /* Process the options */ 2041 2042 for (i = 1; i < argc; i++) 2043 { 2044 option_item *op = NULL; 2045 char *option_data = (char *)""; /* default to keep compiler happy */ 2046 BOOL longop; 2047 BOOL longopwasequals = FALSE; 2048 2049 if (argv[i][0] != '-') break; 2050 2051 /* If we hit an argument that is just "-", it may be a reference to STDIN, 2052 but only if we have previously had -e or -f to define the patterns. */ 2053 2054 if (argv[i][1] == 0) 2055 { 2056 if (pattern_filename != NULL || pattern_count > 0) break; 2057 else pcregrep_exit(usage(2)); 2058 } 2059 2060 /* Handle a long name option, or -- to terminate the options */ 2061 2062 if (argv[i][1] == '-') 2063 { 2064 char *arg = argv[i] + 2; 2065 char *argequals = strchr(arg, '='); 2066 2067 if (*arg == 0) /* -- terminates options */ 2068 { 2069 i++; 2070 break; /* out of the options-handling loop */ 2071 } 2072 2073 longop = TRUE; 2074 2075 /* Some long options have data that follows after =, for example file=name. 2076 Some options have variations in the long name spelling: specifically, we 2077 allow "regexp" because GNU grep allows it, though I personally go along 2078 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". 2079 These options are entered in the table as "regex(p)". Options can be in 2080 both these categories. */ 2081 2082 for (op = optionlist; op->one_char != 0; op++) 2083 { 2084 char *opbra = strchr(op->long_name, '('); 2085 char *equals = strchr(op->long_name, '='); 2086 2087 /* Handle options with only one spelling of the name */ 2088 2089 if (opbra == NULL) /* Does not contain '(' */ 2090 { 2091 if (equals == NULL) /* Not thing=data case */ 2092 { 2093 if (strcmp(arg, op->long_name) == 0) break; 2094 } 2095 else /* Special case xxx=data */ 2096 { 2097 int oplen = (int)(equals - op->long_name); 2098 int arglen = (argequals == NULL)? 2099 (int)strlen(arg) : (int)(argequals - arg); 2100 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) 2101 { 2102 option_data = arg + arglen; 2103 if (*option_data == '=') 2104 { 2105 option_data++; 2106 longopwasequals = TRUE; 2107 } 2108 break; 2109 } 2110 } 2111 } 2112 2113 /* Handle options with an alternate spelling of the name */ 2114 2115 else 2116 { 2117 char buff1[24]; 2118 char buff2[24]; 2119 2120 int baselen = (int)(opbra - op->long_name); 2121 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1); 2122 int arglen = (argequals == NULL || equals == NULL)? 2123 (int)strlen(arg) : (int)(argequals - arg); 2124 2125 sprintf(buff1, "%.*s", baselen, op->long_name); 2126 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1); 2127 2128 if (strncmp(arg, buff1, arglen) == 0 || 2129 strncmp(arg, buff2, arglen) == 0) 2130 { 2131 if (equals != NULL && argequals != NULL) 2132 { 2133 option_data = argequals; 2134 if (*option_data == '=') 2135 { 2136 option_data++; 2137 longopwasequals = TRUE; 2138 } 2139 } 2140 break; 2141 } 2142 } 2143 } 2144 2145 if (op->one_char == 0) 2146 { 2147 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); 2148 pcregrep_exit(usage(2)); 2149 } 2150 } 2151 2152 /* Jeffrey Friedl's debugging harness uses these additional options which 2153 are not in the right form for putting in the option table because they use 2154 only one hyphen, yet are more than one character long. By putting them 2155 separately here, they will not get displayed as part of the help() output, 2156 but I don't think Jeffrey will care about that. */ 2157 2158 #ifdef JFRIEDL_DEBUG 2159 else if (strcmp(argv[i], "-pre") == 0) { 2160 jfriedl_prefix = argv[++i]; 2161 continue; 2162 } else if (strcmp(argv[i], "-post") == 0) { 2163 jfriedl_postfix = argv[++i]; 2164 continue; 2165 } else if (strcmp(argv[i], "-XT") == 0) { 2166 sscanf(argv[++i], "%d", &jfriedl_XT); 2167 continue; 2168 } else if (strcmp(argv[i], "-XR") == 0) { 2169 sscanf(argv[++i], "%d", &jfriedl_XR); 2170 continue; 2171 } 2172 #endif 2173 2174 2175 /* One-char options; many that have no data may be in a single argument; we 2176 continue till we hit the last one or one that needs data. */ 2177 2178 else 2179 { 2180 char *s = argv[i] + 1; 2181 longop = FALSE; 2182 while (*s != 0) 2183 { 2184 for (op = optionlist; op->one_char != 0; op++) 2185 { 2186 if (*s == op->one_char) break; 2187 } 2188 if (op->one_char == 0) 2189 { 2190 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n", 2191 *s, argv[i]); 2192 pcregrep_exit(usage(2)); 2193 } 2194 2195 /* Check for a single-character option that has data: OP_OP_NUMBER 2196 is used for one that either has a numerical number or defaults, i.e. the 2197 data is optional. If a digit follows, there is data; if not, carry on 2198 with other single-character options in the same string. */ 2199 2200 option_data = s+1; 2201 if (op->type == OP_OP_NUMBER) 2202 { 2203 if (isdigit((unsigned char)s[1])) break; 2204 } 2205 else /* Check for end or a dataless option */ 2206 { 2207 if (op->type != OP_NODATA || s[1] == 0) break; 2208 } 2209 2210 /* Handle a single-character option with no data, then loop for the 2211 next character in the string. */ 2212 2213 pcre_options = handle_option(*s++, pcre_options); 2214 } 2215 } 2216 2217 /* At this point we should have op pointing to a matched option. If the type 2218 is NO_DATA, it means that there is no data, and the option might set 2219 something in the PCRE options. */ 2220 2221 if (op->type == OP_NODATA) 2222 { 2223 pcre_options = handle_option(op->one_char, pcre_options); 2224 continue; 2225 } 2226 2227 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that 2228 either has a value or defaults to something. It cannot have data in a 2229 separate item. At the moment, the only such options are "colo(u)r", 2230 "only-matching", and Jeffrey Friedl's special -S debugging option. */ 2231 2232 if (*option_data == 0 && 2233 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER)) 2234 { 2235 switch (op->one_char) 2236 { 2237 case N_COLOUR: 2238 colour_option = (char *)"auto"; 2239 break; 2240 2241 case 'o': 2242 only_matching = 0; 2243 break; 2244 2245 #ifdef JFRIEDL_DEBUG 2246 case 'S': 2247 S_arg = 0; 2248 break; 2249 #endif 2250 } 2251 continue; 2252 } 2253 2254 /* Otherwise, find the data string for the option. */ 2255 2256 if (*option_data == 0) 2257 { 2258 if (i >= argc - 1 || longopwasequals) 2259 { 2260 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); 2261 pcregrep_exit(usage(2)); 2262 } 2263 option_data = argv[++i]; 2264 } 2265 2266 /* If the option type is OP_PATLIST, it's the -e option, which can be called 2267 multiple times to create a list of patterns. */ 2268 2269 if (op->type == OP_PATLIST) 2270 { 2271 if (cmd_pattern_count >= MAX_PATTERN_COUNT) 2272 { 2273 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n", 2274 MAX_PATTERN_COUNT); 2275 return 2; 2276 } 2277 patterns[cmd_pattern_count++] = option_data; 2278 } 2279 2280 /* Otherwise, deal with single string or numeric data values. */ 2281 2282 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER && 2283 op->type != OP_OP_NUMBER) 2284 { 2285 *((char **)op->dataptr) = option_data; 2286 } 2287 2288 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used 2289 only for unpicking arguments, so just keep it simple. */ 2290 2291 else 2292 { 2293 unsigned long int n = 0; 2294 char *endptr = option_data; 2295 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++; 2296 while (isdigit((unsigned char)(*endptr))) 2297 n = n * 10 + (int)(*endptr++ - '0'); 2298 if (*endptr != 0) 2299 { 2300 if (longop) 2301 { 2302 char *equals = strchr(op->long_name, '='); 2303 int nlen = (equals == NULL)? (int)strlen(op->long_name) : 2304 (int)(equals - op->long_name); 2305 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n", 2306 option_data, nlen, op->long_name); 2307 } 2308 else 2309 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", 2310 option_data, op->one_char); 2311 pcregrep_exit(usage(2)); 2312 } 2313 if (op->type == OP_LONGNUMBER) 2314 *((unsigned long int *)op->dataptr) = n; 2315 else 2316 *((int *)op->dataptr) = n; 2317 } 2318 } 2319 2320 /* Options have been decoded. If -C was used, its value is used as a default 2321 for -A and -B. */ 2322 2323 if (both_context > 0) 2324 { 2325 if (after_context == 0) after_context = both_context; 2326 if (before_context == 0) before_context = both_context; 2327 } 2328 2329 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. 2330 However, the latter two set only_matching. */ 2331 2332 if ((only_matching >= 0 && (file_offsets || line_offsets)) || 2333 (file_offsets && line_offsets)) 2334 { 2335 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets " 2336 "and/or --line-offsets\n"); 2337 pcregrep_exit(usage(2)); 2338 } 2339 2340 if (file_offsets || line_offsets) only_matching = 0; 2341 2342 /* If a locale has not been provided as an option, see if the LC_CTYPE or 2343 LC_ALL environment variable is set, and if so, use it. */ 2344 2345 if (locale == NULL) 2346 { 2347 locale = getenv("LC_ALL"); 2348 locale_from = "LCC_ALL"; 2349 } 2350 2351 if (locale == NULL) 2352 { 2353 locale = getenv("LC_CTYPE"); 2354 locale_from = "LC_CTYPE"; 2355 } 2356 2357 /* If a locale has been provided, set it, and generate the tables the PCRE 2358 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */ 2359 2360 if (locale != NULL) 2361 { 2362 if (setlocale(LC_CTYPE, locale) == NULL) 2363 { 2364 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", 2365 locale, locale_from); 2366 return 2; 2367 } 2368 pcretables = pcre_maketables(); 2369 } 2370 2371 /* Sort out colouring */ 2372 2373 if (colour_option != NULL && strcmp(colour_option, "never") != 0) 2374 { 2375 if (strcmp(colour_option, "always") == 0) do_colour = TRUE; 2376 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); 2377 else 2378 { 2379 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", 2380 colour_option); 2381 return 2; 2382 } 2383 if (do_colour) 2384 { 2385 char *cs = getenv("PCREGREP_COLOUR"); 2386 if (cs == NULL) cs = getenv("PCREGREP_COLOR"); 2387 if (cs != NULL) colour_string = cs; 2388 } 2389 } 2390 2391 /* Interpret the newline type; the default settings are Unix-like. */ 2392 2393 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0) 2394 { 2395 pcre_options |= PCRE_NEWLINE_CR; 2396 endlinetype = EL_CR; 2397 } 2398 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0) 2399 { 2400 pcre_options |= PCRE_NEWLINE_LF; 2401 endlinetype = EL_LF; 2402 } 2403 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0) 2404 { 2405 pcre_options |= PCRE_NEWLINE_CRLF; 2406 endlinetype = EL_CRLF; 2407 } 2408 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0) 2409 { 2410 pcre_options |= PCRE_NEWLINE_ANY; 2411 endlinetype = EL_ANY; 2412 } 2413 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) 2414 { 2415 pcre_options |= PCRE_NEWLINE_ANYCRLF; 2416 endlinetype = EL_ANYCRLF; 2417 } 2418 else 2419 { 2420 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); 2421 return 2; 2422 } 2423 2424 /* Interpret the text values for -d and -D */ 2425 2426 if (dee_option != NULL) 2427 { 2428 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; 2429 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; 2430 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; 2431 else 2432 { 2433 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); 2434 return 2; 2435 } 2436 } 2437 2438 if (DEE_option != NULL) 2439 { 2440 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; 2441 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; 2442 else 2443 { 2444 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); 2445 return 2; 2446 } 2447 } 2448 2449 /* Check the values for Jeffrey Friedl's debugging options. */ 2450 2451 #ifdef JFRIEDL_DEBUG 2452 if (S_arg > 9) 2453 { 2454 fprintf(stderr, "pcregrep: bad value for -S option\n"); 2455 return 2; 2456 } 2457 if (jfriedl_XT != 0 || jfriedl_XR != 0) 2458 { 2459 if (jfriedl_XT == 0) jfriedl_XT = 1; 2460 if (jfriedl_XR == 0) jfriedl_XR = 1; 2461 } 2462 #endif 2463 2464 /* Get memory to store the pattern and hints lists. */ 2465 2466 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); 2467 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); 2468 2469 if (pattern_list == NULL || hints_list == NULL) 2470 { 2471 fprintf(stderr, "pcregrep: malloc failed\n"); 2472 goto EXIT2; 2473 } 2474 2475 /* If no patterns were provided by -e, and there is no file provided by -f, 2476 the first argument is the one and only pattern, and it must exist. */ 2477 2478 if (cmd_pattern_count == 0 && pattern_filename == NULL) 2479 { 2480 if (i >= argc) return usage(2); 2481 patterns[cmd_pattern_count++] = argv[i++]; 2482 } 2483 2484 /* Compile the patterns that were provided on the command line, either by 2485 multiple uses of -e or as a single unkeyed pattern. */ 2486 2487 for (j = 0; j < cmd_pattern_count; j++) 2488 { 2489 if (!compile_pattern(patterns[j], pcre_options, NULL, 2490 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1)) 2491 goto EXIT2; 2492 } 2493 2494 /* Compile the regular expressions that are provided in a file. */ 2495 2496 if (pattern_filename != NULL) 2497 { 2498 int linenumber = 0; 2499 FILE *f; 2500 char *filename; 2501 char buffer[MBUFTHIRD]; 2502 2503 if (strcmp(pattern_filename, "-") == 0) 2504 { 2505 f = stdin; 2506 filename = stdin_name; 2507 } 2508 else 2509 { 2510 f = fopen(pattern_filename, "r"); 2511 if (f == NULL) 2512 { 2513 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, 2514 strerror(errno)); 2515 goto EXIT2; 2516 } 2517 filename = pattern_filename; 2518 } 2519 2520 while (fgets(buffer, MBUFTHIRD, f) != NULL) 2521 { 2522 char *s = buffer + (int)strlen(buffer); 2523 while (s > buffer && isspace((unsigned char)(s[-1]))) s--; 2524 *s = 0; 2525 linenumber++; 2526 if (buffer[0] == 0) continue; /* Skip blank lines */ 2527 if (!compile_pattern(buffer, pcre_options, filename, linenumber)) 2528 goto EXIT2; 2529 } 2530 2531 if (f != stdin) fclose(f); 2532 } 2533 2534 /* Study the regular expressions, as we will be running them many times */ 2535 2536 for (j = 0; j < pattern_count; j++) 2537 { 2538 hints_list[j] = pcre_study(pattern_list[j], 0, &error); 2539 if (error != NULL) 2540 { 2541 char s[16]; 2542 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); 2543 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); 2544 goto EXIT2; 2545 } 2546 hint_count++; 2547 } 2548 2549 /* If --match-limit or --recursion-limit was set, put the value(s) into the 2550 pcre_extra block for each pattern. */ 2551 2552 if (match_limit > 0 || match_limit_recursion > 0) 2553 { 2554 for (j = 0; j < pattern_count; j++) 2555 { 2556 if (hints_list[j] == NULL) 2557 { 2558 hints_list[j] = malloc(sizeof(pcre_extra)); 2559 if (hints_list[j] == NULL) 2560 { 2561 fprintf(stderr, "pcregrep: malloc failed\n"); 2562 pcregrep_exit(2); 2563 } 2564 } 2565 if (match_limit > 0) 2566 { 2567 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT; 2568 hints_list[j]->match_limit = match_limit; 2569 } 2570 if (match_limit_recursion > 0) 2571 { 2572 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 2573 hints_list[j]->match_limit_recursion = match_limit_recursion; 2574 } 2575 } 2576 } 2577 2578 /* If there are include or exclude patterns, compile them. */ 2579 2580 if (exclude_pattern != NULL) 2581 { 2582 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, 2583 pcretables); 2584 if (exclude_compiled == NULL) 2585 { 2586 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", 2587 errptr, error); 2588 goto EXIT2; 2589 } 2590 } 2591 2592 if (include_pattern != NULL) 2593 { 2594 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, 2595 pcretables); 2596 if (include_compiled == NULL) 2597 { 2598 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", 2599 errptr, error); 2600 goto EXIT2; 2601 } 2602 } 2603 2604 if (exclude_dir_pattern != NULL) 2605 { 2606 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr, 2607 pcretables); 2608 if (exclude_dir_compiled == NULL) 2609 { 2610 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n", 2611 errptr, error); 2612 goto EXIT2; 2613 } 2614 } 2615 2616 if (include_dir_pattern != NULL) 2617 { 2618 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr, 2619 pcretables); 2620 if (include_dir_compiled == NULL) 2621 { 2622 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n", 2623 errptr, error); 2624 goto EXIT2; 2625 } 2626 } 2627 2628 /* If there are no further arguments, do the business on stdin and exit. */ 2629 2630 if (i >= argc) 2631 { 2632 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL); 2633 goto EXIT; 2634 } 2635 2636 /* Otherwise, work through the remaining arguments as files or directories. 2637 Pass in the fact that there is only one argument at top level - this suppresses 2638 the file name if the argument is not a directory and filenames are not 2639 otherwise forced. */ 2640 2641 only_one_at_top = i == argc - 1; /* Catch initial value of i */ 2642 2643 for (; i < argc; i++) 2644 { 2645 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, 2646 only_one_at_top); 2647 if (frc > 1) rc = frc; 2648 else if (frc == 0 && rc == 1) rc = 0; 2649 } 2650 2651 EXIT: 2652 if (pattern_list != NULL) 2653 { 2654 for (i = 0; i < pattern_count; i++) free(pattern_list[i]); 2655 free(pattern_list); 2656 } 2657 if (hints_list != NULL) 2658 { 2659 for (i = 0; i < hint_count; i++) 2660 { 2661 if (hints_list[i] != NULL) free(hints_list[i]); 2662 } 2663 free(hints_list); 2664 } 2665 pcregrep_exit(rc); 2666 2667 EXIT2: 2668 rc = 2; 2669 goto EXIT; 2670 } 2671 2672 /* End of pcregrep */ 2673