Home | History | Annotate | Download | only in pcre
      1 /*************************************************
      2 *               pcregrep program                 *
      3 *************************************************/
      4 
      5 /* This is a grep program that uses the PCRE regular expression library to do
      6 its pattern matching. On a Unix or Win32 system it can recurse into
      7 directories.
      8 
      9            Copyright (c) 1997-2011 University of Cambridge
     10 
     11 -----------------------------------------------------------------------------
     12 Redistribution and use in source and binary forms, with or without
     13 modification, are permitted provided that the following conditions are met:
     14 
     15     * Redistributions of source code must retain the above copyright notice,
     16       this list of conditions and the following disclaimer.
     17 
     18     * Redistributions in binary form must reproduce the above copyright
     19       notice, this list of conditions and the following disclaimer in the
     20       documentation and/or other materials provided with the distribution.
     21 
     22     * Neither the name of the University of Cambridge nor the names of its
     23       contributors may be used to endorse or promote products derived from
     24       this software without specific prior written permission.
     25 
     26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36 POSSIBILITY OF SUCH DAMAGE.
     37 -----------------------------------------------------------------------------
     38 */
     39 
     40 #ifdef HAVE_CONFIG_H
     41 #include "config.h"
     42 #endif
     43 
     44 #include <ctype.h>
     45 #include <locale.h>
     46 #include <stdio.h>
     47 #include <string.h>
     48 #include <stdlib.h>
     49 #include <errno.h>
     50 
     51 #include <sys/types.h>
     52 #include <sys/stat.h>
     53 
     54 #ifdef HAVE_UNISTD_H
     55 #include <unistd.h>
     56 #endif
     57 
     58 #ifdef SUPPORT_LIBZ
     59 #include <zlib.h>
     60 #endif
     61 
     62 #ifdef SUPPORT_LIBBZ2
     63 #include <bzlib.h>
     64 #endif
     65 
     66 #include "pcre.h"
     67 
     68 #define FALSE 0
     69 #define TRUE 1
     70 
     71 typedef int BOOL;
     72 
     73 #define MAX_PATTERN_COUNT 100
     74 #define OFFSET_SIZE 99
     75 
     76 #if BUFSIZ > 8192
     77 #define MBUFTHIRD BUFSIZ
     78 #else
     79 #define MBUFTHIRD 8192
     80 #endif
     81 
     82 /* Values for the "filenames" variable, which specifies options for file name
     83 output. The order is important; it is assumed that a file name is wanted for
     84 all values greater than FN_DEFAULT. */
     85 
     86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
     87 
     88 /* File reading styles */
     89 
     90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
     91 
     92 /* Actions for the -d and -D options */
     93 
     94 enum { dee_READ, dee_SKIP, dee_RECURSE };
     95 enum { DEE_READ, DEE_SKIP };
     96 
     97 /* Actions for special processing options (flag bits) */
     98 
     99 #define PO_WORD_MATCH     0x0001
    100 #define PO_LINE_MATCH     0x0002
    101 #define PO_FIXED_STRINGS  0x0004
    102 
    103 /* Line ending types */
    104 
    105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
    106 
    107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
    108 environments), a warning is issued if the value of fwrite() is ignored.
    109 Unfortunately, casting to (void) does not suppress the warning. To get round
    110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
    111 apply to fprintf(). */
    112 
    113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
    114 
    115 
    116 
    117 /*************************************************
    118 *               Global variables                 *
    119 *************************************************/
    120 
    121 /* Jeffrey Friedl has some debugging requirements that are not part of the
    122 regular code. */
    123 
    124 #ifdef JFRIEDL_DEBUG
    125 static int S_arg = -1;
    126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
    127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
    128 static const char *jfriedl_prefix = "";
    129 static const char *jfriedl_postfix = "";
    130 #endif
    131 
    132 static int  endlinetype;
    133 
    134 static char *colour_string = (char *)"1;31";
    135 static char *colour_option = NULL;
    136 static char *dee_option = NULL;
    137 static char *DEE_option = NULL;
    138 static char *newline = NULL;
    139 static char *pattern_filename = NULL;
    140 static char *stdin_name = (char *)"(standard input)";
    141 static char *locale = NULL;
    142 
    143 static const unsigned char *pcretables = NULL;
    144 
    145 static int  pattern_count = 0;
    146 static pcre **pattern_list = NULL;
    147 static pcre_extra **hints_list = NULL;
    148 
    149 static char *include_pattern = NULL;
    150 static char *exclude_pattern = NULL;
    151 static char *include_dir_pattern = NULL;
    152 static char *exclude_dir_pattern = NULL;
    153 
    154 static pcre *include_compiled = NULL;
    155 static pcre *exclude_compiled = NULL;
    156 static pcre *include_dir_compiled = NULL;
    157 static pcre *exclude_dir_compiled = NULL;
    158 
    159 static int after_context = 0;
    160 static int before_context = 0;
    161 static int both_context = 0;
    162 static int dee_action = dee_READ;
    163 static int DEE_action = DEE_READ;
    164 static int error_count = 0;
    165 static int filenames = FN_DEFAULT;
    166 static int only_matching = -1;
    167 static int process_options = 0;
    168 
    169 static unsigned long int match_limit = 0;
    170 static unsigned long int match_limit_recursion = 0;
    171 
    172 static BOOL count_only = FALSE;
    173 static BOOL do_colour = FALSE;
    174 static BOOL file_offsets = FALSE;
    175 static BOOL hyphenpending = FALSE;
    176 static BOOL invert = FALSE;
    177 static BOOL line_buffered = FALSE;
    178 static BOOL line_offsets = FALSE;
    179 static BOOL multiline = FALSE;
    180 static BOOL number = FALSE;
    181 static BOOL omit_zero_count = FALSE;
    182 static BOOL resource_error = FALSE;
    183 static BOOL quiet = FALSE;
    184 static BOOL silent = FALSE;
    185 static BOOL utf8 = FALSE;
    186 
    187 /* Structure for options and list of them */
    188 
    189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
    190        OP_OP_NUMBER, OP_PATLIST };
    191 
    192 typedef struct option_item {
    193   int type;
    194   int one_char;
    195   void *dataptr;
    196   const char *long_name;
    197   const char *help_text;
    198 } option_item;
    199 
    200 /* Options without a single-letter equivalent get a negative value. This can be
    201 used to identify them. */
    202 
    203 #define N_COLOUR       (-1)
    204 #define N_EXCLUDE      (-2)
    205 #define N_EXCLUDE_DIR  (-3)
    206 #define N_HELP         (-4)
    207 #define N_INCLUDE      (-5)
    208 #define N_INCLUDE_DIR  (-6)
    209 #define N_LABEL        (-7)
    210 #define N_LOCALE       (-8)
    211 #define N_NULL         (-9)
    212 #define N_LOFFSETS     (-10)
    213 #define N_FOFFSETS     (-11)
    214 #define N_LBUFFER      (-12)
    215 #define N_M_LIMIT      (-13)
    216 #define N_M_LIMIT_REC  (-14)
    217 
    218 static option_item optionlist[] = {
    219   { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
    220   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
    221   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
    222   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
    223   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
    224   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
    225   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
    226   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
    227   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
    228   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
    229   { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
    230   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
    231   { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
    232   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
    233   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
    234   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
    235   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
    236   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
    237   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
    238   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
    239   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
    240   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
    241   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
    242   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
    243   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
    244   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
    245   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
    246   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
    247   { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
    248   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
    249   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
    250   { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
    251   { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
    252   { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
    253   { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
    254 
    255   /* These two were accidentally implemented with underscores instead of
    256   hyphens in the option names. As this was not discovered for several releases,
    257   the incorrect versions are left in the table for compatibility. However, the
    258   --help function misses out any option that has an underscore in its name. */
    259 
    260   { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
    261   { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
    262 
    263 #ifdef JFRIEDL_DEBUG
    264   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
    265 #endif
    266   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
    267   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
    268   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
    269   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
    270   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
    271   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
    272   { OP_NODATA,    0,        NULL,               NULL,            NULL }
    273 };
    274 
    275 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
    276 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
    277 that the combination of -w and -x has the same effect as -x on its own, so we
    278 can treat them as the same. */
    279 
    280 static const char *prefix[] = {
    281   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
    282 
    283 static const char *suffix[] = {
    284   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
    285 
    286 /* UTF-8 tables - used only when the newline setting is "any". */
    287 
    288 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
    289 
    290 const char utf8_table4[] = {
    291   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    292   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    293   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    294   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
    295 
    296 
    297 
    298 /*************************************************
    299 *         Exit from the program                  *
    300 *************************************************/
    301 
    302 /* If there has been a resource error, give a suitable message.
    303 
    304 Argument:  the return code
    305 Returns:   does not return
    306 */
    307 
    308 static void
    309 pcregrep_exit(int rc)
    310 {
    311 if (resource_error)
    312   {
    313   fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
    314     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
    315   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
    316   }
    317 
    318 exit(rc);
    319 }
    320 
    321 
    322 /*************************************************
    323 *            OS-specific functions               *
    324 *************************************************/
    325 
    326 /* These functions are defined so that they can be made system specific,
    327 although at present the only ones are for Unix, Win32, and for "no support". */
    328 
    329 
    330 /************* Directory scanning in Unix ***********/
    331 
    332 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
    333 #include <sys/types.h>
    334 #include <sys/stat.h>
    335 #include <dirent.h>
    336 
    337 typedef DIR directory_type;
    338 
    339 static int
    340 isdirectory(char *filename)
    341 {
    342 struct stat statbuf;
    343 if (stat(filename, &statbuf) < 0)
    344   return 0;        /* In the expectation that opening as a file will fail */
    345 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
    346 }
    347 
    348 static directory_type *
    349 opendirectory(char *filename)
    350 {
    351 return opendir(filename);
    352 }
    353 
    354 static char *
    355 readdirectory(directory_type *dir)
    356 {
    357 for (;;)
    358   {
    359   struct dirent *dent = readdir(dir);
    360   if (dent == NULL) return NULL;
    361   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
    362     return dent->d_name;
    363   }
    364 /* Control never reaches here */
    365 }
    366 
    367 static void
    368 closedirectory(directory_type *dir)
    369 {
    370 closedir(dir);
    371 }
    372 
    373 
    374 /************* Test for regular file in Unix **********/
    375 
    376 static int
    377 isregfile(char *filename)
    378 {
    379 struct stat statbuf;
    380 if (stat(filename, &statbuf) < 0)
    381   return 1;        /* In the expectation that opening as a file will fail */
    382 return (statbuf.st_mode & S_IFMT) == S_IFREG;
    383 }
    384 
    385 
    386 /************* Test for a terminal in Unix **********/
    387 
    388 static BOOL
    389 is_stdout_tty(void)
    390 {
    391 return isatty(fileno(stdout));
    392 }
    393 
    394 static BOOL
    395 is_file_tty(FILE *f)
    396 {
    397 return isatty(fileno(f));
    398 }
    399 
    400 
    401 /************* Directory scanning in Win32 ***********/
    402 
    403 /* I (Philip Hazel) have no means of testing this code. It was contributed by
    404 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
    405 when it did not exist. David Byron added a patch that moved the #include of
    406 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
    407 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
    408 undefined when it is indeed undefined. */
    409 
    410 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
    411 
    412 #ifndef STRICT
    413 # define STRICT
    414 #endif
    415 #ifndef WIN32_LEAN_AND_MEAN
    416 # define WIN32_LEAN_AND_MEAN
    417 #endif
    418 
    419 #include <windows.h>
    420 
    421 #ifndef INVALID_FILE_ATTRIBUTES
    422 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
    423 #endif
    424 
    425 typedef struct directory_type
    426 {
    427 HANDLE handle;
    428 BOOL first;
    429 WIN32_FIND_DATA data;
    430 } directory_type;
    431 
    432 int
    433 isdirectory(char *filename)
    434 {
    435 DWORD attr = GetFileAttributes(filename);
    436 if (attr == INVALID_FILE_ATTRIBUTES)
    437   return 0;
    438 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
    439 }
    440 
    441 directory_type *
    442 opendirectory(char *filename)
    443 {
    444 size_t len;
    445 char *pattern;
    446 directory_type *dir;
    447 DWORD err;
    448 len = strlen(filename);
    449 pattern = (char *) malloc(len + 3);
    450 dir = (directory_type *) malloc(sizeof(*dir));
    451 if ((pattern == NULL) || (dir == NULL))
    452   {
    453   fprintf(stderr, "pcregrep: malloc failed\n");
    454   pcregrep_exit(2);
    455   }
    456 memcpy(pattern, filename, len);
    457 memcpy(&(pattern[len]), "\\*", 3);
    458 dir->handle = FindFirstFile(pattern, &(dir->data));
    459 if (dir->handle != INVALID_HANDLE_VALUE)
    460   {
    461   free(pattern);
    462   dir->first = TRUE;
    463   return dir;
    464   }
    465 err = GetLastError();
    466 free(pattern);
    467 free(dir);
    468 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
    469 return NULL;
    470 }
    471 
    472 char *
    473 readdirectory(directory_type *dir)
    474 {
    475 for (;;)
    476   {
    477   if (!dir->first)
    478     {
    479     if (!FindNextFile(dir->handle, &(dir->data)))
    480       return NULL;
    481     }
    482   else
    483     {
    484     dir->first = FALSE;
    485     }
    486   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
    487     return dir->data.cFileName;
    488   }
    489 #ifndef _MSC_VER
    490 return NULL;   /* Keep compiler happy; never executed */
    491 #endif
    492 }
    493 
    494 void
    495 closedirectory(directory_type *dir)
    496 {
    497 FindClose(dir->handle);
    498 free(dir);
    499 }
    500 
    501 
    502 /************* Test for regular file in Win32 **********/
    503 
    504 /* I don't know how to do this, or if it can be done; assume all paths are
    505 regular if they are not directories. */
    506 
    507 int isregfile(char *filename)
    508 {
    509 return !isdirectory(filename);
    510 }
    511 
    512 
    513 /************* Test for a terminal in Win32 **********/
    514 
    515 /* I don't know how to do this; assume never */
    516 
    517 static BOOL
    518 is_stdout_tty(void)
    519 {
    520 return FALSE;
    521 }
    522 
    523 static BOOL
    524 is_file_tty(FILE *f)
    525 {
    526 return FALSE;
    527 }
    528 
    529 
    530 /************* Directory scanning when we can't do it ***********/
    531 
    532 /* The type is void, and apart from isdirectory(), the functions do nothing. */
    533 
    534 #else
    535 
    536 typedef void directory_type;
    537 
    538 int isdirectory(char *filename) { return 0; }
    539 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
    540 char *readdirectory(directory_type *dir) { return (char*)0;}
    541 void closedirectory(directory_type *dir) {}
    542 
    543 
    544 /************* Test for regular when we can't do it **********/
    545 
    546 /* Assume all files are regular. */
    547 
    548 int isregfile(char *filename) { return 1; }
    549 
    550 
    551 /************* Test for a terminal when we can't do it **********/
    552 
    553 static BOOL
    554 is_stdout_tty(void)
    555 {
    556 return FALSE;
    557 }
    558 
    559 static BOOL
    560 is_file_tty(FILE *f)
    561 {
    562 return FALSE;
    563 }
    564 
    565 #endif
    566 
    567 
    568 
    569 #ifndef HAVE_STRERROR
    570 /*************************************************
    571 *     Provide strerror() for non-ANSI libraries  *
    572 *************************************************/
    573 
    574 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
    575 in their libraries, but can provide the same facility by this simple
    576 alternative function. */
    577 
    578 extern int   sys_nerr;
    579 extern char *sys_errlist[];
    580 
    581 char *
    582 strerror(int n)
    583 {
    584 if (n < 0 || n >= sys_nerr) return "unknown error number";
    585 return sys_errlist[n];
    586 }
    587 #endif /* HAVE_STRERROR */
    588 
    589 
    590 
    591 /*************************************************
    592 *            Read one line of input              *
    593 *************************************************/
    594 
    595 /* Normally, input is read using fread() into a large buffer, so many lines may
    596 be read at once. However, doing this for tty input means that no output appears
    597 until a lot of input has been typed. Instead, tty input is handled line by
    598 line. We cannot use fgets() for this, because it does not stop at a binary
    599 zero, and therefore there is no way of telling how many characters it has read,
    600 because there may be binary zeros embedded in the data.
    601 
    602 Arguments:
    603   buffer     the buffer to read into
    604   length     the maximum number of characters to read
    605   f          the file
    606 
    607 Returns:     the number of characters read, zero at end of file
    608 */
    609 
    610 static int
    611 read_one_line(char *buffer, int length, FILE *f)
    612 {
    613 int c;
    614 int yield = 0;
    615 while ((c = fgetc(f)) != EOF)
    616   {
    617   buffer[yield++] = c;
    618   if (c == '\n' || yield >= length) break;
    619   }
    620 return yield;
    621 }
    622 
    623 
    624 
    625 /*************************************************
    626 *             Find end of line                   *
    627 *************************************************/
    628 
    629 /* The length of the endline sequence that is found is set via lenptr. This may
    630 be zero at the very end of the file if there is no line-ending sequence there.
    631 
    632 Arguments:
    633   p         current position in line
    634   endptr    end of available data
    635   lenptr    where to put the length of the eol sequence
    636 
    637 Returns:    pointer to the last byte of the line, including the newline byte(s)
    638 */
    639 
    640 static char *
    641 end_of_line(char *p, char *endptr, int *lenptr)
    642 {
    643 switch(endlinetype)
    644   {
    645   default:      /* Just in case */
    646   case EL_LF:
    647   while (p < endptr && *p != '\n') p++;
    648   if (p < endptr)
    649     {
    650     *lenptr = 1;
    651     return p + 1;
    652     }
    653   *lenptr = 0;
    654   return endptr;
    655 
    656   case EL_CR:
    657   while (p < endptr && *p != '\r') p++;
    658   if (p < endptr)
    659     {
    660     *lenptr = 1;
    661     return p + 1;
    662     }
    663   *lenptr = 0;
    664   return endptr;
    665 
    666   case EL_CRLF:
    667   for (;;)
    668     {
    669     while (p < endptr && *p != '\r') p++;
    670     if (++p >= endptr)
    671       {
    672       *lenptr = 0;
    673       return endptr;
    674       }
    675     if (*p == '\n')
    676       {
    677       *lenptr = 2;
    678       return p + 1;
    679       }
    680     }
    681   break;
    682 
    683   case EL_ANYCRLF:
    684   while (p < endptr)
    685     {
    686     int extra = 0;
    687     register int c = *((unsigned char *)p);
    688 
    689     if (utf8 && c >= 0xc0)
    690       {
    691       int gcii, gcss;
    692       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
    693       gcss = 6*extra;
    694       c = (c & utf8_table3[extra]) << gcss;
    695       for (gcii = 1; gcii <= extra; gcii++)
    696         {
    697         gcss -= 6;
    698         c |= (p[gcii] & 0x3f) << gcss;
    699         }
    700       }
    701 
    702     p += 1 + extra;
    703 
    704     switch (c)
    705       {
    706       case 0x0a:    /* LF */
    707       *lenptr = 1;
    708       return p;
    709 
    710       case 0x0d:    /* CR */
    711       if (p < endptr && *p == 0x0a)
    712         {
    713         *lenptr = 2;
    714         p++;
    715         }
    716       else *lenptr = 1;
    717       return p;
    718 
    719       default:
    720       break;
    721       }
    722     }   /* End of loop for ANYCRLF case */
    723 
    724   *lenptr = 0;  /* Must have hit the end */
    725   return endptr;
    726 
    727   case EL_ANY:
    728   while (p < endptr)
    729     {
    730     int extra = 0;
    731     register int c = *((unsigned char *)p);
    732 
    733     if (utf8 && c >= 0xc0)
    734       {
    735       int gcii, gcss;
    736       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
    737       gcss = 6*extra;
    738       c = (c & utf8_table3[extra]) << gcss;
    739       for (gcii = 1; gcii <= extra; gcii++)
    740         {
    741         gcss -= 6;
    742         c |= (p[gcii] & 0x3f) << gcss;
    743         }
    744       }
    745 
    746     p += 1 + extra;
    747 
    748     switch (c)
    749       {
    750       case 0x0a:    /* LF */
    751       case 0x0b:    /* VT */
    752       case 0x0c:    /* FF */
    753       *lenptr = 1;
    754       return p;
    755 
    756       case 0x0d:    /* CR */
    757       if (p < endptr && *p == 0x0a)
    758         {
    759         *lenptr = 2;
    760         p++;
    761         }
    762       else *lenptr = 1;
    763       return p;
    764 
    765       case 0x85:    /* NEL */
    766       *lenptr = utf8? 2 : 1;
    767       return p;
    768 
    769       case 0x2028:  /* LS */
    770       case 0x2029:  /* PS */
    771       *lenptr = 3;
    772       return p;
    773 
    774       default:
    775       break;
    776       }
    777     }   /* End of loop for ANY case */
    778 
    779   *lenptr = 0;  /* Must have hit the end */
    780   return endptr;
    781   }     /* End of overall switch */
    782 }
    783 
    784 
    785 
    786 /*************************************************
    787 *         Find start of previous line            *
    788 *************************************************/
    789 
    790 /* This is called when looking back for before lines to print.
    791 
    792 Arguments:
    793   p         start of the subsequent line
    794   startptr  start of available data
    795 
    796 Returns:    pointer to the start of the previous line
    797 */
    798 
    799 static char *
    800 previous_line(char *p, char *startptr)
    801 {
    802 switch(endlinetype)
    803   {
    804   default:      /* Just in case */
    805   case EL_LF:
    806   p--;
    807   while (p > startptr && p[-1] != '\n') p--;
    808   return p;
    809 
    810   case EL_CR:
    811   p--;
    812   while (p > startptr && p[-1] != '\n') p--;
    813   return p;
    814 
    815   case EL_CRLF:
    816   for (;;)
    817     {
    818     p -= 2;
    819     while (p > startptr && p[-1] != '\n') p--;
    820     if (p <= startptr + 1 || p[-2] == '\r') return p;
    821     }
    822   return p;   /* But control should never get here */
    823 
    824   case EL_ANY:
    825   case EL_ANYCRLF:
    826   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
    827   if (utf8) while ((*p & 0xc0) == 0x80) p--;
    828 
    829   while (p > startptr)
    830     {
    831     register int c;
    832     char *pp = p - 1;
    833 
    834     if (utf8)
    835       {
    836       int extra = 0;
    837       while ((*pp & 0xc0) == 0x80) pp--;
    838       c = *((unsigned char *)pp);
    839       if (c >= 0xc0)
    840         {
    841         int gcii, gcss;
    842         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
    843         gcss = 6*extra;
    844         c = (c & utf8_table3[extra]) << gcss;
    845         for (gcii = 1; gcii <= extra; gcii++)
    846           {
    847           gcss -= 6;
    848           c |= (pp[gcii] & 0x3f) << gcss;
    849           }
    850         }
    851       }
    852     else c = *((unsigned char *)pp);
    853 
    854     if (endlinetype == EL_ANYCRLF) switch (c)
    855       {
    856       case 0x0a:    /* LF */
    857       case 0x0d:    /* CR */
    858       return p;
    859 
    860       default:
    861       break;
    862       }
    863 
    864     else switch (c)
    865       {
    866       case 0x0a:    /* LF */
    867       case 0x0b:    /* VT */
    868       case 0x0c:    /* FF */
    869       case 0x0d:    /* CR */
    870       case 0x85:    /* NEL */
    871       case 0x2028:  /* LS */
    872       case 0x2029:  /* PS */
    873       return p;
    874 
    875       default:
    876       break;
    877       }
    878 
    879     p = pp;  /* Back one character */
    880     }        /* End of loop for ANY case */
    881 
    882   return startptr;  /* Hit start of data */
    883   }     /* End of overall switch */
    884 }
    885 
    886 
    887 
    888 
    889 
    890 /*************************************************
    891 *       Print the previous "after" lines         *
    892 *************************************************/
    893 
    894 /* This is called if we are about to lose said lines because of buffer filling,
    895 and at the end of the file. The data in the line is written using fwrite() so
    896 that a binary zero does not terminate it.
    897 
    898 Arguments:
    899   lastmatchnumber   the number of the last matching line, plus one
    900   lastmatchrestart  where we restarted after the last match
    901   endptr            end of available data
    902   printname         filename for printing
    903 
    904 Returns:            nothing
    905 */
    906 
    907 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
    908   char *endptr, char *printname)
    909 {
    910 if (after_context > 0 && lastmatchnumber > 0)
    911   {
    912   int count = 0;
    913   while (lastmatchrestart < endptr && count++ < after_context)
    914     {
    915     int ellength;
    916     char *pp = lastmatchrestart;
    917     if (printname != NULL) fprintf(stdout, "%s-", printname);
    918     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
    919     pp = end_of_line(pp, endptr, &ellength);
    920     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
    921     lastmatchrestart = pp;
    922     }
    923   hyphenpending = TRUE;
    924   }
    925 }
    926 
    927 
    928 
    929 /*************************************************
    930 *   Apply patterns to subject till one matches   *
    931 *************************************************/
    932 
    933 /* This function is called to run through all patterns, looking for a match. It
    934 is used multiple times for the same subject when colouring is enabled, in order
    935 to find all possible matches.
    936 
    937 Arguments:
    938   matchptr    the start of the subject
    939   length      the length of the subject to match
    940   offsets     the offets vector to fill in
    941   mrc         address of where to put the result of pcre_exec()
    942 
    943 Returns:      TRUE if there was a match
    944               FALSE if there was no match
    945               invert if there was a non-fatal error
    946 */
    947 
    948 static BOOL
    949 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
    950 {
    951 int i;
    952 size_t slen = length;
    953 const char *msg = "this text:\n\n";
    954 if (slen > 200)
    955   {
    956   slen = 200;
    957   msg = "text that starts:\n\n";
    958   }
    959 for (i = 0; i < pattern_count; i++)
    960   {
    961   *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
    962     PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
    963   if (*mrc >= 0) return TRUE;
    964   if (*mrc == PCRE_ERROR_NOMATCH) continue;
    965   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
    966   if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
    967   fprintf(stderr, "%s", msg);
    968   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
    969   fprintf(stderr, "\n\n");
    970   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
    971     resource_error = TRUE;
    972   if (error_count++ > 20)
    973     {
    974     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
    975     pcregrep_exit(2);
    976     }
    977   return invert;    /* No more matching; don't show the line again */
    978   }
    979 
    980 return FALSE;  /* No match, no errors */
    981 }
    982 
    983 
    984 
    985 /*************************************************
    986 *            Grep an individual file             *
    987 *************************************************/
    988 
    989 /* This is called from grep_or_recurse() below. It uses a buffer that is three
    990 times the value of MBUFTHIRD. The matching point is never allowed to stray into
    991 the top third of the buffer, thus keeping more of the file available for
    992 context printing or for multiline scanning. For large files, the pointer will
    993 be in the middle third most of the time, so the bottom third is available for
    994 "before" context printing.
    995 
    996 Arguments:
    997   handle       the fopened FILE stream for a normal file
    998                the gzFile pointer when reading is via libz
    999                the BZFILE pointer when reading is via libbz2
   1000   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
   1001   printname    the file name if it is to be printed for each match
   1002                or NULL if the file name is not to be printed
   1003                it cannot be NULL if filenames[_nomatch]_only is set
   1004 
   1005 Returns:       0 if there was at least one match
   1006                1 otherwise (no matches)
   1007                2 if there is a read error on a .bz2 file
   1008 */
   1009 
   1010 static int
   1011 pcregrep(void *handle, int frtype, char *printname)
   1012 {
   1013 int rc = 1;
   1014 int linenumber = 1;
   1015 int lastmatchnumber = 0;
   1016 int count = 0;
   1017 int filepos = 0;
   1018 int offsets[OFFSET_SIZE];
   1019 char *lastmatchrestart = NULL;
   1020 char buffer[3*MBUFTHIRD];
   1021 char *ptr = buffer;
   1022 char *endptr;
   1023 size_t bufflength;
   1024 BOOL endhyphenpending = FALSE;
   1025 BOOL input_line_buffered = line_buffered;
   1026 FILE *in = NULL;                    /* Ensure initialized */
   1027 
   1028 #ifdef SUPPORT_LIBZ
   1029 gzFile ingz = NULL;
   1030 #endif
   1031 
   1032 #ifdef SUPPORT_LIBBZ2
   1033 BZFILE *inbz2 = NULL;
   1034 #endif
   1035 
   1036 
   1037 /* Do the first read into the start of the buffer and set up the pointer to end
   1038 of what we have. In the case of libz, a non-zipped .gz file will be read as a
   1039 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
   1040 fail. */
   1041 
   1042 #ifdef SUPPORT_LIBZ
   1043 if (frtype == FR_LIBZ)
   1044   {
   1045   ingz = (gzFile)handle;
   1046   bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
   1047   }
   1048 else
   1049 #endif
   1050 
   1051 #ifdef SUPPORT_LIBBZ2
   1052 if (frtype == FR_LIBBZ2)
   1053   {
   1054   inbz2 = (BZFILE *)handle;
   1055   bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
   1056   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
   1057   }                                    /* without the cast it is unsigned. */
   1058 else
   1059 #endif
   1060 
   1061   {
   1062   in = (FILE *)handle;
   1063   if (is_file_tty(in)) input_line_buffered = TRUE;
   1064   bufflength = input_line_buffered?
   1065     read_one_line(buffer, 3*MBUFTHIRD, in) :
   1066     fread(buffer, 1, 3*MBUFTHIRD, in);
   1067   }
   1068 
   1069 endptr = buffer + bufflength;
   1070 
   1071 /* Loop while the current pointer is not at the end of the file. For large
   1072 files, endptr will be at the end of the buffer when we are in the middle of the
   1073 file, but ptr will never get there, because as soon as it gets over 2/3 of the
   1074 way, the buffer is shifted left and re-filled. */
   1075 
   1076 while (ptr < endptr)
   1077   {
   1078   int endlinelength;
   1079   int mrc = 0;
   1080   BOOL match;
   1081   char *matchptr = ptr;
   1082   char *t = ptr;
   1083   size_t length, linelength;
   1084 
   1085   /* At this point, ptr is at the start of a line. We need to find the length
   1086   of the subject string to pass to pcre_exec(). In multiline mode, it is the
   1087   length remainder of the data in the buffer. Otherwise, it is the length of
   1088   the next line, excluding the terminating newline. After matching, we always
   1089   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
   1090   option is used for compiling, so that any match is constrained to be in the
   1091   first line. */
   1092 
   1093   t = end_of_line(t, endptr, &endlinelength);
   1094   linelength = t - ptr - endlinelength;
   1095   length = multiline? (size_t)(endptr - ptr) : linelength;
   1096 
   1097   /* Extra processing for Jeffrey Friedl's debugging. */
   1098 
   1099 #ifdef JFRIEDL_DEBUG
   1100   if (jfriedl_XT || jfriedl_XR)
   1101   {
   1102       #include <sys/time.h>
   1103       #include <time.h>
   1104       struct timeval start_time, end_time;
   1105       struct timezone dummy;
   1106       int i;
   1107 
   1108       if (jfriedl_XT)
   1109       {
   1110           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
   1111           const char *orig = ptr;
   1112           ptr = malloc(newlen + 1);
   1113           if (!ptr) {
   1114                   printf("out of memory");
   1115                   pcregrep_exit(2);
   1116           }
   1117           endptr = ptr;
   1118           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
   1119           for (i = 0; i < jfriedl_XT; i++) {
   1120                   strncpy(endptr, orig,  length);
   1121                   endptr += length;
   1122           }
   1123           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
   1124           length = newlen;
   1125       }
   1126 
   1127       if (gettimeofday(&start_time, &dummy) != 0)
   1128               perror("bad gettimeofday");
   1129 
   1130 
   1131       for (i = 0; i < jfriedl_XR; i++)
   1132           match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
   1133               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
   1134 
   1135       if (gettimeofday(&end_time, &dummy) != 0)
   1136               perror("bad gettimeofday");
   1137 
   1138       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
   1139                       -
   1140                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
   1141 
   1142       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
   1143       return 0;
   1144   }
   1145 #endif
   1146 
   1147   /* We come back here after a match when the -o option (only_matching) is set,
   1148   in order to find any further matches in the same line. */
   1149 
   1150   ONLY_MATCHING_RESTART:
   1151 
   1152   /* Run through all the patterns until one matches or there is an error other
   1153   than NOMATCH. This code is in a subroutine so that it can be re-used for
   1154   finding subsequent matches when colouring matched lines. */
   1155 
   1156   match = match_patterns(matchptr, length, offsets, &mrc);
   1157 
   1158   /* If it's a match or a not-match (as required), do what's wanted. */
   1159 
   1160   if (match != invert)
   1161     {
   1162     BOOL hyphenprinted = FALSE;
   1163 
   1164     /* We've failed if we want a file that doesn't have any matches. */
   1165 
   1166     if (filenames == FN_NOMATCH_ONLY) return 1;
   1167 
   1168     /* Just count if just counting is wanted. */
   1169 
   1170     if (count_only) count++;
   1171 
   1172     /* If all we want is a file name, there is no need to scan any more lines
   1173     in the file. */
   1174 
   1175     else if (filenames == FN_MATCH_ONLY)
   1176       {
   1177       fprintf(stdout, "%s\n", printname);
   1178       return 0;
   1179       }
   1180 
   1181     /* Likewise, if all we want is a yes/no answer. */
   1182 
   1183     else if (quiet) return 0;
   1184 
   1185     /* The --only-matching option prints just the substring that matched, or a
   1186     captured portion of it, as long as this string is not empty, and the
   1187     --file-offsets and --line-offsets options output offsets for the matching
   1188     substring (they both force --only-matching = 0). None of these options
   1189     prints any context. Afterwards, adjust the start and length, and then jump
   1190     back to look for further matches in the same line. If we are in invert
   1191     mode, however, nothing is printed and we do not restart - this could still
   1192     be useful because the return code is set. */
   1193 
   1194     else if (only_matching >= 0)
   1195       {
   1196       if (!invert)
   1197         {
   1198         if (printname != NULL) fprintf(stdout, "%s:", printname);
   1199         if (number) fprintf(stdout, "%d:", linenumber);
   1200         if (line_offsets)
   1201           fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
   1202             offsets[1] - offsets[0]);
   1203         else if (file_offsets)
   1204           fprintf(stdout, "%d,%d\n",
   1205             (int)(filepos + matchptr + offsets[0] - ptr),
   1206             offsets[1] - offsets[0]);
   1207         else if (only_matching < mrc)
   1208           {
   1209           int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
   1210           if (plen > 0)
   1211             {
   1212             if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
   1213             FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
   1214             if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
   1215             fprintf(stdout, "\n");
   1216             }
   1217           }
   1218         else if (printname != NULL || number) fprintf(stdout, "\n");
   1219         matchptr += offsets[1];
   1220         length -= offsets[1];
   1221         match = FALSE;
   1222         if (line_buffered) fflush(stdout);
   1223         rc = 0;    /* Had some success */
   1224         goto ONLY_MATCHING_RESTART;
   1225         }
   1226       }
   1227 
   1228     /* This is the default case when none of the above options is set. We print
   1229     the matching lines(s), possibly preceded and/or followed by other lines of
   1230     context. */
   1231 
   1232     else
   1233       {
   1234       /* See if there is a requirement to print some "after" lines from a
   1235       previous match. We never print any overlaps. */
   1236 
   1237       if (after_context > 0 && lastmatchnumber > 0)
   1238         {
   1239         int ellength;
   1240         int linecount = 0;
   1241         char *p = lastmatchrestart;
   1242 
   1243         while (p < ptr && linecount < after_context)
   1244           {
   1245           p = end_of_line(p, ptr, &ellength);
   1246           linecount++;
   1247           }
   1248 
   1249         /* It is important to advance lastmatchrestart during this printing so
   1250         that it interacts correctly with any "before" printing below. Print
   1251         each line's data using fwrite() in case there are binary zeroes. */
   1252 
   1253         while (lastmatchrestart < p)
   1254           {
   1255           char *pp = lastmatchrestart;
   1256           if (printname != NULL) fprintf(stdout, "%s-", printname);
   1257           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
   1258           pp = end_of_line(pp, endptr, &ellength);
   1259           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
   1260           lastmatchrestart = pp;
   1261           }
   1262         if (lastmatchrestart != ptr) hyphenpending = TRUE;
   1263         }
   1264 
   1265       /* If there were non-contiguous lines printed above, insert hyphens. */
   1266 
   1267       if (hyphenpending)
   1268         {
   1269         fprintf(stdout, "--\n");
   1270         hyphenpending = FALSE;
   1271         hyphenprinted = TRUE;
   1272         }
   1273 
   1274       /* See if there is a requirement to print some "before" lines for this
   1275       match. Again, don't print overlaps. */
   1276 
   1277       if (before_context > 0)
   1278         {
   1279         int linecount = 0;
   1280         char *p = ptr;
   1281 
   1282         while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
   1283                linecount < before_context)
   1284           {
   1285           linecount++;
   1286           p = previous_line(p, buffer);
   1287           }
   1288 
   1289         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
   1290           fprintf(stdout, "--\n");
   1291 
   1292         while (p < ptr)
   1293           {
   1294           int ellength;
   1295           char *pp = p;
   1296           if (printname != NULL) fprintf(stdout, "%s-", printname);
   1297           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
   1298           pp = end_of_line(pp, endptr, &ellength);
   1299           FWRITE(p, 1, pp - p, stdout);
   1300           p = pp;
   1301           }
   1302         }
   1303 
   1304       /* Now print the matching line(s); ensure we set hyphenpending at the end
   1305       of the file if any context lines are being output. */
   1306 
   1307       if (after_context > 0 || before_context > 0)
   1308         endhyphenpending = TRUE;
   1309 
   1310       if (printname != NULL) fprintf(stdout, "%s:", printname);
   1311       if (number) fprintf(stdout, "%d:", linenumber);
   1312 
   1313       /* In multiline mode, we want to print to the end of the line in which
   1314       the end of the matched string is found, so we adjust linelength and the
   1315       line number appropriately, but only when there actually was a match
   1316       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
   1317       the match will always be before the first newline sequence. */
   1318 
   1319       if (multiline & !invert)
   1320         {
   1321         char *endmatch = ptr + offsets[1];
   1322         t = ptr;
   1323         while (t < endmatch)
   1324           {
   1325           t = end_of_line(t, endptr, &endlinelength);
   1326           if (t < endmatch) linenumber++; else break;
   1327           }
   1328         linelength = t - ptr - endlinelength;
   1329         }
   1330 
   1331       /*** NOTE: Use only fwrite() to output the data line, so that binary
   1332       zeroes are treated as just another data character. */
   1333 
   1334       /* This extra option, for Jeffrey Friedl's debugging requirements,
   1335       replaces the matched string, or a specific captured string if it exists,
   1336       with X. When this happens, colouring is ignored. */
   1337 
   1338 #ifdef JFRIEDL_DEBUG
   1339       if (S_arg >= 0 && S_arg < mrc)
   1340         {
   1341         int first = S_arg * 2;
   1342         int last  = first + 1;
   1343         FWRITE(ptr, 1, offsets[first], stdout);
   1344         fprintf(stdout, "X");
   1345         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
   1346         }
   1347       else
   1348 #endif
   1349 
   1350       /* We have to split the line(s) up if colouring, and search for further
   1351       matches, but not of course if the line is a non-match. */
   1352 
   1353       if (do_colour && !invert)
   1354         {
   1355         int plength;
   1356         int last_offset = 0;
   1357         FWRITE(ptr, 1, offsets[0], stdout);
   1358         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
   1359         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
   1360         fprintf(stdout, "%c[00m", 0x1b);
   1361         for (;;)
   1362           {
   1363           last_offset += offsets[1];
   1364           matchptr += offsets[1];
   1365           length -= offsets[1];
   1366           if (last_offset >= linelength + endlinelength ||
   1367               !match_patterns(matchptr, length, offsets, &mrc)) break;
   1368           FWRITE(matchptr, 1, offsets[0], stdout);
   1369           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
   1370           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
   1371           fprintf(stdout, "%c[00m", 0x1b);
   1372           }
   1373 
   1374         /* In multiline mode, we may have already printed the complete line
   1375         and its line-ending characters (if they matched the pattern), so there
   1376         may be no more to print. */
   1377 
   1378         plength = (linelength + endlinelength) - last_offset;
   1379         if (plength > 0)
   1380           FWRITE(ptr + last_offset, 1, plength, stdout);
   1381         }
   1382 
   1383       /* Not colouring; no need to search for further matches */
   1384 
   1385       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
   1386       }
   1387 
   1388     /* End of doing what has to be done for a match. If --line-buffered was
   1389     given, flush the output. */
   1390 
   1391     if (line_buffered) fflush(stdout);
   1392     rc = 0;    /* Had some success */
   1393 
   1394     /* Remember where the last match happened for after_context. We remember
   1395     where we are about to restart, and that line's number. */
   1396 
   1397     lastmatchrestart = ptr + linelength + endlinelength;
   1398     lastmatchnumber = linenumber + 1;
   1399     }
   1400 
   1401   /* For a match in multiline inverted mode (which of course did not cause
   1402   anything to be printed), we have to move on to the end of the match before
   1403   proceeding. */
   1404 
   1405   if (multiline && invert && match)
   1406     {
   1407     int ellength;
   1408     char *endmatch = ptr + offsets[1];
   1409     t = ptr;
   1410     while (t < endmatch)
   1411       {
   1412       t = end_of_line(t, endptr, &ellength);
   1413       if (t <= endmatch) linenumber++; else break;
   1414       }
   1415     endmatch = end_of_line(endmatch, endptr, &ellength);
   1416     linelength = endmatch - ptr - ellength;
   1417     }
   1418 
   1419   /* Advance to after the newline and increment the line number. The file
   1420   offset to the current line is maintained in filepos. */
   1421 
   1422   ptr += linelength + endlinelength;
   1423   filepos += (int)(linelength + endlinelength);
   1424   linenumber++;
   1425 
   1426   /* If input is line buffered, and the buffer is not yet full, read another
   1427   line and add it into the buffer. */
   1428 
   1429   if (input_line_buffered && bufflength < sizeof(buffer))
   1430     {
   1431     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
   1432     bufflength += add;
   1433     endptr += add;
   1434     }
   1435 
   1436   /* If we haven't yet reached the end of the file (the buffer is full), and
   1437   the current point is in the top 1/3 of the buffer, slide the buffer down by
   1438   1/3 and refill it. Before we do this, if some unprinted "after" lines are
   1439   about to be lost, print them. */
   1440 
   1441   if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
   1442     {
   1443     if (after_context > 0 &&
   1444         lastmatchnumber > 0 &&
   1445         lastmatchrestart < buffer + MBUFTHIRD)
   1446       {
   1447       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
   1448       lastmatchnumber = 0;
   1449       }
   1450 
   1451     /* Now do the shuffle */
   1452 
   1453     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
   1454     ptr -= MBUFTHIRD;
   1455 
   1456 #ifdef SUPPORT_LIBZ
   1457     if (frtype == FR_LIBZ)
   1458       bufflength = 2*MBUFTHIRD +
   1459         gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
   1460     else
   1461 #endif
   1462 
   1463 #ifdef SUPPORT_LIBBZ2
   1464     if (frtype == FR_LIBBZ2)
   1465       bufflength = 2*MBUFTHIRD +
   1466         BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
   1467     else
   1468 #endif
   1469 
   1470     bufflength = 2*MBUFTHIRD +
   1471       (input_line_buffered?
   1472        read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
   1473        fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
   1474     endptr = buffer + bufflength;
   1475 
   1476     /* Adjust any last match point */
   1477 
   1478     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
   1479     }
   1480   }     /* Loop through the whole file */
   1481 
   1482 /* End of file; print final "after" lines if wanted; do_after_lines sets
   1483 hyphenpending if it prints something. */
   1484 
   1485 if (only_matching < 0 && !count_only)
   1486   {
   1487   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
   1488   hyphenpending |= endhyphenpending;
   1489   }
   1490 
   1491 /* Print the file name if we are looking for those without matches and there
   1492 were none. If we found a match, we won't have got this far. */
   1493 
   1494 if (filenames == FN_NOMATCH_ONLY)
   1495   {
   1496   fprintf(stdout, "%s\n", printname);
   1497   return 0;
   1498   }
   1499 
   1500 /* Print the match count if wanted */
   1501 
   1502 if (count_only)
   1503   {
   1504   if (count > 0 || !omit_zero_count)
   1505     {
   1506     if (printname != NULL && filenames != FN_NONE)
   1507       fprintf(stdout, "%s:", printname);
   1508     fprintf(stdout, "%d\n", count);
   1509     }
   1510   }
   1511 
   1512 return rc;
   1513 }
   1514 
   1515 
   1516 
   1517 /*************************************************
   1518 *     Grep a file or recurse into a directory    *
   1519 *************************************************/
   1520 
   1521 /* Given a path name, if it's a directory, scan all the files if we are
   1522 recursing; if it's a file, grep it.
   1523 
   1524 Arguments:
   1525   pathname          the path to investigate
   1526   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   1527   only_one_at_top   TRUE if the path is the only one at toplevel
   1528 
   1529 Returns:   0 if there was at least one match
   1530            1 if there were no matches
   1531            2 there was some kind of error
   1532 
   1533 However, file opening failures are suppressed if "silent" is set.
   1534 */
   1535 
   1536 static int
   1537 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   1538 {
   1539 int rc = 1;
   1540 int sep;
   1541 int frtype;
   1542 int pathlen;
   1543 void *handle;
   1544 FILE *in = NULL;           /* Ensure initialized */
   1545 
   1546 #ifdef SUPPORT_LIBZ
   1547 gzFile ingz = NULL;
   1548 #endif
   1549 
   1550 #ifdef SUPPORT_LIBBZ2
   1551 BZFILE *inbz2 = NULL;
   1552 #endif
   1553 
   1554 /* If the file name is "-" we scan stdin */
   1555 
   1556 if (strcmp(pathname, "-") == 0)
   1557   {
   1558   return pcregrep(stdin, FR_PLAIN,
   1559     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
   1560       stdin_name : NULL);
   1561   }
   1562 
   1563 /* If the file is a directory, skip if skipping or if we are recursing, scan
   1564 each file and directory within it, subject to any include or exclude patterns
   1565 that were set. The scanning code is localized so it can be made
   1566 system-specific. */
   1567 
   1568 if ((sep = isdirectory(pathname)) != 0)
   1569   {
   1570   if (dee_action == dee_SKIP) return 1;
   1571   if (dee_action == dee_RECURSE)
   1572     {
   1573     char buffer[1024];
   1574     char *nextfile;
   1575     directory_type *dir = opendirectory(pathname);
   1576 
   1577     if (dir == NULL)
   1578       {
   1579       if (!silent)
   1580         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
   1581           strerror(errno));
   1582       return 2;
   1583       }
   1584 
   1585     while ((nextfile = readdirectory(dir)) != NULL)
   1586       {
   1587       int frc, nflen;
   1588       sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
   1589       nflen = (int)(strlen(nextfile));
   1590 
   1591       if (isdirectory(buffer))
   1592         {
   1593         if (exclude_dir_compiled != NULL &&
   1594             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
   1595           continue;
   1596 
   1597         if (include_dir_compiled != NULL &&
   1598             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
   1599           continue;
   1600         }
   1601       else
   1602         {
   1603         if (exclude_compiled != NULL &&
   1604             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
   1605           continue;
   1606 
   1607         if (include_compiled != NULL &&
   1608             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
   1609           continue;
   1610         }
   1611 
   1612       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
   1613       if (frc > 1) rc = frc;
   1614        else if (frc == 0 && rc == 1) rc = 0;
   1615       }
   1616 
   1617     closedirectory(dir);
   1618     return rc;
   1619     }
   1620   }
   1621 
   1622 /* If the file is not a directory and not a regular file, skip it if that's
   1623 been requested. */
   1624 
   1625 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
   1626 
   1627 /* Control reaches here if we have a regular file, or if we have a directory
   1628 and recursion or skipping was not requested, or if we have anything else and
   1629 skipping was not requested. The scan proceeds. If this is the first and only
   1630 argument at top level, we don't show the file name, unless we are only showing
   1631 the file name, or the filename was forced (-H). */
   1632 
   1633 pathlen = (int)(strlen(pathname));
   1634 
   1635 /* Open using zlib if it is supported and the file name ends with .gz. */
   1636 
   1637 #ifdef SUPPORT_LIBZ
   1638 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
   1639   {
   1640   ingz = gzopen(pathname, "rb");
   1641   if (ingz == NULL)
   1642     {
   1643     if (!silent)
   1644       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
   1645         strerror(errno));
   1646     return 2;
   1647     }
   1648   handle = (void *)ingz;
   1649   frtype = FR_LIBZ;
   1650   }
   1651 else
   1652 #endif
   1653 
   1654 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
   1655 
   1656 #ifdef SUPPORT_LIBBZ2
   1657 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
   1658   {
   1659   inbz2 = BZ2_bzopen(pathname, "rb");
   1660   handle = (void *)inbz2;
   1661   frtype = FR_LIBBZ2;
   1662   }
   1663 else
   1664 #endif
   1665 
   1666 /* Otherwise use plain fopen(). The label is so that we can come back here if
   1667 an attempt to read a .bz2 file indicates that it really is a plain file. */
   1668 
   1669 #ifdef SUPPORT_LIBBZ2
   1670 PLAIN_FILE:
   1671 #endif
   1672   {
   1673   in = fopen(pathname, "rb");
   1674   handle = (void *)in;
   1675   frtype = FR_PLAIN;
   1676   }
   1677 
   1678 /* All the opening methods return errno when they fail. */
   1679 
   1680 if (handle == NULL)
   1681   {
   1682   if (!silent)
   1683     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
   1684       strerror(errno));
   1685   return 2;
   1686   }
   1687 
   1688 /* Now grep the file */
   1689 
   1690 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
   1691   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
   1692 
   1693 /* Close in an appropriate manner. */
   1694 
   1695 #ifdef SUPPORT_LIBZ
   1696 if (frtype == FR_LIBZ)
   1697   gzclose(ingz);
   1698 else
   1699 #endif
   1700 
   1701 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
   1702 read failed. If the error indicates that the file isn't in fact bzipped, try
   1703 again as a normal file. */
   1704 
   1705 #ifdef SUPPORT_LIBBZ2
   1706 if (frtype == FR_LIBBZ2)
   1707   {
   1708   if (rc == 2)
   1709     {
   1710     int errnum;
   1711     const char *err = BZ2_bzerror(inbz2, &errnum);
   1712     if (errnum == BZ_DATA_ERROR_MAGIC)
   1713       {
   1714       BZ2_bzclose(inbz2);
   1715       goto PLAIN_FILE;
   1716       }
   1717     else if (!silent)
   1718       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
   1719         pathname, err);
   1720     }
   1721   BZ2_bzclose(inbz2);
   1722   }
   1723 else
   1724 #endif
   1725 
   1726 /* Normal file close */
   1727 
   1728 fclose(in);
   1729 
   1730 /* Pass back the yield from pcregrep(). */
   1731 
   1732 return rc;
   1733 }
   1734 
   1735 
   1736 
   1737 
   1738 /*************************************************
   1739 *                Usage function                  *
   1740 *************************************************/
   1741 
   1742 static int
   1743 usage(int rc)
   1744 {
   1745 option_item *op;
   1746 fprintf(stderr, "Usage: pcregrep [-");
   1747 for (op = optionlist; op->one_char != 0; op++)
   1748   {
   1749   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
   1750   }
   1751 fprintf(stderr, "] [long options] [pattern] [files]\n");
   1752 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
   1753   "options.\n");
   1754 return rc;
   1755 }
   1756 
   1757 
   1758 
   1759 
   1760 /*************************************************
   1761 *                Help function                   *
   1762 *************************************************/
   1763 
   1764 static void
   1765 help(void)
   1766 {
   1767 option_item *op;
   1768 
   1769 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
   1770 printf("Search for PATTERN in each FILE or standard input.\n");
   1771 printf("PATTERN must be present if neither -e nor -f is used.\n");
   1772 printf("\"-\" can be used as a file name to mean STDIN.\n");
   1773 
   1774 #ifdef SUPPORT_LIBZ
   1775 printf("Files whose names end in .gz are read using zlib.\n");
   1776 #endif
   1777 
   1778 #ifdef SUPPORT_LIBBZ2
   1779 printf("Files whose names end in .bz2 are read using bzlib2.\n");
   1780 #endif
   1781 
   1782 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
   1783 printf("Other files and the standard input are read as plain files.\n\n");
   1784 #else
   1785 printf("All files are read as plain files, without any interpretation.\n\n");
   1786 #endif
   1787 
   1788 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
   1789 printf("Options:\n");
   1790 
   1791 for (op = optionlist; op->one_char != 0; op++)
   1792   {
   1793   int n;
   1794   char s[4];
   1795 
   1796   /* Two options were accidentally implemented and documented with underscores
   1797   instead of hyphens in their names, something that was not noticed for quite a
   1798   few releases. When fixing this, I left the underscored versions in the list
   1799   in case people were using them. However, we don't want to display them in the
   1800   help data. There are no other options that contain underscores, and we do not
   1801   expect ever to implement such options. Therefore, just omit any option that
   1802   contains an underscore. */
   1803 
   1804   if (strchr(op->long_name, '_') != NULL) continue;
   1805 
   1806   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
   1807   n = 31 - printf("  %s --%s", s, op->long_name);
   1808   if (n < 1) n = 1;
   1809   printf("%.*s%s\n", n, "                     ", op->help_text);
   1810   }
   1811 
   1812 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
   1813 printf("trailing white space is removed and blank lines are ignored.\n");
   1814 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
   1815 
   1816 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
   1817 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
   1818 }
   1819 
   1820 
   1821 
   1822 
   1823 /*************************************************
   1824 *    Handle a single-letter, no data option      *
   1825 *************************************************/
   1826 
   1827 static int
   1828 handle_option(int letter, int options)
   1829 {
   1830 switch(letter)
   1831   {
   1832   case N_FOFFSETS: file_offsets = TRUE; break;
   1833   case N_HELP: help(); pcregrep_exit(0);
   1834   case N_LOFFSETS: line_offsets = number = TRUE; break;
   1835   case N_LBUFFER: line_buffered = TRUE; break;
   1836   case 'c': count_only = TRUE; break;
   1837   case 'F': process_options |= PO_FIXED_STRINGS; break;
   1838   case 'H': filenames = FN_FORCE; break;
   1839   case 'h': filenames = FN_NONE; break;
   1840   case 'i': options |= PCRE_CASELESS; break;
   1841   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
   1842   case 'L': filenames = FN_NOMATCH_ONLY; break;
   1843   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
   1844   case 'n': number = TRUE; break;
   1845   case 'o': only_matching = 0; break;
   1846   case 'q': quiet = TRUE; break;
   1847   case 'r': dee_action = dee_RECURSE; break;
   1848   case 's': silent = TRUE; break;
   1849   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
   1850   case 'v': invert = TRUE; break;
   1851   case 'w': process_options |= PO_WORD_MATCH; break;
   1852   case 'x': process_options |= PO_LINE_MATCH; break;
   1853 
   1854   case 'V':
   1855   fprintf(stderr, "pcregrep version %s\n", pcre_version());
   1856   pcregrep_exit(0);
   1857   break;
   1858 
   1859   default:
   1860   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
   1861   pcregrep_exit(usage(2));
   1862   }
   1863 
   1864 return options;
   1865 }
   1866 
   1867 
   1868 
   1869 
   1870 /*************************************************
   1871 *          Construct printed ordinal             *
   1872 *************************************************/
   1873 
   1874 /* This turns a number into "1st", "3rd", etc. */
   1875 
   1876 static char *
   1877 ordin(int n)
   1878 {
   1879 static char buffer[8];
   1880 char *p = buffer;
   1881 sprintf(p, "%d", n);
   1882 while (*p != 0) p++;
   1883 switch (n%10)
   1884   {
   1885   case 1: strcpy(p, "st"); break;
   1886   case 2: strcpy(p, "nd"); break;
   1887   case 3: strcpy(p, "rd"); break;
   1888   default: strcpy(p, "th"); break;
   1889   }
   1890 return buffer;
   1891 }
   1892 
   1893 
   1894 
   1895 /*************************************************
   1896 *          Compile a single pattern              *
   1897 *************************************************/
   1898 
   1899 /* When the -F option has been used, this is called for each substring.
   1900 Otherwise it's called for each supplied pattern.
   1901 
   1902 Arguments:
   1903   pattern        the pattern string
   1904   options        the PCRE options
   1905   filename       the file name, or NULL for a command-line pattern
   1906   count          0 if this is the only command line pattern, or
   1907                  number of the command line pattern, or
   1908                  linenumber for a pattern from a file
   1909 
   1910 Returns:         TRUE on success, FALSE after an error
   1911 */
   1912 
   1913 static BOOL
   1914 compile_single_pattern(char *pattern, int options, char *filename, int count)
   1915 {
   1916 char buffer[MBUFTHIRD + 16];
   1917 const char *error;
   1918 int errptr;
   1919 
   1920 if (pattern_count >= MAX_PATTERN_COUNT)
   1921   {
   1922   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
   1923     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
   1924   return FALSE;
   1925   }
   1926 
   1927 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
   1928   suffix[process_options]);
   1929 pattern_list[pattern_count] =
   1930   pcre_compile(buffer, options, &error, &errptr, pcretables);
   1931 if (pattern_list[pattern_count] != NULL)
   1932   {
   1933   pattern_count++;
   1934   return TRUE;
   1935   }
   1936 
   1937 /* Handle compile errors */
   1938 
   1939 errptr -= (int)strlen(prefix[process_options]);
   1940 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
   1941 
   1942 if (filename == NULL)
   1943   {
   1944   if (count == 0)
   1945     fprintf(stderr, "pcregrep: Error in command-line regex "
   1946       "at offset %d: %s\n", errptr, error);
   1947   else
   1948     fprintf(stderr, "pcregrep: Error in %s command-line regex "
   1949       "at offset %d: %s\n", ordin(count), errptr, error);
   1950   }
   1951 else
   1952   {
   1953   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
   1954     "at offset %d: %s\n", count, filename, errptr, error);
   1955   }
   1956 
   1957 return FALSE;
   1958 }
   1959 
   1960 
   1961 
   1962 /*************************************************
   1963 *           Compile one supplied pattern         *
   1964 *************************************************/
   1965 
   1966 /* When the -F option has been used, each string may be a list of strings,
   1967 separated by line breaks. They will be matched literally.
   1968 
   1969 Arguments:
   1970   pattern        the pattern string
   1971   options        the PCRE options
   1972   filename       the file name, or NULL for a command-line pattern
   1973   count          0 if this is the only command line pattern, or
   1974                  number of the command line pattern, or
   1975                  linenumber for a pattern from a file
   1976 
   1977 Returns:         TRUE on success, FALSE after an error
   1978 */
   1979 
   1980 static BOOL
   1981 compile_pattern(char *pattern, int options, char *filename, int count)
   1982 {
   1983 if ((process_options & PO_FIXED_STRINGS) != 0)
   1984   {
   1985   char *eop = pattern + strlen(pattern);
   1986   char buffer[MBUFTHIRD];
   1987   for(;;)
   1988     {
   1989     int ellength;
   1990     char *p = end_of_line(pattern, eop, &ellength);
   1991     if (ellength == 0)
   1992       return compile_single_pattern(pattern, options, filename, count);
   1993     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
   1994     pattern = p;
   1995     if (!compile_single_pattern(buffer, options, filename, count))
   1996       return FALSE;
   1997     }
   1998   }
   1999 else return compile_single_pattern(pattern, options, filename, count);
   2000 }
   2001 
   2002 
   2003 
   2004 /*************************************************
   2005 *                Main program                    *
   2006 *************************************************/
   2007 
   2008 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
   2009 
   2010 int
   2011 main(int argc, char **argv)
   2012 {
   2013 int i, j;
   2014 int rc = 1;
   2015 int pcre_options = 0;
   2016 int cmd_pattern_count = 0;
   2017 int hint_count = 0;
   2018 int errptr;
   2019 BOOL only_one_at_top;
   2020 char *patterns[MAX_PATTERN_COUNT];
   2021 const char *locale_from = "--locale";
   2022 const char *error;
   2023 
   2024 /* Set the default line ending value from the default in the PCRE library;
   2025 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
   2026 Note that the return values from pcre_config(), though derived from the ASCII
   2027 codes, are the same in EBCDIC environments, so we must use the actual values
   2028 rather than escapes such as as '\r'. */
   2029 
   2030 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
   2031 switch(i)
   2032   {
   2033   default:               newline = (char *)"lf"; break;
   2034   case 13:               newline = (char *)"cr"; break;
   2035   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
   2036   case -1:               newline = (char *)"any"; break;
   2037   case -2:               newline = (char *)"anycrlf"; break;
   2038   }
   2039 
   2040 /* Process the options */
   2041 
   2042 for (i = 1; i < argc; i++)
   2043   {
   2044   option_item *op = NULL;
   2045   char *option_data = (char *)"";    /* default to keep compiler happy */
   2046   BOOL longop;
   2047   BOOL longopwasequals = FALSE;
   2048 
   2049   if (argv[i][0] != '-') break;
   2050 
   2051   /* If we hit an argument that is just "-", it may be a reference to STDIN,
   2052   but only if we have previously had -e or -f to define the patterns. */
   2053 
   2054   if (argv[i][1] == 0)
   2055     {
   2056     if (pattern_filename != NULL || pattern_count > 0) break;
   2057       else pcregrep_exit(usage(2));
   2058     }
   2059 
   2060   /* Handle a long name option, or -- to terminate the options */
   2061 
   2062   if (argv[i][1] == '-')
   2063     {
   2064     char *arg = argv[i] + 2;
   2065     char *argequals = strchr(arg, '=');
   2066 
   2067     if (*arg == 0)    /* -- terminates options */
   2068       {
   2069       i++;
   2070       break;                /* out of the options-handling loop */
   2071       }
   2072 
   2073     longop = TRUE;
   2074 
   2075     /* Some long options have data that follows after =, for example file=name.
   2076     Some options have variations in the long name spelling: specifically, we
   2077     allow "regexp" because GNU grep allows it, though I personally go along
   2078     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
   2079     These options are entered in the table as "regex(p)". Options can be in
   2080     both these categories. */
   2081 
   2082     for (op = optionlist; op->one_char != 0; op++)
   2083       {
   2084       char *opbra = strchr(op->long_name, '(');
   2085       char *equals = strchr(op->long_name, '=');
   2086 
   2087       /* Handle options with only one spelling of the name */
   2088 
   2089       if (opbra == NULL)     /* Does not contain '(' */
   2090         {
   2091         if (equals == NULL)  /* Not thing=data case */
   2092           {
   2093           if (strcmp(arg, op->long_name) == 0) break;
   2094           }
   2095         else                 /* Special case xxx=data */
   2096           {
   2097           int oplen = (int)(equals - op->long_name);
   2098           int arglen = (argequals == NULL)?
   2099             (int)strlen(arg) : (int)(argequals - arg);
   2100           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
   2101             {
   2102             option_data = arg + arglen;
   2103             if (*option_data == '=')
   2104               {
   2105               option_data++;
   2106               longopwasequals = TRUE;
   2107               }
   2108             break;
   2109             }
   2110           }
   2111         }
   2112 
   2113       /* Handle options with an alternate spelling of the name */
   2114 
   2115       else
   2116         {
   2117         char buff1[24];
   2118         char buff2[24];
   2119 
   2120         int baselen = (int)(opbra - op->long_name);
   2121         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
   2122         int arglen = (argequals == NULL || equals == NULL)?
   2123           (int)strlen(arg) : (int)(argequals - arg);
   2124 
   2125         sprintf(buff1, "%.*s", baselen, op->long_name);
   2126         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
   2127 
   2128         if (strncmp(arg, buff1, arglen) == 0 ||
   2129            strncmp(arg, buff2, arglen) == 0)
   2130           {
   2131           if (equals != NULL && argequals != NULL)
   2132             {
   2133             option_data = argequals;
   2134             if (*option_data == '=')
   2135               {
   2136               option_data++;
   2137               longopwasequals = TRUE;
   2138               }
   2139             }
   2140           break;
   2141           }
   2142         }
   2143       }
   2144 
   2145     if (op->one_char == 0)
   2146       {
   2147       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
   2148       pcregrep_exit(usage(2));
   2149       }
   2150     }
   2151 
   2152   /* Jeffrey Friedl's debugging harness uses these additional options which
   2153   are not in the right form for putting in the option table because they use
   2154   only one hyphen, yet are more than one character long. By putting them
   2155   separately here, they will not get displayed as part of the help() output,
   2156   but I don't think Jeffrey will care about that. */
   2157 
   2158 #ifdef JFRIEDL_DEBUG
   2159   else if (strcmp(argv[i], "-pre") == 0) {
   2160           jfriedl_prefix = argv[++i];
   2161           continue;
   2162   } else if (strcmp(argv[i], "-post") == 0) {
   2163           jfriedl_postfix = argv[++i];
   2164           continue;
   2165   } else if (strcmp(argv[i], "-XT") == 0) {
   2166           sscanf(argv[++i], "%d", &jfriedl_XT);
   2167           continue;
   2168   } else if (strcmp(argv[i], "-XR") == 0) {
   2169           sscanf(argv[++i], "%d", &jfriedl_XR);
   2170           continue;
   2171   }
   2172 #endif
   2173 
   2174 
   2175   /* One-char options; many that have no data may be in a single argument; we
   2176   continue till we hit the last one or one that needs data. */
   2177 
   2178   else
   2179     {
   2180     char *s = argv[i] + 1;
   2181     longop = FALSE;
   2182     while (*s != 0)
   2183       {
   2184       for (op = optionlist; op->one_char != 0; op++)
   2185         {
   2186         if (*s == op->one_char) break;
   2187         }
   2188       if (op->one_char == 0)
   2189         {
   2190         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
   2191           *s, argv[i]);
   2192         pcregrep_exit(usage(2));
   2193         }
   2194 
   2195       /* Check for a single-character option that has data: OP_OP_NUMBER
   2196       is used for one that either has a numerical number or defaults, i.e. the
   2197       data is optional. If a digit follows, there is data; if not, carry on
   2198       with other single-character options in the same string. */
   2199 
   2200       option_data = s+1;
   2201       if (op->type == OP_OP_NUMBER)
   2202         {
   2203         if (isdigit((unsigned char)s[1])) break;
   2204         }
   2205       else   /* Check for end or a dataless option */
   2206         {
   2207         if (op->type != OP_NODATA || s[1] == 0) break;
   2208         }
   2209 
   2210       /* Handle a single-character option with no data, then loop for the
   2211       next character in the string. */
   2212 
   2213       pcre_options = handle_option(*s++, pcre_options);
   2214       }
   2215     }
   2216 
   2217   /* At this point we should have op pointing to a matched option. If the type
   2218   is NO_DATA, it means that there is no data, and the option might set
   2219   something in the PCRE options. */
   2220 
   2221   if (op->type == OP_NODATA)
   2222     {
   2223     pcre_options = handle_option(op->one_char, pcre_options);
   2224     continue;
   2225     }
   2226 
   2227   /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
   2228   either has a value or defaults to something. It cannot have data in a
   2229   separate item. At the moment, the only such options are "colo(u)r",
   2230   "only-matching", and Jeffrey Friedl's special -S debugging option. */
   2231 
   2232   if (*option_data == 0 &&
   2233       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   2234     {
   2235     switch (op->one_char)
   2236       {
   2237       case N_COLOUR:
   2238       colour_option = (char *)"auto";
   2239       break;
   2240 
   2241       case 'o':
   2242       only_matching = 0;
   2243       break;
   2244 
   2245 #ifdef JFRIEDL_DEBUG
   2246       case 'S':
   2247       S_arg = 0;
   2248       break;
   2249 #endif
   2250       }
   2251     continue;
   2252     }
   2253 
   2254   /* Otherwise, find the data string for the option. */
   2255 
   2256   if (*option_data == 0)
   2257     {
   2258     if (i >= argc - 1 || longopwasequals)
   2259       {
   2260       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
   2261       pcregrep_exit(usage(2));
   2262       }
   2263     option_data = argv[++i];
   2264     }
   2265 
   2266   /* If the option type is OP_PATLIST, it's the -e option, which can be called
   2267   multiple times to create a list of patterns. */
   2268 
   2269   if (op->type == OP_PATLIST)
   2270     {
   2271     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
   2272       {
   2273       fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
   2274         MAX_PATTERN_COUNT);
   2275       return 2;
   2276       }
   2277     patterns[cmd_pattern_count++] = option_data;
   2278     }
   2279 
   2280   /* Otherwise, deal with single string or numeric data values. */
   2281 
   2282   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
   2283            op->type != OP_OP_NUMBER)
   2284     {
   2285     *((char **)op->dataptr) = option_data;
   2286     }
   2287 
   2288   /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
   2289   only for unpicking arguments, so just keep it simple. */
   2290 
   2291   else
   2292     {
   2293     unsigned long int n = 0;
   2294     char *endptr = option_data;
   2295     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
   2296     while (isdigit((unsigned char)(*endptr)))
   2297       n = n * 10 + (int)(*endptr++ - '0');
   2298     if (*endptr != 0)
   2299       {
   2300       if (longop)
   2301         {
   2302         char *equals = strchr(op->long_name, '=');
   2303         int nlen = (equals == NULL)? (int)strlen(op->long_name) :
   2304           (int)(equals - op->long_name);
   2305         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
   2306           option_data, nlen, op->long_name);
   2307         }
   2308       else
   2309         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
   2310           option_data, op->one_char);
   2311       pcregrep_exit(usage(2));
   2312       }
   2313     if (op->type == OP_LONGNUMBER)
   2314         *((unsigned long int *)op->dataptr) = n;
   2315     else
   2316         *((int *)op->dataptr) = n;
   2317     }
   2318   }
   2319 
   2320 /* Options have been decoded. If -C was used, its value is used as a default
   2321 for -A and -B. */
   2322 
   2323 if (both_context > 0)
   2324   {
   2325   if (after_context == 0) after_context = both_context;
   2326   if (before_context == 0) before_context = both_context;
   2327   }
   2328 
   2329 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
   2330 However, the latter two set only_matching. */
   2331 
   2332 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
   2333     (file_offsets && line_offsets))
   2334   {
   2335   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
   2336     "and/or --line-offsets\n");
   2337   pcregrep_exit(usage(2));
   2338   }
   2339 
   2340 if (file_offsets || line_offsets) only_matching = 0;
   2341 
   2342 /* If a locale has not been provided as an option, see if the LC_CTYPE or
   2343 LC_ALL environment variable is set, and if so, use it. */
   2344 
   2345 if (locale == NULL)
   2346   {
   2347   locale = getenv("LC_ALL");
   2348   locale_from = "LCC_ALL";
   2349   }
   2350 
   2351 if (locale == NULL)
   2352   {
   2353   locale = getenv("LC_CTYPE");
   2354   locale_from = "LC_CTYPE";
   2355   }
   2356 
   2357 /* If a locale has been provided, set it, and generate the tables the PCRE
   2358 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
   2359 
   2360 if (locale != NULL)
   2361   {
   2362   if (setlocale(LC_CTYPE, locale) == NULL)
   2363     {
   2364     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
   2365       locale, locale_from);
   2366     return 2;
   2367     }
   2368   pcretables = pcre_maketables();
   2369   }
   2370 
   2371 /* Sort out colouring */
   2372 
   2373 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
   2374   {
   2375   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
   2376   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
   2377   else
   2378     {
   2379     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
   2380       colour_option);
   2381     return 2;
   2382     }
   2383   if (do_colour)
   2384     {
   2385     char *cs = getenv("PCREGREP_COLOUR");
   2386     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
   2387     if (cs != NULL) colour_string = cs;
   2388     }
   2389   }
   2390 
   2391 /* Interpret the newline type; the default settings are Unix-like. */
   2392 
   2393 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
   2394   {
   2395   pcre_options |= PCRE_NEWLINE_CR;
   2396   endlinetype = EL_CR;
   2397   }
   2398 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
   2399   {
   2400   pcre_options |= PCRE_NEWLINE_LF;
   2401   endlinetype = EL_LF;
   2402   }
   2403 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
   2404   {
   2405   pcre_options |= PCRE_NEWLINE_CRLF;
   2406   endlinetype = EL_CRLF;
   2407   }
   2408 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
   2409   {
   2410   pcre_options |= PCRE_NEWLINE_ANY;
   2411   endlinetype = EL_ANY;
   2412   }
   2413 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
   2414   {
   2415   pcre_options |= PCRE_NEWLINE_ANYCRLF;
   2416   endlinetype = EL_ANYCRLF;
   2417   }
   2418 else
   2419   {
   2420   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
   2421   return 2;
   2422   }
   2423 
   2424 /* Interpret the text values for -d and -D */
   2425 
   2426 if (dee_option != NULL)
   2427   {
   2428   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
   2429   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
   2430   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
   2431   else
   2432     {
   2433     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
   2434     return 2;
   2435     }
   2436   }
   2437 
   2438 if (DEE_option != NULL)
   2439   {
   2440   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
   2441   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
   2442   else
   2443     {
   2444     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
   2445     return 2;
   2446     }
   2447   }
   2448 
   2449 /* Check the values for Jeffrey Friedl's debugging options. */
   2450 
   2451 #ifdef JFRIEDL_DEBUG
   2452 if (S_arg > 9)
   2453   {
   2454   fprintf(stderr, "pcregrep: bad value for -S option\n");
   2455   return 2;
   2456   }
   2457 if (jfriedl_XT != 0 || jfriedl_XR != 0)
   2458   {
   2459   if (jfriedl_XT == 0) jfriedl_XT = 1;
   2460   if (jfriedl_XR == 0) jfriedl_XR = 1;
   2461   }
   2462 #endif
   2463 
   2464 /* Get memory to store the pattern and hints lists. */
   2465 
   2466 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
   2467 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
   2468 
   2469 if (pattern_list == NULL || hints_list == NULL)
   2470   {
   2471   fprintf(stderr, "pcregrep: malloc failed\n");
   2472   goto EXIT2;
   2473   }
   2474 
   2475 /* If no patterns were provided by -e, and there is no file provided by -f,
   2476 the first argument is the one and only pattern, and it must exist. */
   2477 
   2478 if (cmd_pattern_count == 0 && pattern_filename == NULL)
   2479   {
   2480   if (i >= argc) return usage(2);
   2481   patterns[cmd_pattern_count++] = argv[i++];
   2482   }
   2483 
   2484 /* Compile the patterns that were provided on the command line, either by
   2485 multiple uses of -e or as a single unkeyed pattern. */
   2486 
   2487 for (j = 0; j < cmd_pattern_count; j++)
   2488   {
   2489   if (!compile_pattern(patterns[j], pcre_options, NULL,
   2490        (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
   2491     goto EXIT2;
   2492   }
   2493 
   2494 /* Compile the regular expressions that are provided in a file. */
   2495 
   2496 if (pattern_filename != NULL)
   2497   {
   2498   int linenumber = 0;
   2499   FILE *f;
   2500   char *filename;
   2501   char buffer[MBUFTHIRD];
   2502 
   2503   if (strcmp(pattern_filename, "-") == 0)
   2504     {
   2505     f = stdin;
   2506     filename = stdin_name;
   2507     }
   2508   else
   2509     {
   2510     f = fopen(pattern_filename, "r");
   2511     if (f == NULL)
   2512       {
   2513       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
   2514         strerror(errno));
   2515       goto EXIT2;
   2516       }
   2517     filename = pattern_filename;
   2518     }
   2519 
   2520   while (fgets(buffer, MBUFTHIRD, f) != NULL)
   2521     {
   2522     char *s = buffer + (int)strlen(buffer);
   2523     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
   2524     *s = 0;
   2525     linenumber++;
   2526     if (buffer[0] == 0) continue;   /* Skip blank lines */
   2527     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
   2528       goto EXIT2;
   2529     }
   2530 
   2531   if (f != stdin) fclose(f);
   2532   }
   2533 
   2534 /* Study the regular expressions, as we will be running them many times */
   2535 
   2536 for (j = 0; j < pattern_count; j++)
   2537   {
   2538   hints_list[j] = pcre_study(pattern_list[j], 0, &error);
   2539   if (error != NULL)
   2540     {
   2541     char s[16];
   2542     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
   2543     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
   2544     goto EXIT2;
   2545     }
   2546   hint_count++;
   2547   }
   2548 
   2549 /* If --match-limit or --recursion-limit was set, put the value(s) into the
   2550 pcre_extra block for each pattern. */
   2551 
   2552 if (match_limit > 0 || match_limit_recursion > 0)
   2553   {
   2554   for (j = 0; j < pattern_count; j++)
   2555     {
   2556     if (hints_list[j] == NULL)
   2557       {
   2558       hints_list[j] = malloc(sizeof(pcre_extra));
   2559       if (hints_list[j] == NULL)
   2560         {
   2561         fprintf(stderr, "pcregrep: malloc failed\n");
   2562         pcregrep_exit(2);
   2563         }
   2564       }
   2565     if (match_limit > 0)
   2566       {
   2567       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
   2568       hints_list[j]->match_limit = match_limit;
   2569       }
   2570     if (match_limit_recursion > 0)
   2571       {
   2572       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
   2573       hints_list[j]->match_limit_recursion = match_limit_recursion;
   2574       }
   2575     }
   2576   }
   2577 
   2578 /* If there are include or exclude patterns, compile them. */
   2579 
   2580 if (exclude_pattern != NULL)
   2581   {
   2582   exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
   2583     pcretables);
   2584   if (exclude_compiled == NULL)
   2585     {
   2586     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
   2587       errptr, error);
   2588     goto EXIT2;
   2589     }
   2590   }
   2591 
   2592 if (include_pattern != NULL)
   2593   {
   2594   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
   2595     pcretables);
   2596   if (include_compiled == NULL)
   2597     {
   2598     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
   2599       errptr, error);
   2600     goto EXIT2;
   2601     }
   2602   }
   2603 
   2604 if (exclude_dir_pattern != NULL)
   2605   {
   2606   exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
   2607     pcretables);
   2608   if (exclude_dir_compiled == NULL)
   2609     {
   2610     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
   2611       errptr, error);
   2612     goto EXIT2;
   2613     }
   2614   }
   2615 
   2616 if (include_dir_pattern != NULL)
   2617   {
   2618   include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
   2619     pcretables);
   2620   if (include_dir_compiled == NULL)
   2621     {
   2622     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
   2623       errptr, error);
   2624     goto EXIT2;
   2625     }
   2626   }
   2627 
   2628 /* If there are no further arguments, do the business on stdin and exit. */
   2629 
   2630 if (i >= argc)
   2631   {
   2632   rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
   2633   goto EXIT;
   2634   }
   2635 
   2636 /* Otherwise, work through the remaining arguments as files or directories.
   2637 Pass in the fact that there is only one argument at top level - this suppresses
   2638 the file name if the argument is not a directory and filenames are not
   2639 otherwise forced. */
   2640 
   2641 only_one_at_top = i == argc - 1;   /* Catch initial value of i */
   2642 
   2643 for (; i < argc; i++)
   2644   {
   2645   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
   2646     only_one_at_top);
   2647   if (frc > 1) rc = frc;
   2648     else if (frc == 0 && rc == 1) rc = 0;
   2649   }
   2650 
   2651 EXIT:
   2652 if (pattern_list != NULL)
   2653   {
   2654   for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
   2655   free(pattern_list);
   2656   }
   2657 if (hints_list != NULL)
   2658   {
   2659   for (i = 0; i < hint_count; i++)
   2660     {
   2661     if (hints_list[i] != NULL) free(hints_list[i]);
   2662     }
   2663   free(hints_list);
   2664   }
   2665 pcregrep_exit(rc);
   2666 
   2667 EXIT2:
   2668 rc = 2;
   2669 goto EXIT;
   2670 }
   2671 
   2672 /* End of pcregrep */
   2673