Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *               pcre2grep program                *
      3 *************************************************/
      4 
      5 /* This is a grep program that uses the 8-bit PCRE regular expression library
      6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
      7 and native z/OS systems it can recurse into directories, and in z/OS it can
      8 handle PDS files.
      9 
     10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
     11 additional header is required. That header is not included in the main PCRE2
     12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
     13 The header can be found in the special z/OS distribution, which is available
     14 from www.zaconsultants.net or from www.cbttape.org.
     15 
     16            Copyright (c) 1997-2016 University of Cambridge
     17 
     18 -----------------------------------------------------------------------------
     19 Redistribution and use in source and binary forms, with or without
     20 modification, are permitted provided that the following conditions are met:
     21 
     22     * Redistributions of source code must retain the above copyright notice,
     23       this list of conditions and the following disclaimer.
     24 
     25     * Redistributions in binary form must reproduce the above copyright
     26       notice, this list of conditions and the following disclaimer in the
     27       documentation and/or other materials provided with the distribution.
     28 
     29     * Neither the name of the University of Cambridge nor the names of its
     30       contributors may be used to endorse or promote products derived from
     31       this software without specific prior written permission.
     32 
     33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     43 POSSIBILITY OF SUCH DAMAGE.
     44 -----------------------------------------------------------------------------
     45 */
     46 
     47 #ifdef HAVE_CONFIG_H
     48 #include "config.h"
     49 #endif
     50 
     51 #include <ctype.h>
     52 #include <locale.h>
     53 #include <stdio.h>
     54 #include <string.h>
     55 #include <stdlib.h>
     56 #include <errno.h>
     57 
     58 #include <sys/types.h>
     59 #include <sys/stat.h>
     60 
     61 #if defined(_WIN32) || defined(WIN32)
     62 #include <io.h>                /* For _setmode() */
     63 #include <fcntl.h>             /* For _O_BINARY */
     64 #endif
     65 
     66 #ifdef SUPPORT_PCRE2GREP_CALLOUT
     67 #include <sys/wait.h>
     68 #endif
     69 
     70 #ifdef HAVE_UNISTD_H
     71 #include <unistd.h>
     72 #endif
     73 
     74 #ifdef SUPPORT_LIBZ
     75 #include <zlib.h>
     76 #endif
     77 
     78 #ifdef SUPPORT_LIBBZ2
     79 #include <bzlib.h>
     80 #endif
     81 
     82 #define PCRE2_CODE_UNIT_WIDTH 8
     83 #include "pcre2.h"
     84 
     85 #define FALSE 0
     86 #define TRUE 1
     87 
     88 typedef int BOOL;
     89 
     90 #define OFFSET_SIZE 33
     91 
     92 #if BUFSIZ > 8192
     93 #define MAXPATLEN BUFSIZ
     94 #else
     95 #define MAXPATLEN 8192
     96 #endif
     97 
     98 #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
     99 
    100 /* Values for the "filenames" variable, which specifies options for file name
    101 output. The order is important; it is assumed that a file name is wanted for
    102 all values greater than FN_DEFAULT. */
    103 
    104 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
    105 
    106 /* File reading styles */
    107 
    108 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
    109 
    110 /* Actions for the -d and -D options */
    111 
    112 enum { dee_READ, dee_SKIP, dee_RECURSE };
    113 enum { DEE_READ, DEE_SKIP };
    114 
    115 /* Actions for special processing options (flag bits) */
    116 
    117 #define PO_WORD_MATCH     0x0001
    118 #define PO_LINE_MATCH     0x0002
    119 #define PO_FIXED_STRINGS  0x0004
    120 
    121 /* Binary file options */
    122 
    123 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
    124 
    125 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
    126 environments), a warning is issued if the value of fwrite() is ignored.
    127 Unfortunately, casting to (void) does not suppress the warning. To get round
    128 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
    129 apply to fprintf(). */
    130 
    131 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
    132 
    133 /* Under Windows, we have to set stdout to be binary, so that it does not
    134 convert \r\n at the ends of output lines to \r\r\n. However, that means that
    135 any messages written to stdout must have \r\n as their line terminator. This is
    136 handled by using STDOUT_NL as the newline string. */
    137 
    138 #if defined(_WIN32) || defined(WIN32)
    139 #define STDOUT_NL  "\r\n"
    140 #else
    141 #define STDOUT_NL  "\n"
    142 #endif
    143 
    144 
    145 
    146 /*************************************************
    147 *               Global variables                 *
    148 *************************************************/
    149 
    150 /* Jeffrey Friedl has some debugging requirements that are not part of the
    151 regular code. */
    152 
    153 #ifdef JFRIEDL_DEBUG
    154 static int S_arg = -1;
    155 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
    156 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
    157 static const char *jfriedl_prefix = "";
    158 static const char *jfriedl_postfix = "";
    159 #endif
    160 
    161 static char *colour_string = (char *)"1;31";
    162 static char *colour_option = NULL;
    163 static char *dee_option = NULL;
    164 static char *DEE_option = NULL;
    165 static char *locale = NULL;
    166 static char *main_buffer = NULL;
    167 static char *newline_arg = NULL;
    168 static char *om_separator = (char *)"";
    169 static char *stdin_name = (char *)"(standard input)";
    170 
    171 static int after_context = 0;
    172 static int before_context = 0;
    173 static int binary_files = BIN_BINARY;
    174 static int both_context = 0;
    175 static int bufthird = PCRE2GREP_BUFSIZE;
    176 static int bufsize = 3*PCRE2GREP_BUFSIZE;
    177 static int endlinetype;
    178 
    179 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
    180 static int dee_action = dee_SKIP;
    181 #else
    182 static int dee_action = dee_READ;
    183 #endif
    184 static int DEE_action = DEE_READ;
    185 static int error_count = 0;
    186 static int filenames = FN_DEFAULT;
    187 
    188 #ifdef SUPPORT_PCRE2GREP_JIT
    189 static BOOL use_jit = TRUE;
    190 #else
    191 static BOOL use_jit = FALSE;
    192 #endif
    193 
    194 static const uint8_t *character_tables = NULL;
    195 
    196 static uint32_t pcre2_options = 0;
    197 static uint32_t process_options = 0;
    198 static uint32_t match_limit = 0;
    199 static uint32_t recursion_limit = 0;
    200 
    201 static pcre2_compile_context *compile_context;
    202 static pcre2_match_context *match_context;
    203 static pcre2_match_data *match_data;
    204 static PCRE2_SIZE *offsets;
    205 
    206 static BOOL count_only = FALSE;
    207 static BOOL do_colour = FALSE;
    208 static BOOL file_offsets = FALSE;
    209 static BOOL hyphenpending = FALSE;
    210 static BOOL invert = FALSE;
    211 static BOOL line_buffered = FALSE;
    212 static BOOL line_offsets = FALSE;
    213 static BOOL multiline = FALSE;
    214 static BOOL number = FALSE;
    215 static BOOL omit_zero_count = FALSE;
    216 static BOOL resource_error = FALSE;
    217 static BOOL quiet = FALSE;
    218 static BOOL show_only_matching = FALSE;
    219 static BOOL silent = FALSE;
    220 static BOOL utf = FALSE;
    221 
    222 /* Structure for list of --only-matching capturing numbers. */
    223 
    224 typedef struct omstr {
    225   struct omstr *next;
    226   int groupnum;
    227 } omstr;
    228 
    229 static omstr *only_matching = NULL;
    230 static omstr *only_matching_last = NULL;
    231 
    232 /* Structure for holding the two variables that describe a number chain. */
    233 
    234 typedef struct omdatastr {
    235   omstr **anchor;
    236   omstr **lastptr;
    237 } omdatastr;
    238 
    239 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
    240 
    241 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
    242 
    243 typedef struct fnstr {
    244   struct fnstr *next;
    245   char *name;
    246 } fnstr;
    247 
    248 static fnstr *exclude_from = NULL;
    249 static fnstr *exclude_from_last = NULL;
    250 static fnstr *include_from = NULL;
    251 static fnstr *include_from_last = NULL;
    252 
    253 static fnstr *file_lists = NULL;
    254 static fnstr *file_lists_last = NULL;
    255 static fnstr *pattern_files = NULL;
    256 static fnstr *pattern_files_last = NULL;
    257 
    258 /* Structure for holding the two variables that describe a file name chain. */
    259 
    260 typedef struct fndatastr {
    261   fnstr **anchor;
    262   fnstr **lastptr;
    263 } fndatastr;
    264 
    265 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
    266 static fndatastr include_from_data = { &include_from, &include_from_last };
    267 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
    268 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
    269 
    270 /* Structure for pattern and its compiled form; used for matching patterns and
    271 also for include/exclude patterns. */
    272 
    273 typedef struct patstr {
    274   struct patstr *next;
    275   char *string;
    276   pcre2_code *compiled;
    277 } patstr;
    278 
    279 static patstr *patterns = NULL;
    280 static patstr *patterns_last = NULL;
    281 static patstr *include_patterns = NULL;
    282 static patstr *include_patterns_last = NULL;
    283 static patstr *exclude_patterns = NULL;
    284 static patstr *exclude_patterns_last = NULL;
    285 static patstr *include_dir_patterns = NULL;
    286 static patstr *include_dir_patterns_last = NULL;
    287 static patstr *exclude_dir_patterns = NULL;
    288 static patstr *exclude_dir_patterns_last = NULL;
    289 
    290 /* Structure holding the two variables that describe a pattern chain. A pointer
    291 to such structures is used for each appropriate option. */
    292 
    293 typedef struct patdatastr {
    294   patstr **anchor;
    295   patstr **lastptr;
    296 } patdatastr;
    297 
    298 static patdatastr match_patdata = { &patterns, &patterns_last };
    299 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
    300 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
    301 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
    302 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
    303 
    304 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
    305                                  &include_dir_patterns, &exclude_dir_patterns };
    306 
    307 static const char *incexname[4] = { "--include", "--exclude",
    308                                     "--include-dir", "--exclude-dir" };
    309 
    310 /* Structure for options and list of them */
    311 
    312 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
    313        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
    314 
    315 typedef struct option_item {
    316   int type;
    317   int one_char;
    318   void *dataptr;
    319   const char *long_name;
    320   const char *help_text;
    321 } option_item;
    322 
    323 /* Options without a single-letter equivalent get a negative value. This can be
    324 used to identify them. */
    325 
    326 #define N_COLOUR       (-1)
    327 #define N_EXCLUDE      (-2)
    328 #define N_EXCLUDE_DIR  (-3)
    329 #define N_HELP         (-4)
    330 #define N_INCLUDE      (-5)
    331 #define N_INCLUDE_DIR  (-6)
    332 #define N_LABEL        (-7)
    333 #define N_LOCALE       (-8)
    334 #define N_NULL         (-9)
    335 #define N_LOFFSETS     (-10)
    336 #define N_FOFFSETS     (-11)
    337 #define N_LBUFFER      (-12)
    338 #define N_M_LIMIT      (-13)
    339 #define N_M_LIMIT_REC  (-14)
    340 #define N_BUFSIZE      (-15)
    341 #define N_NOJIT        (-16)
    342 #define N_FILE_LIST    (-17)
    343 #define N_BINARY_FILES (-18)
    344 #define N_EXCLUDE_FROM (-19)
    345 #define N_INCLUDE_FROM (-20)
    346 #define N_OM_SEPARATOR (-21)
    347 
    348 static option_item optionlist[] = {
    349   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
    350   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
    351   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
    352   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
    353   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
    354   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
    355   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
    356   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
    357   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
    358   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
    359   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
    360   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
    361   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
    362   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
    363   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
    364   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
    365   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
    366   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
    367   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
    368   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
    369   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
    370   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
    371 #ifdef SUPPORT_PCRE2GREP_JIT
    372   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
    373 #else
    374   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
    375 #endif
    376   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
    377   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
    378   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
    379   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
    380   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
    381   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
    382   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
    383   { OP_U32NUMBER,  N_M_LIMIT_REC, &recursion_limit, "recursion-limit=number", "set PCRE match recursion limit option" },
    384   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
    385   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
    386   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
    387   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
    388   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
    389   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
    390   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
    391   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
    392   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
    393   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
    394   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
    395   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
    396   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
    397 #ifdef JFRIEDL_DEBUG
    398   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
    399 #endif
    400   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
    401   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
    402   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
    403   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
    404   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
    405   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
    406   { OP_NODATA,    0,        NULL,               NULL,            NULL }
    407 };
    408 
    409 /* Table of names for newline types. Must be kept in step with the definitions
    410 of PCRE2_NEWLINE_xx in pcre2.h. */
    411 
    412 static const char *newlines[] = {
    413   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
    414 
    415 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
    416 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
    417 that the combination of -w and -x has the same effect as -x on its own, so we
    418 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
    419 prefix+suffix is 10 characters; if anything longer is added, it must be
    420 adjusted. */
    421 
    422 static const char *prefix[] = {
    423   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
    424 
    425 static const char *suffix[] = {
    426   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
    427 
    428 /* UTF-8 tables - used only when the newline setting is "any". */
    429 
    430 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
    431 
    432 const char utf8_table4[] = {
    433   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    434   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    435   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    436   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
    437 
    438 
    439 
    440 /*************************************************
    441 *         Case-independent string compare        *
    442 *************************************************/
    443 
    444 static int
    445 strcmpic(const char *str1, const char *str2)
    446 {
    447 unsigned int c1, c2;
    448 while (*str1 != '\0' || *str2 != '\0')
    449   {
    450   c1 = tolower(*str1++);
    451   c2 = tolower(*str2++);
    452   if (c1 != c2) return ((c1 > c2) << 1) - 1;
    453   }
    454 return 0;
    455 }
    456 
    457 
    458 
    459 /*************************************************
    460 *         Exit from the program                  *
    461 *************************************************/
    462 
    463 /* If there has been a resource error, give a suitable message.
    464 
    465 Argument:  the return code
    466 Returns:   does not return
    467 */
    468 
    469 static void
    470 pcre2grep_exit(int rc)
    471 {
    472 if (resource_error)
    473   {
    474   fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
    475     "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
    476     PCRE2_ERROR_RECURSIONLIMIT);
    477   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
    478   }
    479 exit(rc);
    480 }
    481 
    482 
    483 /*************************************************
    484 *          Add item to chain of patterns         *
    485 *************************************************/
    486 
    487 /* Used to add an item onto a chain, or just return an unconnected item if the
    488 "after" argument is NULL.
    489 
    490 Arguments:
    491   s          pattern string to add
    492   after      if not NULL points to item to insert after
    493 
    494 Returns:     new pattern block or NULL on error
    495 */
    496 
    497 static patstr *
    498 add_pattern(char *s, patstr *after)
    499 {
    500 patstr *p = (patstr *)malloc(sizeof(patstr));
    501 if (p == NULL)
    502   {
    503   fprintf(stderr, "pcre2grep: malloc failed\n");
    504   pcre2grep_exit(2);
    505   }
    506 if (strlen(s) > MAXPATLEN)
    507   {
    508   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
    509     MAXPATLEN);
    510   free(p);
    511   return NULL;
    512   }
    513 p->next = NULL;
    514 p->string = s;
    515 p->compiled = NULL;
    516 
    517 if (after != NULL)
    518   {
    519   p->next = after->next;
    520   after->next = p;
    521   }
    522 return p;
    523 }
    524 
    525 
    526 /*************************************************
    527 *           Free chain of patterns               *
    528 *************************************************/
    529 
    530 /* Used for several chains of patterns.
    531 
    532 Argument: pointer to start of chain
    533 Returns:  nothing
    534 */
    535 
    536 static void
    537 free_pattern_chain(patstr *pc)
    538 {
    539 while (pc != NULL)
    540   {
    541   patstr *p = pc;
    542   pc = p->next;
    543   if (p->compiled != NULL) pcre2_code_free(p->compiled);
    544   free(p);
    545   }
    546 }
    547 
    548 
    549 /*************************************************
    550 *           Free chain of file names             *
    551 *************************************************/
    552 
    553 /*
    554 Argument: pointer to start of chain
    555 Returns:  nothing
    556 */
    557 
    558 static void
    559 free_file_chain(fnstr *fn)
    560 {
    561 while (fn != NULL)
    562   {
    563   fnstr *f = fn;
    564   fn = f->next;
    565   free(f);
    566   }
    567 }
    568 
    569 
    570 /*************************************************
    571 *            OS-specific functions               *
    572 *************************************************/
    573 
    574 /* These functions are defined so that they can be made system specific.
    575 At present there are versions for Unix-style environments, Windows, native
    576 z/OS, and "no support". */
    577 
    578 
    579 /************* Directory scanning Unix-style and z/OS ***********/
    580 
    581 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
    582 #include <sys/types.h>
    583 #include <sys/stat.h>
    584 #include <dirent.h>
    585 
    586 #if defined NATIVE_ZOS
    587 /************* Directory and PDS/E scanning for z/OS ***********/
    588 /************* z/OS looks mostly like Unix with USS ************/
    589 /* However, z/OS needs the #include statements in this header */
    590 #include "pcrzosfs.h"
    591 /* That header is not included in the main PCRE distribution because
    592    other apparatus is needed to compile pcre2grep for z/OS. The header
    593    can be found in the special z/OS distribution, which is available
    594    from www.zaconsultants.net or from www.cbttape.org. */
    595 #endif
    596 
    597 typedef DIR directory_type;
    598 #define FILESEP '/'
    599 
    600 static int
    601 isdirectory(char *filename)
    602 {
    603 struct stat statbuf;
    604 if (stat(filename, &statbuf) < 0)
    605   return 0;        /* In the expectation that opening as a file will fail */
    606 return S_ISDIR(statbuf.st_mode);
    607 }
    608 
    609 static directory_type *
    610 opendirectory(char *filename)
    611 {
    612 return opendir(filename);
    613 }
    614 
    615 static char *
    616 readdirectory(directory_type *dir)
    617 {
    618 for (;;)
    619   {
    620   struct dirent *dent = readdir(dir);
    621   if (dent == NULL) return NULL;
    622   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
    623     return dent->d_name;
    624   }
    625 /* Control never reaches here */
    626 }
    627 
    628 static void
    629 closedirectory(directory_type *dir)
    630 {
    631 closedir(dir);
    632 }
    633 
    634 
    635 /************* Test for regular file, Unix-style **********/
    636 
    637 static int
    638 isregfile(char *filename)
    639 {
    640 struct stat statbuf;
    641 if (stat(filename, &statbuf) < 0)
    642   return 1;        /* In the expectation that opening as a file will fail */
    643 return S_ISREG(statbuf.st_mode);
    644 }
    645 
    646 
    647 #if defined NATIVE_ZOS
    648 /************* Test for a terminal in z/OS **********/
    649 /* isatty() does not work in a TSO environment, so always give FALSE.*/
    650 
    651 static BOOL
    652 is_stdout_tty(void)
    653 {
    654 return FALSE;
    655 }
    656 
    657 static BOOL
    658 is_file_tty(FILE *f)
    659 {
    660 return FALSE;
    661 }
    662 
    663 
    664 /************* Test for a terminal, Unix-style **********/
    665 
    666 #else
    667 static BOOL
    668 is_stdout_tty(void)
    669 {
    670 return isatty(fileno(stdout));
    671 }
    672 
    673 static BOOL
    674 is_file_tty(FILE *f)
    675 {
    676 return isatty(fileno(f));
    677 }
    678 #endif
    679 
    680 /* End of Unix-style or native z/OS environment functions. */
    681 
    682 
    683 /************* Directory scanning in Windows ***********/
    684 
    685 /* I (Philip Hazel) have no means of testing this code. It was contributed by
    686 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
    687 when it did not exist. David Byron added a patch that moved the #include of
    688 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
    689 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
    690 undefined when it is indeed undefined. */
    691 
    692 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
    693 
    694 #ifndef STRICT
    695 # define STRICT
    696 #endif
    697 #ifndef WIN32_LEAN_AND_MEAN
    698 # define WIN32_LEAN_AND_MEAN
    699 #endif
    700 
    701 #include <windows.h>
    702 
    703 #ifndef INVALID_FILE_ATTRIBUTES
    704 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
    705 #endif
    706 
    707 typedef struct directory_type
    708 {
    709 HANDLE handle;
    710 BOOL first;
    711 WIN32_FIND_DATA data;
    712 } directory_type;
    713 
    714 #define FILESEP '/'
    715 
    716 int
    717 isdirectory(char *filename)
    718 {
    719 DWORD attr = GetFileAttributes(filename);
    720 if (attr == INVALID_FILE_ATTRIBUTES)
    721   return 0;
    722 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
    723 }
    724 
    725 directory_type *
    726 opendirectory(char *filename)
    727 {
    728 size_t len;
    729 char *pattern;
    730 directory_type *dir;
    731 DWORD err;
    732 len = strlen(filename);
    733 pattern = (char *)malloc(len + 3);
    734 dir = (directory_type *)malloc(sizeof(*dir));
    735 if ((pattern == NULL) || (dir == NULL))
    736   {
    737   fprintf(stderr, "pcre2grep: malloc failed\n");
    738   pcre2grep_exit(2);
    739   }
    740 memcpy(pattern, filename, len);
    741 memcpy(&(pattern[len]), "\\*", 3);
    742 dir->handle = FindFirstFile(pattern, &(dir->data));
    743 if (dir->handle != INVALID_HANDLE_VALUE)
    744   {
    745   free(pattern);
    746   dir->first = TRUE;
    747   return dir;
    748   }
    749 err = GetLastError();
    750 free(pattern);
    751 free(dir);
    752 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
    753 return NULL;
    754 }
    755 
    756 char *
    757 readdirectory(directory_type *dir)
    758 {
    759 for (;;)
    760   {
    761   if (!dir->first)
    762     {
    763     if (!FindNextFile(dir->handle, &(dir->data)))
    764       return NULL;
    765     }
    766   else
    767     {
    768     dir->first = FALSE;
    769     }
    770   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
    771     return dir->data.cFileName;
    772   }
    773 #ifndef _MSC_VER
    774 return NULL;   /* Keep compiler happy; never executed */
    775 #endif
    776 }
    777 
    778 void
    779 closedirectory(directory_type *dir)
    780 {
    781 FindClose(dir->handle);
    782 free(dir);
    783 }
    784 
    785 
    786 /************* Test for regular file in Windows **********/
    787 
    788 /* I don't know how to do this, or if it can be done; assume all paths are
    789 regular if they are not directories. */
    790 
    791 int isregfile(char *filename)
    792 {
    793 return !isdirectory(filename);
    794 }
    795 
    796 
    797 /************* Test for a terminal in Windows **********/
    798 
    799 /* I don't know how to do this; assume never */
    800 
    801 static BOOL
    802 is_stdout_tty(void)
    803 {
    804 return FALSE;
    805 }
    806 
    807 static BOOL
    808 is_file_tty(FILE *f)
    809 {
    810 return FALSE;
    811 }
    812 
    813 /* End of Windows functions */
    814 
    815 
    816 /************* Directory scanning when we can't do it ***********/
    817 
    818 /* The type is void, and apart from isdirectory(), the functions do nothing. */
    819 
    820 #else
    821 
    822 #define FILESEP 0
    823 typedef void directory_type;
    824 
    825 int isdirectory(char *filename) { return 0; }
    826 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
    827 char *readdirectory(directory_type *dir) { return (char*)0;}
    828 void closedirectory(directory_type *dir) {}
    829 
    830 
    831 /************* Test for regular file when we can't do it **********/
    832 
    833 /* Assume all files are regular. */
    834 
    835 int isregfile(char *filename) { return 1; }
    836 
    837 
    838 /************* Test for a terminal when we can't do it **********/
    839 
    840 static BOOL
    841 is_stdout_tty(void)
    842 {
    843 return FALSE;
    844 }
    845 
    846 static BOOL
    847 is_file_tty(FILE *f)
    848 {
    849 return FALSE;
    850 }
    851 
    852 #endif  /* End of system-specific functions */
    853 
    854 
    855 
    856 #ifndef HAVE_STRERROR
    857 /*************************************************
    858 *     Provide strerror() for non-ANSI libraries  *
    859 *************************************************/
    860 
    861 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
    862 in their libraries, but can provide the same facility by this simple
    863 alternative function. */
    864 
    865 extern int   sys_nerr;
    866 extern char *sys_errlist[];
    867 
    868 char *
    869 strerror(int n)
    870 {
    871 if (n < 0 || n >= sys_nerr) return "unknown error number";
    872 return sys_errlist[n];
    873 }
    874 #endif /* HAVE_STRERROR */
    875 
    876 
    877 
    878 /*************************************************
    879 *                Usage function                  *
    880 *************************************************/
    881 
    882 static int
    883 usage(int rc)
    884 {
    885 option_item *op;
    886 fprintf(stderr, "Usage: pcre2grep [-");
    887 for (op = optionlist; op->one_char != 0; op++)
    888   {
    889   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
    890   }
    891 fprintf(stderr, "] [long options] [pattern] [files]\n");
    892 fprintf(stderr, "Type `pcre2grep --help' for more information and the long "
    893   "options.\n");
    894 return rc;
    895 }
    896 
    897 
    898 
    899 /*************************************************
    900 *                Help function                   *
    901 *************************************************/
    902 
    903 static void
    904 help(void)
    905 {
    906 option_item *op;
    907 
    908 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
    909 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
    910 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
    911 
    912 #ifdef SUPPORT_PCRE2GREP_CALLOUT
    913 printf("Callout scripts in patterns are supported." STDOUT_NL);
    914 #else
    915 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
    916 #endif
    917 
    918 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
    919 
    920 #ifdef SUPPORT_LIBZ
    921 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
    922 #endif
    923 
    924 #ifdef SUPPORT_LIBBZ2
    925 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
    926 #endif
    927 
    928 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
    929 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
    930 #else
    931 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
    932 #endif
    933 
    934 printf("Example: pcre2grep -i 'hello.*world' menu.h main.c" STDOUT_NL STDOUT_NL);
    935 printf("Options:" STDOUT_NL);
    936 
    937 for (op = optionlist; op->one_char != 0; op++)
    938   {
    939   int n;
    940   char s[4];
    941 
    942   if (op->one_char > 0 && (op->long_name)[0] == 0)
    943     n = 31 - printf("  -%c", op->one_char);
    944   else
    945     {
    946     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
    947       else strcpy(s, "   ");
    948     n = 31 - printf("  %s --%s", s, op->long_name);
    949     }
    950 
    951   if (n < 1) n = 1;
    952   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
    953   }
    954 
    955 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL);
    956 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
    957 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
    958 printf("space is removed and blank lines are ignored." STDOUT_NL);
    959 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
    960 
    961 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
    962 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
    963 }
    964 
    965 
    966 
    967 /*************************************************
    968 *            Test exclude/includes               *
    969 *************************************************/
    970 
    971 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
    972 there are no includes, the path must match an include pattern.
    973 
    974 Arguments:
    975   path      the path to be matched
    976   ip        the chain of include patterns
    977   ep        the chain of exclude patterns
    978 
    979 Returns:    TRUE if the path is not excluded
    980 */
    981 
    982 static BOOL
    983 test_incexc(char *path, patstr *ip, patstr *ep)
    984 {
    985 int plen = strlen((const char *)path);
    986 
    987 for (; ep != NULL; ep = ep->next)
    988   {
    989   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
    990     return FALSE;
    991   }
    992 
    993 if (ip == NULL) return TRUE;
    994 
    995 for (; ip != NULL; ip = ip->next)
    996   {
    997   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
    998     return TRUE;
    999   }
   1000 
   1001 return FALSE;
   1002 }
   1003 
   1004 
   1005 
   1006 /*************************************************
   1007 *         Decode integer argument value          *
   1008 *************************************************/
   1009 
   1010 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
   1011 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
   1012 just keep it simple.
   1013 
   1014 Arguments:
   1015   option_data   the option data string
   1016   op            the option item (for error messages)
   1017   longop        TRUE if option given in long form
   1018 
   1019 Returns:        a long integer
   1020 */
   1021 
   1022 static long int
   1023 decode_number(char *option_data, option_item *op, BOOL longop)
   1024 {
   1025 unsigned long int n = 0;
   1026 char *endptr = option_data;
   1027 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
   1028 while (isdigit((unsigned char)(*endptr)))
   1029   n = n * 10 + (int)(*endptr++ - '0');
   1030 if (toupper(*endptr) == 'K')
   1031   {
   1032   n *= 1024;
   1033   endptr++;
   1034   }
   1035 else if (toupper(*endptr) == 'M')
   1036   {
   1037   n *= 1024*1024;
   1038   endptr++;
   1039   }
   1040 
   1041 if (*endptr != 0)   /* Error */
   1042   {
   1043   if (longop)
   1044     {
   1045     char *equals = strchr(op->long_name, '=');
   1046     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
   1047       (int)(equals - op->long_name);
   1048     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
   1049       option_data, nlen, op->long_name);
   1050     }
   1051   else
   1052     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
   1053       option_data, op->one_char);
   1054   pcre2grep_exit(usage(2));
   1055   }
   1056 
   1057 return n;
   1058 }
   1059 
   1060 
   1061 
   1062 /*************************************************
   1063 *       Add item to a chain of numbers           *
   1064 *************************************************/
   1065 
   1066 /* Used to add an item onto a chain, or just return an unconnected item if the
   1067 "after" argument is NULL.
   1068 
   1069 Arguments:
   1070   n          the number to add
   1071   after      if not NULL points to item to insert after
   1072 
   1073 Returns:     new number block
   1074 */
   1075 
   1076 static omstr *
   1077 add_number(int n, omstr *after)
   1078 {
   1079 omstr *om = (omstr *)malloc(sizeof(omstr));
   1080 
   1081 if (om == NULL)
   1082   {
   1083   fprintf(stderr, "pcre2grep: malloc failed\n");
   1084   pcre2grep_exit(2);
   1085   }
   1086 om->next = NULL;
   1087 om->groupnum = n;
   1088 
   1089 if (after != NULL)
   1090   {
   1091   om->next = after->next;
   1092   after->next = om;
   1093   }
   1094 return om;
   1095 }
   1096 
   1097 
   1098 
   1099 /*************************************************
   1100 *            Read one line of input              *
   1101 *************************************************/
   1102 
   1103 /* Normally, input is read using fread() into a large buffer, so many lines may
   1104 be read at once. However, doing this for tty input means that no output appears
   1105 until a lot of input has been typed. Instead, tty input is handled line by
   1106 line. We cannot use fgets() for this, because it does not stop at a binary
   1107 zero, and therefore there is no way of telling how many characters it has read,
   1108 because there may be binary zeros embedded in the data.
   1109 
   1110 Arguments:
   1111   buffer     the buffer to read into
   1112   length     the maximum number of characters to read
   1113   f          the file
   1114 
   1115 Returns:     the number of characters read, zero at end of file
   1116 */
   1117 
   1118 static unsigned int
   1119 read_one_line(char *buffer, int length, FILE *f)
   1120 {
   1121 int c;
   1122 int yield = 0;
   1123 while ((c = fgetc(f)) != EOF)
   1124   {
   1125   buffer[yield++] = c;
   1126   if (c == '\n' || yield >= length) break;
   1127   }
   1128 return yield;
   1129 }
   1130 
   1131 
   1132 
   1133 /*************************************************
   1134 *             Find end of line                   *
   1135 *************************************************/
   1136 
   1137 /* The length of the endline sequence that is found is set via lenptr. This may
   1138 be zero at the very end of the file if there is no line-ending sequence there.
   1139 
   1140 Arguments:
   1141   p         current position in line
   1142   endptr    end of available data
   1143   lenptr    where to put the length of the eol sequence
   1144 
   1145 Returns:    pointer after the last byte of the line,
   1146             including the newline byte(s)
   1147 */
   1148 
   1149 static char *
   1150 end_of_line(char *p, char *endptr, int *lenptr)
   1151 {
   1152 switch(endlinetype)
   1153   {
   1154   default:      /* Just in case */
   1155   case PCRE2_NEWLINE_LF:
   1156   while (p < endptr && *p != '\n') p++;
   1157   if (p < endptr)
   1158     {
   1159     *lenptr = 1;
   1160     return p + 1;
   1161     }
   1162   *lenptr = 0;
   1163   return endptr;
   1164 
   1165   case PCRE2_NEWLINE_CR:
   1166   while (p < endptr && *p != '\r') p++;
   1167   if (p < endptr)
   1168     {
   1169     *lenptr = 1;
   1170     return p + 1;
   1171     }
   1172   *lenptr = 0;
   1173   return endptr;
   1174 
   1175   case PCRE2_NEWLINE_CRLF:
   1176   for (;;)
   1177     {
   1178     while (p < endptr && *p != '\r') p++;
   1179     if (++p >= endptr)
   1180       {
   1181       *lenptr = 0;
   1182       return endptr;
   1183       }
   1184     if (*p == '\n')
   1185       {
   1186       *lenptr = 2;
   1187       return p + 1;
   1188       }
   1189     }
   1190   break;
   1191 
   1192   case PCRE2_NEWLINE_ANYCRLF:
   1193   while (p < endptr)
   1194     {
   1195     int extra = 0;
   1196     register int c = *((unsigned char *)p);
   1197 
   1198     if (utf && c >= 0xc0)
   1199       {
   1200       int gcii, gcss;
   1201       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
   1202       gcss = 6*extra;
   1203       c = (c & utf8_table3[extra]) << gcss;
   1204       for (gcii = 1; gcii <= extra; gcii++)
   1205         {
   1206         gcss -= 6;
   1207         c |= (p[gcii] & 0x3f) << gcss;
   1208         }
   1209       }
   1210 
   1211     p += 1 + extra;
   1212 
   1213     switch (c)
   1214       {
   1215       case '\n':
   1216       *lenptr = 1;
   1217       return p;
   1218 
   1219       case '\r':
   1220       if (p < endptr && *p == '\n')
   1221         {
   1222         *lenptr = 2;
   1223         p++;
   1224         }
   1225       else *lenptr = 1;
   1226       return p;
   1227 
   1228       default:
   1229       break;
   1230       }
   1231     }   /* End of loop for ANYCRLF case */
   1232 
   1233   *lenptr = 0;  /* Must have hit the end */
   1234   return endptr;
   1235 
   1236   case PCRE2_NEWLINE_ANY:
   1237   while (p < endptr)
   1238     {
   1239     int extra = 0;
   1240     register int c = *((unsigned char *)p);
   1241 
   1242     if (utf && c >= 0xc0)
   1243       {
   1244       int gcii, gcss;
   1245       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
   1246       gcss = 6*extra;
   1247       c = (c & utf8_table3[extra]) << gcss;
   1248       for (gcii = 1; gcii <= extra; gcii++)
   1249         {
   1250         gcss -= 6;
   1251         c |= (p[gcii] & 0x3f) << gcss;
   1252         }
   1253       }
   1254 
   1255     p += 1 + extra;
   1256 
   1257     switch (c)
   1258       {
   1259       case '\n':    /* LF */
   1260       case '\v':    /* VT */
   1261       case '\f':    /* FF */
   1262       *lenptr = 1;
   1263       return p;
   1264 
   1265       case '\r':    /* CR */
   1266       if (p < endptr && *p == '\n')
   1267         {
   1268         *lenptr = 2;
   1269         p++;
   1270         }
   1271       else *lenptr = 1;
   1272       return p;
   1273 
   1274 #ifndef EBCDIC
   1275       case 0x85:    /* Unicode NEL */
   1276       *lenptr = utf? 2 : 1;
   1277       return p;
   1278 
   1279       case 0x2028:  /* Unicode LS */
   1280       case 0x2029:  /* Unicode PS */
   1281       *lenptr = 3;
   1282       return p;
   1283 #endif  /* Not EBCDIC */
   1284 
   1285       default:
   1286       break;
   1287       }
   1288     }   /* End of loop for ANY case */
   1289 
   1290   *lenptr = 0;  /* Must have hit the end */
   1291   return endptr;
   1292   }     /* End of overall switch */
   1293 }
   1294 
   1295 
   1296 
   1297 /*************************************************
   1298 *         Find start of previous line            *
   1299 *************************************************/
   1300 
   1301 /* This is called when looking back for before lines to print.
   1302 
   1303 Arguments:
   1304   p         start of the subsequent line
   1305   startptr  start of available data
   1306 
   1307 Returns:    pointer to the start of the previous line
   1308 */
   1309 
   1310 static char *
   1311 previous_line(char *p, char *startptr)
   1312 {
   1313 switch(endlinetype)
   1314   {
   1315   default:      /* Just in case */
   1316   case PCRE2_NEWLINE_LF:
   1317   p--;
   1318   while (p > startptr && p[-1] != '\n') p--;
   1319   return p;
   1320 
   1321   case PCRE2_NEWLINE_CR:
   1322   p--;
   1323   while (p > startptr && p[-1] != '\n') p--;
   1324   return p;
   1325 
   1326   case PCRE2_NEWLINE_CRLF:
   1327   for (;;)
   1328     {
   1329     p -= 2;
   1330     while (p > startptr && p[-1] != '\n') p--;
   1331     if (p <= startptr + 1 || p[-2] == '\r') return p;
   1332     }
   1333   /* Control can never get here */
   1334 
   1335   case PCRE2_NEWLINE_ANY:
   1336   case PCRE2_NEWLINE_ANYCRLF:
   1337   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
   1338   if (utf) while ((*p & 0xc0) == 0x80) p--;
   1339 
   1340   while (p > startptr)
   1341     {
   1342     register unsigned int c;
   1343     char *pp = p - 1;
   1344 
   1345     if (utf)
   1346       {
   1347       int extra = 0;
   1348       while ((*pp & 0xc0) == 0x80) pp--;
   1349       c = *((unsigned char *)pp);
   1350       if (c >= 0xc0)
   1351         {
   1352         int gcii, gcss;
   1353         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
   1354         gcss = 6*extra;
   1355         c = (c & utf8_table3[extra]) << gcss;
   1356         for (gcii = 1; gcii <= extra; gcii++)
   1357           {
   1358           gcss -= 6;
   1359           c |= (pp[gcii] & 0x3f) << gcss;
   1360           }
   1361         }
   1362       }
   1363     else c = *((unsigned char *)pp);
   1364 
   1365     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
   1366       {
   1367       case '\n':    /* LF */
   1368       case '\r':    /* CR */
   1369       return p;
   1370 
   1371       default:
   1372       break;
   1373       }
   1374 
   1375     else switch (c)
   1376       {
   1377       case '\n':    /* LF */
   1378       case '\v':    /* VT */
   1379       case '\f':    /* FF */
   1380       case '\r':    /* CR */
   1381 #ifndef EBCDIE
   1382       case 0x85:    /* Unicode NEL */
   1383       case 0x2028:  /* Unicode LS */
   1384       case 0x2029:  /* Unicode PS */
   1385 #endif  /* Not EBCDIC */
   1386       return p;
   1387 
   1388       default:
   1389       break;
   1390       }
   1391 
   1392     p = pp;  /* Back one character */
   1393     }        /* End of loop for ANY case */
   1394 
   1395   return startptr;  /* Hit start of data */
   1396   }     /* End of overall switch */
   1397 }
   1398 
   1399 
   1400 
   1401 
   1402 
   1403 /*************************************************
   1404 *       Print the previous "after" lines         *
   1405 *************************************************/
   1406 
   1407 /* This is called if we are about to lose said lines because of buffer filling,
   1408 and at the end of the file. The data in the line is written using fwrite() so
   1409 that a binary zero does not terminate it.
   1410 
   1411 Arguments:
   1412   lastmatchnumber   the number of the last matching line, plus one
   1413   lastmatchrestart  where we restarted after the last match
   1414   endptr            end of available data
   1415   printname         filename for printing
   1416 
   1417 Returns:            nothing
   1418 */
   1419 
   1420 static void
   1421 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
   1422   char *printname)
   1423 {
   1424 if (after_context > 0 && lastmatchnumber > 0)
   1425   {
   1426   int count = 0;
   1427   while (lastmatchrestart < endptr && count++ < after_context)
   1428     {
   1429     int ellength;
   1430     char *pp = lastmatchrestart;
   1431     if (printname != NULL) fprintf(stdout, "%s-", printname);
   1432     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
   1433     pp = end_of_line(pp, endptr, &ellength);
   1434     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
   1435     lastmatchrestart = pp;
   1436     }
   1437   hyphenpending = TRUE;
   1438   }
   1439 }
   1440 
   1441 
   1442 
   1443 /*************************************************
   1444 *   Apply patterns to subject till one matches   *
   1445 *************************************************/
   1446 
   1447 /* This function is called to run through all patterns, looking for a match. It
   1448 is used multiple times for the same subject when colouring is enabled, in order
   1449 to find all possible matches.
   1450 
   1451 Arguments:
   1452   matchptr     the start of the subject
   1453   length       the length of the subject to match
   1454   options      options for pcre_exec
   1455   startoffset  where to start matching
   1456   mrc          address of where to put the result of pcre2_match()
   1457 
   1458 Returns:      TRUE if there was a match
   1459               FALSE if there was no match
   1460               invert if there was a non-fatal error
   1461 */
   1462 
   1463 static BOOL
   1464 match_patterns(char *matchptr, size_t length, unsigned int options,
   1465   size_t startoffset, int *mrc)
   1466 {
   1467 int i;
   1468 size_t slen = length;
   1469 patstr *p = patterns;
   1470 const char *msg = "this text:\n\n";
   1471 
   1472 if (slen > 200)
   1473   {
   1474   slen = 200;
   1475   msg = "text that starts:\n\n";
   1476   }
   1477 for (i = 1; p != NULL; p = p->next, i++)
   1478   {
   1479   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
   1480     startoffset, options, match_data, match_context);
   1481   if (*mrc >= 0) return TRUE;
   1482   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
   1483   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
   1484   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
   1485   fprintf(stderr, "%s", msg);
   1486   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
   1487   fprintf(stderr, "\n\n");
   1488   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_RECURSIONLIMIT ||
   1489       *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
   1490     resource_error = TRUE;
   1491   if (error_count++ > 20)
   1492     {
   1493     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
   1494     pcre2grep_exit(2);
   1495     }
   1496   return invert;    /* No more matching; don't show the line again */
   1497   }
   1498 
   1499 return FALSE;  /* No match, no errors */
   1500 }
   1501 
   1502 
   1503 #ifdef SUPPORT_PCRE2GREP_CALLOUT
   1504 
   1505 /*************************************************
   1506 *        Parse and execute callout scripts       *
   1507 *************************************************/
   1508 
   1509 /* This function parses a callout string block and executes the
   1510 program specified by the string. The string is a list of substrings
   1511 separated by pipe characters. The first substring represents the
   1512 executable name, and the following substrings specify the arguments:
   1513 
   1514   program_name|param1|param2|...
   1515 
   1516 Any substirng (including the program name) can contain escape sequences
   1517 started by the dollar character. The escape sequences are substituted as
   1518 follows:
   1519 
   1520   $<digits> or ${<digits>} is replaced by the captured substring of the given
   1521   decimal number, which must be greater than zero. If the number is greater
   1522   than the number of capturing substrings, or if the capture is unset, the
   1523   replacement is empty.
   1524 
   1525   Any other character is substituted by itself. E.g: $$ is replaced by a single
   1526   dollar or $| replaced by a pipe character.
   1527 
   1528 Example:
   1529 
   1530   echo -e "abcde\n12345" | pcre2grep \
   1531     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
   1532 
   1533   Output:
   1534 
   1535     Arg1: [a] [bcd] [d] Arg2: |a| ()
   1536     abcde
   1537     Arg1: [1] [234] [4] Arg2: |1| ()
   1538     12345
   1539 
   1540 Arguments:
   1541   blockptr     the callout block
   1542 
   1543 Returns:       currently it always returns with 0
   1544 */
   1545 
   1546 static int
   1547 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
   1548 {
   1549 PCRE2_SIZE length = calloutptr->callout_string_length;
   1550 PCRE2_SPTR string = calloutptr->callout_string;
   1551 PCRE2_SPTR subject = calloutptr->subject;
   1552 PCRE2_SIZE *ovector = calloutptr->offset_vector;
   1553 PCRE2_SIZE capture_top = calloutptr->capture_top;
   1554 PCRE2_SIZE argsvectorlen = 2;
   1555 PCRE2_SIZE argslen = 1;
   1556 char *args;
   1557 char *argsptr;
   1558 char **argsvector;
   1559 char **argsvectorptr;
   1560 pid_t pid;
   1561 int result = 0;
   1562 
   1563 (void)unused;   /* Avoid compiler warning */
   1564 
   1565 /* Only callout with strings are supported. */
   1566 if (string == NULL || length == 0) return 0;
   1567 
   1568 /* Checking syntax and compute the number of string fragments. Callout strings
   1569 are ignored in case of a syntax error. */
   1570 
   1571 while (length > 0)
   1572   {
   1573   if (*string == '|')
   1574     {
   1575     argsvectorlen++;
   1576 
   1577     /* Maximum 10000 arguments allowed. */
   1578     if (argsvectorlen > 10000) return 0;
   1579     }
   1580   else if (*string == '$')
   1581     {
   1582     PCRE2_SIZE capture_id = 0;
   1583 
   1584     string++;
   1585     length--;
   1586 
   1587     /* Syntax error: a character must be present after $. */
   1588     if (length == 0) return 0;
   1589 
   1590     if (*string >= '1' && *string <= '9')
   1591       {
   1592       do
   1593         {
   1594         /* Maximum capture id is 65535. */
   1595         if (capture_id <= 65535)
   1596           capture_id = capture_id * 10 + (*string - '0');
   1597 
   1598         string++;
   1599         length--;
   1600         }
   1601       while (length > 0 && *string >= '0' && *string <= '9');
   1602 
   1603       /* To negate the effect of string++ below. */
   1604       string--;
   1605       length++;
   1606       }
   1607     else if (*string == '{')
   1608       {
   1609       /* Must be a decimal number in parenthesis, e.g: (5) or (38) */
   1610       string++;
   1611       length--;
   1612 
   1613       /* Syntax error: a decimal number required. */
   1614       if (length == 0) return 0;
   1615       if (*string < '1' || *string > '9') return 0;
   1616 
   1617       do
   1618         {
   1619         /* Maximum capture id is 65535. */
   1620         if (capture_id <= 65535)
   1621           capture_id = capture_id * 10 + (*string - '0');
   1622 
   1623         string++;
   1624         length--;
   1625 
   1626         /* Syntax error: no more characters */
   1627         if (length == 0) return 0;
   1628         }
   1629       while (*string >= '0' && *string <= '9');
   1630 
   1631       /* Syntax error: close paren is missing. */
   1632       if (*string != '}') return 0;
   1633       }
   1634 
   1635     if (capture_id > 0)
   1636       {
   1637       if (capture_id < capture_top)
   1638         {
   1639         capture_id *= 2;
   1640         argslen += ovector[capture_id + 1] - ovector[capture_id];
   1641         }
   1642 
   1643       /* To negate the effect of argslen++ below. */
   1644       argslen--;
   1645       }
   1646     }
   1647 
   1648   string++;
   1649   length--;
   1650   argslen++;
   1651   }
   1652 
   1653 args = (char*)malloc(argslen);
   1654 if (args == NULL) return 0;
   1655 
   1656 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
   1657 if (argsvector == NULL)
   1658   {
   1659   free(args);
   1660   return 0;
   1661   }
   1662 
   1663 argsptr = args;
   1664 argsvectorptr = argsvector;
   1665 
   1666 *argsvectorptr++ = argsptr;
   1667 
   1668 length = calloutptr->callout_string_length;
   1669 string = calloutptr->callout_string;
   1670 
   1671 while (length > 0)
   1672   {
   1673   if (*string == '|')
   1674     {
   1675     *argsptr++ = '\0';
   1676     *argsvectorptr++ = argsptr;
   1677     }
   1678   else if (*string == '$')
   1679     {
   1680     string++;
   1681     length--;
   1682 
   1683     if ((*string >= '1' && *string <= '9') || *string == '{')
   1684       {
   1685       PCRE2_SIZE capture_id = 0;
   1686 
   1687       if (*string != '{')
   1688         {
   1689         do
   1690           {
   1691           /* Maximum capture id is 65535. */
   1692           if (capture_id <= 65535)
   1693             capture_id = capture_id * 10 + (*string - '0');
   1694 
   1695           string++;
   1696           length--;
   1697           }
   1698         while (length > 0 && *string >= '0' && *string <= '9');
   1699 
   1700         /* To negate the effect of string++ below. */
   1701         string--;
   1702         length++;
   1703         }
   1704       else
   1705         {
   1706         string++;
   1707         length--;
   1708 
   1709         do
   1710           {
   1711           /* Maximum capture id is 65535. */
   1712           if (capture_id <= 65535)
   1713             capture_id = capture_id * 10 + (*string - '0');
   1714 
   1715           string++;
   1716           length--;
   1717           }
   1718         while (*string != '}');
   1719         }
   1720 
   1721         if (capture_id < capture_top)
   1722           {
   1723           PCRE2_SIZE capturesize;
   1724           capture_id *= 2;
   1725 
   1726           capturesize = ovector[capture_id + 1] - ovector[capture_id];
   1727           memcpy(argsptr, subject + ovector[capture_id], capturesize);
   1728           argsptr += capturesize;
   1729           }
   1730       }
   1731     else
   1732       {
   1733       *argsptr++ = *string;
   1734       }
   1735     }
   1736   else
   1737     {
   1738     *argsptr++ = *string;
   1739     }
   1740 
   1741   string++;
   1742   length--;
   1743   }
   1744 
   1745 *argsptr++ = '\0';
   1746 *argsvectorptr = NULL;
   1747 
   1748 pid = fork();
   1749 
   1750 if (pid == 0)
   1751   {
   1752   (void)execv(argsvector[0], argsvector);
   1753   /* Control gets here if there is an error, e.g. a non-existent program */
   1754   exit(1);
   1755   }
   1756 else if (pid > 0)
   1757   (void)waitpid(pid, &result, 0);
   1758 
   1759 free(args);
   1760 free(argsvector);
   1761 
   1762 /* Currently negative return values are not supported, only zero (match
   1763 continues) or non-zero (match fails). */
   1764 
   1765 return result != 0;
   1766 }
   1767 
   1768 #endif
   1769 
   1770 
   1771 
   1772 /*************************************************
   1773 *            Grep an individual file             *
   1774 *************************************************/
   1775 
   1776 /* This is called from grep_or_recurse() below. It uses a buffer that is three
   1777 times the value of bufthird. The matching point is never allowed to stray into
   1778 the top third of the buffer, thus keeping more of the file available for
   1779 context printing or for multiline scanning. For large files, the pointer will
   1780 be in the middle third most of the time, so the bottom third is available for
   1781 "before" context printing.
   1782 
   1783 Arguments:
   1784   handle       the fopened FILE stream for a normal file
   1785                the gzFile pointer when reading is via libz
   1786                the BZFILE pointer when reading is via libbz2
   1787   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
   1788   filename     the file name or NULL (for errors)
   1789   printname    the file name if it is to be printed for each match
   1790                or NULL if the file name is not to be printed
   1791                it cannot be NULL if filenames[_nomatch]_only is set
   1792 
   1793 Returns:       0 if there was at least one match
   1794                1 otherwise (no matches)
   1795                2 if an overlong line is encountered
   1796                3 if there is a read error on a .bz2 file
   1797 */
   1798 
   1799 static int
   1800 pcre2grep(void *handle, int frtype, char *filename, char *printname)
   1801 {
   1802 int rc = 1;
   1803 int linenumber = 1;
   1804 int lastmatchnumber = 0;
   1805 int count = 0;
   1806 int filepos = 0;
   1807 char *lastmatchrestart = NULL;
   1808 char *ptr = main_buffer;
   1809 char *endptr;
   1810 size_t bufflength;
   1811 BOOL binary = FALSE;
   1812 BOOL endhyphenpending = FALSE;
   1813 BOOL input_line_buffered = line_buffered;
   1814 FILE *in = NULL;                    /* Ensure initialized */
   1815 
   1816 #ifdef SUPPORT_LIBZ
   1817 gzFile ingz = NULL;
   1818 #endif
   1819 
   1820 #ifdef SUPPORT_LIBBZ2
   1821 BZFILE *inbz2 = NULL;
   1822 #endif
   1823 
   1824 
   1825 /* Do the first read into the start of the buffer and set up the pointer to end
   1826 of what we have. In the case of libz, a non-zipped .gz file will be read as a
   1827 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
   1828 fail. */
   1829 
   1830 (void)frtype;
   1831 
   1832 #ifdef SUPPORT_LIBZ
   1833 if (frtype == FR_LIBZ)
   1834   {
   1835   ingz = (gzFile)handle;
   1836   bufflength = gzread (ingz, main_buffer, bufsize);
   1837   }
   1838 else
   1839 #endif
   1840 
   1841 #ifdef SUPPORT_LIBBZ2
   1842 if (frtype == FR_LIBBZ2)
   1843   {
   1844   inbz2 = (BZFILE *)handle;
   1845   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
   1846   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
   1847   }                                    /* without the cast it is unsigned. */
   1848 else
   1849 #endif
   1850 
   1851   {
   1852   in = (FILE *)handle;
   1853   if (is_file_tty(in)) input_line_buffered = TRUE;
   1854   bufflength = input_line_buffered?
   1855     read_one_line(main_buffer, bufsize, in) :
   1856     fread(main_buffer, 1, bufsize, in);
   1857   }
   1858 
   1859 endptr = main_buffer + bufflength;
   1860 
   1861 /* Unless binary-files=text, see if we have a binary file. This uses the same
   1862 rule as GNU grep, namely, a search for a binary zero byte near the start of the
   1863 file. */
   1864 
   1865 if (binary_files != BIN_TEXT)
   1866   {
   1867   binary =
   1868     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
   1869   if (binary && binary_files == BIN_NOMATCH) return 1;
   1870   }
   1871 
   1872 /* Loop while the current pointer is not at the end of the file. For large
   1873 files, endptr will be at the end of the buffer when we are in the middle of the
   1874 file, but ptr will never get there, because as soon as it gets over 2/3 of the
   1875 way, the buffer is shifted left and re-filled. */
   1876 
   1877 while (ptr < endptr)
   1878   {
   1879   int endlinelength;
   1880   int mrc = 0;
   1881   unsigned int options = 0;
   1882   BOOL match;
   1883   char *matchptr = ptr;
   1884   char *t = ptr;
   1885   size_t length, linelength;
   1886   size_t startoffset = 0;
   1887 
   1888   /* At this point, ptr is at the start of a line. We need to find the length
   1889   of the subject string to pass to pcre2_match(). In multiline mode, it is the
   1890   length remainder of the data in the buffer. Otherwise, it is the length of
   1891   the next line, excluding the terminating newline. After matching, we always
   1892   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
   1893   option is used for compiling, so that any match is constrained to be in the
   1894   first line. */
   1895 
   1896   t = end_of_line(t, endptr, &endlinelength);
   1897   linelength = t - ptr - endlinelength;
   1898   length = multiline? (size_t)(endptr - ptr) : linelength;
   1899 
   1900   /* Check to see if the line we are looking at extends right to the very end
   1901   of the buffer without a line terminator. This means the line is too long to
   1902   handle. */
   1903 
   1904   if (endlinelength == 0 && t == main_buffer + bufsize)
   1905     {
   1906     fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n"
   1907                     "pcre2grep: the buffer size is %d\n"
   1908                     "pcre2grep: use the --buffer-size option to change it\n",
   1909                     linenumber,
   1910                     (filename == NULL)? "" : " of file ",
   1911                     (filename == NULL)? "" : filename,
   1912                     bufthird);
   1913     return 2;
   1914     }
   1915 
   1916   /* Extra processing for Jeffrey Friedl's debugging. */
   1917 
   1918 #ifdef JFRIEDL_DEBUG
   1919   if (jfriedl_XT || jfriedl_XR)
   1920   {
   1921 #     include <sys/time.h>
   1922 #     include <time.h>
   1923       struct timeval start_time, end_time;
   1924       struct timezone dummy;
   1925       int i;
   1926 
   1927       if (jfriedl_XT)
   1928       {
   1929           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
   1930           const char *orig = ptr;
   1931           ptr = malloc(newlen + 1);
   1932           if (!ptr) {
   1933                   printf("out of memory");
   1934                   pcre2grep_exit(2);
   1935           }
   1936           endptr = ptr;
   1937           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
   1938           for (i = 0; i < jfriedl_XT; i++) {
   1939                   strncpy(endptr, orig,  length);
   1940                   endptr += length;
   1941           }
   1942           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
   1943           length = newlen;
   1944       }
   1945 
   1946       if (gettimeofday(&start_time, &dummy) != 0)
   1947               perror("bad gettimeofday");
   1948 
   1949 
   1950       for (i = 0; i < jfriedl_XR; i++)
   1951           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
   1952               PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
   1953 
   1954       if (gettimeofday(&end_time, &dummy) != 0)
   1955               perror("bad gettimeofday");
   1956 
   1957       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
   1958                       -
   1959                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
   1960 
   1961       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
   1962       return 0;
   1963   }
   1964 #endif
   1965 
   1966   /* We come back here after a match when show_only_matching is set, in order
   1967   to find any further matches in the same line. This applies to
   1968   --only-matching, --file-offsets, and --line-offsets. */
   1969 
   1970   ONLY_MATCHING_RESTART:
   1971 
   1972   /* Run through all the patterns until one matches or there is an error other
   1973   than NOMATCH. This code is in a subroutine so that it can be re-used for
   1974   finding subsequent matches when colouring matched lines. After finding one
   1975   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
   1976   this line. */
   1977 
   1978   match = match_patterns(matchptr, length, options, startoffset, &mrc);
   1979   options = PCRE2_NOTEMPTY;
   1980 
   1981   /* If it's a match or a not-match (as required), do what's wanted. */
   1982 
   1983   if (match != invert)
   1984     {
   1985     BOOL hyphenprinted = FALSE;
   1986 
   1987     /* We've failed if we want a file that doesn't have any matches. */
   1988 
   1989     if (filenames == FN_NOMATCH_ONLY) return 1;
   1990 
   1991     /* If all we want is a yes/no answer, we can return immediately. */
   1992 
   1993     if (quiet) return 0;
   1994 
   1995     /* Just count if just counting is wanted. */
   1996 
   1997     else if (count_only) count++;
   1998 
   1999     /* When handling a binary file and binary-files==binary, the "binary"
   2000     variable will be set true (it's false in all other cases). In this
   2001     situation we just want to output the file name. No need to scan further. */
   2002 
   2003     else if (binary)
   2004       {
   2005       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
   2006       return 0;
   2007       }
   2008 
   2009     /* Likewise, if all we want is a file name, there is no need to scan any
   2010     more lines in the file. */
   2011 
   2012     else if (filenames == FN_MATCH_ONLY)
   2013       {
   2014       fprintf(stdout, "%s" STDOUT_NL, printname);
   2015       return 0;
   2016       }
   2017 
   2018     /* The --only-matching option prints just the substring that matched,
   2019     and/or one or more captured portions of it, as long as these strings are
   2020     not empty. The --file-offsets and --line-offsets options output offsets for
   2021     the matching substring (all three set show_only_matching). None of these
   2022     mutually exclusive options prints any context. Afterwards, adjust the start
   2023     and then jump back to look for further matches in the same line. If we are
   2024     in invert mode, however, nothing is printed and we do not restart - this
   2025     could still be useful because the return code is set. */
   2026 
   2027     else if (show_only_matching)
   2028       {
   2029       if (!invert)
   2030         {
   2031         size_t oldstartoffset;
   2032 
   2033         if (printname != NULL) fprintf(stdout, "%s:", printname);
   2034         if (number) fprintf(stdout, "%d:", linenumber);
   2035 
   2036         /* Handle --line-offsets */
   2037 
   2038         if (line_offsets)
   2039           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(matchptr + offsets[0] - ptr),
   2040             (int)(offsets[1] - offsets[0]));
   2041 
   2042         /* Handle --file-offsets */
   2043 
   2044         else if (file_offsets)
   2045           fprintf(stdout, "%d,%d" STDOUT_NL,
   2046             (int)(filepos + matchptr + offsets[0] - ptr),
   2047             (int)(offsets[1] - offsets[0]));
   2048 
   2049         /* Handle --only-matching, which may occur many times */
   2050 
   2051         else
   2052           {
   2053           BOOL printed = FALSE;
   2054           omstr *om;
   2055 
   2056           for (om = only_matching; om != NULL; om = om->next)
   2057             {
   2058             int n = om->groupnum;
   2059             if (n < mrc)
   2060               {
   2061               int plen = offsets[2*n + 1] - offsets[2*n];
   2062               if (plen > 0)
   2063                 {
   2064                 if (printed) fprintf(stdout, "%s", om_separator);
   2065                 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
   2066                 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
   2067                 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
   2068                 printed = TRUE;
   2069                 }
   2070               }
   2071             }
   2072 
   2073           if (printed || printname != NULL || number)
   2074             fprintf(stdout, STDOUT_NL);
   2075           }
   2076 
   2077         /* Prepare to repeat to find the next match in the line. */
   2078 
   2079         match = FALSE;
   2080         if (line_buffered) fflush(stdout);
   2081         rc = 0;                      /* Had some success */
   2082 
   2083         /* If the current match ended past the end of the line (only possible
   2084         in multiline mode), we are done with this line. */
   2085 
   2086         if (offsets[1] > linelength) goto END_ONE_MATCH;
   2087 
   2088         /* If the pattern contained a lookbehind that included \K, it is
   2089         possible that the end of the match might be at or before the actual
   2090         starting offset we have just used. In this case, start one character
   2091         further on. */
   2092 
   2093         startoffset = offsets[1];    /* Restart after the match */
   2094         oldstartoffset = pcre2_get_startchar(match_data);
   2095         if (startoffset <= oldstartoffset)
   2096           {
   2097           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
   2098           startoffset = oldstartoffset + 1;
   2099           if (utf)
   2100             while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
   2101           }
   2102         goto ONLY_MATCHING_RESTART;
   2103         }
   2104       }
   2105 
   2106     /* This is the default case when none of the above options is set. We print
   2107     the matching lines(s), possibly preceded and/or followed by other lines of
   2108     context. */
   2109 
   2110     else
   2111       {
   2112       /* See if there is a requirement to print some "after" lines from a
   2113       previous match. We never print any overlaps. */
   2114 
   2115       if (after_context > 0 && lastmatchnumber > 0)
   2116         {
   2117         int ellength;
   2118         int linecount = 0;
   2119         char *p = lastmatchrestart;
   2120 
   2121         while (p < ptr && linecount < after_context)
   2122           {
   2123           p = end_of_line(p, ptr, &ellength);
   2124           linecount++;
   2125           }
   2126 
   2127         /* It is important to advance lastmatchrestart during this printing so
   2128         that it interacts correctly with any "before" printing below. Print
   2129         each line's data using fwrite() in case there are binary zeroes. */
   2130 
   2131         while (lastmatchrestart < p)
   2132           {
   2133           char *pp = lastmatchrestart;
   2134           if (printname != NULL) fprintf(stdout, "%s-", printname);
   2135           if (number) fprintf(stdout, "%d-", lastmatchnumber++);
   2136           pp = end_of_line(pp, endptr, &ellength);
   2137           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
   2138           lastmatchrestart = pp;
   2139           }
   2140         if (lastmatchrestart != ptr) hyphenpending = TRUE;
   2141         }
   2142 
   2143       /* If there were non-contiguous lines printed above, insert hyphens. */
   2144 
   2145       if (hyphenpending)
   2146         {
   2147         fprintf(stdout, "--" STDOUT_NL);
   2148         hyphenpending = FALSE;
   2149         hyphenprinted = TRUE;
   2150         }
   2151 
   2152       /* See if there is a requirement to print some "before" lines for this
   2153       match. Again, don't print overlaps. */
   2154 
   2155       if (before_context > 0)
   2156         {
   2157         int linecount = 0;
   2158         char *p = ptr;
   2159 
   2160         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
   2161                linecount < before_context)
   2162           {
   2163           linecount++;
   2164           p = previous_line(p, main_buffer);
   2165           }
   2166 
   2167         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
   2168           fprintf(stdout, "--" STDOUT_NL);
   2169 
   2170         while (p < ptr)
   2171           {
   2172           int ellength;
   2173           char *pp = p;
   2174           if (printname != NULL) fprintf(stdout, "%s-", printname);
   2175           if (number) fprintf(stdout, "%d-", linenumber - linecount--);
   2176           pp = end_of_line(pp, endptr, &ellength);
   2177           FWRITE(p, 1, pp - p, stdout);
   2178           p = pp;
   2179           }
   2180         }
   2181 
   2182       /* Now print the matching line(s); ensure we set hyphenpending at the end
   2183       of the file if any context lines are being output. */
   2184 
   2185       if (after_context > 0 || before_context > 0)
   2186         endhyphenpending = TRUE;
   2187 
   2188       if (printname != NULL) fprintf(stdout, "%s:", printname);
   2189       if (number) fprintf(stdout, "%d:", linenumber);
   2190 
   2191       /* In multiline mode, we want to print to the end of the line in which
   2192       the end of the matched string is found, so we adjust linelength and the
   2193       line number appropriately, but only when there actually was a match
   2194       (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
   2195       the match will always be before the first newline sequence. */
   2196 
   2197       if (multiline & !invert)
   2198         {
   2199         char *endmatch = ptr + offsets[1];
   2200         t = ptr;
   2201         while (t <= endmatch)
   2202           {
   2203           t = end_of_line(t, endptr, &endlinelength);
   2204           if (t < endmatch) linenumber++; else break;
   2205           }
   2206         linelength = t - ptr - endlinelength;
   2207         }
   2208 
   2209       /*** NOTE: Use only fwrite() to output the data line, so that binary
   2210       zeroes are treated as just another data character. */
   2211 
   2212       /* This extra option, for Jeffrey Friedl's debugging requirements,
   2213       replaces the matched string, or a specific captured string if it exists,
   2214       with X. When this happens, colouring is ignored. */
   2215 
   2216 #ifdef JFRIEDL_DEBUG
   2217       if (S_arg >= 0 && S_arg < mrc)
   2218         {
   2219         int first = S_arg * 2;
   2220         int last  = first + 1;
   2221         FWRITE(ptr, 1, offsets[first], stdout);
   2222         fprintf(stdout, "X");
   2223         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
   2224         }
   2225       else
   2226 #endif
   2227 
   2228       /* We have to split the line(s) up if colouring, and search for further
   2229       matches, but not of course if the line is a non-match. */
   2230 
   2231       if (do_colour && !invert)
   2232         {
   2233         int plength;
   2234         FWRITE(ptr, 1, offsets[0], stdout);
   2235         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
   2236         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
   2237         fprintf(stdout, "%c[00m", 0x1b);
   2238         for (;;)
   2239           {
   2240           startoffset = offsets[1];
   2241           if (startoffset >= linelength + endlinelength ||
   2242               !match_patterns(matchptr, length, options, startoffset, &mrc))
   2243             break;
   2244           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
   2245           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
   2246           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
   2247           fprintf(stdout, "%c[00m", 0x1b);
   2248           }
   2249 
   2250         /* In multiline mode, we may have already printed the complete line
   2251         and its line-ending characters (if they matched the pattern), so there
   2252         may be no more to print. */
   2253 
   2254         plength = (int)((linelength + endlinelength) - startoffset);
   2255         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
   2256         }
   2257 
   2258       /* Not colouring; no need to search for further matches */
   2259 
   2260       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
   2261       }
   2262 
   2263     /* End of doing what has to be done for a match. If --line-buffered was
   2264     given, flush the output. */
   2265 
   2266     if (line_buffered) fflush(stdout);
   2267     rc = 0;    /* Had some success */
   2268 
   2269     /* Remember where the last match happened for after_context. We remember
   2270     where we are about to restart, and that line's number. */
   2271 
   2272     lastmatchrestart = ptr + linelength + endlinelength;
   2273     lastmatchnumber = linenumber + 1;
   2274     }
   2275 
   2276   /* For a match in multiline inverted mode (which of course did not cause
   2277   anything to be printed), we have to move on to the end of the match before
   2278   proceeding. */
   2279 
   2280   if (multiline && invert && match)
   2281     {
   2282     int ellength;
   2283     char *endmatch = ptr + offsets[1];
   2284     t = ptr;
   2285     while (t < endmatch)
   2286       {
   2287       t = end_of_line(t, endptr, &ellength);
   2288       if (t <= endmatch) linenumber++; else break;
   2289       }
   2290     endmatch = end_of_line(endmatch, endptr, &ellength);
   2291     linelength = endmatch - ptr - ellength;
   2292     }
   2293 
   2294   /* Advance to after the newline and increment the line number. The file
   2295   offset to the current line is maintained in filepos. */
   2296 
   2297   END_ONE_MATCH:
   2298   ptr += linelength + endlinelength;
   2299   filepos += (int)(linelength + endlinelength);
   2300   linenumber++;
   2301 
   2302   /* If input is line buffered, and the buffer is not yet full, read another
   2303   line and add it into the buffer. */
   2304 
   2305   if (input_line_buffered && bufflength < (size_t)bufsize)
   2306     {
   2307     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
   2308     bufflength += add;
   2309     endptr += add;
   2310     }
   2311 
   2312   /* If we haven't yet reached the end of the file (the buffer is full), and
   2313   the current point is in the top 1/3 of the buffer, slide the buffer down by
   2314   1/3 and refill it. Before we do this, if some unprinted "after" lines are
   2315   about to be lost, print them. */
   2316 
   2317   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
   2318     {
   2319     if (after_context > 0 &&
   2320         lastmatchnumber > 0 &&
   2321         lastmatchrestart < main_buffer + bufthird)
   2322       {
   2323       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
   2324       lastmatchnumber = 0;
   2325       }
   2326 
   2327     /* Now do the shuffle */
   2328 
   2329     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
   2330     ptr -= bufthird;
   2331 
   2332 #ifdef SUPPORT_LIBZ
   2333     if (frtype == FR_LIBZ)
   2334       bufflength = 2*bufthird +
   2335         gzread (ingz, main_buffer + 2*bufthird, bufthird);
   2336     else
   2337 #endif
   2338 
   2339 #ifdef SUPPORT_LIBBZ2
   2340     if (frtype == FR_LIBBZ2)
   2341       bufflength = 2*bufthird +
   2342         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
   2343     else
   2344 #endif
   2345 
   2346     bufflength = 2*bufthird +
   2347       (input_line_buffered?
   2348        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
   2349        fread(main_buffer + 2*bufthird, 1, bufthird, in));
   2350     endptr = main_buffer + bufflength;
   2351 
   2352     /* Adjust any last match point */
   2353 
   2354     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
   2355     }
   2356   }     /* Loop through the whole file */
   2357 
   2358 /* End of file; print final "after" lines if wanted; do_after_lines sets
   2359 hyphenpending if it prints something. */
   2360 
   2361 if (!show_only_matching && !count_only)
   2362   {
   2363   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
   2364   hyphenpending |= endhyphenpending;
   2365   }
   2366 
   2367 /* Print the file name if we are looking for those without matches and there
   2368 were none. If we found a match, we won't have got this far. */
   2369 
   2370 if (filenames == FN_NOMATCH_ONLY)
   2371   {
   2372   fprintf(stdout, "%s" STDOUT_NL, printname);
   2373   return 0;
   2374   }
   2375 
   2376 /* Print the match count if wanted */
   2377 
   2378 if (count_only && !quiet)
   2379   {
   2380   if (count > 0 || !omit_zero_count)
   2381     {
   2382     if (printname != NULL && filenames != FN_NONE)
   2383       fprintf(stdout, "%s:", printname);
   2384     fprintf(stdout, "%d" STDOUT_NL, count);
   2385     }
   2386   }
   2387 
   2388 return rc;
   2389 }
   2390 
   2391 
   2392 
   2393 /*************************************************
   2394 *     Grep a file or recurse into a directory    *
   2395 *************************************************/
   2396 
   2397 /* Given a path name, if it's a directory, scan all the files if we are
   2398 recursing; if it's a file, grep it.
   2399 
   2400 Arguments:
   2401   pathname          the path to investigate
   2402   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   2403   only_one_at_top   TRUE if the path is the only one at toplevel
   2404 
   2405 Returns:  -1 the file/directory was skipped
   2406            0 if there was at least one match
   2407            1 if there were no matches
   2408            2 there was some kind of error
   2409 
   2410 However, file opening failures are suppressed if "silent" is set.
   2411 */
   2412 
   2413 static int
   2414 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   2415 {
   2416 int rc = 1;
   2417 int frtype;
   2418 void *handle;
   2419 char *lastcomp;
   2420 FILE *in = NULL;           /* Ensure initialized */
   2421 
   2422 #ifdef SUPPORT_LIBZ
   2423 gzFile ingz = NULL;
   2424 #endif
   2425 
   2426 #ifdef SUPPORT_LIBBZ2
   2427 BZFILE *inbz2 = NULL;
   2428 #endif
   2429 
   2430 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
   2431 int pathlen;
   2432 #endif
   2433 
   2434 #if defined NATIVE_ZOS
   2435 int zos_type;
   2436 FILE *zos_test_file;
   2437 #endif
   2438 
   2439 /* If the file name is "-" we scan stdin */
   2440 
   2441 if (strcmp(pathname, "-") == 0)
   2442   {
   2443   return pcre2grep(stdin, FR_PLAIN, stdin_name,
   2444     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
   2445       stdin_name : NULL);
   2446   }
   2447 
   2448 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
   2449 directories, whereas --include and --exclude apply to everything else. The test
   2450 is against the final component of the path. */
   2451 
   2452 lastcomp = strrchr(pathname, FILESEP);
   2453 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
   2454 
   2455 /* If the file is a directory, skip if not recursing or if explicitly excluded.
   2456 Otherwise, scan the directory and recurse for each path within it. The scanning
   2457 code is localized so it can be made system-specific. */
   2458 
   2459 
   2460 /* For z/OS, determine the file type. */
   2461 
   2462 #if defined NATIVE_ZOS
   2463 zos_test_file =  fopen(pathname,"rb");
   2464 
   2465 if (zos_test_file == NULL)
   2466    {
   2467    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
   2468      pathname, strerror(errno));
   2469    return -1;
   2470    }
   2471 zos_type = identifyzosfiletype (zos_test_file);
   2472 fclose (zos_test_file);
   2473 
   2474 /* Handle a PDS in separate code */
   2475 
   2476 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
   2477    {
   2478    return travelonpdsdir (pathname, only_one_at_top);
   2479    }
   2480 
   2481 /* Deal with regular files in the normal way below. These types are:
   2482    zos_type == __ZOS_PDS_MEMBER
   2483    zos_type == __ZOS_PS
   2484    zos_type == __ZOS_VSAM_KSDS
   2485    zos_type == __ZOS_VSAM_ESDS
   2486    zos_type == __ZOS_VSAM_RRDS
   2487 */
   2488 
   2489 /* Handle a z/OS directory using common code. */
   2490 
   2491 else if (zos_type == __ZOS_HFS)
   2492  {
   2493 #endif  /* NATIVE_ZOS */
   2494 
   2495 
   2496 /* Handle directories: common code for all OS */
   2497 
   2498 if (isdirectory(pathname))
   2499   {
   2500   if (dee_action == dee_SKIP ||
   2501       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
   2502     return -1;
   2503 
   2504   if (dee_action == dee_RECURSE)
   2505     {
   2506     char buffer[1024];
   2507     char *nextfile;
   2508     directory_type *dir = opendirectory(pathname);
   2509 
   2510     if (dir == NULL)
   2511       {
   2512       if (!silent)
   2513         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
   2514           strerror(errno));
   2515       return 2;
   2516       }
   2517 
   2518     while ((nextfile = readdirectory(dir)) != NULL)
   2519       {
   2520       int frc;
   2521       sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
   2522       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
   2523       if (frc > 1) rc = frc;
   2524        else if (frc == 0 && rc == 1) rc = 0;
   2525       }
   2526 
   2527     closedirectory(dir);
   2528     return rc;
   2529     }
   2530   }
   2531 
   2532 #if defined NATIVE_ZOS
   2533  }
   2534 #endif
   2535 
   2536 /* If the file is not a directory, check for a regular file, and if it is not,
   2537 skip it if that's been requested. Otherwise, check for an explicit inclusion or
   2538 exclusion. */
   2539 
   2540 else if (
   2541 #if defined NATIVE_ZOS
   2542         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
   2543 #else  /* all other OS */
   2544         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
   2545 #endif
   2546         !test_incexc(lastcomp, include_patterns, exclude_patterns))
   2547   return -1;  /* File skipped */
   2548 
   2549 /* Control reaches here if we have a regular file, or if we have a directory
   2550 and recursion or skipping was not requested, or if we have anything else and
   2551 skipping was not requested. The scan proceeds. If this is the first and only
   2552 argument at top level, we don't show the file name, unless we are only showing
   2553 the file name, or the filename was forced (-H). */
   2554 
   2555 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
   2556 pathlen = (int)(strlen(pathname));
   2557 #endif
   2558 
   2559 /* Open using zlib if it is supported and the file name ends with .gz. */
   2560 
   2561 #ifdef SUPPORT_LIBZ
   2562 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
   2563   {
   2564   ingz = gzopen(pathname, "rb");
   2565   if (ingz == NULL)
   2566     {
   2567     if (!silent)
   2568       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
   2569         strerror(errno));
   2570     return 2;
   2571     }
   2572   handle = (void *)ingz;
   2573   frtype = FR_LIBZ;
   2574   }
   2575 else
   2576 #endif
   2577 
   2578 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
   2579 
   2580 #ifdef SUPPORT_LIBBZ2
   2581 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
   2582   {
   2583   inbz2 = BZ2_bzopen(pathname, "rb");
   2584   handle = (void *)inbz2;
   2585   frtype = FR_LIBBZ2;
   2586   }
   2587 else
   2588 #endif
   2589 
   2590 /* Otherwise use plain fopen(). The label is so that we can come back here if
   2591 an attempt to read a .bz2 file indicates that it really is a plain file. */
   2592 
   2593 #ifdef SUPPORT_LIBBZ2
   2594 PLAIN_FILE:
   2595 #endif
   2596   {
   2597   in = fopen(pathname, "rb");
   2598   handle = (void *)in;
   2599   frtype = FR_PLAIN;
   2600   }
   2601 
   2602 /* All the opening methods return errno when they fail. */
   2603 
   2604 if (handle == NULL)
   2605   {
   2606   if (!silent)
   2607     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
   2608       strerror(errno));
   2609   return 2;
   2610   }
   2611 
   2612 /* Now grep the file */
   2613 
   2614 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
   2615   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
   2616 
   2617 /* Close in an appropriate manner. */
   2618 
   2619 #ifdef SUPPORT_LIBZ
   2620 if (frtype == FR_LIBZ)
   2621   gzclose(ingz);
   2622 else
   2623 #endif
   2624 
   2625 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
   2626 read failed. If the error indicates that the file isn't in fact bzipped, try
   2627 again as a normal file. */
   2628 
   2629 #ifdef SUPPORT_LIBBZ2
   2630 if (frtype == FR_LIBBZ2)
   2631   {
   2632   if (rc == 3)
   2633     {
   2634     int errnum;
   2635     const char *err = BZ2_bzerror(inbz2, &errnum);
   2636     if (errnum == BZ_DATA_ERROR_MAGIC)
   2637       {
   2638       BZ2_bzclose(inbz2);
   2639       goto PLAIN_FILE;
   2640       }
   2641     else if (!silent)
   2642       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
   2643         pathname, err);
   2644     rc = 2;    /* The normal "something went wrong" code */
   2645     }
   2646   BZ2_bzclose(inbz2);
   2647   }
   2648 else
   2649 #endif
   2650 
   2651 /* Normal file close */
   2652 
   2653 fclose(in);
   2654 
   2655 /* Pass back the yield from pcre2grep(). */
   2656 
   2657 return rc;
   2658 }
   2659 
   2660 
   2661 
   2662 /*************************************************
   2663 *    Handle a single-letter, no data option      *
   2664 *************************************************/
   2665 
   2666 static int
   2667 handle_option(int letter, int options)
   2668 {
   2669 switch(letter)
   2670   {
   2671   case N_FOFFSETS: file_offsets = TRUE; break;
   2672   case N_HELP: help(); pcre2grep_exit(0);
   2673   case N_LBUFFER: line_buffered = TRUE; break;
   2674   case N_LOFFSETS: line_offsets = number = TRUE; break;
   2675   case N_NOJIT: use_jit = FALSE; break;
   2676   case 'a': binary_files = BIN_TEXT; break;
   2677   case 'c': count_only = TRUE; break;
   2678   case 'F': process_options |= PO_FIXED_STRINGS; break;
   2679   case 'H': filenames = FN_FORCE; break;
   2680   case 'I': binary_files = BIN_NOMATCH; break;
   2681   case 'h': filenames = FN_NONE; break;
   2682   case 'i': options |= PCRE2_CASELESS; break;
   2683   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
   2684   case 'L': filenames = FN_NOMATCH_ONLY; break;
   2685   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
   2686   case 'n': number = TRUE; break;
   2687 
   2688   case 'o':
   2689   only_matching_last = add_number(0, only_matching_last);
   2690   if (only_matching == NULL) only_matching = only_matching_last;
   2691   break;
   2692 
   2693   case 'q': quiet = TRUE; break;
   2694   case 'r': dee_action = dee_RECURSE; break;
   2695   case 's': silent = TRUE; break;
   2696   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
   2697   case 'v': invert = TRUE; break;
   2698   case 'w': process_options |= PO_WORD_MATCH; break;
   2699   case 'x': process_options |= PO_LINE_MATCH; break;
   2700 
   2701   case 'V':
   2702     {
   2703     unsigned char buffer[128];
   2704     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
   2705     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
   2706     }
   2707   pcre2grep_exit(0);
   2708   break;
   2709 
   2710   default:
   2711   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
   2712   pcre2grep_exit(usage(2));
   2713   }
   2714 
   2715 return options;
   2716 }
   2717 
   2718 
   2719 
   2720 
   2721 /*************************************************
   2722 *          Construct printed ordinal             *
   2723 *************************************************/
   2724 
   2725 /* This turns a number into "1st", "3rd", etc. */
   2726 
   2727 static char *
   2728 ordin(int n)
   2729 {
   2730 static char buffer[14];
   2731 char *p = buffer;
   2732 sprintf(p, "%d", n);
   2733 while (*p != 0) p++;
   2734 switch (n%10)
   2735   {
   2736   case 1: strcpy(p, "st"); break;
   2737   case 2: strcpy(p, "nd"); break;
   2738   case 3: strcpy(p, "rd"); break;
   2739   default: strcpy(p, "th"); break;
   2740   }
   2741 return buffer;
   2742 }
   2743 
   2744 
   2745 
   2746 /*************************************************
   2747 *          Compile a single pattern              *
   2748 *************************************************/
   2749 
   2750 /* Do nothing if the pattern has already been compiled. This is the case for
   2751 include/exclude patterns read from a file.
   2752 
   2753 When the -F option has been used, each "pattern" may be a list of strings,
   2754 separated by line breaks. They will be matched literally. We split such a
   2755 string and compile the first substring, inserting an additional block into the
   2756 pattern chain.
   2757 
   2758 Arguments:
   2759   p              points to the pattern block
   2760   options        the PCRE options
   2761   popts          the processing options
   2762   fromfile       TRUE if the pattern was read from a file
   2763   fromtext       file name or identifying text (e.g. "include")
   2764   count          0 if this is the only command line pattern, or
   2765                  number of the command line pattern, or
   2766                  linenumber for a pattern from a file
   2767 
   2768 Returns:         TRUE on success, FALSE after an error
   2769 */
   2770 
   2771 static BOOL
   2772 compile_pattern(patstr *p, int options, int popts, int fromfile,
   2773   const char *fromtext, int count)
   2774 {
   2775 unsigned char buffer[PATBUFSIZE];
   2776 PCRE2_SIZE erroffset;
   2777 char *ps = p->string;
   2778 unsigned int patlen = strlen(ps);
   2779 int errcode;
   2780 
   2781 if (p->compiled != NULL) return TRUE;
   2782 
   2783 if ((popts & PO_FIXED_STRINGS) != 0)
   2784   {
   2785   int ellength;
   2786   char *eop = ps + patlen;
   2787   char *pe = end_of_line(ps, eop, &ellength);
   2788 
   2789   if (ellength != 0)
   2790     {
   2791     if (add_pattern(pe, p) == NULL) return FALSE;
   2792     patlen = (int)(pe - ps - ellength);
   2793     }
   2794   }
   2795 
   2796 sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
   2797 p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
   2798   &erroffset, compile_context);
   2799 
   2800 /* Handle successful compile */
   2801 
   2802 if (p->compiled != NULL)
   2803   {
   2804 #ifdef SUPPORT_PCRE2GREP_JIT
   2805   if (use_jit)
   2806     {
   2807     errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
   2808     if (errcode == 0) return TRUE;
   2809     erroffset = PCRE2_SIZE_MAX;     /* Will get reduced to patlen below */
   2810     }
   2811   else
   2812 #endif
   2813   return TRUE;
   2814   }
   2815 
   2816 /* Handle compile and JIT compile errors */
   2817 
   2818 erroffset -= (int)strlen(prefix[popts]);
   2819 if (erroffset > patlen) erroffset = patlen;
   2820 pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
   2821 
   2822 if (fromfile)
   2823   {
   2824   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
   2825     "at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
   2826   }
   2827 else
   2828   {
   2829   if (count == 0)
   2830     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
   2831       fromtext, (int)erroffset, buffer);
   2832   else
   2833     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
   2834       ordin(count), fromtext, (int)erroffset, buffer);
   2835   }
   2836 
   2837 return FALSE;
   2838 }
   2839 
   2840 
   2841 
   2842 /*************************************************
   2843 *     Read and compile a file of patterns        *
   2844 *************************************************/
   2845 
   2846 /* This is used for --filelist, --include-from, and --exclude-from.
   2847 
   2848 Arguments:
   2849   name         the name of the file; "-" is stdin
   2850   patptr       pointer to the pattern chain anchor
   2851   patlastptr   pointer to the last pattern pointer
   2852   popts        the process options to pass to pattern_compile()
   2853 
   2854 Returns:       TRUE if all went well
   2855 */
   2856 
   2857 static BOOL
   2858 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
   2859 {
   2860 int linenumber = 0;
   2861 FILE *f;
   2862 char *filename;
   2863 char buffer[PATBUFSIZE];
   2864 
   2865 if (strcmp(name, "-") == 0)
   2866   {
   2867   f = stdin;
   2868   filename = stdin_name;
   2869   }
   2870 else
   2871   {
   2872   f = fopen(name, "r");
   2873   if (f == NULL)
   2874     {
   2875     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
   2876     return FALSE;
   2877     }
   2878   filename = name;
   2879   }
   2880 
   2881 while (fgets(buffer, PATBUFSIZE, f) != NULL)
   2882   {
   2883   char *s = buffer + (int)strlen(buffer);
   2884   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
   2885   *s = 0;
   2886   linenumber++;
   2887   if (buffer[0] == 0) continue;   /* Skip blank lines */
   2888 
   2889   /* Note: this call to add_pattern() puts a pointer to the local variable
   2890   "buffer" into the pattern chain. However, that pointer is used only when
   2891   compiling the pattern, which happens immediately below, so we flatten it
   2892   afterwards, as a precaution against any later code trying to use it. */
   2893 
   2894   *patlastptr = add_pattern(buffer, *patlastptr);
   2895   if (*patlastptr == NULL)
   2896     {
   2897     if (f != stdin) fclose(f);
   2898     return FALSE;
   2899     }
   2900   if (*patptr == NULL) *patptr = *patlastptr;
   2901 
   2902   /* This loop is needed because compiling a "pattern" when -F is set may add
   2903   on additional literal patterns if the original contains a newline. In the
   2904   common case, it never will, because fgets() stops at a newline. However,
   2905   the -N option can be used to give pcre2grep a different newline setting. */
   2906 
   2907   for(;;)
   2908     {
   2909     if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
   2910         linenumber))
   2911       {
   2912       if (f != stdin) fclose(f);
   2913       return FALSE;
   2914       }
   2915     (*patlastptr)->string = NULL;            /* Insurance */
   2916     if ((*patlastptr)->next == NULL) break;
   2917     *patlastptr = (*patlastptr)->next;
   2918     }
   2919   }
   2920 
   2921 if (f != stdin) fclose(f);
   2922 return TRUE;
   2923 }
   2924 
   2925 
   2926 
   2927 /*************************************************
   2928 *                Main program                    *
   2929 *************************************************/
   2930 
   2931 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
   2932 
   2933 int
   2934 main(int argc, char **argv)
   2935 {
   2936 int i, j;
   2937 int rc = 1;
   2938 BOOL only_one_at_top;
   2939 patstr *cp;
   2940 fnstr *fn;
   2941 const char *locale_from = "--locale";
   2942 
   2943 #ifdef SUPPORT_PCRE2GREP_JIT
   2944 pcre2_jit_stack *jit_stack = NULL;
   2945 #endif
   2946 
   2947 /* In Windows, stdout is set up as a text stream, which means that \n is
   2948 converted to \r\n. This causes output lines that are copied from the input to
   2949 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
   2950 that stdout is a binary stream. Note that this means all other output to stdout
   2951 must use STDOUT_NL to terminate lines. */
   2952 
   2953 #if defined(_WIN32) || defined(WIN32)
   2954 _setmode( _fileno(stdout), _O_BINARY);
   2955 #endif
   2956 
   2957 /* Set up a default compile and match contexts and a match data block. */
   2958 
   2959 compile_context = pcre2_compile_context_create(NULL);
   2960 match_context = pcre2_match_context_create(NULL);
   2961 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
   2962 offsets = pcre2_get_ovector_pointer(match_data);
   2963 
   2964 /* If string (script) callouts are supported, set up the callout processing
   2965 function. */
   2966 
   2967 #ifdef SUPPORT_PCRE2GREP_CALLOUT
   2968 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
   2969 #endif
   2970 
   2971 /* Process the options */
   2972 
   2973 for (i = 1; i < argc; i++)
   2974   {
   2975   option_item *op = NULL;
   2976   char *option_data = (char *)"";    /* default to keep compiler happy */
   2977   BOOL longop;
   2978   BOOL longopwasequals = FALSE;
   2979 
   2980   if (argv[i][0] != '-') break;
   2981 
   2982   /* If we hit an argument that is just "-", it may be a reference to STDIN,
   2983   but only if we have previously had -e or -f to define the patterns. */
   2984 
   2985   if (argv[i][1] == 0)
   2986     {
   2987     if (pattern_files != NULL || patterns != NULL) break;
   2988       else pcre2grep_exit(usage(2));
   2989     }
   2990 
   2991   /* Handle a long name option, or -- to terminate the options */
   2992 
   2993   if (argv[i][1] == '-')
   2994     {
   2995     char *arg = argv[i] + 2;
   2996     char *argequals = strchr(arg, '=');
   2997 
   2998     if (*arg == 0)    /* -- terminates options */
   2999       {
   3000       i++;
   3001       break;                /* out of the options-handling loop */
   3002       }
   3003 
   3004     longop = TRUE;
   3005 
   3006     /* Some long options have data that follows after =, for example file=name.
   3007     Some options have variations in the long name spelling: specifically, we
   3008     allow "regexp" because GNU grep allows it, though I personally go along
   3009     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
   3010     These options are entered in the table as "regex(p)". Options can be in
   3011     both these categories. */
   3012 
   3013     for (op = optionlist; op->one_char != 0; op++)
   3014       {
   3015       char *opbra = strchr(op->long_name, '(');
   3016       char *equals = strchr(op->long_name, '=');
   3017 
   3018       /* Handle options with only one spelling of the name */
   3019 
   3020       if (opbra == NULL)     /* Does not contain '(' */
   3021         {
   3022         if (equals == NULL)  /* Not thing=data case */
   3023           {
   3024           if (strcmp(arg, op->long_name) == 0) break;
   3025           }
   3026         else                 /* Special case xxx=data */
   3027           {
   3028           int oplen = (int)(equals - op->long_name);
   3029           int arglen = (argequals == NULL)?
   3030             (int)strlen(arg) : (int)(argequals - arg);
   3031           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
   3032             {
   3033             option_data = arg + arglen;
   3034             if (*option_data == '=')
   3035               {
   3036               option_data++;
   3037               longopwasequals = TRUE;
   3038               }
   3039             break;
   3040             }
   3041           }
   3042         }
   3043 
   3044       /* Handle options with an alternate spelling of the name */
   3045 
   3046       else
   3047         {
   3048         char buff1[24];
   3049         char buff2[24];
   3050 
   3051         int baselen = (int)(opbra - op->long_name);
   3052         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
   3053         int arglen = (argequals == NULL || equals == NULL)?
   3054           (int)strlen(arg) : (int)(argequals - arg);
   3055 
   3056         sprintf(buff1, "%.*s", baselen, op->long_name);
   3057         sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
   3058 
   3059         if (strncmp(arg, buff1, arglen) == 0 ||
   3060            strncmp(arg, buff2, arglen) == 0)
   3061           {
   3062           if (equals != NULL && argequals != NULL)
   3063             {
   3064             option_data = argequals;
   3065             if (*option_data == '=')
   3066               {
   3067               option_data++;
   3068               longopwasequals = TRUE;
   3069               }
   3070             }
   3071           break;
   3072           }
   3073         }
   3074       }
   3075 
   3076     if (op->one_char == 0)
   3077       {
   3078       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
   3079       pcre2grep_exit(usage(2));
   3080       }
   3081     }
   3082 
   3083   /* Jeffrey Friedl's debugging harness uses these additional options which
   3084   are not in the right form for putting in the option table because they use
   3085   only one hyphen, yet are more than one character long. By putting them
   3086   separately here, they will not get displayed as part of the help() output,
   3087   but I don't think Jeffrey will care about that. */
   3088 
   3089 #ifdef JFRIEDL_DEBUG
   3090   else if (strcmp(argv[i], "-pre") == 0) {
   3091           jfriedl_prefix = argv[++i];
   3092           continue;
   3093   } else if (strcmp(argv[i], "-post") == 0) {
   3094           jfriedl_postfix = argv[++i];
   3095           continue;
   3096   } else if (strcmp(argv[i], "-XT") == 0) {
   3097           sscanf(argv[++i], "%d", &jfriedl_XT);
   3098           continue;
   3099   } else if (strcmp(argv[i], "-XR") == 0) {
   3100           sscanf(argv[++i], "%d", &jfriedl_XR);
   3101           continue;
   3102   }
   3103 #endif
   3104 
   3105 
   3106   /* One-char options; many that have no data may be in a single argument; we
   3107   continue till we hit the last one or one that needs data. */
   3108 
   3109   else
   3110     {
   3111     char *s = argv[i] + 1;
   3112     longop = FALSE;
   3113 
   3114     while (*s != 0)
   3115       {
   3116       for (op = optionlist; op->one_char != 0; op++)
   3117         {
   3118         if (*s == op->one_char) break;
   3119         }
   3120       if (op->one_char == 0)
   3121         {
   3122         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
   3123           *s, argv[i]);
   3124         pcre2grep_exit(usage(2));
   3125         }
   3126 
   3127       option_data = s+1;
   3128 
   3129       /* Break out if this is the last character in the string; it's handled
   3130       below like a single multi-char option. */
   3131 
   3132       if (*option_data == 0) break;
   3133 
   3134       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
   3135       are used for ones that either have a numerical number or defaults, i.e.
   3136       the data is optional. If a digit follows, there is data; if not, carry on
   3137       with other single-character options in the same string. */
   3138 
   3139       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
   3140         {
   3141         if (isdigit((unsigned char)s[1])) break;
   3142         }
   3143       else   /* Check for an option with data */
   3144         {
   3145         if (op->type != OP_NODATA) break;
   3146         }
   3147 
   3148       /* Handle a single-character option with no data, then loop for the
   3149       next character in the string. */
   3150 
   3151       pcre2_options = handle_option(*s++, pcre2_options);
   3152       }
   3153     }
   3154 
   3155   /* At this point we should have op pointing to a matched option. If the type
   3156   is NO_DATA, it means that there is no data, and the option might set
   3157   something in the PCRE options. */
   3158 
   3159   if (op->type == OP_NODATA)
   3160     {
   3161     pcre2_options = handle_option(op->one_char, pcre2_options);
   3162     continue;
   3163     }
   3164 
   3165   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
   3166   either has a value or defaults to something. It cannot have data in a
   3167   separate item. At the moment, the only such options are "colo(u)r",
   3168   "only-matching", and Jeffrey Friedl's special -S debugging option. */
   3169 
   3170   if (*option_data == 0 &&
   3171       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
   3172        op->type == OP_OP_NUMBERS))
   3173     {
   3174     switch (op->one_char)
   3175       {
   3176       case N_COLOUR:
   3177       colour_option = (char *)"auto";
   3178       break;
   3179 
   3180       case 'o':
   3181       only_matching_last = add_number(0, only_matching_last);
   3182       if (only_matching == NULL) only_matching = only_matching_last;
   3183       break;
   3184 
   3185 #ifdef JFRIEDL_DEBUG
   3186       case 'S':
   3187       S_arg = 0;
   3188       break;
   3189 #endif
   3190       }
   3191     continue;
   3192     }
   3193 
   3194   /* Otherwise, find the data string for the option. */
   3195 
   3196   if (*option_data == 0)
   3197     {
   3198     if (i >= argc - 1 || longopwasequals)
   3199       {
   3200       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
   3201       pcre2grep_exit(usage(2));
   3202       }
   3203     option_data = argv[++i];
   3204     }
   3205 
   3206   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
   3207   added to a chain of numbers. */
   3208 
   3209   if (op->type == OP_OP_NUMBERS)
   3210     {
   3211     unsigned long int n = decode_number(option_data, op, longop);
   3212     omdatastr *omd = (omdatastr *)op->dataptr;
   3213     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
   3214     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
   3215     }
   3216 
   3217   /* If the option type is OP_PATLIST, it's the -e option, or one of the
   3218   include/exclude options, which can be called multiple times to create lists
   3219   of patterns. */
   3220 
   3221   else if (op->type == OP_PATLIST)
   3222     {
   3223     patdatastr *pd = (patdatastr *)op->dataptr;
   3224     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
   3225     if (*(pd->lastptr) == NULL) goto EXIT2;
   3226     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
   3227     }
   3228 
   3229   /* If the option type is OP_FILELIST, it's one of the options that names a
   3230   file. */
   3231 
   3232   else if (op->type == OP_FILELIST)
   3233     {
   3234     fndatastr *fd = (fndatastr *)op->dataptr;
   3235     fn = (fnstr *)malloc(sizeof(fnstr));
   3236     if (fn == NULL)
   3237       {
   3238       fprintf(stderr, "pcre2grep: malloc failed\n");
   3239       goto EXIT2;
   3240       }
   3241     fn->next = NULL;
   3242     fn->name = option_data;
   3243     if (*(fd->anchor) == NULL)
   3244       *(fd->anchor) = fn;
   3245     else
   3246       (*(fd->lastptr))->next = fn;
   3247     *(fd->lastptr) = fn;
   3248     }
   3249 
   3250   /* Handle OP_BINARY_FILES */
   3251 
   3252   else if (op->type == OP_BINFILES)
   3253     {
   3254     if (strcmp(option_data, "binary") == 0)
   3255       binary_files = BIN_BINARY;
   3256     else if (strcmp(option_data, "without-match") == 0)
   3257       binary_files = BIN_NOMATCH;
   3258     else if (strcmp(option_data, "text") == 0)
   3259       binary_files = BIN_TEXT;
   3260     else
   3261       {
   3262       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
   3263         option_data);
   3264       pcre2grep_exit(usage(2));
   3265       }
   3266     }
   3267 
   3268   /* Otherwise, deal with a single string or numeric data value. */
   3269 
   3270   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
   3271            op->type != OP_OP_NUMBER)
   3272     {
   3273     *((char **)op->dataptr) = option_data;
   3274     }
   3275   else
   3276     {
   3277     unsigned long int n = decode_number(option_data, op, longop);
   3278     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
   3279       else *((int *)op->dataptr) = n;
   3280     }
   3281   }
   3282 
   3283 /* Options have been decoded. If -C was used, its value is used as a default
   3284 for -A and -B. */
   3285 
   3286 if (both_context > 0)
   3287   {
   3288   if (after_context == 0) after_context = both_context;
   3289   if (before_context == 0) before_context = both_context;
   3290   }
   3291 
   3292 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
   3293 However, all three set show_only_matching because they display, each in their
   3294 own way, only the data that has matched. */
   3295 
   3296 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
   3297     (file_offsets && line_offsets))
   3298   {
   3299   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --file-offsets "
   3300     "and/or --line-offsets\n");
   3301   pcre2grep_exit(usage(2));
   3302   }
   3303 
   3304 /* Put limits into the match data block. */
   3305 
   3306 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
   3307 if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
   3308 
   3309 if (only_matching != NULL || file_offsets || line_offsets)
   3310   show_only_matching = TRUE;
   3311 
   3312 /* If a locale has not been provided as an option, see if the LC_CTYPE or
   3313 LC_ALL environment variable is set, and if so, use it. */
   3314 
   3315 if (locale == NULL)
   3316   {
   3317   locale = getenv("LC_ALL");
   3318   locale_from = "LCC_ALL";
   3319   }
   3320 
   3321 if (locale == NULL)
   3322   {
   3323   locale = getenv("LC_CTYPE");
   3324   locale_from = "LC_CTYPE";
   3325   }
   3326 
   3327 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
   3328 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
   3329 
   3330 if (locale != NULL)
   3331   {
   3332   if (setlocale(LC_CTYPE, locale) == NULL)
   3333     {
   3334     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
   3335       locale, locale_from);
   3336     goto EXIT2;
   3337     }
   3338   character_tables = pcre2_maketables(NULL);
   3339   pcre2_set_character_tables(compile_context, character_tables);
   3340   }
   3341 
   3342 /* Sort out colouring */
   3343 
   3344 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
   3345   {
   3346   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
   3347   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
   3348   else
   3349     {
   3350     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
   3351       colour_option);
   3352     goto EXIT2;
   3353     }
   3354   if (do_colour)
   3355     {
   3356     char *cs = getenv("PCRE2GREP_COLOUR");
   3357     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
   3358     if (cs != NULL) colour_string = cs;
   3359     }
   3360   }
   3361 
   3362 /* Sort out a newline setting. */
   3363 
   3364 if (newline_arg != NULL)
   3365   {
   3366   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
   3367        endlinetype++)
   3368     {
   3369     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
   3370     }
   3371   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
   3372     pcre2_set_newline(compile_context, endlinetype);
   3373   else
   3374     {
   3375     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
   3376       newline_arg);
   3377     goto EXIT2;
   3378     }
   3379   }
   3380 
   3381 /* Find default newline convention */
   3382 
   3383 else
   3384   {
   3385   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
   3386   }
   3387 
   3388 /* Interpret the text values for -d and -D */
   3389 
   3390 if (dee_option != NULL)
   3391   {
   3392   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
   3393   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
   3394   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
   3395   else
   3396     {
   3397     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
   3398     goto EXIT2;
   3399     }
   3400   }
   3401 
   3402 if (DEE_option != NULL)
   3403   {
   3404   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
   3405   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
   3406   else
   3407     {
   3408     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
   3409     goto EXIT2;
   3410     }
   3411   }
   3412 
   3413 /* Check the values for Jeffrey Friedl's debugging options. */
   3414 
   3415 #ifdef JFRIEDL_DEBUG
   3416 if (S_arg > 9)
   3417   {
   3418   fprintf(stderr, "pcre2grep: bad value for -S option\n");
   3419   return 2;
   3420   }
   3421 if (jfriedl_XT != 0 || jfriedl_XR != 0)
   3422   {
   3423   if (jfriedl_XT == 0) jfriedl_XT = 1;
   3424   if (jfriedl_XR == 0) jfriedl_XR = 1;
   3425   }
   3426 #endif
   3427 
   3428 /* Get memory for the main buffer. */
   3429 
   3430 bufsize = 3*bufthird;
   3431 main_buffer = (char *)malloc(bufsize);
   3432 
   3433 if (main_buffer == NULL)
   3434   {
   3435   fprintf(stderr, "pcre2grep: malloc failed\n");
   3436   goto EXIT2;
   3437   }
   3438 
   3439 /* If no patterns were provided by -e, and there are no files provided by -f,
   3440 the first argument is the one and only pattern, and it must exist. */
   3441 
   3442 if (patterns == NULL && pattern_files == NULL)
   3443   {
   3444   if (i >= argc) return usage(2);
   3445   patterns = patterns_last = add_pattern(argv[i++], NULL);
   3446   if (patterns == NULL) goto EXIT2;
   3447   }
   3448 
   3449 /* Compile the patterns that were provided on the command line, either by
   3450 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
   3451 after all the command-line options are read so that we know which PCRE options
   3452 to use. When -F is used, compile_pattern() may add another block into the
   3453 chain, so we must not access the next pointer till after the compile. */
   3454 
   3455 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
   3456   {
   3457   if (!compile_pattern(cp, pcre2_options, process_options, FALSE, "command-line",
   3458        (j == 1 && patterns->next == NULL)? 0 : j))
   3459     goto EXIT2;
   3460   }
   3461 
   3462 /* Read and compile the regular expressions that are provided in files. */
   3463 
   3464 for (fn = pattern_files; fn != NULL; fn = fn->next)
   3465   {
   3466   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
   3467     goto EXIT2;
   3468   }
   3469 
   3470 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
   3471 
   3472 #ifdef SUPPORT_PCRE2GREP_JIT
   3473 if (use_jit)
   3474   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
   3475 #endif
   3476 
   3477 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
   3478   {
   3479 #ifdef SUPPORT_PCRE2GREP_JIT
   3480   if (jit_stack != NULL && cp->compiled != NULL)
   3481     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
   3482 #endif
   3483   }
   3484 
   3485 /* If there are include or exclude patterns read from the command line, compile
   3486 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
   3487 0. */
   3488 
   3489 for (j = 0; j < 4; j++)
   3490   {
   3491   int k;
   3492   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   3493     {
   3494     if (!compile_pattern(cp, pcre2_options, 0, FALSE, incexname[j],
   3495          (k == 1 && cp->next == NULL)? 0 : k))
   3496       goto EXIT2;
   3497     }
   3498   }
   3499 
   3500 /* Read and compile include/exclude patterns from files. */
   3501 
   3502 for (fn = include_from; fn != NULL; fn = fn->next)
   3503   {
   3504   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
   3505     goto EXIT2;
   3506   }
   3507 
   3508 for (fn = exclude_from; fn != NULL; fn = fn->next)
   3509   {
   3510   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
   3511     goto EXIT2;
   3512   }
   3513 
   3514 /* If there are no files that contain lists of files to search, and there are
   3515 no file arguments, search stdin, and then exit. */
   3516 
   3517 if (file_lists == NULL && i >= argc)
   3518   {
   3519   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
   3520     (filenames > FN_DEFAULT)? stdin_name : NULL);
   3521   goto EXIT;
   3522   }
   3523 
   3524 /* If any files that contains a list of files to search have been specified,
   3525 read them line by line and search the given files. */
   3526 
   3527 for (fn = file_lists; fn != NULL; fn = fn->next)
   3528   {
   3529   char buffer[PATBUFSIZE];
   3530   FILE *fl;
   3531   if (strcmp(fn->name, "-") == 0) fl = stdin; else
   3532     {
   3533     fl = fopen(fn->name, "rb");
   3534     if (fl == NULL)
   3535       {
   3536       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
   3537         strerror(errno));
   3538       goto EXIT2;
   3539       }
   3540     }
   3541   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
   3542     {
   3543     int frc;
   3544     char *end = buffer + (int)strlen(buffer);
   3545     while (end > buffer && isspace(end[-1])) end--;
   3546     *end = 0;
   3547     if (*buffer != 0)
   3548       {
   3549       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
   3550       if (frc > 1) rc = frc;
   3551         else if (frc == 0 && rc == 1) rc = 0;
   3552       }
   3553     }
   3554   if (fl != stdin) fclose(fl);
   3555   }
   3556 
   3557 /* After handling file-list, work through remaining arguments. Pass in the fact
   3558 that there is only one argument at top level - this suppresses the file name if
   3559 the argument is not a directory and filenames are not otherwise forced. */
   3560 
   3561 only_one_at_top = i == argc - 1 && file_lists == NULL;
   3562 
   3563 for (; i < argc; i++)
   3564   {
   3565   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
   3566     only_one_at_top);
   3567   if (frc > 1) rc = frc;
   3568     else if (frc == 0 && rc == 1) rc = 0;
   3569   }
   3570 
   3571 EXIT:
   3572 #ifdef SUPPORT_PCRE2GREP_JIT
   3573 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
   3574 #endif
   3575 
   3576 free(main_buffer);
   3577 free((void *)character_tables);
   3578 
   3579 pcre2_compile_context_free(compile_context);
   3580 pcre2_match_context_free(match_context);
   3581 pcre2_match_data_free(match_data);
   3582 
   3583 free_pattern_chain(patterns);
   3584 free_pattern_chain(include_patterns);
   3585 free_pattern_chain(include_dir_patterns);
   3586 free_pattern_chain(exclude_patterns);
   3587 free_pattern_chain(exclude_dir_patterns);
   3588 
   3589 free_file_chain(exclude_from);
   3590 free_file_chain(include_from);
   3591 free_file_chain(pattern_files);
   3592 free_file_chain(file_lists);
   3593 
   3594 while (only_matching != NULL)
   3595   {
   3596   omstr *this = only_matching;
   3597   only_matching = this->next;
   3598   free(this);
   3599   }
   3600 
   3601 pcre2grep_exit(rc);
   3602 
   3603 EXIT2:
   3604 rc = 2;
   3605 goto EXIT;
   3606 }
   3607 
   3608 /* End of pcre2grep */
   3609