Home | History | Annotate | Download | only in posix
      1 /* grep.c - print lines what match given regular expression
      2  *
      3  * Copyright 2013 CE Strake <strake888 at gmail.com>
      4  *
      5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html
      6  *
      7  * TODO: --color, "Binary file %s matches"
      8  *
      9  * Posix doesn't even specify -r, documenting deviations from it is silly.
     10 
     11 USE_GREP(NEWTOY(grep, "S(exclude)*M(include)*C#B#A#ZzEFHabhinorsvwclqe*f*m#x[!wx][!EFw]", TOYFLAG_BIN))
     12 USE_EGREP(OLDTOY(egrep, grep, TOYFLAG_BIN))
     13 USE_FGREP(OLDTOY(fgrep, grep, TOYFLAG_BIN))
     14 
     15 config GREP
     16   bool "grep"
     17   default y
     18   help
     19     usage: grep [-EFrivwcloqsHbhn] [-ABC NUM] [-m MAX] [-e REGEX]... [-MS PATTERN]... [-f REGFILE] [FILE]...
     20 
     21     Show lines matching regular expressions. If no -e, first argument is
     22     regular expression to match. With no files (or "-" filename) read stdin.
     23     Returns 0 if matched, 1 if no match found.
     24 
     25     -e  Regex to match. (May be repeated.)
     26     -f  File listing regular expressions to match.
     27 
     28     file search:
     29     -r  Recurse into subdirectories (defaults FILE to ".")
     30     -M  Match filename pattern (--include)
     31     -S  Skip filename pattern (--exclude)
     32 
     33     match type:
     34     -A  Show NUM lines after     -B  Show NUM lines before match
     35     -C  NUM lines context (A+B)  -E  extended regex syntax
     36     -F  fixed (literal match)    -i  case insensitive
     37     -m  match MAX many lines     -v  invert match
     38     -w  whole word (implies -E)  -x  whole line
     39     -z  input NUL terminated
     40 
     41     display modes: (default: matched line)
     42     -c  count of matching lines  -l  show matching filenames
     43     -o  only matching part       -q  quiet (errors only)
     44     -s  silent (no error msg)    -Z  output NUL terminated
     45 
     46     output prefix (default: filename if checking more than 1 file)
     47     -H  force filename           -b  byte offset of match
     48     -h  hide filename            -n  line number of match
     49 
     50 config EGREP
     51   bool
     52   default y
     53   depends on GREP
     54 
     55 config FGREP
     56   bool
     57   default y
     58   depends on GREP
     59 */
     60 
     61 #define FOR_grep
     62 #include "toys.h"
     63 #include <regex.h>
     64 
     65 GLOBALS(
     66   long m;
     67   struct arg_list *f;
     68   struct arg_list *e;
     69   long a;
     70   long b;
     71   long c;
     72   struct arg_list *M;
     73   struct arg_list *S;
     74 
     75   char indelim, outdelim;
     76   int found;
     77 )
     78 
     79 // Emit line with various potential prefixes and delimiter
     80 static void outline(char *line, char dash, char *name, long lcount, long bcount,
     81   int trim)
     82 {
     83   if (name && (toys.optflags&FLAG_H)) printf("%s%c", name, dash);
     84   if (!line || (lcount && (toys.optflags&FLAG_n)))
     85     printf("%ld%c", lcount, line ? dash : TT.outdelim);
     86   if (bcount && (toys.optflags&FLAG_b)) printf("%ld%c", bcount-1, dash);
     87   if (line) xprintf("%.*s%c", trim, line, TT.outdelim);
     88 }
     89 
     90 // Show matches in one file
     91 static void do_grep(int fd, char *name)
     92 {
     93   struct double_list *dlb = 0;
     94   FILE *file = fdopen(fd, "r");
     95   long lcount = 0, mcount = 0, offset = 0, after = 0, before = 0;
     96   char *bars = 0;
     97 
     98   if (!fd) name = "(standard input)";
     99 
    100   if (!file) {
    101     perror_msg("%s", name);
    102 
    103     return;
    104   }
    105 
    106   // Loop through lines of input
    107   for (;;) {
    108     char *line = 0, *start;
    109     regmatch_t matches;
    110     size_t unused;
    111     long len;
    112     int mmatch = 0;
    113 
    114     lcount++;
    115     errno = 0;
    116     len = getdelim(&line, &unused, TT.indelim, file);
    117     if (errno) perror_msg("%s", name);
    118     if (len<1) break;
    119     if (line[len-1] == TT.indelim) line[len-1] = 0;
    120 
    121     start = line;
    122 
    123     // Loop through matches in this line
    124     do {
    125       int rc = 0, skip = 0;
    126 
    127       // Handle non-regex matches
    128       if (toys.optflags & FLAG_F) {
    129         struct arg_list *seek, fseek;
    130         char *s = 0;
    131 
    132         for (seek = TT.e; seek; seek = seek->next) {
    133           if (toys.optflags & FLAG_x) {
    134             int i = (toys.optflags & FLAG_i);
    135 
    136             if ((i ? strcasecmp : strcmp)(seek->arg, line)) s = line;
    137           } else if (!*seek->arg) {
    138             seek = &fseek;
    139             fseek.arg = s = line;
    140             break;
    141           }
    142           if (toys.optflags & FLAG_i) s = strnstr(line, seek->arg);
    143           else s = strstr(line, seek->arg);
    144           if (s) break;
    145         }
    146 
    147         if (s) {
    148           matches.rm_so = (s-line);
    149           skip = matches.rm_eo = (s-line)+strlen(seek->arg);
    150         } else rc = 1;
    151       } else {
    152         rc = regexec((regex_t *)toybuf, start, 1, &matches,
    153                      start==line ? 0 : REG_NOTBOL);
    154         skip = matches.rm_eo;
    155       }
    156 
    157       if (toys.optflags & FLAG_x)
    158         if (matches.rm_so || line[matches.rm_eo]) rc = 1;
    159 
    160       if (!rc && (toys.optflags & FLAG_w)) {
    161         char c = 0;
    162 
    163         if ((start+matches.rm_so)!=line) {
    164           c = start[matches.rm_so-1];
    165           if (!isalnum(c) && c != '_') c = 0;
    166         }
    167         if (!c) {
    168           c = start[matches.rm_eo];
    169           if (!isalnum(c) && c != '_') c = 0;
    170         }
    171         if (c) {
    172           start += matches.rm_so+1;
    173 
    174           continue;
    175         }
    176       }
    177 
    178       if (toys.optflags & FLAG_v) {
    179         if (toys.optflags & FLAG_o) {
    180           if (rc) skip = matches.rm_eo = strlen(start);
    181           else if (!matches.rm_so) {
    182             start += skip;
    183             continue;
    184           } else matches.rm_eo = matches.rm_so;
    185         } else {
    186           if (!rc) break;
    187           matches.rm_eo = strlen(start);
    188         }
    189         matches.rm_so = 0;
    190       } else if (rc) break;
    191 
    192       // At least one line we didn't print since match while -ABC active
    193       if (bars) {
    194         xputs(bars);
    195         bars = 0;
    196       }
    197       mmatch++;
    198       TT.found = 1;
    199       if (toys.optflags & FLAG_q) {
    200         toys.exitval = 0;
    201         xexit();
    202       }
    203       if (toys.optflags & FLAG_l) {
    204         xprintf("%s%c", name, TT.outdelim);
    205         free(line);
    206         fclose(file);
    207         return;
    208       }
    209       if (toys.optflags & FLAG_o)
    210         if (matches.rm_eo == matches.rm_so)
    211           break;
    212 
    213       if (!(toys.optflags & FLAG_c)) {
    214         long bcount = 1 + offset + (start-line) +
    215           ((toys.optflags & FLAG_o) ? matches.rm_so : 0);
    216 
    217         if (!(toys.optflags & FLAG_o)) {
    218           while (dlb) {
    219             struct double_list *dl = dlist_pop(&dlb);
    220 
    221             outline(dl->data, '-', name, lcount-before, 0, -1);
    222             free(dl->data);
    223             free(dl);
    224             before--;
    225           }
    226 
    227           outline(line, ':', name, lcount, bcount, -1);
    228           if (TT.a) after = TT.a+1;
    229         } else outline(start+matches.rm_so, ':', name, lcount, bcount,
    230                        matches.rm_eo-matches.rm_so);
    231       }
    232 
    233       start += skip;
    234       if (!(toys.optflags & FLAG_o)) break;
    235     } while (*start);
    236     offset += len;
    237 
    238     if (mmatch) mcount++;
    239     else {
    240       int discard = (after || TT.b);
    241 
    242       if (after && --after) {
    243         outline(line, '-', name, lcount, 0, -1);
    244         discard = 0;
    245       }
    246       if (discard && TT.b) {
    247         dlist_add(&dlb, line);
    248         line = 0;
    249         if (++before>TT.b) {
    250           struct double_list *dl;
    251 
    252           dl = dlist_pop(&dlb);
    253           free(dl->data);
    254           free(dl);
    255           before--;
    256         } else discard = 0;
    257       }
    258       // If we discarded a line while displaying context, show bars before next
    259       // line (but don't show them now in case that was last match in file)
    260       if (discard && mcount) bars = "--";
    261     }
    262     free(line);
    263 
    264     if ((toys.optflags & FLAG_m) && mcount >= TT.m) break;
    265   }
    266 
    267   if (toys.optflags & FLAG_c) outline(0, ':', name, mcount, 0, -1);
    268 
    269   // loopfiles will also close the fd, but this frees an (opaque) struct.
    270   fclose(file);
    271 }
    272 
    273 static void parse_regex(void)
    274 {
    275   struct arg_list *al, *new, *list = NULL;
    276   long len = 0;
    277   char *s, *ss;
    278 
    279   // Add all -f lines to -e list. (Yes, this is leaking allocation context for
    280   // exit to free. Not supporting nofork for this command any time soon.)
    281   al = TT.f ? TT.f : TT.e;
    282   while (al) {
    283     if (TT.f) s = ss = xreadfile(al->arg, 0, 0);
    284     else s = ss = al->arg;
    285 
    286     // Split lines at \n, add individual lines to new list.
    287     do {
    288       ss = strchr(s, '\n');
    289       if (ss) *(ss++) = 0;
    290       new = xmalloc(sizeof(struct arg_list));
    291       new->next = list;
    292       new->arg = s;
    293       list = new;
    294       s = ss;
    295     } while (ss && *s);
    296 
    297     // Advance, when we run out of -f switch to -e.
    298     al = al->next;
    299     if (!al && TT.f) {
    300       TT.f = 0;
    301       al = TT.e;
    302     }
    303   }
    304   TT.e = list;
    305 
    306   if (!(toys.optflags & FLAG_F)) {
    307     char *regstr;
    308     int i;
    309 
    310     // Convert strings to one big regex
    311     for (al = TT.e; al; al = al->next)
    312       len += strlen(al->arg)+1+!(toys.optflags & FLAG_E);
    313 
    314     regstr = s = xmalloc(len);
    315     for (al = TT.e; al; al = al->next) {
    316       s = stpcpy(s, al->arg);
    317       if (!(toys.optflags & FLAG_E)) *(s++) = '\\';
    318       *(s++) = '|';
    319     }
    320     *(s-=(1+!(toys.optflags & FLAG_E))) = 0;
    321 
    322     i = regcomp((regex_t *)toybuf, regstr,
    323                 ((toys.optflags & FLAG_E) ? REG_EXTENDED : 0) |
    324                 ((toys.optflags & FLAG_i) ? REG_ICASE    : 0));
    325 
    326     if (i) {
    327       regerror(i, (regex_t *)toybuf, toybuf+sizeof(regex_t),
    328                sizeof(toybuf)-sizeof(regex_t));
    329       error_exit("bad REGEX: %s", toybuf);
    330     }
    331   }
    332 }
    333 
    334 static int do_grep_r(struct dirtree *new)
    335 {
    336   char *name;
    337 
    338   if (!dirtree_notdotdot(new)) return 0;
    339   if (S_ISDIR(new->st.st_mode)) return DIRTREE_RECURSE;
    340   if (TT.S || TT.M) {
    341     struct arg_list *al;
    342 
    343     for (al = TT.S; al; al = al->next)
    344       if (!fnmatch(al->arg, new->name, 0)) return 0;
    345 
    346     if (TT.M) {
    347       for (al = TT.M; al; al = al->next)
    348         if (!fnmatch(al->arg, new->name, 0)) break;
    349 
    350       if (!al) return 0;
    351     }
    352   }
    353 
    354   // "grep -r onefile" doesn't show filenames, but "grep -r onedir" should.
    355   if (new->parent && !(toys.optflags & FLAG_h)) toys.optflags |= FLAG_H;
    356 
    357   name = dirtree_path(new, 0);
    358   do_grep(openat(dirtree_parentfd(new), new->name, 0), name);
    359   free(name);
    360 
    361   return 0;
    362 }
    363 
    364 void grep_main(void)
    365 {
    366   char **ss = toys.optargs;
    367 
    368   // Grep exits with 2 for errors
    369   toys.exitval = 2;
    370 
    371   if (!TT.a) TT.a = TT.c;
    372   if (!TT.b) TT.b = TT.c;
    373 
    374   TT.indelim = '\n' * !(toys.optflags&FLAG_z);
    375   TT.outdelim = '\n' * !(toys.optflags&FLAG_Z);
    376 
    377   // Handle egrep and fgrep
    378   if (*toys.which->name == 'e') toys.optflags |= FLAG_E;
    379   if (*toys.which->name == 'f') toys.optflags |= FLAG_F;
    380 
    381   if (!TT.e && !TT.f) {
    382     if (!*ss) error_exit("no REGEX");
    383     TT.e = xzalloc(sizeof(struct arg_list));
    384     TT.e->arg = *(ss++);
    385     toys.optc--;
    386   }
    387 
    388   parse_regex();
    389 
    390   if (!(toys.optflags & FLAG_h) && toys.optc>1) toys.optflags |= FLAG_H;
    391 
    392   if (toys.optflags & FLAG_s) {
    393     close(2);
    394     xopen_stdio("/dev/null", O_RDWR);
    395   }
    396 
    397   if (toys.optflags & FLAG_r) {
    398     // Iterate through -r arguments. Use "." as default if none provided.
    399     for (ss = *ss ? ss : (char *[]){".", 0}; *ss; ss++) {
    400       if (!strcmp(*ss, "-")) do_grep(0, *ss);
    401       else dirtree_read(*ss, do_grep_r);
    402     }
    403   } else loopfiles_rw(ss, O_RDONLY|WARN_ONLY, 0, do_grep);
    404   toys.exitval = !TT.found;
    405 }
    406