Home | History | Annotate | Download | only in grep
      1 /*	$NetBSD: util.c,v 1.16 2012/05/06 22:32:05 joerg Exp $	*/
      2 /*	$FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $	*/
      3 /*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
      4 
      5 /*-
      6  * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
      7  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
      8  * All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #if HAVE_NBTOOL_CONFIG_H
     33 #include "nbtool_config.h"
     34 #endif
     35 
     36 #include <sys/cdefs.h>
     37 __RCSID("$NetBSD: util.c,v 1.16 2012/05/06 22:32:05 joerg Exp $");
     38 
     39 #include <sys/stat.h>
     40 #include <sys/types.h>
     41 
     42 #include <ctype.h>
     43 #include <err.h>
     44 #include <errno.h>
     45 #include <fnmatch.h>
     46 #include <fts.h>
     47 #include <libgen.h>
     48 #include <stdbool.h>
     49 #include <stdio.h>
     50 #include <stdlib.h>
     51 #include <string.h>
     52 #include <unistd.h>
     53 #include <wchar.h>
     54 #include <wctype.h>
     55 
     56 #include "grep.h"
     57 
     58 static bool	 first, first_global = true;
     59 static unsigned long long since_printed;
     60 
     61 static int	 procline(struct str *l, int);
     62 
     63 bool
     64 file_matching(const char *fname)
     65 {
     66 	char *fname_base, *fname_copy;
     67 	unsigned int i;
     68 	bool ret;
     69 
     70 	ret = finclude ? false : true;
     71 	fname_copy = grep_strdup(fname);
     72 	fname_base = basename(fname_copy);
     73 
     74 	for (i = 0; i < fpatterns; ++i) {
     75 		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
     76 		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
     77 			if (fpattern[i].mode == EXCL_PAT)
     78 				return (false);
     79 			else
     80 				ret = true;
     81 		}
     82 	}
     83 	free(fname_copy);
     84 	return (ret);
     85 }
     86 
     87 static inline bool
     88 dir_matching(const char *dname)
     89 {
     90 	unsigned int i;
     91 	bool ret;
     92 
     93 	ret = dinclude ? false : true;
     94 
     95 	for (i = 0; i < dpatterns; ++i) {
     96 		if (dname != NULL &&
     97 		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
     98 			if (dpattern[i].mode == EXCL_PAT)
     99 				return (false);
    100 			else
    101 				ret = true;
    102 		}
    103 	}
    104 	return (ret);
    105 }
    106 
    107 /*
    108  * Processes a directory when a recursive search is performed with
    109  * the -R option.  Each appropriate file is passed to procfile().
    110  */
    111 int
    112 grep_tree(char **argv)
    113 {
    114 	FTS *fts;
    115 	FTSENT *p;
    116 	char *d, *dir = NULL;
    117 	int c, fts_flags;
    118 	bool ok;
    119 
    120 	c = fts_flags = 0;
    121 
    122 	switch(linkbehave) {
    123 	case LINK_EXPLICIT:
    124 		fts_flags = FTS_COMFOLLOW;
    125 		break;
    126 	case LINK_SKIP:
    127 		fts_flags = FTS_PHYSICAL;
    128 		break;
    129 	default:
    130 		fts_flags = FTS_LOGICAL;
    131 
    132 	}
    133 
    134 	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
    135 
    136 	if (!(fts = fts_open(argv, fts_flags, NULL)))
    137 		err(2, "fts_open");
    138 	while ((p = fts_read(fts)) != NULL) {
    139 		switch (p->fts_info) {
    140 		case FTS_DNR:
    141 			/* FALLTHROUGH */
    142 		case FTS_ERR:
    143 			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
    144 			break;
    145 		case FTS_D:
    146 			/* FALLTHROUGH */
    147 		case FTS_DP:
    148 			break;
    149 		case FTS_DC:
    150 			/* Print a warning for recursive directory loop */
    151 			warnx("warning: %s: recursive directory loop",
    152 				p->fts_path);
    153 			break;
    154 		default:
    155 			/* Check for file exclusion/inclusion */
    156 			ok = true;
    157 			if (dexclude || dinclude) {
    158 				if ((d = strrchr(p->fts_path, '/')) != NULL) {
    159 					dir = grep_malloc(sizeof(char) *
    160 					    (d - p->fts_path + 1));
    161 					memcpy(dir, p->fts_path,
    162 					    d - p->fts_path);
    163 					dir[d - p->fts_path] = '\0';
    164 				}
    165 				ok = dir_matching(dir);
    166 				free(dir);
    167 				dir = NULL;
    168 			}
    169 			if (fexclude || finclude)
    170 				ok &= file_matching(p->fts_path);
    171 
    172 			if (ok)
    173 				c += procfile(p->fts_path);
    174 			break;
    175 		}
    176 	}
    177 
    178 	fts_close(fts);
    179 	return (c);
    180 }
    181 
    182 /*
    183  * Opens a file and processes it.  Each file is processed line-by-line
    184  * passing the lines to procline().
    185  */
    186 int
    187 procfile(const char *fn)
    188 {
    189 	struct file *f;
    190 	struct stat sb;
    191 	struct str ln;
    192 	mode_t s;
    193 	int c, t;
    194 
    195 	if (mflag && (mcount <= 0))
    196 		return (0);
    197 
    198 	if (strcmp(fn, "-") == 0) {
    199 		fn = label != NULL ? label : getstr(1);
    200 		f = grep_open(NULL);
    201 	} else {
    202 		if (!stat(fn, &sb)) {
    203 			/* Check if we need to process the file */
    204 			s = sb.st_mode & S_IFMT;
    205 			if (s == S_IFDIR && dirbehave == DIR_SKIP)
    206 				return (0);
    207 			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
    208 				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
    209 					return (0);
    210 		}
    211 		f = grep_open(fn);
    212 	}
    213 	if (f == NULL) {
    214 		if (!sflag)
    215 			warn("%s", fn);
    216 		if (errno == ENOENT)
    217 			notfound = true;
    218 		return (0);
    219 	}
    220 
    221 	ln.file = grep_malloc(strlen(fn) + 1);
    222 	strcpy(ln.file, fn);
    223 	ln.line_no = 0;
    224 	ln.len = 0;
    225 	tail = 0;
    226 	ln.off = -1;
    227 
    228 	for (first = true, c = 0;  c == 0 || !(lflag || qflag); ) {
    229 		ln.off += ln.len + 1;
    230 		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0)
    231 			break;
    232 		if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep)
    233 			--ln.len;
    234 		ln.line_no++;
    235 
    236 		/* Return if we need to skip a binary file */
    237 		if (f->binary && binbehave == BINFILE_SKIP) {
    238 			grep_close(f);
    239 			free(ln.file);
    240 			free(f);
    241 			return (0);
    242 		}
    243 		/* Process the file line-by-line */
    244 		t = procline(&ln, f->binary);
    245 		c += t;
    246 
    247 		/* Count the matches if we have a match limit */
    248 		if (mflag) {
    249 			mcount -= t;
    250 			if (mcount <= 0)
    251 				break;
    252 		}
    253 	}
    254 	if (Bflag > 0)
    255 		clearqueue();
    256 	grep_close(f);
    257 
    258 	if (cflag) {
    259 		if (!hflag)
    260 			printf("%s:", ln.file);
    261 		printf("%u%c", c, line_sep);
    262 	}
    263 	if (lflag && !qflag && c != 0)
    264 		printf("%s%c", fn, line_sep);
    265 	if (Lflag && !qflag && c == 0)
    266 		printf("%s%c", fn, line_sep);
    267 	if (c && !cflag && !lflag && !Lflag &&
    268 	    binbehave == BINFILE_BIN && f->binary && !qflag)
    269 		printf(getstr(8), fn);
    270 
    271 	free(ln.file);
    272 	free(f);
    273 	return (c);
    274 }
    275 
    276 #define iswword(x)	(iswalnum((x)) || (x) == L'_')
    277 
    278 /*
    279  * Processes a line comparing it with the specified patterns.  Each pattern
    280  * is looped to be compared along with the full string, saving each and every
    281  * match, which is necessary to colorize the output and to count the
    282  * matches.  The matching lines are passed to printline() to display the
    283  * appropriate output.
    284  */
    285 static int
    286 procline(struct str *l, int nottext)
    287 {
    288 	regmatch_t matches[MAX_LINE_MATCHES];
    289 	regmatch_t pmatch;
    290 	size_t st = 0;
    291 	unsigned int i;
    292 	int c = 0, m = 0, r = 0;
    293 
    294 	/* Loop to process the whole line */
    295 	while (st <= l->len) {
    296 		pmatch.rm_so = st;
    297 		pmatch.rm_eo = l->len;
    298 
    299 		/* Loop to compare with all the patterns */
    300 		for (i = 0; i < patterns; i++) {
    301 /*
    302  * XXX: grep_search() is a workaround for speed up and should be
    303  * removed in the future.  See fastgrep.c.
    304  */
    305 			if (fg_pattern[i].pattern) {
    306 				r = grep_search(&fg_pattern[i],
    307 				    (unsigned char *)l->dat,
    308 				    l->len, &pmatch);
    309 				r = (r == 0) ? 0 : REG_NOMATCH;
    310 				st = pmatch.rm_eo;
    311 			} else {
    312 				r = regexec(&r_pattern[i], l->dat, 1,
    313 				    &pmatch, eflags);
    314 				r = (r == 0) ? 0 : REG_NOMATCH;
    315 				st = pmatch.rm_eo;
    316 			}
    317 			if (r == REG_NOMATCH)
    318 				continue;
    319 			/* Check for full match */
    320 			if (xflag &&
    321 			    (pmatch.rm_so != 0 ||
    322 			     (size_t)pmatch.rm_eo != l->len))
    323 				continue;
    324 			/* Check for whole word match */
    325 			if (fg_pattern[i].word && pmatch.rm_so != 0) {
    326 				wint_t wbegin, wend;
    327 
    328 				wbegin = wend = L' ';
    329 				if (pmatch.rm_so != 0 &&
    330 				    sscanf(&l->dat[pmatch.rm_so - 1],
    331 				    "%lc", &wbegin) != 1)
    332 					continue;
    333 				if ((size_t)pmatch.rm_eo != l->len &&
    334 				    sscanf(&l->dat[pmatch.rm_eo],
    335 				    "%lc", &wend) != 1)
    336 					continue;
    337 				if (iswword(wbegin) || iswword(wend))
    338 					continue;
    339 			}
    340 			c = 1;
    341 			if (m < MAX_LINE_MATCHES)
    342 				matches[m++] = pmatch;
    343 			/* matches - skip further patterns */
    344 			if ((color != NULL && !oflag) || qflag || lflag)
    345 				break;
    346 		}
    347 
    348 		if (vflag) {
    349 			c = !c;
    350 			break;
    351 		}
    352 		/* One pass if we are not recording matches */
    353 		if ((color != NULL && !oflag) || qflag || lflag)
    354 			break;
    355 
    356 		if (st == (size_t)pmatch.rm_so)
    357 			break; 	/* No matches */
    358 	}
    359 
    360 	if (c && binbehave == BINFILE_BIN && nottext)
    361 		return (c); /* Binary file */
    362 
    363 	/* Dealing with the context */
    364 	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
    365 		if (c) {
    366 			if ((Aflag || Bflag) && !first_global &&
    367 			    (first || since_printed > Bflag))
    368 				printf("--\n");
    369 			tail = Aflag;
    370 			if (Bflag > 0)
    371 				printqueue();
    372 			printline(l, ':', matches, m);
    373 		} else {
    374 			printline(l, '-', matches, m);
    375 			tail--;
    376 		}
    377 		first = false;
    378 		first_global = false;
    379 		since_printed = 0;
    380 	} else {
    381 		if (Bflag)
    382 			enqueue(l);
    383 		since_printed++;
    384 	}
    385 	return (c);
    386 }
    387 
    388 /*
    389  * Safe malloc() for internal use.
    390  */
    391 void *
    392 grep_malloc(size_t size)
    393 {
    394 	void *ptr;
    395 
    396 	if ((ptr = malloc(size)) == NULL)
    397 		err(2, "malloc");
    398 	return (ptr);
    399 }
    400 
    401 /*
    402  * Safe calloc() for internal use.
    403  */
    404 void *
    405 grep_calloc(size_t nmemb, size_t size)
    406 {
    407 	void *ptr;
    408 
    409 	if ((ptr = calloc(nmemb, size)) == NULL)
    410 		err(2, "calloc");
    411 	return (ptr);
    412 }
    413 
    414 /*
    415  * Safe realloc() for internal use.
    416  */
    417 void *
    418 grep_realloc(void *ptr, size_t size)
    419 {
    420 
    421 	if ((ptr = realloc(ptr, size)) == NULL)
    422 		err(2, "realloc");
    423 	return (ptr);
    424 }
    425 
    426 /*
    427  * Safe strdup() for internal use.
    428  */
    429 char *
    430 grep_strdup(const char *str)
    431 {
    432 	char *ret;
    433 
    434 	if ((ret = strdup(str)) == NULL)
    435 		err(2, "strdup");
    436 	return (ret);
    437 }
    438 
    439 /*
    440  * Prints a matching line according to the command line options.
    441  */
    442 void
    443 printline(struct str *line, int sep, regmatch_t *matches, int m)
    444 {
    445 	size_t a = 0;
    446 	int i, n = 0;
    447 
    448 	if (!hflag) {
    449 		if (nullflag == 0)
    450 			fputs(line->file, stdout);
    451 		else {
    452 			printf("%s", line->file);
    453 			putchar(0);
    454 		}
    455 		++n;
    456 	}
    457 	if (nflag) {
    458 		if (n > 0)
    459 			putchar(sep);
    460 		printf("%d", line->line_no);
    461 		++n;
    462 	}
    463 	if (bflag) {
    464 		if (n > 0)
    465 			putchar(sep);
    466 		printf("%lld", (long long)line->off);
    467 		++n;
    468 	}
    469 	if (n)
    470 		putchar(sep);
    471 	/* --color and -o */
    472 	if ((oflag || color) && m > 0) {
    473 		for (i = 0; i < m; i++) {
    474 			if (!oflag)
    475 				fwrite(line->dat + a, matches[i].rm_so - a, 1,
    476 				    stdout);
    477 			if (color)
    478 				fprintf(stdout, "\33[%sm\33[K", color);
    479 
    480 				fwrite(line->dat + matches[i].rm_so,
    481 				    matches[i].rm_eo - matches[i].rm_so, 1,
    482 				    stdout);
    483 			if (color)
    484 				fprintf(stdout, "\33[m\33[K");
    485 			a = matches[i].rm_eo;
    486 			if (oflag)
    487 				putchar('\n');
    488 		}
    489 		if (!oflag) {
    490 			if (line->len - a > 0)
    491 				fwrite(line->dat + a, line->len - a, 1, stdout);
    492 			putchar(line_sep);
    493 		}
    494 	} else {
    495 		fwrite(line->dat, line->len, 1, stdout);
    496 		putchar(line_sep);
    497 	}
    498 }
    499