1 /* $NetBSD: util.c,v 1.17 2013/01/21 03:24:43 msaitoh Exp $ */ 2 /* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */ 3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5 /*- 6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav 7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if HAVE_NBTOOL_CONFIG_H 33 #include "nbtool_config.h" 34 #endif 35 36 #include <sys/cdefs.h> 37 __RCSID("$NetBSD: util.c,v 1.17 2013/01/21 03:24:43 msaitoh Exp $"); 38 39 #include <sys/stat.h> 40 #include <sys/types.h> 41 42 #include <ctype.h> 43 #include <err.h> 44 #include <errno.h> 45 #include <fnmatch.h> 46 #include <fts.h> 47 #include <libgen.h> 48 #include <stdbool.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 #include <wchar.h> 54 #include <wctype.h> 55 56 #include "grep.h" 57 58 static bool first, first_global = true; 59 static unsigned long long since_printed; 60 61 static int procline(struct str *l, int); 62 63 bool 64 file_matching(const char *fname) 65 { 66 char *fname_base, *fname_copy; 67 unsigned int i; 68 bool ret; 69 70 ret = finclude ? false : true; 71 fname_copy = grep_strdup(fname); 72 fname_base = basename(fname_copy); 73 74 for (i = 0; i < fpatterns; ++i) { 75 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 76 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 77 if (fpattern[i].mode == EXCL_PAT) { 78 free(fname_copy); 79 return (false); 80 } else 81 ret = true; 82 } 83 } 84 free(fname_copy); 85 return (ret); 86 } 87 88 static inline bool 89 dir_matching(const char *dname) 90 { 91 unsigned int i; 92 bool ret; 93 94 ret = dinclude ? false : true; 95 96 for (i = 0; i < dpatterns; ++i) { 97 if (dname != NULL && 98 fnmatch(dname, dpattern[i].pat, 0) == 0) { 99 if (dpattern[i].mode == EXCL_PAT) 100 return (false); 101 else 102 ret = true; 103 } 104 } 105 return (ret); 106 } 107 108 /* 109 * Processes a directory when a recursive search is performed with 110 * the -R option. Each appropriate file is passed to procfile(). 111 */ 112 int 113 grep_tree(char **argv) 114 { 115 FTS *fts; 116 FTSENT *p; 117 char *d, *dir = NULL; 118 int c, fts_flags; 119 bool ok; 120 121 c = fts_flags = 0; 122 123 switch(linkbehave) { 124 case LINK_EXPLICIT: 125 fts_flags = FTS_COMFOLLOW; 126 break; 127 case LINK_SKIP: 128 fts_flags = FTS_PHYSICAL; 129 break; 130 default: 131 fts_flags = FTS_LOGICAL; 132 133 } 134 135 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 136 137 if (!(fts = fts_open(argv, fts_flags, NULL))) 138 err(2, "fts_open"); 139 while ((p = fts_read(fts)) != NULL) { 140 switch (p->fts_info) { 141 case FTS_DNR: 142 /* FALLTHROUGH */ 143 case FTS_ERR: 144 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); 145 break; 146 case FTS_D: 147 /* FALLTHROUGH */ 148 case FTS_DP: 149 break; 150 case FTS_DC: 151 /* Print a warning for recursive directory loop */ 152 warnx("warning: %s: recursive directory loop", 153 p->fts_path); 154 break; 155 default: 156 /* Check for file exclusion/inclusion */ 157 ok = true; 158 if (dexclude || dinclude) { 159 if ((d = strrchr(p->fts_path, '/')) != NULL) { 160 dir = grep_malloc(sizeof(char) * 161 (d - p->fts_path + 1)); 162 memcpy(dir, p->fts_path, 163 d - p->fts_path); 164 dir[d - p->fts_path] = '\0'; 165 } 166 ok = dir_matching(dir); 167 free(dir); 168 dir = NULL; 169 } 170 if (fexclude || finclude) 171 ok &= file_matching(p->fts_path); 172 173 if (ok) 174 c += procfile(p->fts_path); 175 break; 176 } 177 } 178 179 fts_close(fts); 180 return (c); 181 } 182 183 /* 184 * Opens a file and processes it. Each file is processed line-by-line 185 * passing the lines to procline(). 186 */ 187 int 188 procfile(const char *fn) 189 { 190 struct file *f; 191 struct stat sb; 192 struct str ln; 193 mode_t s; 194 int c, t; 195 196 if (mflag && (mcount <= 0)) 197 return (0); 198 199 if (strcmp(fn, "-") == 0) { 200 fn = label != NULL ? label : getstr(1); 201 f = grep_open(NULL); 202 } else { 203 if (!stat(fn, &sb)) { 204 /* Check if we need to process the file */ 205 s = sb.st_mode & S_IFMT; 206 if (s == S_IFDIR && dirbehave == DIR_SKIP) 207 return (0); 208 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 209 || s == S_IFSOCK) && devbehave == DEV_SKIP) 210 return (0); 211 } 212 f = grep_open(fn); 213 } 214 if (f == NULL) { 215 if (!sflag) 216 warn("%s", fn); 217 if (errno == ENOENT) 218 notfound = true; 219 return (0); 220 } 221 222 ln.file = grep_malloc(strlen(fn) + 1); 223 strcpy(ln.file, fn); 224 ln.line_no = 0; 225 ln.len = 0; 226 tail = 0; 227 ln.off = -1; 228 229 for (first = true, c = 0; c == 0 || !(lflag || qflag); ) { 230 ln.off += ln.len + 1; 231 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) 232 break; 233 if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep) 234 --ln.len; 235 ln.line_no++; 236 237 /* Return if we need to skip a binary file */ 238 if (f->binary && binbehave == BINFILE_SKIP) { 239 grep_close(f); 240 free(ln.file); 241 free(f); 242 return (0); 243 } 244 /* Process the file line-by-line */ 245 t = procline(&ln, f->binary); 246 c += t; 247 248 /* Count the matches if we have a match limit */ 249 if (mflag) { 250 mcount -= t; 251 if (mcount <= 0) 252 break; 253 } 254 } 255 if (Bflag > 0) 256 clearqueue(); 257 grep_close(f); 258 259 if (cflag) { 260 if (!hflag) 261 printf("%s:", ln.file); 262 printf("%u%c", c, line_sep); 263 } 264 if (lflag && !qflag && c != 0) 265 printf("%s%c", fn, line_sep); 266 if (Lflag && !qflag && c == 0) 267 printf("%s%c", fn, line_sep); 268 if (c && !cflag && !lflag && !Lflag && 269 binbehave == BINFILE_BIN && f->binary && !qflag) 270 printf(getstr(8), fn); 271 272 free(ln.file); 273 free(f); 274 return (c); 275 } 276 277 #define iswword(x) (iswalnum((x)) || (x) == L'_') 278 279 /* 280 * Processes a line comparing it with the specified patterns. Each pattern 281 * is looped to be compared along with the full string, saving each and every 282 * match, which is necessary to colorize the output and to count the 283 * matches. The matching lines are passed to printline() to display the 284 * appropriate output. 285 */ 286 static int 287 procline(struct str *l, int nottext) 288 { 289 regmatch_t matches[MAX_LINE_MATCHES]; 290 regmatch_t pmatch; 291 size_t st = 0; 292 unsigned int i; 293 int c = 0, m = 0, r = 0; 294 295 /* Loop to process the whole line */ 296 while (st <= l->len) { 297 pmatch.rm_so = st; 298 pmatch.rm_eo = l->len; 299 300 /* Loop to compare with all the patterns */ 301 for (i = 0; i < patterns; i++) { 302 /* 303 * XXX: grep_search() is a workaround for speed up and should be 304 * removed in the future. See fastgrep.c. 305 */ 306 if (fg_pattern[i].pattern) { 307 r = grep_search(&fg_pattern[i], 308 (unsigned char *)l->dat, 309 l->len, &pmatch); 310 r = (r == 0) ? 0 : REG_NOMATCH; 311 st = pmatch.rm_eo; 312 } else { 313 r = regexec(&r_pattern[i], l->dat, 1, 314 &pmatch, eflags); 315 r = (r == 0) ? 0 : REG_NOMATCH; 316 st = pmatch.rm_eo; 317 } 318 if (r == REG_NOMATCH) 319 continue; 320 /* Check for full match */ 321 if (xflag && 322 (pmatch.rm_so != 0 || 323 (size_t)pmatch.rm_eo != l->len)) 324 continue; 325 /* Check for whole word match */ 326 if (fg_pattern[i].word && pmatch.rm_so != 0) { 327 wchar_t wbegin, wend; 328 329 wbegin = wend = L' '; 330 if (pmatch.rm_so != 0 && 331 sscanf(&l->dat[pmatch.rm_so - 1], 332 "%lc", &wbegin) != 1) 333 continue; 334 if ((size_t)pmatch.rm_eo != l->len && 335 sscanf(&l->dat[pmatch.rm_eo], 336 "%lc", &wend) != 1) 337 continue; 338 if (iswword(wbegin) || iswword(wend)) 339 continue; 340 } 341 c = 1; 342 if (m < MAX_LINE_MATCHES) 343 matches[m++] = pmatch; 344 /* matches - skip further patterns */ 345 if ((color != NULL && !oflag) || qflag || lflag) 346 break; 347 } 348 349 if (vflag) { 350 c = !c; 351 break; 352 } 353 /* One pass if we are not recording matches */ 354 if ((color != NULL && !oflag) || qflag || lflag) 355 break; 356 357 if (st == (size_t)pmatch.rm_so) 358 break; /* No matches */ 359 } 360 361 if (c && binbehave == BINFILE_BIN && nottext) 362 return (c); /* Binary file */ 363 364 /* Dealing with the context */ 365 if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { 366 if (c) { 367 if ((Aflag || Bflag) && !first_global && 368 (first || since_printed > Bflag)) 369 printf("--\n"); 370 tail = Aflag; 371 if (Bflag > 0) 372 printqueue(); 373 printline(l, ':', matches, m); 374 } else { 375 printline(l, '-', matches, m); 376 tail--; 377 } 378 first = false; 379 first_global = false; 380 since_printed = 0; 381 } else { 382 if (Bflag) 383 enqueue(l); 384 since_printed++; 385 } 386 return (c); 387 } 388 389 /* 390 * Safe malloc() for internal use. 391 */ 392 void * 393 grep_malloc(size_t size) 394 { 395 void *ptr; 396 397 if ((ptr = malloc(size)) == NULL) 398 err(2, "malloc"); 399 return (ptr); 400 } 401 402 /* 403 * Safe calloc() for internal use. 404 */ 405 void * 406 grep_calloc(size_t nmemb, size_t size) 407 { 408 void *ptr; 409 410 if ((ptr = calloc(nmemb, size)) == NULL) 411 err(2, "calloc"); 412 return (ptr); 413 } 414 415 /* 416 * Safe realloc() for internal use. 417 */ 418 void * 419 grep_realloc(void *ptr, size_t size) 420 { 421 422 if ((ptr = realloc(ptr, size)) == NULL) 423 err(2, "realloc"); 424 return (ptr); 425 } 426 427 /* 428 * Safe strdup() for internal use. 429 */ 430 char * 431 grep_strdup(const char *str) 432 { 433 char *ret; 434 435 if ((ret = strdup(str)) == NULL) 436 err(2, "strdup"); 437 return (ret); 438 } 439 440 /* 441 * Prints a matching line according to the command line options. 442 */ 443 void 444 printline(struct str *line, int sep, regmatch_t *matches, int m) 445 { 446 size_t a = 0; 447 int i, n = 0; 448 449 if (!hflag) { 450 if (nullflag == 0) 451 fputs(line->file, stdout); 452 else { 453 printf("%s", line->file); 454 putchar(0); 455 } 456 ++n; 457 } 458 if (nflag) { 459 if (n > 0) 460 putchar(sep); 461 printf("%d", line->line_no); 462 ++n; 463 } 464 if (bflag) { 465 if (n > 0) 466 putchar(sep); 467 printf("%lld", (long long)line->off); 468 ++n; 469 } 470 if (n) 471 putchar(sep); 472 /* --color and -o */ 473 if ((oflag || color) && m > 0) { 474 for (i = 0; i < m; i++) { 475 if (!oflag) 476 fwrite(line->dat + a, matches[i].rm_so - a, 1, 477 stdout); 478 if (color) 479 fprintf(stdout, "\33[%sm\33[K", color); 480 481 fwrite(line->dat + matches[i].rm_so, 482 matches[i].rm_eo - matches[i].rm_so, 1, 483 stdout); 484 if (color) 485 fprintf(stdout, "\33[m\33[K"); 486 a = matches[i].rm_eo; 487 if (oflag) 488 putchar('\n'); 489 } 490 if (!oflag) { 491 if (line->len - a > 0) 492 fwrite(line->dat + a, line->len - a, 1, stdout); 493 putchar(line_sep); 494 } 495 } else { 496 fwrite(line->dat, line->len, 1, stdout); 497 putchar(line_sep); 498 } 499 } 500