1 /* $NetBSD: grep.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #include "fastmatch.h"
61 * Default messags to use when NLS is disabled or no catalogue
64 const char *errstr[] = {
66 /* 1*/ "(standard input)",
67 /* 2*/ "cannot read bzip2 compressed file",
68 /* 3*/ "unknown %s option",
69 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
70 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 7*/ "\t[--null] [pattern] [file ...]\n",
73 /* 8*/ "Binary file %s matches\n",
74 /* 9*/ "%s (BSD grep) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags = REG_NOSUB;
79 int eflags = REG_STARTEND;
81 /* Shortcut for matching all cases like empty regex */
84 /* Searching patterns */
85 unsigned int patterns, pattern_sz;
88 fastmatch_t *fg_pattern;
90 /* Filename exclusion/inclusion patterns */
91 unsigned int fpatterns, fpattern_sz;
92 unsigned int dpatterns, dpattern_sz;
93 struct epat *dpattern, *fpattern;
95 /* For regex errors */
96 char re_error[RE_ERROR_BUF + 1];
98 /* Command-line flags */
99 unsigned long long Aflag; /* -A x: print x lines trailing each match */
100 unsigned long long Bflag; /* -B x: print x lines leading each match */
101 bool Hflag; /* -H: always print file name */
102 bool Lflag; /* -L: only show names of files with no matches */
103 bool bflag; /* -b: show block numbers for each match */
104 bool cflag; /* -c: only show a count of matching lines */
105 bool hflag; /* -h: don't print filename headers */
106 bool iflag; /* -i: ignore case */
107 bool lflag; /* -l: only show names of files with matches */
108 bool mflag; /* -m x: stop reading the files after x matches */
109 long long mcount; /* count for -m */
110 long long mlimit; /* requested value for -m */
111 bool nflag; /* -n: show line numbers in front of matching lines */
112 bool oflag; /* -o: print only matching part */
113 bool qflag; /* -q: quiet mode (don't output anything) */
114 bool sflag; /* -s: silent mode (ignore errors) */
115 bool vflag; /* -v: only show non-matching lines */
116 bool wflag; /* -w: pattern must start and end on word boundaries */
117 bool xflag; /* -x: pattern must match entire line */
118 bool lbflag; /* --line-buffered */
119 bool nullflag; /* --null */
120 char *label; /* --label */
121 const char *color; /* --color */
122 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
123 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
124 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
125 int devbehave = DEV_READ; /* -D: handling of devices */
126 int dirbehave = DIR_READ; /* -dRr: handling of directories */
127 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
129 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
130 bool fexclude, finclude; /* --exclude and --include */
133 BIN_OPT = CHAR_MAX + 1,
146 static inline const char *init_color(const char *);
149 bool first = true; /* flag whether we are processing the first match */
150 bool prev; /* flag whether or not the previous line matched */
151 int tail; /* lines left to print */
152 bool file_err; /* file reading error */
155 * Prints usage information and returns 2.
160 fprintf(stderr, getstr(4), getprogname());
161 fprintf(stderr, "%s", getstr(5));
162 fprintf(stderr, "%s", getstr(6));
163 fprintf(stderr, "%s", getstr(7));
167 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
169 struct option long_options[] =
171 {"binary-files", required_argument, NULL, BIN_OPT},
172 {"help", no_argument, NULL, HELP_OPT},
173 {"mmap", no_argument, NULL, MMAP_OPT},
174 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
175 {"label", required_argument, NULL, LABEL_OPT},
176 {"null", no_argument, NULL, NULL_OPT},
177 {"color", optional_argument, NULL, COLOR_OPT},
178 {"colour", optional_argument, NULL, COLOR_OPT},
179 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
180 {"include", required_argument, NULL, R_INCLUDE_OPT},
181 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
182 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
183 {"after-context", required_argument, NULL, 'A'},
184 {"text", no_argument, NULL, 'a'},
185 {"before-context", required_argument, NULL, 'B'},
186 {"byte-offset", no_argument, NULL, 'b'},
187 {"context", optional_argument, NULL, 'C'},
188 {"count", no_argument, NULL, 'c'},
189 {"devices", required_argument, NULL, 'D'},
190 {"directories", required_argument, NULL, 'd'},
191 {"extended-regexp", no_argument, NULL, 'E'},
192 {"regexp", required_argument, NULL, 'e'},
193 {"fixed-strings", no_argument, NULL, 'F'},
194 {"file", required_argument, NULL, 'f'},
195 {"basic-regexp", no_argument, NULL, 'G'},
196 {"no-filename", no_argument, NULL, 'h'},
197 {"with-filename", no_argument, NULL, 'H'},
198 {"ignore-case", no_argument, NULL, 'i'},
199 {"bz2decompress", no_argument, NULL, 'J'},
200 {"files-with-matches", no_argument, NULL, 'l'},
201 {"files-without-match", no_argument, NULL, 'L'},
202 {"max-count", required_argument, NULL, 'm'},
203 {"lzma", no_argument, NULL, 'M'},
204 {"line-number", no_argument, NULL, 'n'},
205 {"only-matching", no_argument, NULL, 'o'},
206 {"quiet", no_argument, NULL, 'q'},
207 {"silent", no_argument, NULL, 'q'},
208 {"recursive", no_argument, NULL, 'r'},
209 {"no-messages", no_argument, NULL, 's'},
210 {"binary", no_argument, NULL, 'U'},
211 {"unix-byte-offsets", no_argument, NULL, 'u'},
212 {"invert-match", no_argument, NULL, 'v'},
213 {"version", no_argument, NULL, 'V'},
214 {"word-regexp", no_argument, NULL, 'w'},
215 {"line-regexp", no_argument, NULL, 'x'},
216 {"xz", no_argument, NULL, 'X'},
217 {"decompress", no_argument, NULL, 'Z'},
218 {NULL, no_argument, NULL, 0}
222 * Adds a searching pattern to the internal array.
225 add_pattern(char *pat, size_t len)
228 /* Do not add further pattern is we already match everything */
232 /* Check if we can do a shortcut */
235 for (unsigned int i = 0; i < patterns; i++) {
236 free(pattern[i].pat);
238 pattern = grep_realloc(pattern, sizeof(struct pat));
239 pattern[0].pat = NULL;
244 /* Increase size if necessary */
245 if (patterns == pattern_sz) {
247 pattern = grep_realloc(pattern, ++pattern_sz *
250 if (len > 0 && pat[len - 1] == '\n')
252 /* pat may not be NUL-terminated */
253 pattern[patterns].pat = grep_malloc(len + 1);
254 memcpy(pattern[patterns].pat, pat, len);
255 pattern[patterns].len = len;
256 pattern[patterns].pat[len] = '\0';
261 * Adds a file include/exclude pattern to the internal array.
264 add_fpattern(const char *pat, int mode)
267 /* Increase size if necessary */
268 if (fpatterns == fpattern_sz) {
270 fpattern = grep_realloc(fpattern, ++fpattern_sz *
271 sizeof(struct epat));
273 fpattern[fpatterns].pat = grep_strdup(pat);
274 fpattern[fpatterns].mode = mode;
279 * Adds a directory include/exclude pattern to the internal array.
282 add_dpattern(const char *pat, int mode)
285 /* Increase size if necessary */
286 if (dpatterns == dpattern_sz) {
288 dpattern = grep_realloc(dpattern, ++dpattern_sz *
289 sizeof(struct epat));
291 dpattern[dpatterns].pat = grep_strdup(pat);
292 dpattern[dpatterns].mode = mode;
297 * Reads searching patterns from a file and adds them with add_pattern().
300 read_patterns(const char *fn)
307 if ((f = fopen(fn, "r")) == NULL)
309 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
313 while ((line = fgetln(f, &len)) != NULL)
314 add_pattern(line, line[0] == '\n' ? 0 : len);
320 static inline const char *
321 init_color(const char *d)
325 c = getenv("GREP_COLOR");
326 return (c != NULL && c[0] != '\0' ? c : d);
330 main(int argc, char *argv[])
332 char **aargv, **eargv, *eopts;
335 unsigned long long l;
336 unsigned int aargc, eargc, i;
337 int c, lastc, needpattern, newarg, prevoptind;
339 setlocale(LC_ALL, "");
342 catalog = catopen("grep", NL_CAT_LOCALE);
345 /* Check what is the program name of the binary. In this
346 way we can have all the funcionalities in one binary
347 without the need of scripting and using ugly hacks. */
349 if (pn[0] == 'b' && pn[1] == 'z') {
350 filebehave = FILE_BZIP;
352 } else if (pn[0] == 'x' && pn[1] == 'z') {
353 filebehave = FILE_XZ;
355 } else if (pn[0] == 'l' && pn[1] == 'z') {
356 filebehave = FILE_LZMA;
358 } else if (pn[0] == 'z') {
359 filebehave = FILE_GZIP;
364 grepbehave = GREP_EXTENDED;
367 grepbehave = GREP_FIXED;
376 eopts = getenv("GREP_OPTIONS");
378 /* support for extra arguments in GREP_OPTIONS */
380 if (eopts != NULL && eopts[0] != '\0') {
383 /* make an estimation of how many extra arguments we have */
384 for (unsigned int j = 0; j < strlen(eopts); j++)
388 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
391 /* parse extra arguments */
392 while ((str = strsep(&eopts, " ")) != NULL)
394 eargv[eargc++] = grep_strdup(str);
396 aargv = (char **)grep_calloc(eargc + argc + 1,
400 for (i = 0; i < eargc; i++)
401 aargv[i + 1] = eargv[i];
402 for (int j = 1; j < argc; j++, i++)
403 aargv[i + 1] = argv[j];
405 aargc = eargc + argc;
411 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
414 case '0': case '1': case '2': case '3': case '4':
415 case '5': case '6': case '7': case '8': case '9':
416 if (newarg || !isdigit(lastc))
418 else if (Aflag > LLONG_MAX / 10) {
422 Aflag = Bflag = (Aflag * 10) + (c - '0');
425 if (optarg == NULL) {
434 l = strtoull(optarg, &ep, 10);
435 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
436 ((errno == EINVAL) && (l == 0)))
438 else if (ep[0] != '\0') {
450 binbehave = BINFILE_TEXT;
459 if (strcasecmp(optarg, "skip") == 0)
460 devbehave = DEV_SKIP;
461 else if (strcasecmp(optarg, "read") == 0)
462 devbehave = DEV_READ;
464 errx(2, getstr(3), "--devices");
467 if (strcasecmp("recurse", optarg) == 0) {
469 dirbehave = DIR_RECURSE;
470 } else if (strcasecmp("skip", optarg) == 0)
471 dirbehave = DIR_SKIP;
472 else if (strcasecmp("read", optarg) == 0)
473 dirbehave = DIR_READ;
475 errx(2, getstr(3), "--directories");
478 grepbehave = GREP_EXTENDED;
483 char *string = optarg;
485 while ((token = strsep(&string, "\n")) != NULL)
486 add_pattern(token, strlen(token));
491 grepbehave = GREP_FIXED;
494 read_patterns(optarg);
498 grepbehave = GREP_BASIC;
508 binbehave = BINFILE_SKIP;
518 err(2, "bzip2 support was disabled at compile-time");
520 filebehave = FILE_BZIP;
533 mlimit = mcount = strtoll(optarg, &ep, 10);
534 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
535 ((errno == EINVAL) && (mcount == 0)))
537 else if (ep[0] != '\0') {
543 filebehave = FILE_LZMA;
549 linkbehave = LINK_EXPLICIT;
553 cflags &= ~REG_NOSUB;
556 linkbehave = LINK_SKIP;
562 linkbehave = LINK_READ;
566 dirbehave = DIR_RECURSE;
573 binbehave = BINFILE_BIN;
577 filebehave = FILE_MMAP;
580 printf(getstr(9), getprogname(), VERSION);
587 cflags &= ~REG_NOSUB;
591 cflags &= ~REG_NOSUB;
594 filebehave = FILE_XZ;
597 filebehave = FILE_GZIP;
600 if (strcasecmp("binary", optarg) == 0)
601 binbehave = BINFILE_BIN;
602 else if (strcasecmp("without-match", optarg) == 0)
603 binbehave = BINFILE_SKIP;
604 else if (strcasecmp("text", optarg) == 0)
605 binbehave = BINFILE_TEXT;
607 errx(2, getstr(3), "--binary-files");
611 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
612 strcasecmp("tty", optarg) == 0 ||
613 strcasecmp("if-tty", optarg) == 0) {
616 term = getenv("TERM");
617 if (isatty(STDOUT_FILENO) && term != NULL &&
618 strcasecmp(term, "dumb") != 0)
619 color = init_color("01;31");
620 } else if (strcasecmp("always", optarg) == 0 ||
621 strcasecmp("yes", optarg) == 0 ||
622 strcasecmp("force", optarg) == 0) {
623 color = init_color("01;31");
624 } else if (strcasecmp("never", optarg) != 0 &&
625 strcasecmp("none", optarg) != 0 &&
626 strcasecmp("no", optarg) != 0)
627 errx(2, getstr(3), "--color");
628 cflags &= ~REG_NOSUB;
641 add_fpattern(optarg, INCL_PAT);
645 add_fpattern(optarg, EXCL_PAT);
649 add_dpattern(optarg, INCL_PAT);
653 add_dpattern(optarg, EXCL_PAT);
660 newarg = optind != prevoptind;
666 /* Empty pattern file matches nothing */
667 if (!needpattern && (patterns == 0))
670 /* Fail if we don't have any pattern */
671 if (aargc == 0 && needpattern)
674 /* Process patterns from command line */
675 if (aargc != 0 && needpattern) {
677 char *string = *aargv;
679 while ((token = strsep(&string, "\n")) != NULL)
680 add_pattern(token, strlen(token));
685 switch (grepbehave) {
689 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
693 cflags |= REG_EXTENDED;
700 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
701 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
703 /* Check if cheating is allowed (always is for fgrep). */
704 for (i = 0; i < patterns; ++i) {
705 if (fastncomp(&fg_pattern[i], pattern[i].pat,
706 pattern[i].len, cflags) != 0) {
707 /* Fall back to full regex library */
708 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
710 regerror(c, &r_pattern[i], re_error,
712 errx(2, "%s", re_error);
720 if ((aargc == 0 || aargc == 1) && !Hflag)
724 exit(!procfile("-"));
726 if (dirbehave == DIR_RECURSE)
727 c = grep_tree(aargv);
729 for (c = 0; aargc--; ++aargv) {
730 if ((finclude || fexclude) && !file_matching(*aargv))
732 c+= procfile(*aargv);
739 /* Find out the correct return value according to the
740 results and the command line option. */
741 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));