1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #ifndef WITHOUT_FASTMATCH
53 #include "fastmatch.h"
63 * Default messags to use when NLS is disabled or no catalogue
66 const char *errstr[] = {
68 /* 1*/ "(standard input)",
69 /* 2*/ "cannot read bzip2 compressed file",
70 /* 3*/ "unknown %s option",
71 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
72 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
73 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
74 /* 7*/ "\t[--null] [pattern] [file ...]\n",
75 /* 8*/ "Binary file %s matches\n",
76 /* 9*/ "%s (BSD grep) %s\n",
77 /* 10*/ "%s (BSD grep, GNU compatible) %s\n",
80 /* Flags passed to regcomp() and regexec() */
81 int cflags = REG_NOSUB;
82 int eflags = REG_STARTEND;
84 /* XXX TODO: Get rid of this flag.
85 * matchall is a gross hack that means that an empty pattern was passed to us.
86 * It is a necessary evil at the moment because our regex(3) implementation
87 * does not allow for empty patterns, as supported by POSIX's definition of
88 * grammar for BREs/EREs. When libregex becomes available, it would be wise
89 * to remove this and let regex(3) handle the dirty details of empty patterns.
93 /* Searching patterns */
94 unsigned int patterns;
95 static unsigned int pattern_sz;
98 #ifndef WITHOUT_FASTMATCH
99 fastmatch_t *fg_pattern;
102 /* Filename exclusion/inclusion patterns */
103 unsigned int fpatterns, dpatterns;
104 static unsigned int fpattern_sz, dpattern_sz;
105 struct epat *dpattern, *fpattern;
107 /* For regex errors */
108 char re_error[RE_ERROR_BUF + 1];
110 /* Command-line flags */
111 unsigned long long Aflag; /* -A x: print x lines trailing each match */
112 unsigned long long Bflag; /* -B x: print x lines leading each match */
113 bool Hflag; /* -H: always print file name */
114 bool Lflag; /* -L: only show names of files with no matches */
115 bool bflag; /* -b: show block numbers for each match */
116 bool cflag; /* -c: only show a count of matching lines */
117 bool hflag; /* -h: don't print filename headers */
118 bool iflag; /* -i: ignore case */
119 bool lflag; /* -l: only show names of files with matches */
120 bool mflag; /* -m x: stop reading the files after x matches */
121 long long mcount; /* count for -m */
122 long long mlimit; /* requested value for -m */
123 char fileeol; /* indicator for eol */
124 bool nflag; /* -n: show line numbers in front of matching lines */
125 bool oflag; /* -o: print only matching part */
126 bool qflag; /* -q: quiet mode (don't output anything) */
127 bool sflag; /* -s: silent mode (ignore errors) */
128 bool vflag; /* -v: only show non-matching lines */
129 bool wflag; /* -w: pattern must start and end on word boundaries */
130 bool xflag; /* -x: pattern must match entire line */
131 bool lbflag; /* --line-buffered */
132 bool nullflag; /* --null */
133 char *label; /* --label */
134 const char *color; /* --color */
135 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
136 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
137 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
138 int devbehave = DEV_READ; /* -D: handling of devices */
139 int dirbehave = DIR_READ; /* -dRr: handling of directories */
140 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
142 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
143 bool fexclude, finclude; /* --exclude and --include */
146 BIN_OPT = CHAR_MAX + 1,
159 static inline const char *init_color(const char *);
162 bool file_err; /* file reading error */
165 * Prints usage information and returns 2.
170 fprintf(stderr, getstr(4), getprogname());
171 fprintf(stderr, "%s", getstr(5));
172 fprintf(stderr, "%s", getstr(6));
173 fprintf(stderr, "%s", getstr(7));
177 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXyz";
179 static const struct option long_options[] =
181 {"binary-files", required_argument, NULL, BIN_OPT},
182 {"help", no_argument, NULL, HELP_OPT},
183 {"mmap", no_argument, NULL, MMAP_OPT},
184 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
185 {"label", required_argument, NULL, LABEL_OPT},
186 {"null", no_argument, NULL, NULL_OPT},
187 {"color", optional_argument, NULL, COLOR_OPT},
188 {"colour", optional_argument, NULL, COLOR_OPT},
189 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
190 {"include", required_argument, NULL, R_INCLUDE_OPT},
191 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
192 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
193 {"after-context", required_argument, NULL, 'A'},
194 {"text", no_argument, NULL, 'a'},
195 {"before-context", required_argument, NULL, 'B'},
196 {"byte-offset", no_argument, NULL, 'b'},
197 {"context", optional_argument, NULL, 'C'},
198 {"count", no_argument, NULL, 'c'},
199 {"devices", required_argument, NULL, 'D'},
200 {"directories", required_argument, NULL, 'd'},
201 {"extended-regexp", no_argument, NULL, 'E'},
202 {"regexp", required_argument, NULL, 'e'},
203 {"fixed-strings", no_argument, NULL, 'F'},
204 {"file", required_argument, NULL, 'f'},
205 {"basic-regexp", no_argument, NULL, 'G'},
206 {"no-filename", no_argument, NULL, 'h'},
207 {"with-filename", no_argument, NULL, 'H'},
208 {"ignore-case", no_argument, NULL, 'i'},
209 {"bz2decompress", no_argument, NULL, 'J'},
210 {"files-with-matches", no_argument, NULL, 'l'},
211 {"files-without-match", no_argument, NULL, 'L'},
212 {"max-count", required_argument, NULL, 'm'},
213 {"lzma", no_argument, NULL, 'M'},
214 {"line-number", no_argument, NULL, 'n'},
215 {"only-matching", no_argument, NULL, 'o'},
216 {"quiet", no_argument, NULL, 'q'},
217 {"silent", no_argument, NULL, 'q'},
218 {"recursive", no_argument, NULL, 'r'},
219 {"no-messages", no_argument, NULL, 's'},
220 {"binary", no_argument, NULL, 'U'},
221 {"unix-byte-offsets", no_argument, NULL, 'u'},
222 {"invert-match", no_argument, NULL, 'v'},
223 {"version", no_argument, NULL, 'V'},
224 {"word-regexp", no_argument, NULL, 'w'},
225 {"line-regexp", no_argument, NULL, 'x'},
226 {"xz", no_argument, NULL, 'X'},
227 {"null-data", no_argument, NULL, 'z'},
228 {"decompress", no_argument, NULL, 'Z'},
229 {NULL, no_argument, NULL, 0}
233 * Adds a searching pattern to the internal array.
236 add_pattern(char *pat, size_t len)
239 /* Do not add further pattern is we already match everything */
243 /* Check if we can do a shortcut */
246 for (unsigned int i = 0; i < patterns; i++) {
247 free(pattern[i].pat);
249 pattern = grep_realloc(pattern, sizeof(struct pat));
250 pattern[0].pat = NULL;
255 /* Increase size if necessary */
256 if (patterns == pattern_sz) {
258 pattern = grep_realloc(pattern, ++pattern_sz *
261 if (len > 0 && pat[len - 1] == '\n')
263 /* pat may not be NUL-terminated */
264 pattern[patterns].pat = grep_malloc(len + 1);
265 memcpy(pattern[patterns].pat, pat, len);
266 pattern[patterns].len = len;
267 pattern[patterns].pat[len] = '\0';
272 * Adds a file include/exclude pattern to the internal array.
275 add_fpattern(const char *pat, int mode)
278 /* Increase size if necessary */
279 if (fpatterns == fpattern_sz) {
281 fpattern = grep_realloc(fpattern, ++fpattern_sz *
282 sizeof(struct epat));
284 fpattern[fpatterns].pat = grep_strdup(pat);
285 fpattern[fpatterns].mode = mode;
290 * Adds a directory include/exclude pattern to the internal array.
293 add_dpattern(const char *pat, int mode)
296 /* Increase size if necessary */
297 if (dpatterns == dpattern_sz) {
299 dpattern = grep_realloc(dpattern, ++dpattern_sz *
300 sizeof(struct epat));
302 dpattern[dpatterns].pat = grep_strdup(pat);
303 dpattern[dpatterns].mode = mode;
308 * Reads searching patterns from a file and adds them with add_pattern().
311 read_patterns(const char *fn)
319 if ((f = fopen(fn, "r")) == NULL)
321 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
327 while ((rlen = getline(&line, &len, f)) != -1) {
330 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
339 static inline const char *
340 init_color(const char *d)
344 c = getenv("GREP_COLOR");
345 return (c != NULL && c[0] != '\0' ? c : d);
349 main(int argc, char *argv[])
351 char **aargv, **eargv, *eopts;
354 unsigned long long l;
355 unsigned int aargc, eargc, i;
356 int c, lastc, needpattern, newarg, prevoptind;
358 setlocale(LC_ALL, "");
361 catalog = catopen("grep", NL_CAT_LOCALE);
364 /* Check what is the program name of the binary. In this
365 way we can have all the funcionalities in one binary
366 without the need of scripting and using ugly hacks. */
368 if (pn[0] == 'b' && pn[1] == 'z') {
369 filebehave = FILE_BZIP;
371 } else if (pn[0] == 'x' && pn[1] == 'z') {
372 filebehave = FILE_XZ;
374 } else if (pn[0] == 'l' && pn[1] == 'z') {
375 filebehave = FILE_LZMA;
377 } else if (pn[0] == 'r') {
378 dirbehave = DIR_RECURSE;
380 } else if (pn[0] == 'z') {
381 filebehave = FILE_GZIP;
386 grepbehave = GREP_EXTENDED;
389 grepbehave = GREP_FIXED;
399 eopts = getenv("GREP_OPTIONS");
401 /* support for extra arguments in GREP_OPTIONS */
403 if (eopts != NULL && eopts[0] != '\0') {
406 /* make an estimation of how many extra arguments we have */
407 for (unsigned int j = 0; j < strlen(eopts); j++)
411 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
414 /* parse extra arguments */
415 while ((str = strsep(&eopts, " ")) != NULL)
417 eargv[eargc++] = grep_strdup(str);
419 aargv = (char **)grep_calloc(eargc + argc + 1,
423 for (i = 0; i < eargc; i++)
424 aargv[i + 1] = eargv[i];
425 for (int j = 1; j < argc; j++, i++)
426 aargv[i + 1] = argv[j];
428 aargc = eargc + argc;
434 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
437 case '0': case '1': case '2': case '3': case '4':
438 case '5': case '6': case '7': case '8': case '9':
439 if (newarg || !isdigit(lastc))
441 else if (Aflag > LLONG_MAX / 10) {
445 Aflag = Bflag = (Aflag * 10) + (c - '0');
448 if (optarg == NULL) {
457 l = strtoull(optarg, &ep, 10);
458 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
459 ((errno == EINVAL) && (l == 0)))
461 else if (ep[0] != '\0') {
473 binbehave = BINFILE_TEXT;
482 if (strcasecmp(optarg, "skip") == 0)
483 devbehave = DEV_SKIP;
484 else if (strcasecmp(optarg, "read") == 0)
485 devbehave = DEV_READ;
487 errx(2, getstr(3), "--devices");
490 if (strcasecmp("recurse", optarg) == 0) {
492 dirbehave = DIR_RECURSE;
493 } else if (strcasecmp("skip", optarg) == 0)
494 dirbehave = DIR_SKIP;
495 else if (strcasecmp("read", optarg) == 0)
496 dirbehave = DIR_READ;
498 errx(2, getstr(3), "--directories");
501 grepbehave = GREP_EXTENDED;
506 char *string = optarg;
508 while ((token = strsep(&string, "\n")) != NULL)
509 add_pattern(token, strlen(token));
514 grepbehave = GREP_FIXED;
517 read_patterns(optarg);
521 grepbehave = GREP_BASIC;
531 binbehave = BINFILE_SKIP;
541 err(2, "bzip2 support was disabled at compile-time");
543 filebehave = FILE_BZIP;
556 mlimit = mcount = strtoll(optarg, &ep, 10);
557 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
558 ((errno == EINVAL) && (mcount == 0)))
560 else if (ep[0] != '\0') {
566 filebehave = FILE_LZMA;
572 linkbehave = LINK_EXPLICIT;
576 cflags &= ~REG_NOSUB;
579 linkbehave = LINK_SKIP;
585 linkbehave = LINK_READ;
589 dirbehave = DIR_RECURSE;
596 binbehave = BINFILE_BIN;
600 filebehave = FILE_MMAP;
604 printf(getstr(10), getprogname(), VERSION);
606 printf(getstr(9), getprogname(), VERSION);
614 cflags &= ~REG_NOSUB;
618 cflags &= ~REG_NOSUB;
621 filebehave = FILE_XZ;
627 filebehave = FILE_GZIP;
630 if (strcasecmp("binary", optarg) == 0)
631 binbehave = BINFILE_BIN;
632 else if (strcasecmp("without-match", optarg) == 0)
633 binbehave = BINFILE_SKIP;
634 else if (strcasecmp("text", optarg) == 0)
635 binbehave = BINFILE_TEXT;
637 errx(2, getstr(3), "--binary-files");
641 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
642 strcasecmp("tty", optarg) == 0 ||
643 strcasecmp("if-tty", optarg) == 0) {
646 term = getenv("TERM");
647 if (isatty(STDOUT_FILENO) && term != NULL &&
648 strcasecmp(term, "dumb") != 0)
649 color = init_color("01;31");
650 } else if (strcasecmp("always", optarg) == 0 ||
651 strcasecmp("yes", optarg) == 0 ||
652 strcasecmp("force", optarg) == 0) {
653 color = init_color("01;31");
654 } else if (strcasecmp("never", optarg) != 0 &&
655 strcasecmp("none", optarg) != 0 &&
656 strcasecmp("no", optarg) != 0)
657 errx(2, getstr(3), "--color");
658 cflags &= ~REG_NOSUB;
671 add_fpattern(optarg, INCL_PAT);
675 add_fpattern(optarg, EXCL_PAT);
679 add_dpattern(optarg, INCL_PAT);
683 add_dpattern(optarg, EXCL_PAT);
690 newarg = optind != prevoptind;
696 /* Empty pattern file matches nothing */
697 if (!needpattern && (patterns == 0))
700 /* Fail if we don't have any pattern */
701 if (aargc == 0 && needpattern)
704 /* Process patterns from command line */
705 if (aargc != 0 && needpattern) {
707 char *string = *aargv;
709 while ((token = strsep(&string, "\n")) != NULL)
710 add_pattern(token, strlen(token));
715 switch (grepbehave) {
719 #if defined(REG_NOSPEC)
720 cflags |= REG_NOSPEC;
721 #elif defined(REG_LITERAL)
722 cflags |= REG_LITERAL;
724 errx(2, "literal expressions not supported at compile time");
728 cflags |= REG_EXTENDED;
735 #ifndef WITHOUT_FASTMATCH
736 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
738 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
740 /* Don't process any patterns if we have a blank one */
742 /* Check if cheating is allowed (always is for fgrep). */
743 for (i = 0; i < patterns; ++i) {
744 #ifndef WITHOUT_FASTMATCH
746 * Attempt compilation with fastmatch regex and
747 * fallback to regex(3) if it fails.
749 if (fastncomp(&fg_pattern[i], pattern[i].pat,
750 pattern[i].len, cflags) == 0)
753 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
755 regerror(c, &r_pattern[i], re_error,
757 errx(2, "%s", re_error);
765 if ((aargc == 0 || aargc == 1) && !Hflag)
768 if (aargc == 0 && dirbehave != DIR_RECURSE)
769 exit(!procfile("-"));
771 if (dirbehave == DIR_RECURSE)
772 c = grep_tree(aargv);
774 for (c = 0; aargc--; ++aargv) {
775 if ((finclude || fexclude) && !file_matching(*aargv))
777 c+= procfile(*aargv);
784 /* Find out the correct return value according to the
785 results and the command line option. */
786 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));