1 /* $NetBSD: grep.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
59 * Default messags to use when NLS is disabled or no catalogue
62 const char *errstr[] = {
64 /* 1*/ "(standard input)",
65 /* 2*/ "cannot read bzip2 compressed file",
66 /* 3*/ "unknown %s option",
67 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
68 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
69 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
70 /* 7*/ "\t[--null] [pattern] [file ...]\n",
71 /* 8*/ "Binary file %s matches\n",
72 /* 9*/ "%s (BSD grep) %s\n",
75 /* Flags passed to regcomp() and regexec() */
76 int cflags = REG_NOSUB;
77 int eflags = REG_STARTEND;
79 /* Shortcut for matching all cases like empty regex */
82 /* Searching patterns */
83 unsigned int patterns, pattern_sz;
86 fastgrep_t *fg_pattern;
88 /* Filename exclusion/inclusion patterns */
89 unsigned int fpatterns, fpattern_sz;
90 unsigned int dpatterns, dpattern_sz;
91 struct epat *dpattern, *fpattern;
93 /* For regex errors */
94 char re_error[RE_ERROR_BUF + 1];
96 /* Command-line flags */
97 unsigned long long Aflag; /* -A x: print x lines trailing each match */
98 unsigned long long Bflag; /* -B x: print x lines leading each match */
99 bool Hflag; /* -H: always print file name */
100 bool Lflag; /* -L: only show names of files with no matches */
101 bool bflag; /* -b: show block numbers for each match */
102 bool cflag; /* -c: only show a count of matching lines */
103 bool hflag; /* -h: don't print filename headers */
104 bool iflag; /* -i: ignore case */
105 bool lflag; /* -l: only show names of files with matches */
106 bool mflag; /* -m x: stop reading the files after x matches */
107 unsigned long long mcount; /* count for -m */
108 bool nflag; /* -n: show line numbers in front of matching lines */
109 bool oflag; /* -o: print only matching part */
110 bool qflag; /* -q: quiet mode (don't output anything) */
111 bool sflag; /* -s: silent mode (ignore errors) */
112 bool vflag; /* -v: only show non-matching lines */
113 bool wflag; /* -w: pattern must start and end on word boundaries */
114 bool xflag; /* -x: pattern must match entire line */
115 bool lbflag; /* --line-buffered */
116 bool nullflag; /* --null */
117 char *label; /* --label */
118 const char *color; /* --color */
119 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
120 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
121 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
122 int devbehave = DEV_READ; /* -D: handling of devices */
123 int dirbehave = DIR_READ; /* -dRr: handling of directories */
124 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
126 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
127 bool fexclude, finclude; /* --exclude and --include */
130 BIN_OPT = CHAR_MAX + 1,
143 static inline const char *init_color(const char *);
146 bool first = true; /* flag whether we are processing the first match */
147 bool prev; /* flag whether or not the previous line matched */
148 int tail; /* lines left to print */
149 bool notfound; /* file not found */
151 extern char *__progname;
154 * Prints usage information and returns 2.
159 fprintf(stderr, getstr(4), __progname);
160 fprintf(stderr, "%s", getstr(5));
161 fprintf(stderr, "%s", getstr(5));
162 fprintf(stderr, "%s", getstr(6));
163 fprintf(stderr, "%s", getstr(7));
167 static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
169 struct option long_options[] =
171 {"binary-files", required_argument, NULL, BIN_OPT},
172 {"help", no_argument, NULL, HELP_OPT},
173 {"mmap", no_argument, NULL, MMAP_OPT},
174 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
175 {"label", required_argument, NULL, LABEL_OPT},
176 {"null", no_argument, NULL, NULL_OPT},
177 {"color", optional_argument, NULL, COLOR_OPT},
178 {"colour", optional_argument, NULL, COLOR_OPT},
179 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
180 {"include", required_argument, NULL, R_INCLUDE_OPT},
181 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
182 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
183 {"after-context", required_argument, NULL, 'A'},
184 {"text", no_argument, NULL, 'a'},
185 {"before-context", required_argument, NULL, 'B'},
186 {"byte-offset", no_argument, NULL, 'b'},
187 {"context", optional_argument, NULL, 'C'},
188 {"count", no_argument, NULL, 'c'},
189 {"devices", required_argument, NULL, 'D'},
190 {"directories", required_argument, NULL, 'd'},
191 {"extended-regexp", no_argument, NULL, 'E'},
192 {"regexp", required_argument, NULL, 'e'},
193 {"fixed-strings", no_argument, NULL, 'F'},
194 {"file", required_argument, NULL, 'f'},
195 {"basic-regexp", no_argument, NULL, 'G'},
196 {"no-filename", no_argument, NULL, 'h'},
197 {"with-filename", no_argument, NULL, 'H'},
198 {"ignore-case", no_argument, NULL, 'i'},
199 {"bz2decompress", no_argument, NULL, 'J'},
200 {"files-with-matches", no_argument, NULL, 'l'},
201 {"files-without-match", no_argument, NULL, 'L'},
202 {"max-count", required_argument, NULL, 'm'},
203 {"line-number", no_argument, NULL, 'n'},
204 {"only-matching", no_argument, NULL, 'o'},
205 {"quiet", no_argument, NULL, 'q'},
206 {"silent", no_argument, NULL, 'q'},
207 {"recursive", no_argument, NULL, 'r'},
208 {"no-messages", no_argument, NULL, 's'},
209 {"binary", no_argument, NULL, 'U'},
210 {"unix-byte-offsets", no_argument, NULL, 'u'},
211 {"invert-match", no_argument, NULL, 'v'},
212 {"version", no_argument, NULL, 'V'},
213 {"word-regexp", no_argument, NULL, 'w'},
214 {"line-regexp", no_argument, NULL, 'x'},
215 {"decompress", no_argument, NULL, 'Z'},
216 {NULL, no_argument, NULL, 0}
220 * Adds a searching pattern to the internal array.
223 add_pattern(char *pat, size_t len)
226 /* Check if we can do a shortcut */
227 if (len == 0 || matchall) {
231 /* Increase size if necessary */
232 if (patterns == pattern_sz) {
234 pattern = grep_realloc(pattern, ++pattern_sz *
237 if (len > 0 && pat[len - 1] == '\n')
239 /* pat may not be NUL-terminated */
240 pattern[patterns] = grep_malloc(len + 1);
241 memcpy(pattern[patterns], pat, len);
242 pattern[patterns][len] = '\0';
247 * Adds a file include/exclude pattern to the internal array.
250 add_fpattern(const char *pat, int mode)
253 /* Increase size if necessary */
254 if (fpatterns == fpattern_sz) {
256 fpattern = grep_realloc(fpattern, ++fpattern_sz *
257 sizeof(struct epat));
259 fpattern[fpatterns].pat = grep_strdup(pat);
260 fpattern[fpatterns].mode = mode;
265 * Adds a directory include/exclude pattern to the internal array.
268 add_dpattern(const char *pat, int mode)
271 /* Increase size if necessary */
272 if (dpatterns == dpattern_sz) {
274 dpattern = grep_realloc(dpattern, ++dpattern_sz *
275 sizeof(struct epat));
277 dpattern[dpatterns].pat = grep_strdup(pat);
278 dpattern[dpatterns].mode = mode;
283 * Reads searching patterns from a file and adds them with add_pattern().
286 read_patterns(const char *fn)
292 if ((f = fopen(fn, "r")) == NULL)
294 while ((line = fgetln(f, &len)) != NULL)
295 add_pattern(line, *line == '\n' ? 0 : len);
301 static inline const char *
302 init_color(const char *d)
306 c = getenv("GREP_COLOR");
307 return (c != NULL && c[0] != '\0' ? c : d);
311 main(int argc, char *argv[])
313 char **aargv, **eargv, *eopts;
315 unsigned long long l;
316 unsigned int aargc, eargc, i;
317 int c, lastc, needpattern, newarg, prevoptind;
319 setlocale(LC_ALL, "");
322 catalog = catopen("grep", NL_CAT_LOCALE);
325 /* Check what is the program name of the binary. In this
326 way we can have all the funcionalities in one binary
327 without the need of scripting and using ugly hacks. */
328 switch (__progname[0]) {
330 grepbehave = GREP_EXTENDED;
333 grepbehave = GREP_FIXED;
336 grepbehave = GREP_BASIC;
339 filebehave = FILE_GZIP;
340 switch(__progname[1]) {
342 grepbehave = GREP_EXTENDED;
345 grepbehave = GREP_FIXED;
348 grepbehave = GREP_BASIC;
359 eopts = getenv("GREP_OPTIONS");
361 /* support for extra arguments in GREP_OPTIONS */
363 if (eopts != NULL && eopts[0] != '\0') {
366 /* make an estimation of how many extra arguments we have */
367 for (unsigned int j = 0; j < strlen(eopts); j++)
371 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
374 /* parse extra arguments */
375 while ((str = strsep(&eopts, " ")) != NULL)
377 eargv[eargc++] = grep_strdup(str);
379 aargv = (char **)grep_calloc(eargc + argc + 1,
383 for (i = 0; i < eargc; i++)
384 aargv[i + 1] = eargv[i];
385 for (int j = 1; j < argc; j++, i++)
386 aargv[i + 1] = argv[j];
388 aargc = eargc + argc;
394 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
397 case '0': case '1': case '2': case '3': case '4':
398 case '5': case '6': case '7': case '8': case '9':
399 if (newarg || !isdigit(lastc))
401 else if (Aflag > LLONG_MAX / 10) {
405 Aflag = Bflag = (Aflag * 10) + (c - '0');
408 if (optarg == NULL) {
417 l = strtoull(optarg, &ep, 10);
418 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
419 ((errno == EINVAL) && (l == 0)))
421 else if (ep[0] != '\0') {
433 binbehave = BINFILE_TEXT;
442 if (strcasecmp(optarg, "skip") == 0)
443 devbehave = DEV_SKIP;
444 else if (strcasecmp(optarg, "read") == 0)
445 devbehave = DEV_READ;
447 errx(2, getstr(3), "--devices");
450 if (strcasecmp("recurse", optarg) == 0) {
452 dirbehave = DIR_RECURSE;
453 } else if (strcasecmp("skip", optarg) == 0)
454 dirbehave = DIR_SKIP;
455 else if (strcasecmp("read", optarg) == 0)
456 dirbehave = DIR_READ;
458 errx(2, getstr(3), "--directories");
461 grepbehave = GREP_EXTENDED;
464 add_pattern(optarg, strlen(optarg));
468 grepbehave = GREP_FIXED;
471 read_patterns(optarg);
475 grepbehave = GREP_BASIC;
485 binbehave = BINFILE_SKIP;
493 filebehave = FILE_BZIP;
506 mcount = strtoull(optarg, &ep, 10);
507 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
508 ((errno == EINVAL) && (mcount == 0)))
510 else if (ep[0] != '\0') {
519 linkbehave = LINK_EXPLICIT;
523 cflags &= ~REG_NOSUB;
526 linkbehave = LINK_SKIP;
532 linkbehave = LINK_READ;
536 dirbehave = DIR_RECURSE;
543 binbehave = BINFILE_BIN;
547 /* noop, compatibility */
550 printf(getstr(9), __progname, VERSION);
557 cflags &= ~REG_NOSUB;
561 cflags &= ~REG_NOSUB;
564 filebehave = FILE_GZIP;
567 if (strcasecmp("binary", optarg) == 0)
568 binbehave = BINFILE_BIN;
569 else if (strcasecmp("without-match", optarg) == 0)
570 binbehave = BINFILE_SKIP;
571 else if (strcasecmp("text", optarg) == 0)
572 binbehave = BINFILE_TEXT;
574 errx(2, getstr(3), "--binary-files");
578 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
579 strcasecmp("tty", optarg) == 0 ||
580 strcasecmp("if-tty", optarg) == 0) {
583 term = getenv("TERM");
584 if (isatty(STDOUT_FILENO) && term != NULL &&
585 strcasecmp(term, "dumb") != 0)
586 color = init_color("01;31");
587 } else if (strcasecmp("always", optarg) == 0 ||
588 strcasecmp("yes", optarg) == 0 ||
589 strcasecmp("force", optarg) == 0) {
590 color = init_color("01;31");
591 } else if (strcasecmp("never", optarg) != 0 &&
592 strcasecmp("none", optarg) != 0 &&
593 strcasecmp("no", optarg) != 0)
594 errx(2, getstr(3), "--color");
595 cflags &= ~REG_NOSUB;
608 add_fpattern(optarg, INCL_PAT);
612 add_fpattern(optarg, EXCL_PAT);
616 add_dpattern(optarg, INCL_PAT);
620 add_dpattern(optarg, EXCL_PAT);
627 newarg = optind != prevoptind;
633 /* Fail if we don't have any pattern */
634 if (aargc == 0 && needpattern)
637 /* Process patterns from command line */
638 if (aargc != 0 && needpattern) {
639 add_pattern(*aargv, strlen(*aargv));
644 switch (grepbehave) {
649 cflags |= REG_EXTENDED;
656 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
657 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
659 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
660 * Optimizations should be done there.
662 /* Check if cheating is allowed (always is for fgrep). */
663 if (grepbehave == GREP_FIXED) {
664 for (i = 0; i < patterns; ++i)
665 fgrepcomp(&fg_pattern[i], pattern[i]);
667 for (i = 0; i < patterns; ++i) {
668 if (fastcomp(&fg_pattern[i], pattern[i])) {
669 /* Fall back to full regex library */
670 c = regcomp(&r_pattern[i], pattern[i], cflags);
672 regerror(c, &r_pattern[i], re_error,
674 errx(2, "%s", re_error);
683 if ((aargc == 0 || aargc == 1) && !Hflag)
687 exit(!procfile("-"));
689 if (dirbehave == DIR_RECURSE)
690 c = grep_tree(aargv);
692 for (c = 0; aargc--; ++aargv) {
693 if ((finclude || fexclude) && !file_matching(*aargv))
695 c+= procfile(*aargv);
702 /* Find out the correct return value according to the
703 results and the command line option. */
704 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));