1 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
34 #include <sys/types.h>
57 * Default messags to use when NLS is disabled or no catalogue
60 const char *errstr[] = {
62 /* 1*/ "(standard input)",
63 /* 2*/ "cannot read bzip2 compressed file",
64 /* 3*/ "unknown %s option",
65 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68 /* 7*/ "\t[--null] [pattern] [file ...]\n",
69 /* 8*/ "Binary file %s matches\n",
70 /* 9*/ "%s (BSD grep) %s\n",
73 /* Flags passed to regcomp() and regexec() */
75 int eflags = REG_STARTEND;
77 /* Shortcut for matching all cases like empty regex */
80 /* Searching patterns */
81 unsigned int patterns, pattern_sz;
84 fastgrep_t *fg_pattern;
86 /* Filename exclusion/inclusion patterns */
87 unsigned int fpatterns, fpattern_sz;
88 unsigned int dpatterns, dpattern_sz;
89 struct epat *dpattern, *fpattern;
91 /* For regex errors */
92 char re_error[RE_ERROR_BUF + 1];
94 /* Command-line flags */
95 unsigned long long Aflag; /* -A x: print x lines trailing each match */
96 unsigned long long Bflag; /* -B x: print x lines leading each match */
97 bool Hflag; /* -H: always print file name */
98 bool Lflag; /* -L: only show names of files with no matches */
99 bool bflag; /* -b: show block numbers for each match */
100 bool cflag; /* -c: only show a count of matching lines */
101 bool hflag; /* -h: don't print filename headers */
102 bool iflag; /* -i: ignore case */
103 bool lflag; /* -l: only show names of files with matches */
104 bool mflag; /* -m x: stop reading the files after x matches */
105 unsigned long long mcount; /* count for -m */
106 bool nflag; /* -n: show line numbers in front of matching lines */
107 bool oflag; /* -o: print only matching part */
108 bool qflag; /* -q: quiet mode (don't output anything) */
109 bool sflag; /* -s: silent mode (ignore errors) */
110 bool vflag; /* -v: only show non-matching lines */
111 bool wflag; /* -w: pattern must start and end on word boundaries */
112 bool xflag; /* -x: pattern must match entire line */
113 bool lbflag; /* --line-buffered */
114 bool nullflag; /* --null */
115 char *label; /* --label */
116 const char *color; /* --color */
117 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
118 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
119 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
120 int devbehave = DEV_READ; /* -D: handling of devices */
121 int dirbehave = DIR_READ; /* -dRr: handling of directories */
122 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
124 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
125 bool fexclude, finclude; /* --exclude and --include */
128 BIN_OPT = CHAR_MAX + 1,
141 static inline const char *init_color(const char *);
144 bool first = true; /* flag whether we are processing the first match */
145 bool prev; /* flag whether or not the previous line matched */
146 int tail; /* lines left to print */
147 bool notfound; /* file not found */
149 extern char *__progname;
152 * Prints usage information and returns 2.
157 fprintf(stderr, getstr(4), __progname);
158 fprintf(stderr, "%s", getstr(5));
159 fprintf(stderr, "%s", getstr(5));
160 fprintf(stderr, "%s", getstr(6));
161 fprintf(stderr, "%s", getstr(7));
165 static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
167 struct option long_options[] =
169 {"binary-files", required_argument, NULL, BIN_OPT},
170 {"help", no_argument, NULL, HELP_OPT},
171 {"mmap", no_argument, NULL, MMAP_OPT},
172 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
173 {"label", required_argument, NULL, LABEL_OPT},
174 {"null", no_argument, NULL, NULL_OPT},
175 {"color", optional_argument, NULL, COLOR_OPT},
176 {"colour", optional_argument, NULL, COLOR_OPT},
177 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
178 {"include", required_argument, NULL, R_INCLUDE_OPT},
179 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
180 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
181 {"after-context", required_argument, NULL, 'A'},
182 {"text", no_argument, NULL, 'a'},
183 {"before-context", required_argument, NULL, 'B'},
184 {"byte-offset", no_argument, NULL, 'b'},
185 {"context", optional_argument, NULL, 'C'},
186 {"count", no_argument, NULL, 'c'},
187 {"devices", required_argument, NULL, 'D'},
188 {"directories", required_argument, NULL, 'd'},
189 {"extended-regexp", no_argument, NULL, 'E'},
190 {"regexp", required_argument, NULL, 'e'},
191 {"fixed-strings", no_argument, NULL, 'F'},
192 {"file", required_argument, NULL, 'f'},
193 {"basic-regexp", no_argument, NULL, 'G'},
194 {"no-filename", no_argument, NULL, 'h'},
195 {"with-filename", no_argument, NULL, 'H'},
196 {"ignore-case", no_argument, NULL, 'i'},
197 {"bz2decompress", no_argument, NULL, 'J'},
198 {"files-with-matches", no_argument, NULL, 'l'},
199 {"files-without-match", no_argument, NULL, 'L'},
200 {"max-count", required_argument, NULL, 'm'},
201 {"line-number", no_argument, NULL, 'n'},
202 {"only-matching", no_argument, NULL, 'o'},
203 {"quiet", no_argument, NULL, 'q'},
204 {"silent", no_argument, NULL, 'q'},
205 {"recursive", no_argument, NULL, 'r'},
206 {"no-messages", no_argument, NULL, 's'},
207 {"binary", no_argument, NULL, 'U'},
208 {"unix-byte-offsets", no_argument, NULL, 'u'},
209 {"invert-match", no_argument, NULL, 'v'},
210 {"version", no_argument, NULL, 'V'},
211 {"word-regexp", no_argument, NULL, 'w'},
212 {"line-regexp", no_argument, NULL, 'x'},
213 {"decompress", no_argument, NULL, 'Z'},
214 {NULL, no_argument, NULL, 0}
218 * Adds a searching pattern to the internal array.
221 add_pattern(char *pat, size_t len)
224 /* Check if we can do a shortcut */
225 if (len == 0 || matchall) {
229 /* Increase size if necessary */
230 if (patterns == pattern_sz) {
232 pattern = grep_realloc(pattern, ++pattern_sz *
235 if (len > 0 && pat[len - 1] == '\n')
237 /* pat may not be NUL-terminated */
238 pattern[patterns] = grep_malloc(len + 1);
239 memcpy(pattern[patterns], pat, len);
240 pattern[patterns][len] = '\0';
245 * Adds a file include/exclude pattern to the internal array.
248 add_fpattern(const char *pat, int mode)
251 /* Increase size if necessary */
252 if (fpatterns == fpattern_sz) {
254 fpattern = grep_realloc(fpattern, ++fpattern_sz *
255 sizeof(struct epat));
257 fpattern[fpatterns].pat = grep_strdup(pat);
258 fpattern[fpatterns].mode = mode;
263 * Adds a directory include/exclude pattern to the internal array.
266 add_dpattern(const char *pat, int mode)
269 /* Increase size if necessary */
270 if (dpatterns == dpattern_sz) {
272 dpattern = grep_realloc(dpattern, ++dpattern_sz *
273 sizeof(struct epat));
275 dpattern[dpatterns].pat = grep_strdup(pat);
276 dpattern[dpatterns].mode = mode;
281 * Reads searching patterns from a file and adds them with add_pattern().
284 read_patterns(const char *fn)
290 if ((f = fopen(fn, "r")) == NULL)
292 while ((line = fgetln(f, &len)) != NULL)
293 add_pattern(line, *line == '\n' ? 0 : len);
299 static inline const char *
300 init_color(const char *d)
304 c = getenv("GREP_COLOR");
305 return (c != NULL ? c : d);
309 main(int argc, char *argv[])
311 char **aargv, **eargv, *eopts;
313 unsigned long long l;
314 unsigned int aargc, eargc, i;
315 int c, lastc, needpattern, newarg, prevoptind;
317 setlocale(LC_ALL, "");
320 catalog = catopen("grep", NL_CAT_LOCALE);
323 /* Check what is the program name of the binary. In this
324 way we can have all the funcionalities in one binary
325 without the need of scripting and using ugly hacks. */
326 switch (__progname[0]) {
328 grepbehave = GREP_EXTENDED;
331 grepbehave = GREP_FIXED;
334 grepbehave = GREP_BASIC;
337 filebehave = FILE_GZIP;
338 switch(__progname[1]) {
340 grepbehave = GREP_EXTENDED;
343 grepbehave = GREP_FIXED;
346 grepbehave = GREP_BASIC;
357 eopts = getenv("GREP_OPTIONS");
359 /* support for extra arguments in GREP_OPTIONS */
364 /* make an estimation of how many extra arguments we have */
365 for (unsigned int j = 0; j < strlen(eopts); j++)
369 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
372 /* parse extra arguments */
373 while ((str = strsep(&eopts, " ")) != NULL)
374 eargv[eargc++] = grep_strdup(str);
376 aargv = (char **)grep_calloc(eargc + argc + 1,
380 for (i = 0; i < eargc; i++)
381 aargv[i + 1] = eargv[i];
382 for (int j = 1; j < argc; j++, i++)
383 aargv[i + 1] = argv[j];
385 aargc = eargc + argc;
391 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
394 case '0': case '1': case '2': case '3': case '4':
395 case '5': case '6': case '7': case '8': case '9':
396 if (newarg || !isdigit(lastc))
398 else if (Aflag > LLONG_MAX / 10) {
402 Aflag = Bflag = (Aflag * 10) + (c - '0');
405 if (optarg == NULL) {
414 l = strtoull(optarg, &ep, 10);
415 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
416 ((errno == EINVAL) && (l == 0)))
418 else if (ep[0] != '\0') {
430 binbehave = BINFILE_TEXT;
439 if (strcasecmp(optarg, "skip") == 0)
440 devbehave = DEV_SKIP;
441 else if (strcasecmp(optarg, "read") == 0)
442 devbehave = DEV_READ;
444 errx(2, getstr(3), "--devices");
447 if (strcasecmp("recurse", optarg) == 0) {
449 dirbehave = DIR_RECURSE;
450 } else if (strcasecmp("skip", optarg) == 0)
451 dirbehave = DIR_SKIP;
452 else if (strcasecmp("read", optarg) == 0)
453 dirbehave = DIR_READ;
455 errx(2, getstr(3), "--directories");
458 grepbehave = GREP_EXTENDED;
461 add_pattern(optarg, strlen(optarg));
465 grepbehave = GREP_FIXED;
468 read_patterns(optarg);
472 grepbehave = GREP_BASIC;
482 binbehave = BINFILE_SKIP;
490 filebehave = FILE_BZIP;
503 mcount = strtoull(optarg, &ep, 10);
504 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
505 ((errno == EINVAL) && (mcount == 0)))
507 else if (ep[0] != '\0') {
516 linkbehave = LINK_EXPLICIT;
522 linkbehave = LINK_SKIP;
528 linkbehave = LINK_READ;
532 dirbehave = DIR_RECURSE;
539 binbehave = BINFILE_BIN;
543 /* noop, compatibility */
546 printf(getstr(9), __progname, VERSION);
558 filebehave = FILE_GZIP;
561 if (strcasecmp("binary", optarg) == 0)
562 binbehave = BINFILE_BIN;
563 else if (strcasecmp("without-match", optarg) == 0)
564 binbehave = BINFILE_SKIP;
565 else if (strcasecmp("text", optarg) == 0)
566 binbehave = BINFILE_TEXT;
568 errx(2, getstr(3), "--binary-files");
572 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
573 strcasecmp("tty", optarg) == 0 ||
574 strcasecmp("if-tty", optarg) == 0) {
577 term = getenv("TERM");
578 if (isatty(STDOUT_FILENO) && term != NULL &&
579 strcasecmp(term, "dumb") != 0)
580 color = init_color("01;31");
581 } else if (strcasecmp("always", optarg) == 0 ||
582 strcasecmp("yes", optarg) == 0 ||
583 strcasecmp("force", optarg) == 0) {
584 color = init_color("01;31");
585 } else if (strcasecmp("never", optarg) != 0 &&
586 strcasecmp("none", optarg) != 0 &&
587 strcasecmp("no", optarg) != 0)
588 errx(2, getstr(3), "--color");
601 add_fpattern(optarg, INCL_PAT);
605 add_fpattern(optarg, EXCL_PAT);
609 add_dpattern(optarg, INCL_PAT);
613 add_dpattern(optarg, EXCL_PAT);
620 newarg = optind != prevoptind;
626 /* Fail if we don't have any pattern */
627 if (aargc == 0 && needpattern)
630 /* Process patterns from command line */
631 if (aargc != 0 && needpattern) {
632 add_pattern(*aargv, strlen(*aargv));
637 switch (grepbehave) {
642 cflags |= REG_EXTENDED;
649 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
650 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
652 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
653 * Optimizations should be done there.
655 /* Check if cheating is allowed (always is for fgrep). */
656 if (grepbehave == GREP_FIXED) {
657 for (i = 0; i < patterns; ++i)
658 fgrepcomp(&fg_pattern[i], pattern[i]);
660 for (i = 0; i < patterns; ++i) {
661 if (fastcomp(&fg_pattern[i], pattern[i])) {
662 /* Fall back to full regex library */
663 c = regcomp(&r_pattern[i], pattern[i], cflags);
665 regerror(c, &r_pattern[i], re_error,
667 errx(2, "%s", re_error);
676 if ((aargc == 0 || aargc == 1) && !Hflag)
680 exit(!procfile("-"));
682 if (dirbehave == DIR_RECURSE)
683 c = grep_tree(aargv);
685 for (c = 0; aargc--; ++aargv) {
686 if ((finclude || fexclude) && !file_matching(*aargv))
688 c+= procfile(*aargv);
695 /* Find out the correct return value according to the
696 results and the command line option. */
697 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));