1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
38 #include <sys/types.h>
56 const char *errstr[] = {
58 /* 1*/ "(standard input)",
59 /* 2*/ "unknown %s option",
60 /* 3*/ "usage: %s [-abcDEFGHhIiLlmnOoPqRSsUVvwxz] [-A num] [-B num] [-C[num]]\n",
61 /* 4*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
62 /* 5*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
63 /* 6*/ "\t[--null] [pattern] [file ...]\n",
64 /* 7*/ "Binary file %s matches\n",
65 /* 8*/ "%s (BSD grep, GNU compatible) %s\n",
68 /* Flags passed to regcomp() and regexec() */
69 int cflags = REG_NOSUB | REG_NEWLINE;
70 int eflags = REG_STARTEND;
72 /* XXX TODO: Get rid of this flag.
73 * matchall is a gross hack that means that an empty pattern was passed to us.
74 * It is a necessary evil at the moment because our regex(3) implementation
75 * does not allow for empty patterns, as supported by POSIX's definition of
76 * grammar for BREs/EREs. When libregex becomes available, it would be wise
77 * to remove this and let regex(3) handle the dirty details of empty patterns.
81 /* Searching patterns */
82 unsigned int patterns;
83 static unsigned int pattern_sz;
87 /* Filename exclusion/inclusion patterns */
88 unsigned int fpatterns, dpatterns;
89 static unsigned int fpattern_sz, dpattern_sz;
90 struct epat *dpattern, *fpattern;
92 /* For regex errors */
93 char re_error[RE_ERROR_BUF + 1];
95 /* Command-line flags */
96 long long Aflag; /* -A x: print x lines trailing each match */
97 long long Bflag; /* -B x: print x lines leading each match */
98 bool Hflag; /* -H: always print file name */
99 bool Lflag; /* -L: only show names of files with no matches */
100 bool bflag; /* -b: show block numbers for each match */
101 bool cflag; /* -c: only show a count of matching lines */
102 bool hflag; /* -h: don't print filename headers */
103 bool iflag; /* -i: ignore case */
104 bool lflag; /* -l: only show names of files with matches */
105 bool mflag; /* -m x: stop reading the files after x matches */
106 long long mcount; /* count for -m */
107 long long mlimit; /* requested value for -m */
108 char fileeol; /* indicator for eol */
109 bool nflag; /* -n: show line numbers in front of matching lines */
110 bool oflag; /* -o: print only matching part */
111 bool qflag; /* -q: quiet mode (don't output anything) */
112 bool sflag; /* -s: silent mode (ignore errors) */
113 bool vflag; /* -v: only show non-matching lines */
114 bool wflag; /* -w: pattern must start and end on word boundaries */
115 bool xflag; /* -x: pattern must match entire line */
116 bool lbflag; /* --line-buffered */
117 bool nullflag; /* --null */
118 char *label; /* --label */
119 const char *color; /* --color */
120 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
121 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
122 int filebehave = FILE_STDIO;
123 int devbehave = DEV_READ; /* -D: handling of devices */
124 int dirbehave = DIR_READ; /* -dRr: handling of directories */
125 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
127 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
128 bool fexclude, finclude; /* --exclude and --include */
131 BIN_OPT = CHAR_MAX + 1,
144 static inline const char *init_color(const char *);
147 bool file_err; /* file reading error */
150 * Prints usage information and returns 2.
155 fprintf(stderr, errstr[3], getprogname());
156 fprintf(stderr, "%s", errstr[4]);
157 fprintf(stderr, "%s", errstr[5]);
158 fprintf(stderr, "%s", errstr[6]);
162 static const char *optstr = "0123456789A:B:C:D:EFGHILOPSRUVabcd:e:f:hilm:nopqrsuvwxyz";
164 static const struct option long_options[] =
166 {"binary-files", required_argument, NULL, BIN_OPT},
167 {"help", no_argument, NULL, HELP_OPT},
168 {"mmap", no_argument, NULL, MMAP_OPT},
169 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
170 {"label", required_argument, NULL, LABEL_OPT},
171 {"null", no_argument, NULL, NULL_OPT},
172 {"color", optional_argument, NULL, COLOR_OPT},
173 {"colour", optional_argument, NULL, COLOR_OPT},
174 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
175 {"include", required_argument, NULL, R_INCLUDE_OPT},
176 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
177 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
178 {"after-context", required_argument, NULL, 'A'},
179 {"text", no_argument, NULL, 'a'},
180 {"before-context", required_argument, NULL, 'B'},
181 {"byte-offset", no_argument, NULL, 'b'},
182 {"context", optional_argument, NULL, 'C'},
183 {"count", no_argument, NULL, 'c'},
184 {"devices", required_argument, NULL, 'D'},
185 {"directories", required_argument, NULL, 'd'},
186 {"extended-regexp", no_argument, NULL, 'E'},
187 {"regexp", required_argument, NULL, 'e'},
188 {"fixed-strings", no_argument, NULL, 'F'},
189 {"file", required_argument, NULL, 'f'},
190 {"basic-regexp", no_argument, NULL, 'G'},
191 {"no-filename", no_argument, NULL, 'h'},
192 {"with-filename", no_argument, NULL, 'H'},
193 {"ignore-case", no_argument, NULL, 'i'},
194 {"files-with-matches", no_argument, NULL, 'l'},
195 {"files-without-match", no_argument, NULL, 'L'},
196 {"max-count", required_argument, NULL, 'm'},
197 {"line-number", no_argument, NULL, 'n'},
198 {"only-matching", no_argument, NULL, 'o'},
199 {"quiet", no_argument, NULL, 'q'},
200 {"silent", no_argument, NULL, 'q'},
201 {"recursive", no_argument, NULL, 'r'},
202 {"no-messages", no_argument, NULL, 's'},
203 {"binary", no_argument, NULL, 'U'},
204 {"unix-byte-offsets", no_argument, NULL, 'u'},
205 {"invert-match", no_argument, NULL, 'v'},
206 {"version", no_argument, NULL, 'V'},
207 {"word-regexp", no_argument, NULL, 'w'},
208 {"line-regexp", no_argument, NULL, 'x'},
209 {"null-data", no_argument, NULL, 'z'},
210 {NULL, no_argument, NULL, 0}
214 * Adds a searching pattern to the internal array.
217 add_pattern(char *pat, size_t len)
220 /* Check if we can do a shortcut */
225 /* Increase size if necessary */
226 if (patterns == pattern_sz) {
228 pattern = grep_realloc(pattern, ++pattern_sz *
231 if (len > 0 && pat[len - 1] == '\n')
233 /* pat may not be NUL-terminated */
234 pattern[patterns].pat = grep_malloc(len + 1);
235 memcpy(pattern[patterns].pat, pat, len);
236 pattern[patterns].len = len;
237 pattern[patterns].pat[len] = '\0';
242 * Adds a file include/exclude pattern to the internal array.
245 add_fpattern(const char *pat, int mode)
248 /* Increase size if necessary */
249 if (fpatterns == fpattern_sz) {
251 fpattern = grep_realloc(fpattern, ++fpattern_sz *
252 sizeof(struct epat));
254 fpattern[fpatterns].pat = grep_strdup(pat);
255 fpattern[fpatterns].mode = mode;
260 * Adds a directory include/exclude pattern to the internal array.
263 add_dpattern(const char *pat, int mode)
266 /* Increase size if necessary */
267 if (dpatterns == dpattern_sz) {
269 dpattern = grep_realloc(dpattern, ++dpattern_sz *
270 sizeof(struct epat));
272 dpattern[dpatterns].pat = grep_strdup(pat);
273 dpattern[dpatterns].mode = mode;
278 * Reads searching patterns from a file and adds them with add_pattern().
281 read_patterns(const char *fn)
289 if (strcmp(fn, "-") == 0)
291 else if ((f = fopen(fn, "r")) == NULL)
293 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
299 while ((rlen = getline(&line, &len, f)) != -1) {
302 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
308 if (strcmp(fn, "-") != 0)
312 static inline const char *
313 init_color(const char *d)
317 c = getenv("GREP_COLOR");
318 return (c != NULL && c[0] != '\0' ? c : d);
322 main(int argc, char *argv[])
324 char **aargv, **eargv, *eopts;
328 unsigned int aargc, eargc, i;
329 int c, lastc, needpattern, newarg, prevoptind;
332 setlocale(LC_ALL, "");
335 * Check how we've bene invoked to determine the behavior we should
336 * exhibit. In this way we can have all the functionalities in one
337 * binary without the need of scripting and using ugly hacks.
342 grepbehave = GREP_EXTENDED;
345 grepbehave = GREP_FIXED;
348 dirbehave = DIR_RECURSE;
359 eopts = getenv("GREP_OPTIONS");
361 /* support for extra arguments in GREP_OPTIONS */
363 if (eopts != NULL && eopts[0] != '\0') {
366 /* make an estimation of how many extra arguments we have */
367 for (unsigned int j = 0; j < strlen(eopts); j++)
371 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
374 /* parse extra arguments */
375 while ((str = strsep(&eopts, " ")) != NULL)
377 eargv[eargc++] = grep_strdup(str);
379 aargv = (char **)grep_calloc(eargc + argc + 1,
383 for (i = 0; i < eargc; i++)
384 aargv[i + 1] = eargv[i];
385 for (int j = 1; j < argc; j++, i++)
386 aargv[i + 1] = argv[j];
388 aargc = eargc + argc;
394 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
397 case '0': case '1': case '2': case '3': case '4':
398 case '5': case '6': case '7': case '8': case '9':
399 if (newarg || !isdigit(lastc))
401 else if (Aflag > LLONG_MAX / 10 - 1) {
406 Aflag = Bflag = (Aflag * 10) + (c - '0');
409 if (optarg == NULL) {
418 l = strtoll(optarg, &ep, 10);
419 if (errno == ERANGE || errno == EINVAL)
421 else if (ep[0] != '\0') {
426 err(2, "context argument must be non-negative");
437 binbehave = BINFILE_TEXT;
446 if (strcasecmp(optarg, "skip") == 0)
447 devbehave = DEV_SKIP;
448 else if (strcasecmp(optarg, "read") == 0)
449 devbehave = DEV_READ;
451 errx(2, errstr[2], "--devices");
454 if (strcasecmp("recurse", optarg) == 0) {
456 dirbehave = DIR_RECURSE;
457 } else if (strcasecmp("skip", optarg) == 0)
458 dirbehave = DIR_SKIP;
459 else if (strcasecmp("read", optarg) == 0)
460 dirbehave = DIR_READ;
462 errx(2, errstr[2], "--directories");
465 grepbehave = GREP_EXTENDED;
470 char *string = optarg;
472 while ((token = strsep(&string, "\n")) != NULL)
473 add_pattern(token, strlen(token));
478 grepbehave = GREP_FIXED;
481 read_patterns(optarg);
485 grepbehave = GREP_BASIC;
495 binbehave = BINFILE_SKIP;
513 mlimit = mcount = strtoll(optarg, &ep, 10);
514 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
515 ((errno == EINVAL) && (mcount == 0)))
517 else if (ep[0] != '\0') {
526 linkbehave = LINK_EXPLICIT;
530 cflags &= ~REG_NOSUB;
533 linkbehave = LINK_SKIP;
539 linkbehave = LINK_READ;
543 dirbehave = DIR_RECURSE;
550 binbehave = BINFILE_BIN;
554 filebehave = FILE_MMAP;
557 printf(errstr[8], getprogname(), VERSION);
564 cflags &= ~REG_NOSUB;
568 cflags &= ~REG_NOSUB;
574 if (strcasecmp("binary", optarg) == 0)
575 binbehave = BINFILE_BIN;
576 else if (strcasecmp("without-match", optarg) == 0)
577 binbehave = BINFILE_SKIP;
578 else if (strcasecmp("text", optarg) == 0)
579 binbehave = BINFILE_TEXT;
581 errx(2, errstr[2], "--binary-files");
585 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
586 strcasecmp("tty", optarg) == 0 ||
587 strcasecmp("if-tty", optarg) == 0) {
590 term = getenv("TERM");
591 if (isatty(STDOUT_FILENO) && term != NULL &&
592 strcasecmp(term, "dumb") != 0)
593 color = init_color("01;31");
594 } else if (strcasecmp("always", optarg) == 0 ||
595 strcasecmp("yes", optarg) == 0 ||
596 strcasecmp("force", optarg) == 0) {
597 color = init_color("01;31");
598 } else if (strcasecmp("never", optarg) != 0 &&
599 strcasecmp("none", optarg) != 0 &&
600 strcasecmp("no", optarg) != 0)
601 errx(2, errstr[2], "--color");
602 cflags &= ~REG_NOSUB;
615 add_fpattern(optarg, INCL_PAT);
619 add_fpattern(optarg, EXCL_PAT);
623 add_dpattern(optarg, INCL_PAT);
627 add_dpattern(optarg, EXCL_PAT);
634 newarg = optind != prevoptind;
640 /* Empty pattern file matches nothing */
641 if (!needpattern && (patterns == 0) && !matchall)
644 /* Fail if we don't have any pattern */
645 if (aargc == 0 && needpattern)
648 /* Process patterns from command line */
649 if (aargc != 0 && needpattern) {
651 char *string = *aargv;
653 while ((token = strsep(&string, "\n")) != NULL)
654 add_pattern(token, strlen(token));
659 switch (grepbehave) {
664 * regex(3) implementations that support fixed-string searches generally
665 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
666 * here. If neither are defined, GREP_FIXED later implies that the
667 * internal literal matcher should be used. Other cflags that have
668 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
669 * similarly added here, and grep.h should be amended to take this into
670 * consideration when defining WITH_INTERNAL_NOSPEC.
672 #if defined(REG_NOSPEC)
673 cflags |= REG_NOSPEC;
674 #elif defined(REG_LITERAL)
675 cflags |= REG_LITERAL;
679 cflags |= REG_EXTENDED;
686 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
688 #ifdef WITH_INTERNAL_NOSPEC
689 if (grepbehave != GREP_FIXED) {
693 /* Check if cheating is allowed (always is for fgrep). */
694 for (i = 0; i < patterns; ++i) {
695 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
697 regerror(c, &r_pattern[i], re_error,
699 errx(2, "%s", re_error);
707 if ((aargc == 0 || aargc == 1) && !Hflag)
712 if (aargc == 0 && dirbehave != DIR_RECURSE)
713 exit(!procfile("-"));
715 if (dirbehave == DIR_RECURSE)
716 matched = grep_tree(aargv);
718 for (matched = false; aargc--; ++aargv) {
719 if ((finclude || fexclude) && !file_matching(*aargv))
721 if (procfile(*aargv))
729 * Calculate the correct return value according to the
730 * results and the command line option.
732 exit(matched ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));