1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
38 #include <sys/types.h>
56 const char *errstr[] = {
58 /* 1*/ "(standard input)",
59 /* 2*/ "unknown %s option",
60 /* 3*/ "usage: %s [-abcDEFGHhIiLlmnOoPqRSsUVvwxz] [-A num] [-B num] [-C[num]]\n",
61 /* 4*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
62 /* 5*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
63 /* 6*/ "\t[--null] [pattern] [file ...]\n",
64 /* 7*/ "Binary file %s matches\n",
65 /* 8*/ "%s (BSD grep) %s\n",
66 /* 9*/ "%s (BSD grep, GNU compatible) %s\n",
69 /* Flags passed to regcomp() and regexec() */
70 int cflags = REG_NOSUB | REG_NEWLINE;
71 int eflags = REG_STARTEND;
73 /* XXX TODO: Get rid of this flag.
74 * matchall is a gross hack that means that an empty pattern was passed to us.
75 * It is a necessary evil at the moment because our regex(3) implementation
76 * does not allow for empty patterns, as supported by POSIX's definition of
77 * grammar for BREs/EREs. When libregex becomes available, it would be wise
78 * to remove this and let regex(3) handle the dirty details of empty patterns.
82 /* Searching patterns */
83 unsigned int patterns;
84 static unsigned int pattern_sz;
88 /* Filename exclusion/inclusion patterns */
89 unsigned int fpatterns, dpatterns;
90 static unsigned int fpattern_sz, dpattern_sz;
91 struct epat *dpattern, *fpattern;
93 /* For regex errors */
94 char re_error[RE_ERROR_BUF + 1];
96 /* Command-line flags */
97 long long Aflag; /* -A x: print x lines trailing each match */
98 long long Bflag; /* -B x: print x lines leading each match */
99 bool Hflag; /* -H: always print file name */
100 bool Lflag; /* -L: only show names of files with no matches */
101 bool bflag; /* -b: show block numbers for each match */
102 bool cflag; /* -c: only show a count of matching lines */
103 bool hflag; /* -h: don't print filename headers */
104 bool iflag; /* -i: ignore case */
105 bool lflag; /* -l: only show names of files with matches */
106 bool mflag; /* -m x: stop reading the files after x matches */
107 long long mcount; /* count for -m */
108 long long mlimit; /* requested value for -m */
109 char fileeol; /* indicator for eol */
110 bool nflag; /* -n: show line numbers in front of matching lines */
111 bool oflag; /* -o: print only matching part */
112 bool qflag; /* -q: quiet mode (don't output anything) */
113 bool sflag; /* -s: silent mode (ignore errors) */
114 bool vflag; /* -v: only show non-matching lines */
115 bool wflag; /* -w: pattern must start and end on word boundaries */
116 bool xflag; /* -x: pattern must match entire line */
117 bool lbflag; /* --line-buffered */
118 bool nullflag; /* --null */
119 char *label; /* --label */
120 const char *color; /* --color */
121 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
122 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
123 int filebehave = FILE_STDIO;
124 int devbehave = DEV_READ; /* -D: handling of devices */
125 int dirbehave = DIR_READ; /* -dRr: handling of directories */
126 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
128 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
129 bool fexclude, finclude; /* --exclude and --include */
132 BIN_OPT = CHAR_MAX + 1,
145 static inline const char *init_color(const char *);
148 bool file_err; /* file reading error */
151 * Prints usage information and returns 2.
156 fprintf(stderr, errstr[3], getprogname());
157 fprintf(stderr, "%s", errstr[4]);
158 fprintf(stderr, "%s", errstr[5]);
159 fprintf(stderr, "%s", errstr[6]);
163 static const char *optstr = "0123456789A:B:C:D:EFGHILOPSRUVabcd:e:f:hilm:nopqrsuvwxyz";
165 static const struct option long_options[] =
167 {"binary-files", required_argument, NULL, BIN_OPT},
168 {"help", no_argument, NULL, HELP_OPT},
169 {"mmap", no_argument, NULL, MMAP_OPT},
170 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
171 {"label", required_argument, NULL, LABEL_OPT},
172 {"null", no_argument, NULL, NULL_OPT},
173 {"color", optional_argument, NULL, COLOR_OPT},
174 {"colour", optional_argument, NULL, COLOR_OPT},
175 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
176 {"include", required_argument, NULL, R_INCLUDE_OPT},
177 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
178 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
179 {"after-context", required_argument, NULL, 'A'},
180 {"text", no_argument, NULL, 'a'},
181 {"before-context", required_argument, NULL, 'B'},
182 {"byte-offset", no_argument, NULL, 'b'},
183 {"context", optional_argument, NULL, 'C'},
184 {"count", no_argument, NULL, 'c'},
185 {"devices", required_argument, NULL, 'D'},
186 {"directories", required_argument, NULL, 'd'},
187 {"extended-regexp", no_argument, NULL, 'E'},
188 {"regexp", required_argument, NULL, 'e'},
189 {"fixed-strings", no_argument, NULL, 'F'},
190 {"file", required_argument, NULL, 'f'},
191 {"basic-regexp", no_argument, NULL, 'G'},
192 {"no-filename", no_argument, NULL, 'h'},
193 {"with-filename", no_argument, NULL, 'H'},
194 {"ignore-case", no_argument, NULL, 'i'},
195 {"files-with-matches", no_argument, NULL, 'l'},
196 {"files-without-match", no_argument, NULL, 'L'},
197 {"max-count", required_argument, NULL, 'm'},
198 {"line-number", no_argument, NULL, 'n'},
199 {"only-matching", no_argument, NULL, 'o'},
200 {"quiet", no_argument, NULL, 'q'},
201 {"silent", no_argument, NULL, 'q'},
202 {"recursive", no_argument, NULL, 'r'},
203 {"no-messages", no_argument, NULL, 's'},
204 {"binary", no_argument, NULL, 'U'},
205 {"unix-byte-offsets", no_argument, NULL, 'u'},
206 {"invert-match", no_argument, NULL, 'v'},
207 {"version", no_argument, NULL, 'V'},
208 {"word-regexp", no_argument, NULL, 'w'},
209 {"line-regexp", no_argument, NULL, 'x'},
210 {"null-data", no_argument, NULL, 'z'},
211 {NULL, no_argument, NULL, 0}
215 * Adds a searching pattern to the internal array.
218 add_pattern(char *pat, size_t len)
221 /* Check if we can do a shortcut */
226 /* Increase size if necessary */
227 if (patterns == pattern_sz) {
229 pattern = grep_realloc(pattern, ++pattern_sz *
232 if (len > 0 && pat[len - 1] == '\n')
234 /* pat may not be NUL-terminated */
235 pattern[patterns].pat = grep_malloc(len + 1);
236 memcpy(pattern[patterns].pat, pat, len);
237 pattern[patterns].len = len;
238 pattern[patterns].pat[len] = '\0';
243 * Adds a file include/exclude pattern to the internal array.
246 add_fpattern(const char *pat, int mode)
249 /* Increase size if necessary */
250 if (fpatterns == fpattern_sz) {
252 fpattern = grep_realloc(fpattern, ++fpattern_sz *
253 sizeof(struct epat));
255 fpattern[fpatterns].pat = grep_strdup(pat);
256 fpattern[fpatterns].mode = mode;
261 * Adds a directory include/exclude pattern to the internal array.
264 add_dpattern(const char *pat, int mode)
267 /* Increase size if necessary */
268 if (dpatterns == dpattern_sz) {
270 dpattern = grep_realloc(dpattern, ++dpattern_sz *
271 sizeof(struct epat));
273 dpattern[dpatterns].pat = grep_strdup(pat);
274 dpattern[dpatterns].mode = mode;
279 * Reads searching patterns from a file and adds them with add_pattern().
282 read_patterns(const char *fn)
290 if (strcmp(fn, "-") == 0)
292 else if ((f = fopen(fn, "r")) == NULL)
294 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
300 while ((rlen = getline(&line, &len, f)) != -1) {
303 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
309 if (strcmp(fn, "-") != 0)
313 static inline const char *
314 init_color(const char *d)
318 c = getenv("GREP_COLOR");
319 return (c != NULL && c[0] != '\0' ? c : d);
323 main(int argc, char *argv[])
325 char **aargv, **eargv, *eopts;
329 unsigned int aargc, eargc, i;
330 int c, lastc, needpattern, newarg, prevoptind;
333 setlocale(LC_ALL, "");
336 * Check how we've bene invoked to determine the behavior we should
337 * exhibit. In this way we can have all the functionalities in one
338 * binary without the need of scripting and using ugly hacks.
343 grepbehave = GREP_EXTENDED;
346 grepbehave = GREP_FIXED;
349 dirbehave = DIR_RECURSE;
360 eopts = getenv("GREP_OPTIONS");
362 /* support for extra arguments in GREP_OPTIONS */
364 if (eopts != NULL && eopts[0] != '\0') {
367 /* make an estimation of how many extra arguments we have */
368 for (unsigned int j = 0; j < strlen(eopts); j++)
372 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
375 /* parse extra arguments */
376 while ((str = strsep(&eopts, " ")) != NULL)
378 eargv[eargc++] = grep_strdup(str);
380 aargv = (char **)grep_calloc(eargc + argc + 1,
384 for (i = 0; i < eargc; i++)
385 aargv[i + 1] = eargv[i];
386 for (int j = 1; j < argc; j++, i++)
387 aargv[i + 1] = argv[j];
389 aargc = eargc + argc;
395 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
398 case '0': case '1': case '2': case '3': case '4':
399 case '5': case '6': case '7': case '8': case '9':
400 if (newarg || !isdigit(lastc))
402 else if (Aflag > LLONG_MAX / 10 - 1) {
407 Aflag = Bflag = (Aflag * 10) + (c - '0');
410 if (optarg == NULL) {
419 l = strtoll(optarg, &ep, 10);
420 if (errno == ERANGE || errno == EINVAL)
422 else if (ep[0] != '\0') {
427 err(2, "context argument must be non-negative");
438 binbehave = BINFILE_TEXT;
447 if (strcasecmp(optarg, "skip") == 0)
448 devbehave = DEV_SKIP;
449 else if (strcasecmp(optarg, "read") == 0)
450 devbehave = DEV_READ;
452 errx(2, errstr[2], "--devices");
455 if (strcasecmp("recurse", optarg) == 0) {
457 dirbehave = DIR_RECURSE;
458 } else if (strcasecmp("skip", optarg) == 0)
459 dirbehave = DIR_SKIP;
460 else if (strcasecmp("read", optarg) == 0)
461 dirbehave = DIR_READ;
463 errx(2, errstr[2], "--directories");
466 grepbehave = GREP_EXTENDED;
471 char *string = optarg;
473 while ((token = strsep(&string, "\n")) != NULL)
474 add_pattern(token, strlen(token));
479 grepbehave = GREP_FIXED;
482 read_patterns(optarg);
486 grepbehave = GREP_BASIC;
496 binbehave = BINFILE_SKIP;
514 mlimit = mcount = strtoll(optarg, &ep, 10);
515 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
516 ((errno == EINVAL) && (mcount == 0)))
518 else if (ep[0] != '\0') {
527 linkbehave = LINK_EXPLICIT;
531 cflags &= ~REG_NOSUB;
534 linkbehave = LINK_SKIP;
540 linkbehave = LINK_READ;
544 dirbehave = DIR_RECURSE;
551 binbehave = BINFILE_BIN;
555 filebehave = FILE_MMAP;
558 #ifdef WITH_GNU_COMPAT
559 printf(errstr[9], getprogname(), VERSION);
561 printf(errstr[8], getprogname(), VERSION);
569 cflags &= ~REG_NOSUB;
573 cflags &= ~REG_NOSUB;
579 if (strcasecmp("binary", optarg) == 0)
580 binbehave = BINFILE_BIN;
581 else if (strcasecmp("without-match", optarg) == 0)
582 binbehave = BINFILE_SKIP;
583 else if (strcasecmp("text", optarg) == 0)
584 binbehave = BINFILE_TEXT;
586 errx(2, errstr[2], "--binary-files");
590 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
591 strcasecmp("tty", optarg) == 0 ||
592 strcasecmp("if-tty", optarg) == 0) {
595 term = getenv("TERM");
596 if (isatty(STDOUT_FILENO) && term != NULL &&
597 strcasecmp(term, "dumb") != 0)
598 color = init_color("01;31");
599 } else if (strcasecmp("always", optarg) == 0 ||
600 strcasecmp("yes", optarg) == 0 ||
601 strcasecmp("force", optarg) == 0) {
602 color = init_color("01;31");
603 } else if (strcasecmp("never", optarg) != 0 &&
604 strcasecmp("none", optarg) != 0 &&
605 strcasecmp("no", optarg) != 0)
606 errx(2, errstr[2], "--color");
607 cflags &= ~REG_NOSUB;
620 add_fpattern(optarg, INCL_PAT);
624 add_fpattern(optarg, EXCL_PAT);
628 add_dpattern(optarg, INCL_PAT);
632 add_dpattern(optarg, EXCL_PAT);
639 newarg = optind != prevoptind;
645 /* Empty pattern file matches nothing */
646 if (!needpattern && (patterns == 0) && !matchall)
649 /* Fail if we don't have any pattern */
650 if (aargc == 0 && needpattern)
653 /* Process patterns from command line */
654 if (aargc != 0 && needpattern) {
656 char *string = *aargv;
658 while ((token = strsep(&string, "\n")) != NULL)
659 add_pattern(token, strlen(token));
664 switch (grepbehave) {
669 * regex(3) implementations that support fixed-string searches generally
670 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
671 * here. If neither are defined, GREP_FIXED later implies that the
672 * internal literal matcher should be used. Other cflags that have
673 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
674 * similarly added here, and grep.h should be amended to take this into
675 * consideration when defining WITH_INTERNAL_NOSPEC.
677 #if defined(REG_NOSPEC)
678 cflags |= REG_NOSPEC;
679 #elif defined(REG_LITERAL)
680 cflags |= REG_LITERAL;
684 cflags |= REG_EXTENDED;
691 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
693 #ifdef WITH_INTERNAL_NOSPEC
694 if (grepbehave != GREP_FIXED) {
698 /* Check if cheating is allowed (always is for fgrep). */
699 for (i = 0; i < patterns; ++i) {
700 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
702 regerror(c, &r_pattern[i], re_error,
704 errx(2, "%s", re_error);
712 if ((aargc == 0 || aargc == 1) && !Hflag)
715 if (aargc == 0 && dirbehave != DIR_RECURSE)
716 exit(!procfile("-"));
718 if (dirbehave == DIR_RECURSE)
719 matched = grep_tree(aargv);
721 for (matched = false; aargc--; ++aargv) {
722 if ((finclude || fexclude) && !file_matching(*aargv))
724 if (procfile(*aargv))
732 * Calculate the correct return value according to the
733 * results and the command line option.
735 exit(matched ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));