1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
38 #include <sys/types.h>
56 const char *errstr[] = {
58 /* 1*/ "(standard input)",
59 /* 2*/ "unknown %s option",
60 /* 3*/ "usage: %s [-abcDEFGHhIiLlmnOoPqRSsUVvwxz] [-A num] [-B num] [-C[num]]\n",
61 /* 4*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
62 /* 5*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
63 /* 6*/ "\t[--null] [pattern] [file ...]\n",
64 /* 7*/ "Binary file %s matches\n",
65 /* 8*/ "%s (BSD grep) %s\n",
66 /* 9*/ "%s (BSD grep, GNU compatible) %s\n",
69 /* Flags passed to regcomp() and regexec() */
70 int cflags = REG_NOSUB | REG_NEWLINE;
71 int eflags = REG_STARTEND;
73 /* XXX TODO: Get rid of this flag.
74 * matchall is a gross hack that means that an empty pattern was passed to us.
75 * It is a necessary evil at the moment because our regex(3) implementation
76 * does not allow for empty patterns, as supported by POSIX's definition of
77 * grammar for BREs/EREs. When libregex becomes available, it would be wise
78 * to remove this and let regex(3) handle the dirty details of empty patterns.
82 /* Searching patterns */
83 unsigned int patterns;
84 static unsigned int pattern_sz;
88 /* Filename exclusion/inclusion patterns */
89 unsigned int fpatterns, dpatterns;
90 static unsigned int fpattern_sz, dpattern_sz;
91 struct epat *dpattern, *fpattern;
93 /* For regex errors */
94 char re_error[RE_ERROR_BUF + 1];
96 /* Command-line flags */
97 long long Aflag; /* -A x: print x lines trailing each match */
98 long long Bflag; /* -B x: print x lines leading each match */
99 bool Hflag; /* -H: always print file name */
100 bool Lflag; /* -L: only show names of files with no matches */
101 bool bflag; /* -b: show block numbers for each match */
102 bool cflag; /* -c: only show a count of matching lines */
103 bool hflag; /* -h: don't print filename headers */
104 bool iflag; /* -i: ignore case */
105 bool lflag; /* -l: only show names of files with matches */
106 bool mflag; /* -m x: stop reading the files after x matches */
107 long long mcount; /* count for -m */
108 long long mlimit; /* requested value for -m */
109 char fileeol; /* indicator for eol */
110 bool nflag; /* -n: show line numbers in front of matching lines */
111 bool oflag; /* -o: print only matching part */
112 bool qflag; /* -q: quiet mode (don't output anything) */
113 bool sflag; /* -s: silent mode (ignore errors) */
114 bool vflag; /* -v: only show non-matching lines */
115 bool wflag; /* -w: pattern must start and end on word boundaries */
116 bool xflag; /* -x: pattern must match entire line */
117 bool lbflag; /* --line-buffered */
118 bool nullflag; /* --null */
119 char *label; /* --label */
120 const char *color; /* --color */
121 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
122 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
123 int filebehave = FILE_STDIO;
124 int devbehave = DEV_READ; /* -D: handling of devices */
125 int dirbehave = DIR_READ; /* -dRr: handling of directories */
126 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
128 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
129 bool fexclude, finclude; /* --exclude and --include */
132 BIN_OPT = CHAR_MAX + 1,
145 static inline const char *init_color(const char *);
148 bool file_err; /* file reading error */
151 * Prints usage information and returns 2.
156 fprintf(stderr, errstr[3], getprogname());
157 fprintf(stderr, "%s", errstr[4]);
158 fprintf(stderr, "%s", errstr[5]);
159 fprintf(stderr, "%s", errstr[6]);
163 static const char *optstr = "0123456789A:B:C:D:EFGHILOPSRUVabcd:e:f:hilm:nopqrsuvwxyz";
165 static const struct option long_options[] =
167 {"binary-files", required_argument, NULL, BIN_OPT},
168 {"help", no_argument, NULL, HELP_OPT},
169 {"mmap", no_argument, NULL, MMAP_OPT},
170 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
171 {"label", required_argument, NULL, LABEL_OPT},
172 {"null", no_argument, NULL, NULL_OPT},
173 {"color", optional_argument, NULL, COLOR_OPT},
174 {"colour", optional_argument, NULL, COLOR_OPT},
175 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
176 {"include", required_argument, NULL, R_INCLUDE_OPT},
177 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
178 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
179 {"after-context", required_argument, NULL, 'A'},
180 {"text", no_argument, NULL, 'a'},
181 {"before-context", required_argument, NULL, 'B'},
182 {"byte-offset", no_argument, NULL, 'b'},
183 {"context", optional_argument, NULL, 'C'},
184 {"count", no_argument, NULL, 'c'},
185 {"devices", required_argument, NULL, 'D'},
186 {"directories", required_argument, NULL, 'd'},
187 {"extended-regexp", no_argument, NULL, 'E'},
188 {"regexp", required_argument, NULL, 'e'},
189 {"fixed-strings", no_argument, NULL, 'F'},
190 {"file", required_argument, NULL, 'f'},
191 {"basic-regexp", no_argument, NULL, 'G'},
192 {"no-filename", no_argument, NULL, 'h'},
193 {"with-filename", no_argument, NULL, 'H'},
194 {"ignore-case", no_argument, NULL, 'i'},
195 {"files-with-matches", no_argument, NULL, 'l'},
196 {"files-without-match", no_argument, NULL, 'L'},
197 {"max-count", required_argument, NULL, 'm'},
198 {"line-number", no_argument, NULL, 'n'},
199 {"only-matching", no_argument, NULL, 'o'},
200 {"quiet", no_argument, NULL, 'q'},
201 {"silent", no_argument, NULL, 'q'},
202 {"recursive", no_argument, NULL, 'r'},
203 {"no-messages", no_argument, NULL, 's'},
204 {"binary", no_argument, NULL, 'U'},
205 {"unix-byte-offsets", no_argument, NULL, 'u'},
206 {"invert-match", no_argument, NULL, 'v'},
207 {"version", no_argument, NULL, 'V'},
208 {"word-regexp", no_argument, NULL, 'w'},
209 {"line-regexp", no_argument, NULL, 'x'},
210 {"null-data", no_argument, NULL, 'z'},
211 {NULL, no_argument, NULL, 0}
215 * Adds a searching pattern to the internal array.
218 add_pattern(char *pat, size_t len)
221 /* Do not add further pattern is we already match everything */
225 /* Check if we can do a shortcut */
228 for (unsigned int i = 0; i < patterns; i++) {
229 free(pattern[i].pat);
231 pattern = grep_realloc(pattern, sizeof(struct pat));
232 pattern[0].pat = NULL;
237 /* Increase size if necessary */
238 if (patterns == pattern_sz) {
240 pattern = grep_realloc(pattern, ++pattern_sz *
243 if (len > 0 && pat[len - 1] == '\n')
245 /* pat may not be NUL-terminated */
246 pattern[patterns].pat = grep_malloc(len + 1);
247 memcpy(pattern[patterns].pat, pat, len);
248 pattern[patterns].len = len;
249 pattern[patterns].pat[len] = '\0';
254 * Adds a file include/exclude pattern to the internal array.
257 add_fpattern(const char *pat, int mode)
260 /* Increase size if necessary */
261 if (fpatterns == fpattern_sz) {
263 fpattern = grep_realloc(fpattern, ++fpattern_sz *
264 sizeof(struct epat));
266 fpattern[fpatterns].pat = grep_strdup(pat);
267 fpattern[fpatterns].mode = mode;
272 * Adds a directory include/exclude pattern to the internal array.
275 add_dpattern(const char *pat, int mode)
278 /* Increase size if necessary */
279 if (dpatterns == dpattern_sz) {
281 dpattern = grep_realloc(dpattern, ++dpattern_sz *
282 sizeof(struct epat));
284 dpattern[dpatterns].pat = grep_strdup(pat);
285 dpattern[dpatterns].mode = mode;
290 * Reads searching patterns from a file and adds them with add_pattern().
293 read_patterns(const char *fn)
301 if (strcmp(fn, "-") == 0)
303 else if ((f = fopen(fn, "r")) == NULL)
305 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
311 while ((rlen = getline(&line, &len, f)) != -1) {
314 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
320 if (strcmp(fn, "-") != 0)
324 static inline const char *
325 init_color(const char *d)
329 c = getenv("GREP_COLOR");
330 return (c != NULL && c[0] != '\0' ? c : d);
334 main(int argc, char *argv[])
336 char **aargv, **eargv, *eopts;
340 unsigned int aargc, eargc, i;
341 int c, lastc, needpattern, newarg, prevoptind;
344 setlocale(LC_ALL, "");
346 /* Check what is the program name of the binary. In this
347 way we can have all the funcionalities in one binary
348 without the need of scripting and using ugly hacks. */
351 dirbehave = DIR_RECURSE;
356 grepbehave = GREP_EXTENDED;
359 grepbehave = GREP_FIXED;
369 eopts = getenv("GREP_OPTIONS");
371 /* support for extra arguments in GREP_OPTIONS */
373 if (eopts != NULL && eopts[0] != '\0') {
376 /* make an estimation of how many extra arguments we have */
377 for (unsigned int j = 0; j < strlen(eopts); j++)
381 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
384 /* parse extra arguments */
385 while ((str = strsep(&eopts, " ")) != NULL)
387 eargv[eargc++] = grep_strdup(str);
389 aargv = (char **)grep_calloc(eargc + argc + 1,
393 for (i = 0; i < eargc; i++)
394 aargv[i + 1] = eargv[i];
395 for (int j = 1; j < argc; j++, i++)
396 aargv[i + 1] = argv[j];
398 aargc = eargc + argc;
404 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
407 case '0': case '1': case '2': case '3': case '4':
408 case '5': case '6': case '7': case '8': case '9':
409 if (newarg || !isdigit(lastc))
411 else if (Aflag > LLONG_MAX / 10 - 1) {
416 Aflag = Bflag = (Aflag * 10) + (c - '0');
419 if (optarg == NULL) {
428 l = strtoll(optarg, &ep, 10);
429 if (errno == ERANGE || errno == EINVAL)
431 else if (ep[0] != '\0') {
436 err(2, "context argument must be non-negative");
447 binbehave = BINFILE_TEXT;
456 if (strcasecmp(optarg, "skip") == 0)
457 devbehave = DEV_SKIP;
458 else if (strcasecmp(optarg, "read") == 0)
459 devbehave = DEV_READ;
461 errx(2, errstr[2], "--devices");
464 if (strcasecmp("recurse", optarg) == 0) {
466 dirbehave = DIR_RECURSE;
467 } else if (strcasecmp("skip", optarg) == 0)
468 dirbehave = DIR_SKIP;
469 else if (strcasecmp("read", optarg) == 0)
470 dirbehave = DIR_READ;
472 errx(2, errstr[2], "--directories");
475 grepbehave = GREP_EXTENDED;
480 char *string = optarg;
482 while ((token = strsep(&string, "\n")) != NULL)
483 add_pattern(token, strlen(token));
488 grepbehave = GREP_FIXED;
491 read_patterns(optarg);
495 grepbehave = GREP_BASIC;
505 binbehave = BINFILE_SKIP;
523 mlimit = mcount = strtoll(optarg, &ep, 10);
524 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
525 ((errno == EINVAL) && (mcount == 0)))
527 else if (ep[0] != '\0') {
536 linkbehave = LINK_EXPLICIT;
540 cflags &= ~REG_NOSUB;
543 linkbehave = LINK_SKIP;
549 linkbehave = LINK_READ;
553 dirbehave = DIR_RECURSE;
560 binbehave = BINFILE_BIN;
564 filebehave = FILE_MMAP;
568 printf(errstr[9], getprogname(), VERSION);
570 printf(errstr[8], getprogname(), VERSION);
578 cflags &= ~REG_NOSUB;
582 cflags &= ~REG_NOSUB;
588 if (strcasecmp("binary", optarg) == 0)
589 binbehave = BINFILE_BIN;
590 else if (strcasecmp("without-match", optarg) == 0)
591 binbehave = BINFILE_SKIP;
592 else if (strcasecmp("text", optarg) == 0)
593 binbehave = BINFILE_TEXT;
595 errx(2, errstr[2], "--binary-files");
599 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
600 strcasecmp("tty", optarg) == 0 ||
601 strcasecmp("if-tty", optarg) == 0) {
604 term = getenv("TERM");
605 if (isatty(STDOUT_FILENO) && term != NULL &&
606 strcasecmp(term, "dumb") != 0)
607 color = init_color("01;31");
608 } else if (strcasecmp("always", optarg) == 0 ||
609 strcasecmp("yes", optarg) == 0 ||
610 strcasecmp("force", optarg) == 0) {
611 color = init_color("01;31");
612 } else if (strcasecmp("never", optarg) != 0 &&
613 strcasecmp("none", optarg) != 0 &&
614 strcasecmp("no", optarg) != 0)
615 errx(2, errstr[2], "--color");
616 cflags &= ~REG_NOSUB;
629 add_fpattern(optarg, INCL_PAT);
633 add_fpattern(optarg, EXCL_PAT);
637 add_dpattern(optarg, INCL_PAT);
641 add_dpattern(optarg, EXCL_PAT);
648 newarg = optind != prevoptind;
654 /* Empty pattern file matches nothing */
655 if (!needpattern && (patterns == 0))
658 /* Fail if we don't have any pattern */
659 if (aargc == 0 && needpattern)
662 /* Process patterns from command line */
663 if (aargc != 0 && needpattern) {
665 char *string = *aargv;
667 while ((token = strsep(&string, "\n")) != NULL)
668 add_pattern(token, strlen(token));
673 switch (grepbehave) {
678 * regex(3) implementations that support fixed-string searches generally
679 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
680 * here. If neither are defined, GREP_FIXED later implies that the
681 * internal literal matcher should be used. Other cflags that have
682 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
683 * similarly added here, and grep.h should be amended to take this into
684 * consideration when defining WITH_INTERNAL_NOSPEC.
686 #if defined(REG_NOSPEC)
687 cflags |= REG_NOSPEC;
688 #elif defined(REG_LITERAL)
689 cflags |= REG_LITERAL;
693 cflags |= REG_EXTENDED;
700 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
702 /* Don't process any patterns if we have a blank one */
703 #ifdef WITH_INTERNAL_NOSPEC
704 if (!matchall && grepbehave != GREP_FIXED) {
708 /* Check if cheating is allowed (always is for fgrep). */
709 for (i = 0; i < patterns; ++i) {
710 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
712 regerror(c, &r_pattern[i], re_error,
714 errx(2, "%s", re_error);
722 if ((aargc == 0 || aargc == 1) && !Hflag)
725 if (aargc == 0 && dirbehave != DIR_RECURSE)
726 exit(!procfile("-"));
728 if (dirbehave == DIR_RECURSE)
729 matched = grep_tree(aargv);
731 for (matched = false; aargc--; ++aargv) {
732 if ((finclude || fexclude) && !file_matching(*aargv))
734 if (procfile(*aargv))
738 /* Find out the correct return value according to the
739 results and the command line option. */
740 exit(matched ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));