1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
38 #include <sys/types.h>
62 * Default messags to use when NLS is disabled or no catalogue
65 const char *errstr[] = {
67 /* 1*/ "(standard input)",
68 /* 2*/ "unknown %s option",
69 /* 3*/ "usage: %s [-abcDEFGHhIiLlmnOoPqRSsUVvwxz] [-A num] [-B num] [-C[num]]\n",
70 /* 4*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 5*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 6*/ "\t[--null] [pattern] [file ...]\n",
73 /* 7*/ "Binary file %s matches\n",
74 /* 8*/ "%s (BSD grep) %s\n",
75 /* 9*/ "%s (BSD grep, GNU compatible) %s\n",
78 /* Flags passed to regcomp() and regexec() */
79 int cflags = REG_NOSUB | REG_NEWLINE;
80 int eflags = REG_STARTEND;
82 /* XXX TODO: Get rid of this flag.
83 * matchall is a gross hack that means that an empty pattern was passed to us.
84 * It is a necessary evil at the moment because our regex(3) implementation
85 * does not allow for empty patterns, as supported by POSIX's definition of
86 * grammar for BREs/EREs. When libregex becomes available, it would be wise
87 * to remove this and let regex(3) handle the dirty details of empty patterns.
91 /* Searching patterns */
92 unsigned int patterns;
93 static unsigned int pattern_sz;
97 /* Filename exclusion/inclusion patterns */
98 unsigned int fpatterns, dpatterns;
99 static unsigned int fpattern_sz, dpattern_sz;
100 struct epat *dpattern, *fpattern;
102 /* For regex errors */
103 char re_error[RE_ERROR_BUF + 1];
105 /* Command-line flags */
106 long long Aflag; /* -A x: print x lines trailing each match */
107 long long Bflag; /* -B x: print x lines leading each match */
108 bool Hflag; /* -H: always print file name */
109 bool Lflag; /* -L: only show names of files with no matches */
110 bool bflag; /* -b: show block numbers for each match */
111 bool cflag; /* -c: only show a count of matching lines */
112 bool hflag; /* -h: don't print filename headers */
113 bool iflag; /* -i: ignore case */
114 bool lflag; /* -l: only show names of files with matches */
115 bool mflag; /* -m x: stop reading the files after x matches */
116 long long mcount; /* count for -m */
117 long long mlimit; /* requested value for -m */
118 char fileeol; /* indicator for eol */
119 bool nflag; /* -n: show line numbers in front of matching lines */
120 bool oflag; /* -o: print only matching part */
121 bool qflag; /* -q: quiet mode (don't output anything) */
122 bool sflag; /* -s: silent mode (ignore errors) */
123 bool vflag; /* -v: only show non-matching lines */
124 bool wflag; /* -w: pattern must start and end on word boundaries */
125 bool xflag; /* -x: pattern must match entire line */
126 bool lbflag; /* --line-buffered */
127 bool nullflag; /* --null */
128 char *label; /* --label */
129 const char *color; /* --color */
130 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
131 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
132 int filebehave = FILE_STDIO;
133 int devbehave = DEV_READ; /* -D: handling of devices */
134 int dirbehave = DIR_READ; /* -dRr: handling of directories */
135 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
137 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
138 bool fexclude, finclude; /* --exclude and --include */
141 BIN_OPT = CHAR_MAX + 1,
154 static inline const char *init_color(const char *);
157 bool file_err; /* file reading error */
160 * Prints usage information and returns 2.
165 fprintf(stderr, getstr(3), getprogname());
166 fprintf(stderr, "%s", getstr(4));
167 fprintf(stderr, "%s", getstr(5));
168 fprintf(stderr, "%s", getstr(6));
172 static const char *optstr = "0123456789A:B:C:D:EFGHILOPSRUVabcd:e:f:hilm:nopqrsuvwxyz";
174 static const struct option long_options[] =
176 {"binary-files", required_argument, NULL, BIN_OPT},
177 {"help", no_argument, NULL, HELP_OPT},
178 {"mmap", no_argument, NULL, MMAP_OPT},
179 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
180 {"label", required_argument, NULL, LABEL_OPT},
181 {"null", no_argument, NULL, NULL_OPT},
182 {"color", optional_argument, NULL, COLOR_OPT},
183 {"colour", optional_argument, NULL, COLOR_OPT},
184 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
185 {"include", required_argument, NULL, R_INCLUDE_OPT},
186 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
187 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
188 {"after-context", required_argument, NULL, 'A'},
189 {"text", no_argument, NULL, 'a'},
190 {"before-context", required_argument, NULL, 'B'},
191 {"byte-offset", no_argument, NULL, 'b'},
192 {"context", optional_argument, NULL, 'C'},
193 {"count", no_argument, NULL, 'c'},
194 {"devices", required_argument, NULL, 'D'},
195 {"directories", required_argument, NULL, 'd'},
196 {"extended-regexp", no_argument, NULL, 'E'},
197 {"regexp", required_argument, NULL, 'e'},
198 {"fixed-strings", no_argument, NULL, 'F'},
199 {"file", required_argument, NULL, 'f'},
200 {"basic-regexp", no_argument, NULL, 'G'},
201 {"no-filename", no_argument, NULL, 'h'},
202 {"with-filename", no_argument, NULL, 'H'},
203 {"ignore-case", no_argument, NULL, 'i'},
204 {"files-with-matches", no_argument, NULL, 'l'},
205 {"files-without-match", no_argument, NULL, 'L'},
206 {"max-count", required_argument, NULL, 'm'},
207 {"line-number", no_argument, NULL, 'n'},
208 {"only-matching", no_argument, NULL, 'o'},
209 {"quiet", no_argument, NULL, 'q'},
210 {"silent", no_argument, NULL, 'q'},
211 {"recursive", no_argument, NULL, 'r'},
212 {"no-messages", no_argument, NULL, 's'},
213 {"binary", no_argument, NULL, 'U'},
214 {"unix-byte-offsets", no_argument, NULL, 'u'},
215 {"invert-match", no_argument, NULL, 'v'},
216 {"version", no_argument, NULL, 'V'},
217 {"word-regexp", no_argument, NULL, 'w'},
218 {"line-regexp", no_argument, NULL, 'x'},
219 {"null-data", no_argument, NULL, 'z'},
220 {NULL, no_argument, NULL, 0}
224 * Adds a searching pattern to the internal array.
227 add_pattern(char *pat, size_t len)
230 /* Do not add further pattern is we already match everything */
234 /* Check if we can do a shortcut */
237 for (unsigned int i = 0; i < patterns; i++) {
238 free(pattern[i].pat);
240 pattern = grep_realloc(pattern, sizeof(struct pat));
241 pattern[0].pat = NULL;
246 /* Increase size if necessary */
247 if (patterns == pattern_sz) {
249 pattern = grep_realloc(pattern, ++pattern_sz *
252 if (len > 0 && pat[len - 1] == '\n')
254 /* pat may not be NUL-terminated */
255 pattern[patterns].pat = grep_malloc(len + 1);
256 memcpy(pattern[patterns].pat, pat, len);
257 pattern[patterns].len = len;
258 pattern[patterns].pat[len] = '\0';
263 * Adds a file include/exclude pattern to the internal array.
266 add_fpattern(const char *pat, int mode)
269 /* Increase size if necessary */
270 if (fpatterns == fpattern_sz) {
272 fpattern = grep_realloc(fpattern, ++fpattern_sz *
273 sizeof(struct epat));
275 fpattern[fpatterns].pat = grep_strdup(pat);
276 fpattern[fpatterns].mode = mode;
281 * Adds a directory include/exclude pattern to the internal array.
284 add_dpattern(const char *pat, int mode)
287 /* Increase size if necessary */
288 if (dpatterns == dpattern_sz) {
290 dpattern = grep_realloc(dpattern, ++dpattern_sz *
291 sizeof(struct epat));
293 dpattern[dpatterns].pat = grep_strdup(pat);
294 dpattern[dpatterns].mode = mode;
299 * Reads searching patterns from a file and adds them with add_pattern().
302 read_patterns(const char *fn)
310 if (strcmp(fn, "-") == 0)
312 else if ((f = fopen(fn, "r")) == NULL)
314 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
320 while ((rlen = getline(&line, &len, f)) != -1) {
323 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
329 if (strcmp(fn, "-") != 0)
333 static inline const char *
334 init_color(const char *d)
338 c = getenv("GREP_COLOR");
339 return (c != NULL && c[0] != '\0' ? c : d);
343 main(int argc, char *argv[])
345 char **aargv, **eargv, *eopts;
349 unsigned int aargc, eargc, i;
350 int c, lastc, needpattern, newarg, prevoptind;
352 setlocale(LC_ALL, "");
355 catalog = catopen("grep", NL_CAT_LOCALE);
358 /* Check what is the program name of the binary. In this
359 way we can have all the funcionalities in one binary
360 without the need of scripting and using ugly hacks. */
363 dirbehave = DIR_RECURSE;
368 grepbehave = GREP_EXTENDED;
371 grepbehave = GREP_FIXED;
381 eopts = getenv("GREP_OPTIONS");
383 /* support for extra arguments in GREP_OPTIONS */
385 if (eopts != NULL && eopts[0] != '\0') {
388 /* make an estimation of how many extra arguments we have */
389 for (unsigned int j = 0; j < strlen(eopts); j++)
393 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
396 /* parse extra arguments */
397 while ((str = strsep(&eopts, " ")) != NULL)
399 eargv[eargc++] = grep_strdup(str);
401 aargv = (char **)grep_calloc(eargc + argc + 1,
405 for (i = 0; i < eargc; i++)
406 aargv[i + 1] = eargv[i];
407 for (int j = 1; j < argc; j++, i++)
408 aargv[i + 1] = argv[j];
410 aargc = eargc + argc;
416 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
419 case '0': case '1': case '2': case '3': case '4':
420 case '5': case '6': case '7': case '8': case '9':
421 if (newarg || !isdigit(lastc))
423 else if (Aflag > LLONG_MAX / 10 - 1) {
428 Aflag = Bflag = (Aflag * 10) + (c - '0');
431 if (optarg == NULL) {
440 l = strtoll(optarg, &ep, 10);
441 if (errno == ERANGE || errno == EINVAL)
443 else if (ep[0] != '\0') {
448 err(2, "context argument must be non-negative");
459 binbehave = BINFILE_TEXT;
468 if (strcasecmp(optarg, "skip") == 0)
469 devbehave = DEV_SKIP;
470 else if (strcasecmp(optarg, "read") == 0)
471 devbehave = DEV_READ;
473 errx(2, getstr(2), "--devices");
476 if (strcasecmp("recurse", optarg) == 0) {
478 dirbehave = DIR_RECURSE;
479 } else if (strcasecmp("skip", optarg) == 0)
480 dirbehave = DIR_SKIP;
481 else if (strcasecmp("read", optarg) == 0)
482 dirbehave = DIR_READ;
484 errx(2, getstr(2), "--directories");
487 grepbehave = GREP_EXTENDED;
492 char *string = optarg;
494 while ((token = strsep(&string, "\n")) != NULL)
495 add_pattern(token, strlen(token));
500 grepbehave = GREP_FIXED;
503 read_patterns(optarg);
507 grepbehave = GREP_BASIC;
517 binbehave = BINFILE_SKIP;
535 mlimit = mcount = strtoll(optarg, &ep, 10);
536 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
537 ((errno == EINVAL) && (mcount == 0)))
539 else if (ep[0] != '\0') {
548 linkbehave = LINK_EXPLICIT;
552 cflags &= ~REG_NOSUB;
555 linkbehave = LINK_SKIP;
561 linkbehave = LINK_READ;
565 dirbehave = DIR_RECURSE;
572 binbehave = BINFILE_BIN;
576 filebehave = FILE_MMAP;
580 printf(getstr(9), getprogname(), VERSION);
582 printf(getstr(8), getprogname(), VERSION);
590 cflags &= ~REG_NOSUB;
594 cflags &= ~REG_NOSUB;
600 if (strcasecmp("binary", optarg) == 0)
601 binbehave = BINFILE_BIN;
602 else if (strcasecmp("without-match", optarg) == 0)
603 binbehave = BINFILE_SKIP;
604 else if (strcasecmp("text", optarg) == 0)
605 binbehave = BINFILE_TEXT;
607 errx(2, getstr(2), "--binary-files");
611 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
612 strcasecmp("tty", optarg) == 0 ||
613 strcasecmp("if-tty", optarg) == 0) {
616 term = getenv("TERM");
617 if (isatty(STDOUT_FILENO) && term != NULL &&
618 strcasecmp(term, "dumb") != 0)
619 color = init_color("01;31");
620 } else if (strcasecmp("always", optarg) == 0 ||
621 strcasecmp("yes", optarg) == 0 ||
622 strcasecmp("force", optarg) == 0) {
623 color = init_color("01;31");
624 } else if (strcasecmp("never", optarg) != 0 &&
625 strcasecmp("none", optarg) != 0 &&
626 strcasecmp("no", optarg) != 0)
627 errx(2, getstr(2), "--color");
628 cflags &= ~REG_NOSUB;
641 add_fpattern(optarg, INCL_PAT);
645 add_fpattern(optarg, EXCL_PAT);
649 add_dpattern(optarg, INCL_PAT);
653 add_dpattern(optarg, EXCL_PAT);
660 newarg = optind != prevoptind;
666 /* Empty pattern file matches nothing */
667 if (!needpattern && (patterns == 0))
670 /* Fail if we don't have any pattern */
671 if (aargc == 0 && needpattern)
674 /* Process patterns from command line */
675 if (aargc != 0 && needpattern) {
677 char *string = *aargv;
679 while ((token = strsep(&string, "\n")) != NULL)
680 add_pattern(token, strlen(token));
685 switch (grepbehave) {
690 * regex(3) implementations that support fixed-string searches generally
691 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
692 * here. If neither are defined, GREP_FIXED later implies that the
693 * internal literal matcher should be used. Other cflags that have
694 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
695 * similarly added here, and grep.h should be amended to take this into
696 * consideration when defining WITH_INTERNAL_NOSPEC.
698 #if defined(REG_NOSPEC)
699 cflags |= REG_NOSPEC;
700 #elif defined(REG_LITERAL)
701 cflags |= REG_LITERAL;
705 cflags |= REG_EXTENDED;
712 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
714 /* Don't process any patterns if we have a blank one */
715 #ifdef WITH_INTERNAL_NOSPEC
716 if (!matchall && grepbehave != GREP_FIXED) {
720 /* Check if cheating is allowed (always is for fgrep). */
721 for (i = 0; i < patterns; ++i) {
722 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
724 regerror(c, &r_pattern[i], re_error,
726 errx(2, "%s", re_error);
734 if ((aargc == 0 || aargc == 1) && !Hflag)
737 if (aargc == 0 && dirbehave != DIR_RECURSE)
738 exit(!procfile("-"));
740 if (dirbehave == DIR_RECURSE)
741 c = grep_tree(aargv);
743 for (c = 0; aargc--; ++aargv) {
744 if ((finclude || fexclude) && !file_matching(*aargv))
746 c+= procfile(*aargv);
753 /* Find out the correct return value according to the
754 results and the command line option. */
755 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));