1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
38 #include <sys/types.h>
55 #ifndef WITHOUT_FASTMATCH
56 #include "fastmatch.h"
66 * Default messags to use when NLS is disabled or no catalogue
69 const char *errstr[] = {
71 /* 1*/ "(standard input)",
72 /* 2*/ "cannot read bzip2 compressed file",
73 /* 3*/ "unknown %s option",
74 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
75 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
76 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
77 /* 7*/ "\t[--null] [pattern] [file ...]\n",
78 /* 8*/ "Binary file %s matches\n",
79 /* 9*/ "%s (BSD grep) %s\n",
80 /* 10*/ "%s (BSD grep, GNU compatible) %s\n",
83 /* Flags passed to regcomp() and regexec() */
84 int cflags = REG_NOSUB | REG_NEWLINE;
85 int eflags = REG_STARTEND;
87 /* XXX TODO: Get rid of this flag.
88 * matchall is a gross hack that means that an empty pattern was passed to us.
89 * It is a necessary evil at the moment because our regex(3) implementation
90 * does not allow for empty patterns, as supported by POSIX's definition of
91 * grammar for BREs/EREs. When libregex becomes available, it would be wise
92 * to remove this and let regex(3) handle the dirty details of empty patterns.
96 /* Searching patterns */
97 unsigned int patterns;
98 static unsigned int pattern_sz;
101 #ifndef WITHOUT_FASTMATCH
102 fastmatch_t *fg_pattern;
105 /* Filename exclusion/inclusion patterns */
106 unsigned int fpatterns, dpatterns;
107 static unsigned int fpattern_sz, dpattern_sz;
108 struct epat *dpattern, *fpattern;
110 /* For regex errors */
111 char re_error[RE_ERROR_BUF + 1];
113 /* Command-line flags */
114 long long Aflag; /* -A x: print x lines trailing each match */
115 long long Bflag; /* -B x: print x lines leading each match */
116 bool Hflag; /* -H: always print file name */
117 bool Lflag; /* -L: only show names of files with no matches */
118 bool bflag; /* -b: show block numbers for each match */
119 bool cflag; /* -c: only show a count of matching lines */
120 bool hflag; /* -h: don't print filename headers */
121 bool iflag; /* -i: ignore case */
122 bool lflag; /* -l: only show names of files with matches */
123 bool mflag; /* -m x: stop reading the files after x matches */
124 long long mcount; /* count for -m */
125 long long mlimit; /* requested value for -m */
126 char fileeol; /* indicator for eol */
127 bool nflag; /* -n: show line numbers in front of matching lines */
128 bool oflag; /* -o: print only matching part */
129 bool qflag; /* -q: quiet mode (don't output anything) */
130 bool sflag; /* -s: silent mode (ignore errors) */
131 bool vflag; /* -v: only show non-matching lines */
132 bool wflag; /* -w: pattern must start and end on word boundaries */
133 bool xflag; /* -x: pattern must match entire line */
134 bool lbflag; /* --line-buffered */
135 bool nullflag; /* --null */
136 char *label; /* --label */
137 const char *color; /* --color */
138 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
139 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
140 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
141 int devbehave = DEV_READ; /* -D: handling of devices */
142 int dirbehave = DIR_READ; /* -dRr: handling of directories */
143 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
145 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
146 bool fexclude, finclude; /* --exclude and --include */
149 BIN_OPT = CHAR_MAX + 1,
162 static inline const char *init_color(const char *);
165 bool file_err; /* file reading error */
168 * Prints usage information and returns 2.
173 fprintf(stderr, getstr(4), getprogname());
174 fprintf(stderr, "%s", getstr(5));
175 fprintf(stderr, "%s", getstr(6));
176 fprintf(stderr, "%s", getstr(7));
180 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXyz";
182 static const struct option long_options[] =
184 {"binary-files", required_argument, NULL, BIN_OPT},
185 {"help", no_argument, NULL, HELP_OPT},
186 {"mmap", no_argument, NULL, MMAP_OPT},
187 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
188 {"label", required_argument, NULL, LABEL_OPT},
189 {"null", no_argument, NULL, NULL_OPT},
190 {"color", optional_argument, NULL, COLOR_OPT},
191 {"colour", optional_argument, NULL, COLOR_OPT},
192 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
193 {"include", required_argument, NULL, R_INCLUDE_OPT},
194 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
195 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
196 {"after-context", required_argument, NULL, 'A'},
197 {"text", no_argument, NULL, 'a'},
198 {"before-context", required_argument, NULL, 'B'},
199 {"byte-offset", no_argument, NULL, 'b'},
200 {"context", optional_argument, NULL, 'C'},
201 {"count", no_argument, NULL, 'c'},
202 {"devices", required_argument, NULL, 'D'},
203 {"directories", required_argument, NULL, 'd'},
204 {"extended-regexp", no_argument, NULL, 'E'},
205 {"regexp", required_argument, NULL, 'e'},
206 {"fixed-strings", no_argument, NULL, 'F'},
207 {"file", required_argument, NULL, 'f'},
208 {"basic-regexp", no_argument, NULL, 'G'},
209 {"no-filename", no_argument, NULL, 'h'},
210 {"with-filename", no_argument, NULL, 'H'},
211 {"ignore-case", no_argument, NULL, 'i'},
212 {"bz2decompress", no_argument, NULL, 'J'},
213 {"files-with-matches", no_argument, NULL, 'l'},
214 {"files-without-match", no_argument, NULL, 'L'},
215 {"max-count", required_argument, NULL, 'm'},
216 {"lzma", no_argument, NULL, 'M'},
217 {"line-number", no_argument, NULL, 'n'},
218 {"only-matching", no_argument, NULL, 'o'},
219 {"quiet", no_argument, NULL, 'q'},
220 {"silent", no_argument, NULL, 'q'},
221 {"recursive", no_argument, NULL, 'r'},
222 {"no-messages", no_argument, NULL, 's'},
223 {"binary", no_argument, NULL, 'U'},
224 {"unix-byte-offsets", no_argument, NULL, 'u'},
225 {"invert-match", no_argument, NULL, 'v'},
226 {"version", no_argument, NULL, 'V'},
227 {"word-regexp", no_argument, NULL, 'w'},
228 {"line-regexp", no_argument, NULL, 'x'},
229 {"xz", no_argument, NULL, 'X'},
230 {"null-data", no_argument, NULL, 'z'},
231 {"decompress", no_argument, NULL, 'Z'},
232 {NULL, no_argument, NULL, 0}
236 * Adds a searching pattern to the internal array.
239 add_pattern(char *pat, size_t len)
242 /* Do not add further pattern is we already match everything */
246 /* Check if we can do a shortcut */
249 for (unsigned int i = 0; i < patterns; i++) {
250 free(pattern[i].pat);
252 pattern = grep_realloc(pattern, sizeof(struct pat));
253 pattern[0].pat = NULL;
258 /* Increase size if necessary */
259 if (patterns == pattern_sz) {
261 pattern = grep_realloc(pattern, ++pattern_sz *
264 if (len > 0 && pat[len - 1] == '\n')
266 /* pat may not be NUL-terminated */
267 pattern[patterns].pat = grep_malloc(len + 1);
268 memcpy(pattern[patterns].pat, pat, len);
269 pattern[patterns].len = len;
270 pattern[patterns].pat[len] = '\0';
275 * Adds a file include/exclude pattern to the internal array.
278 add_fpattern(const char *pat, int mode)
281 /* Increase size if necessary */
282 if (fpatterns == fpattern_sz) {
284 fpattern = grep_realloc(fpattern, ++fpattern_sz *
285 sizeof(struct epat));
287 fpattern[fpatterns].pat = grep_strdup(pat);
288 fpattern[fpatterns].mode = mode;
293 * Adds a directory include/exclude pattern to the internal array.
296 add_dpattern(const char *pat, int mode)
299 /* Increase size if necessary */
300 if (dpatterns == dpattern_sz) {
302 dpattern = grep_realloc(dpattern, ++dpattern_sz *
303 sizeof(struct epat));
305 dpattern[dpatterns].pat = grep_strdup(pat);
306 dpattern[dpatterns].mode = mode;
311 * Reads searching patterns from a file and adds them with add_pattern().
314 read_patterns(const char *fn)
322 if ((f = fopen(fn, "r")) == NULL)
324 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
330 while ((rlen = getline(&line, &len, f)) != -1) {
333 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
342 static inline const char *
343 init_color(const char *d)
347 c = getenv("GREP_COLOR");
348 return (c != NULL && c[0] != '\0' ? c : d);
352 main(int argc, char *argv[])
354 char **aargv, **eargv, *eopts;
358 unsigned int aargc, eargc, i;
359 int c, lastc, needpattern, newarg, prevoptind;
361 setlocale(LC_ALL, "");
364 catalog = catopen("grep", NL_CAT_LOCALE);
367 /* Check what is the program name of the binary. In this
368 way we can have all the funcionalities in one binary
369 without the need of scripting and using ugly hacks. */
371 if (pn[0] == 'b' && pn[1] == 'z') {
372 filebehave = FILE_BZIP;
374 } else if (pn[0] == 'x' && pn[1] == 'z') {
375 filebehave = FILE_XZ;
377 } else if (pn[0] == 'l' && pn[1] == 'z') {
378 filebehave = FILE_LZMA;
380 } else if (pn[0] == 'r') {
381 dirbehave = DIR_RECURSE;
383 } else if (pn[0] == 'z') {
384 filebehave = FILE_GZIP;
389 grepbehave = GREP_EXTENDED;
392 grepbehave = GREP_FIXED;
402 eopts = getenv("GREP_OPTIONS");
404 /* support for extra arguments in GREP_OPTIONS */
406 if (eopts != NULL && eopts[0] != '\0') {
409 /* make an estimation of how many extra arguments we have */
410 for (unsigned int j = 0; j < strlen(eopts); j++)
414 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
417 /* parse extra arguments */
418 while ((str = strsep(&eopts, " ")) != NULL)
420 eargv[eargc++] = grep_strdup(str);
422 aargv = (char **)grep_calloc(eargc + argc + 1,
426 for (i = 0; i < eargc; i++)
427 aargv[i + 1] = eargv[i];
428 for (int j = 1; j < argc; j++, i++)
429 aargv[i + 1] = argv[j];
431 aargc = eargc + argc;
437 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
440 case '0': case '1': case '2': case '3': case '4':
441 case '5': case '6': case '7': case '8': case '9':
442 if (newarg || !isdigit(lastc))
444 else if (Aflag > LLONG_MAX / 10 - 1) {
449 Aflag = Bflag = (Aflag * 10) + (c - '0');
452 if (optarg == NULL) {
461 l = strtoll(optarg, &ep, 10);
462 if (errno == ERANGE || errno == EINVAL)
464 else if (ep[0] != '\0') {
469 err(2, "context argument must be non-negative");
480 binbehave = BINFILE_TEXT;
489 if (strcasecmp(optarg, "skip") == 0)
490 devbehave = DEV_SKIP;
491 else if (strcasecmp(optarg, "read") == 0)
492 devbehave = DEV_READ;
494 errx(2, getstr(3), "--devices");
497 if (strcasecmp("recurse", optarg) == 0) {
499 dirbehave = DIR_RECURSE;
500 } else if (strcasecmp("skip", optarg) == 0)
501 dirbehave = DIR_SKIP;
502 else if (strcasecmp("read", optarg) == 0)
503 dirbehave = DIR_READ;
505 errx(2, getstr(3), "--directories");
508 grepbehave = GREP_EXTENDED;
513 char *string = optarg;
515 while ((token = strsep(&string, "\n")) != NULL)
516 add_pattern(token, strlen(token));
521 grepbehave = GREP_FIXED;
524 read_patterns(optarg);
528 grepbehave = GREP_BASIC;
538 binbehave = BINFILE_SKIP;
548 err(2, "bzip2 support was disabled at compile-time");
550 filebehave = FILE_BZIP;
563 mlimit = mcount = strtoll(optarg, &ep, 10);
564 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
565 ((errno == EINVAL) && (mcount == 0)))
567 else if (ep[0] != '\0') {
573 filebehave = FILE_LZMA;
579 linkbehave = LINK_EXPLICIT;
583 cflags &= ~REG_NOSUB;
586 linkbehave = LINK_SKIP;
592 linkbehave = LINK_READ;
596 dirbehave = DIR_RECURSE;
603 binbehave = BINFILE_BIN;
607 filebehave = FILE_MMAP;
611 printf(getstr(10), getprogname(), VERSION);
613 printf(getstr(9), getprogname(), VERSION);
621 cflags &= ~REG_NOSUB;
625 cflags &= ~REG_NOSUB;
628 filebehave = FILE_XZ;
634 filebehave = FILE_GZIP;
637 if (strcasecmp("binary", optarg) == 0)
638 binbehave = BINFILE_BIN;
639 else if (strcasecmp("without-match", optarg) == 0)
640 binbehave = BINFILE_SKIP;
641 else if (strcasecmp("text", optarg) == 0)
642 binbehave = BINFILE_TEXT;
644 errx(2, getstr(3), "--binary-files");
648 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
649 strcasecmp("tty", optarg) == 0 ||
650 strcasecmp("if-tty", optarg) == 0) {
653 term = getenv("TERM");
654 if (isatty(STDOUT_FILENO) && term != NULL &&
655 strcasecmp(term, "dumb") != 0)
656 color = init_color("01;31");
657 } else if (strcasecmp("always", optarg) == 0 ||
658 strcasecmp("yes", optarg) == 0 ||
659 strcasecmp("force", optarg) == 0) {
660 color = init_color("01;31");
661 } else if (strcasecmp("never", optarg) != 0 &&
662 strcasecmp("none", optarg) != 0 &&
663 strcasecmp("no", optarg) != 0)
664 errx(2, getstr(3), "--color");
665 cflags &= ~REG_NOSUB;
678 add_fpattern(optarg, INCL_PAT);
682 add_fpattern(optarg, EXCL_PAT);
686 add_dpattern(optarg, INCL_PAT);
690 add_dpattern(optarg, EXCL_PAT);
697 newarg = optind != prevoptind;
703 /* Empty pattern file matches nothing */
704 if (!needpattern && (patterns == 0))
707 /* Fail if we don't have any pattern */
708 if (aargc == 0 && needpattern)
711 /* Process patterns from command line */
712 if (aargc != 0 && needpattern) {
714 char *string = *aargv;
716 while ((token = strsep(&string, "\n")) != NULL)
717 add_pattern(token, strlen(token));
722 switch (grepbehave) {
727 * regex(3) implementations that support fixed-string searches generally
728 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
729 * here. If neither are defined, GREP_FIXED later implies that the
730 * internal literal matcher should be used. Other cflags that have
731 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
732 * similarly added here, and grep.h should be amended to take this into
733 * consideration when defining WITH_INTERNAL_NOSPEC.
735 #if defined(REG_NOSPEC)
736 cflags |= REG_NOSPEC;
737 #elif defined(REG_LITERAL)
738 cflags |= REG_LITERAL;
742 cflags |= REG_EXTENDED;
749 #ifndef WITHOUT_FASTMATCH
750 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
752 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
754 /* Don't process any patterns if we have a blank one */
755 #ifdef WITH_INTERNAL_NOSPEC
756 if (!matchall && grepbehave != GREP_FIXED) {
760 /* Check if cheating is allowed (always is for fgrep). */
761 for (i = 0; i < patterns; ++i) {
762 #ifndef WITHOUT_FASTMATCH
764 * Attempt compilation with fastmatch regex and
765 * fallback to regex(3) if it fails.
767 if (fastncomp(&fg_pattern[i], pattern[i].pat,
768 pattern[i].len, cflags) == 0)
771 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
773 regerror(c, &r_pattern[i], re_error,
775 errx(2, "%s", re_error);
783 if ((aargc == 0 || aargc == 1) && !Hflag)
786 if (aargc == 0 && dirbehave != DIR_RECURSE)
787 exit(!procfile("-"));
789 if (dirbehave == DIR_RECURSE)
790 c = grep_tree(aargv);
792 for (c = 0; aargc--; ++aargv) {
793 if ((finclude || fexclude) && !file_matching(*aargv))
795 c+= procfile(*aargv);
802 /* Find out the correct return value according to the
803 results and the command line option. */
804 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));