1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
38 #include <sys/types.h>
54 #ifndef WITHOUT_FASTMATCH
55 #include "fastmatch.h"
65 * Default messags to use when NLS is disabled or no catalogue
68 const char *errstr[] = {
70 /* 1*/ "(standard input)",
71 /* 2*/ "cannot read bzip2 compressed file",
72 /* 3*/ "unknown %s option",
73 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
74 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
75 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
76 /* 7*/ "\t[--null] [pattern] [file ...]\n",
77 /* 8*/ "Binary file %s matches\n",
78 /* 9*/ "%s (BSD grep) %s\n",
79 /* 10*/ "%s (BSD grep, GNU compatible) %s\n",
82 /* Flags passed to regcomp() and regexec() */
83 int cflags = REG_NOSUB | REG_NEWLINE;
84 int eflags = REG_STARTEND;
86 /* XXX TODO: Get rid of this flag.
87 * matchall is a gross hack that means that an empty pattern was passed to us.
88 * It is a necessary evil at the moment because our regex(3) implementation
89 * does not allow for empty patterns, as supported by POSIX's definition of
90 * grammar for BREs/EREs. When libregex becomes available, it would be wise
91 * to remove this and let regex(3) handle the dirty details of empty patterns.
95 /* Searching patterns */
96 unsigned int patterns;
97 static unsigned int pattern_sz;
100 #ifndef WITHOUT_FASTMATCH
101 fastmatch_t *fg_pattern;
104 /* Filename exclusion/inclusion patterns */
105 unsigned int fpatterns, dpatterns;
106 static unsigned int fpattern_sz, dpattern_sz;
107 struct epat *dpattern, *fpattern;
109 /* For regex errors */
110 char re_error[RE_ERROR_BUF + 1];
112 /* Command-line flags */
113 long long Aflag; /* -A x: print x lines trailing each match */
114 long long Bflag; /* -B x: print x lines leading each match */
115 bool Hflag; /* -H: always print file name */
116 bool Lflag; /* -L: only show names of files with no matches */
117 bool bflag; /* -b: show block numbers for each match */
118 bool cflag; /* -c: only show a count of matching lines */
119 bool hflag; /* -h: don't print filename headers */
120 bool iflag; /* -i: ignore case */
121 bool lflag; /* -l: only show names of files with matches */
122 bool mflag; /* -m x: stop reading the files after x matches */
123 long long mcount; /* count for -m */
124 long long mlimit; /* requested value for -m */
125 char fileeol; /* indicator for eol */
126 bool nflag; /* -n: show line numbers in front of matching lines */
127 bool oflag; /* -o: print only matching part */
128 bool qflag; /* -q: quiet mode (don't output anything) */
129 bool sflag; /* -s: silent mode (ignore errors) */
130 bool vflag; /* -v: only show non-matching lines */
131 bool wflag; /* -w: pattern must start and end on word boundaries */
132 bool xflag; /* -x: pattern must match entire line */
133 bool lbflag; /* --line-buffered */
134 bool nullflag; /* --null */
135 char *label; /* --label */
136 const char *color; /* --color */
137 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
138 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
139 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
140 int devbehave = DEV_READ; /* -D: handling of devices */
141 int dirbehave = DIR_READ; /* -dRr: handling of directories */
142 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
144 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
145 bool fexclude, finclude; /* --exclude and --include */
148 BIN_OPT = CHAR_MAX + 1,
161 static inline const char *init_color(const char *);
164 bool file_err; /* file reading error */
167 * Prints usage information and returns 2.
172 fprintf(stderr, getstr(4), getprogname());
173 fprintf(stderr, "%s", getstr(5));
174 fprintf(stderr, "%s", getstr(6));
175 fprintf(stderr, "%s", getstr(7));
179 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXyz";
181 static const struct option long_options[] =
183 {"binary-files", required_argument, NULL, BIN_OPT},
184 {"help", no_argument, NULL, HELP_OPT},
185 {"mmap", no_argument, NULL, MMAP_OPT},
186 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
187 {"label", required_argument, NULL, LABEL_OPT},
188 {"null", no_argument, NULL, NULL_OPT},
189 {"color", optional_argument, NULL, COLOR_OPT},
190 {"colour", optional_argument, NULL, COLOR_OPT},
191 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
192 {"include", required_argument, NULL, R_INCLUDE_OPT},
193 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
194 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
195 {"after-context", required_argument, NULL, 'A'},
196 {"text", no_argument, NULL, 'a'},
197 {"before-context", required_argument, NULL, 'B'},
198 {"byte-offset", no_argument, NULL, 'b'},
199 {"context", optional_argument, NULL, 'C'},
200 {"count", no_argument, NULL, 'c'},
201 {"devices", required_argument, NULL, 'D'},
202 {"directories", required_argument, NULL, 'd'},
203 {"extended-regexp", no_argument, NULL, 'E'},
204 {"regexp", required_argument, NULL, 'e'},
205 {"fixed-strings", no_argument, NULL, 'F'},
206 {"file", required_argument, NULL, 'f'},
207 {"basic-regexp", no_argument, NULL, 'G'},
208 {"no-filename", no_argument, NULL, 'h'},
209 {"with-filename", no_argument, NULL, 'H'},
210 {"ignore-case", no_argument, NULL, 'i'},
211 {"bz2decompress", no_argument, NULL, 'J'},
212 {"files-with-matches", no_argument, NULL, 'l'},
213 {"files-without-match", no_argument, NULL, 'L'},
214 {"max-count", required_argument, NULL, 'm'},
215 {"lzma", no_argument, NULL, 'M'},
216 {"line-number", no_argument, NULL, 'n'},
217 {"only-matching", no_argument, NULL, 'o'},
218 {"quiet", no_argument, NULL, 'q'},
219 {"silent", no_argument, NULL, 'q'},
220 {"recursive", no_argument, NULL, 'r'},
221 {"no-messages", no_argument, NULL, 's'},
222 {"binary", no_argument, NULL, 'U'},
223 {"unix-byte-offsets", no_argument, NULL, 'u'},
224 {"invert-match", no_argument, NULL, 'v'},
225 {"version", no_argument, NULL, 'V'},
226 {"word-regexp", no_argument, NULL, 'w'},
227 {"line-regexp", no_argument, NULL, 'x'},
228 {"xz", no_argument, NULL, 'X'},
229 {"null-data", no_argument, NULL, 'z'},
230 {"decompress", no_argument, NULL, 'Z'},
231 {NULL, no_argument, NULL, 0}
235 * Adds a searching pattern to the internal array.
238 add_pattern(char *pat, size_t len)
241 /* Do not add further pattern is we already match everything */
245 /* Check if we can do a shortcut */
248 for (unsigned int i = 0; i < patterns; i++) {
249 free(pattern[i].pat);
251 pattern = grep_realloc(pattern, sizeof(struct pat));
252 pattern[0].pat = NULL;
257 /* Increase size if necessary */
258 if (patterns == pattern_sz) {
260 pattern = grep_realloc(pattern, ++pattern_sz *
263 if (len > 0 && pat[len - 1] == '\n')
265 /* pat may not be NUL-terminated */
266 pattern[patterns].pat = grep_malloc(len + 1);
267 memcpy(pattern[patterns].pat, pat, len);
268 pattern[patterns].len = len;
269 pattern[patterns].pat[len] = '\0';
274 * Adds a file include/exclude pattern to the internal array.
277 add_fpattern(const char *pat, int mode)
280 /* Increase size if necessary */
281 if (fpatterns == fpattern_sz) {
283 fpattern = grep_realloc(fpattern, ++fpattern_sz *
284 sizeof(struct epat));
286 fpattern[fpatterns].pat = grep_strdup(pat);
287 fpattern[fpatterns].mode = mode;
292 * Adds a directory include/exclude pattern to the internal array.
295 add_dpattern(const char *pat, int mode)
298 /* Increase size if necessary */
299 if (dpatterns == dpattern_sz) {
301 dpattern = grep_realloc(dpattern, ++dpattern_sz *
302 sizeof(struct epat));
304 dpattern[dpatterns].pat = grep_strdup(pat);
305 dpattern[dpatterns].mode = mode;
310 * Reads searching patterns from a file and adds them with add_pattern().
313 read_patterns(const char *fn)
321 if ((f = fopen(fn, "r")) == NULL)
323 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
329 while ((rlen = getline(&line, &len, f)) != -1) {
332 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
341 static inline const char *
342 init_color(const char *d)
346 c = getenv("GREP_COLOR");
347 return (c != NULL && c[0] != '\0' ? c : d);
351 main(int argc, char *argv[])
353 char **aargv, **eargv, *eopts;
357 unsigned int aargc, eargc, i;
358 int c, lastc, needpattern, newarg, prevoptind;
360 setlocale(LC_ALL, "");
363 catalog = catopen("grep", NL_CAT_LOCALE);
366 /* Check what is the program name of the binary. In this
367 way we can have all the funcionalities in one binary
368 without the need of scripting and using ugly hacks. */
370 if (pn[0] == 'b' && pn[1] == 'z') {
371 filebehave = FILE_BZIP;
373 } else if (pn[0] == 'x' && pn[1] == 'z') {
374 filebehave = FILE_XZ;
376 } else if (pn[0] == 'l' && pn[1] == 'z') {
377 filebehave = FILE_LZMA;
379 } else if (pn[0] == 'r') {
380 dirbehave = DIR_RECURSE;
382 } else if (pn[0] == 'z') {
383 filebehave = FILE_GZIP;
388 grepbehave = GREP_EXTENDED;
391 grepbehave = GREP_FIXED;
401 eopts = getenv("GREP_OPTIONS");
403 /* support for extra arguments in GREP_OPTIONS */
405 if (eopts != NULL && eopts[0] != '\0') {
408 /* make an estimation of how many extra arguments we have */
409 for (unsigned int j = 0; j < strlen(eopts); j++)
413 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
416 /* parse extra arguments */
417 while ((str = strsep(&eopts, " ")) != NULL)
419 eargv[eargc++] = grep_strdup(str);
421 aargv = (char **)grep_calloc(eargc + argc + 1,
425 for (i = 0; i < eargc; i++)
426 aargv[i + 1] = eargv[i];
427 for (int j = 1; j < argc; j++, i++)
428 aargv[i + 1] = argv[j];
430 aargc = eargc + argc;
436 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
439 case '0': case '1': case '2': case '3': case '4':
440 case '5': case '6': case '7': case '8': case '9':
441 if (newarg || !isdigit(lastc))
443 else if (Aflag > LLONG_MAX / 10 - 1) {
448 Aflag = Bflag = (Aflag * 10) + (c - '0');
451 if (optarg == NULL) {
460 l = strtoll(optarg, &ep, 10);
461 if (errno == ERANGE || errno == EINVAL)
463 else if (ep[0] != '\0') {
468 err(2, "context argument must be non-negative");
479 binbehave = BINFILE_TEXT;
488 if (strcasecmp(optarg, "skip") == 0)
489 devbehave = DEV_SKIP;
490 else if (strcasecmp(optarg, "read") == 0)
491 devbehave = DEV_READ;
493 errx(2, getstr(3), "--devices");
496 if (strcasecmp("recurse", optarg) == 0) {
498 dirbehave = DIR_RECURSE;
499 } else if (strcasecmp("skip", optarg) == 0)
500 dirbehave = DIR_SKIP;
501 else if (strcasecmp("read", optarg) == 0)
502 dirbehave = DIR_READ;
504 errx(2, getstr(3), "--directories");
507 grepbehave = GREP_EXTENDED;
512 char *string = optarg;
514 while ((token = strsep(&string, "\n")) != NULL)
515 add_pattern(token, strlen(token));
520 grepbehave = GREP_FIXED;
523 read_patterns(optarg);
527 grepbehave = GREP_BASIC;
537 binbehave = BINFILE_SKIP;
547 err(2, "bzip2 support was disabled at compile-time");
549 filebehave = FILE_BZIP;
562 mlimit = mcount = strtoll(optarg, &ep, 10);
563 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
564 ((errno == EINVAL) && (mcount == 0)))
566 else if (ep[0] != '\0') {
572 filebehave = FILE_LZMA;
578 linkbehave = LINK_EXPLICIT;
582 cflags &= ~REG_NOSUB;
585 linkbehave = LINK_SKIP;
591 linkbehave = LINK_READ;
595 dirbehave = DIR_RECURSE;
602 binbehave = BINFILE_BIN;
606 filebehave = FILE_MMAP;
610 printf(getstr(10), getprogname(), VERSION);
612 printf(getstr(9), getprogname(), VERSION);
620 cflags &= ~REG_NOSUB;
624 cflags &= ~REG_NOSUB;
627 filebehave = FILE_XZ;
633 filebehave = FILE_GZIP;
636 if (strcasecmp("binary", optarg) == 0)
637 binbehave = BINFILE_BIN;
638 else if (strcasecmp("without-match", optarg) == 0)
639 binbehave = BINFILE_SKIP;
640 else if (strcasecmp("text", optarg) == 0)
641 binbehave = BINFILE_TEXT;
643 errx(2, getstr(3), "--binary-files");
647 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
648 strcasecmp("tty", optarg) == 0 ||
649 strcasecmp("if-tty", optarg) == 0) {
652 term = getenv("TERM");
653 if (isatty(STDOUT_FILENO) && term != NULL &&
654 strcasecmp(term, "dumb") != 0)
655 color = init_color("01;31");
656 } else if (strcasecmp("always", optarg) == 0 ||
657 strcasecmp("yes", optarg) == 0 ||
658 strcasecmp("force", optarg) == 0) {
659 color = init_color("01;31");
660 } else if (strcasecmp("never", optarg) != 0 &&
661 strcasecmp("none", optarg) != 0 &&
662 strcasecmp("no", optarg) != 0)
663 errx(2, getstr(3), "--color");
664 cflags &= ~REG_NOSUB;
677 add_fpattern(optarg, INCL_PAT);
681 add_fpattern(optarg, EXCL_PAT);
685 add_dpattern(optarg, INCL_PAT);
689 add_dpattern(optarg, EXCL_PAT);
696 newarg = optind != prevoptind;
702 /* Empty pattern file matches nothing */
703 if (!needpattern && (patterns == 0))
706 /* Fail if we don't have any pattern */
707 if (aargc == 0 && needpattern)
710 /* Process patterns from command line */
711 if (aargc != 0 && needpattern) {
713 char *string = *aargv;
715 while ((token = strsep(&string, "\n")) != NULL)
716 add_pattern(token, strlen(token));
721 switch (grepbehave) {
726 * regex(3) implementations that support fixed-string searches generally
727 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
728 * here. If neither are defined, GREP_FIXED later implies that the
729 * internal literal matcher should be used. Other cflags that have
730 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
731 * similarly added here, and grep.h should be amended to take this into
732 * consideration when defining WITH_INTERNAL_NOSPEC.
734 #if defined(REG_NOSPEC)
735 cflags |= REG_NOSPEC;
736 #elif defined(REG_LITERAL)
737 cflags |= REG_LITERAL;
741 cflags |= REG_EXTENDED;
748 #ifndef WITHOUT_FASTMATCH
749 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
751 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
753 /* Don't process any patterns if we have a blank one */
754 #ifdef WITH_INTERNAL_NOSPEC
755 if (!matchall && grepbehave != GREP_FIXED) {
759 /* Check if cheating is allowed (always is for fgrep). */
760 for (i = 0; i < patterns; ++i) {
761 #ifndef WITHOUT_FASTMATCH
763 * Attempt compilation with fastmatch regex and
764 * fallback to regex(3) if it fails.
766 if (fastncomp(&fg_pattern[i], pattern[i].pat,
767 pattern[i].len, cflags) == 0)
770 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
772 regerror(c, &r_pattern[i], re_error,
774 errx(2, "%s", re_error);
782 if ((aargc == 0 || aargc == 1) && !Hflag)
785 if (aargc == 0 && dirbehave != DIR_RECURSE)
786 exit(!procfile("-"));
788 if (dirbehave == DIR_RECURSE)
789 c = grep_tree(aargv);
791 for (c = 0; aargc--; ++aargv) {
792 if ((finclude || fexclude) && !file_matching(*aargv))
794 c+= procfile(*aargv);
801 /* Find out the correct return value according to the
802 results and the command line option. */
803 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));