1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #ifndef WITHOUT_FASTMATCH
53 #include "fastmatch.h"
63 * Default messags to use when NLS is disabled or no catalogue
66 const char *errstr[] = {
68 /* 1*/ "(standard input)",
69 /* 2*/ "cannot read bzip2 compressed file",
70 /* 3*/ "unknown %s option",
71 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
72 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
73 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
74 /* 7*/ "\t[--null] [pattern] [file ...]\n",
75 /* 8*/ "Binary file %s matches\n",
76 /* 9*/ "%s (BSD grep) %s\n",
79 /* Flags passed to regcomp() and regexec() */
80 int cflags = REG_NOSUB;
81 int eflags = REG_STARTEND;
83 /* Shortcut for matching all cases like empty regex */
86 /* Searching patterns */
87 unsigned int patterns;
88 static unsigned int pattern_sz;
91 #ifndef WITHOUT_FASTMATCH
92 fastmatch_t *fg_pattern;
95 /* Filename exclusion/inclusion patterns */
96 unsigned int fpatterns, dpatterns;
97 static unsigned int fpattern_sz, dpattern_sz;
98 struct epat *dpattern, *fpattern;
100 /* For regex errors */
101 char re_error[RE_ERROR_BUF + 1];
103 /* Command-line flags */
104 unsigned long long Aflag; /* -A x: print x lines trailing each match */
105 unsigned long long Bflag; /* -B x: print x lines leading each match */
106 bool Hflag; /* -H: always print file name */
107 bool Lflag; /* -L: only show names of files with no matches */
108 bool bflag; /* -b: show block numbers for each match */
109 bool cflag; /* -c: only show a count of matching lines */
110 bool hflag; /* -h: don't print filename headers */
111 bool iflag; /* -i: ignore case */
112 bool lflag; /* -l: only show names of files with matches */
113 bool mflag; /* -m x: stop reading the files after x matches */
114 long long mcount; /* count for -m */
115 long long mlimit; /* requested value for -m */
116 char fileeol; /* indicator for eol */
117 bool nflag; /* -n: show line numbers in front of matching lines */
118 bool oflag; /* -o: print only matching part */
119 bool qflag; /* -q: quiet mode (don't output anything) */
120 bool sflag; /* -s: silent mode (ignore errors) */
121 bool vflag; /* -v: only show non-matching lines */
122 bool wflag; /* -w: pattern must start and end on word boundaries */
123 bool xflag; /* -x: pattern must match entire line */
124 bool lbflag; /* --line-buffered */
125 bool nullflag; /* --null */
126 char *label; /* --label */
127 const char *color; /* --color */
128 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
129 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
130 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
131 int devbehave = DEV_READ; /* -D: handling of devices */
132 int dirbehave = DIR_READ; /* -dRr: handling of directories */
133 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
135 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
136 bool fexclude, finclude; /* --exclude and --include */
139 BIN_OPT = CHAR_MAX + 1,
152 static inline const char *init_color(const char *);
155 bool first = true; /* flag whether we are processing the first match */
156 bool prev; /* flag whether or not the previous line matched */
157 int tail; /* lines left to print */
158 bool file_err; /* file reading error */
161 * Prints usage information and returns 2.
166 fprintf(stderr, getstr(4), getprogname());
167 fprintf(stderr, "%s", getstr(5));
168 fprintf(stderr, "%s", getstr(6));
169 fprintf(stderr, "%s", getstr(7));
173 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXyz";
175 static const struct option long_options[] =
177 {"binary-files", required_argument, NULL, BIN_OPT},
178 {"help", no_argument, NULL, HELP_OPT},
179 {"mmap", no_argument, NULL, MMAP_OPT},
180 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
181 {"label", required_argument, NULL, LABEL_OPT},
182 {"null", no_argument, NULL, NULL_OPT},
183 {"color", optional_argument, NULL, COLOR_OPT},
184 {"colour", optional_argument, NULL, COLOR_OPT},
185 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
186 {"include", required_argument, NULL, R_INCLUDE_OPT},
187 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
188 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
189 {"after-context", required_argument, NULL, 'A'},
190 {"text", no_argument, NULL, 'a'},
191 {"before-context", required_argument, NULL, 'B'},
192 {"byte-offset", no_argument, NULL, 'b'},
193 {"context", optional_argument, NULL, 'C'},
194 {"count", no_argument, NULL, 'c'},
195 {"devices", required_argument, NULL, 'D'},
196 {"directories", required_argument, NULL, 'd'},
197 {"extended-regexp", no_argument, NULL, 'E'},
198 {"regexp", required_argument, NULL, 'e'},
199 {"fixed-strings", no_argument, NULL, 'F'},
200 {"file", required_argument, NULL, 'f'},
201 {"basic-regexp", no_argument, NULL, 'G'},
202 {"no-filename", no_argument, NULL, 'h'},
203 {"with-filename", no_argument, NULL, 'H'},
204 {"ignore-case", no_argument, NULL, 'i'},
205 {"bz2decompress", no_argument, NULL, 'J'},
206 {"files-with-matches", no_argument, NULL, 'l'},
207 {"files-without-match", no_argument, NULL, 'L'},
208 {"max-count", required_argument, NULL, 'm'},
209 {"lzma", no_argument, NULL, 'M'},
210 {"line-number", no_argument, NULL, 'n'},
211 {"only-matching", no_argument, NULL, 'o'},
212 {"quiet", no_argument, NULL, 'q'},
213 {"silent", no_argument, NULL, 'q'},
214 {"recursive", no_argument, NULL, 'r'},
215 {"no-messages", no_argument, NULL, 's'},
216 {"binary", no_argument, NULL, 'U'},
217 {"unix-byte-offsets", no_argument, NULL, 'u'},
218 {"invert-match", no_argument, NULL, 'v'},
219 {"version", no_argument, NULL, 'V'},
220 {"word-regexp", no_argument, NULL, 'w'},
221 {"line-regexp", no_argument, NULL, 'x'},
222 {"xz", no_argument, NULL, 'X'},
223 {"null-data", no_argument, NULL, 'z'},
224 {"decompress", no_argument, NULL, 'Z'},
225 {NULL, no_argument, NULL, 0}
229 * Adds a searching pattern to the internal array.
232 add_pattern(char *pat, size_t len)
235 /* Do not add further pattern is we already match everything */
239 /* Check if we can do a shortcut */
242 for (unsigned int i = 0; i < patterns; i++) {
243 free(pattern[i].pat);
245 pattern = grep_realloc(pattern, sizeof(struct pat));
246 pattern[0].pat = NULL;
251 /* Increase size if necessary */
252 if (patterns == pattern_sz) {
254 pattern = grep_realloc(pattern, ++pattern_sz *
257 if (len > 0 && pat[len - 1] == '\n')
259 /* pat may not be NUL-terminated */
260 pattern[patterns].pat = grep_malloc(len + 1);
261 memcpy(pattern[patterns].pat, pat, len);
262 pattern[patterns].len = len;
263 pattern[patterns].pat[len] = '\0';
268 * Adds a file include/exclude pattern to the internal array.
271 add_fpattern(const char *pat, int mode)
274 /* Increase size if necessary */
275 if (fpatterns == fpattern_sz) {
277 fpattern = grep_realloc(fpattern, ++fpattern_sz *
278 sizeof(struct epat));
280 fpattern[fpatterns].pat = grep_strdup(pat);
281 fpattern[fpatterns].mode = mode;
286 * Adds a directory include/exclude pattern to the internal array.
289 add_dpattern(const char *pat, int mode)
292 /* Increase size if necessary */
293 if (dpatterns == dpattern_sz) {
295 dpattern = grep_realloc(dpattern, ++dpattern_sz *
296 sizeof(struct epat));
298 dpattern[dpatterns].pat = grep_strdup(pat);
299 dpattern[dpatterns].mode = mode;
304 * Reads searching patterns from a file and adds them with add_pattern().
307 read_patterns(const char *fn)
315 if ((f = fopen(fn, "r")) == NULL)
317 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
323 while ((rlen = getline(&line, &len, f)) != -1) {
326 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
335 static inline const char *
336 init_color(const char *d)
340 c = getenv("GREP_COLOR");
341 return (c != NULL && c[0] != '\0' ? c : d);
345 main(int argc, char *argv[])
347 char **aargv, **eargv, *eopts;
350 unsigned long long l;
351 unsigned int aargc, eargc, i;
352 int c, lastc, needpattern, newarg, prevoptind;
354 setlocale(LC_ALL, "");
357 catalog = catopen("grep", NL_CAT_LOCALE);
360 /* Check what is the program name of the binary. In this
361 way we can have all the funcionalities in one binary
362 without the need of scripting and using ugly hacks. */
364 if (pn[0] == 'b' && pn[1] == 'z') {
365 filebehave = FILE_BZIP;
367 } else if (pn[0] == 'x' && pn[1] == 'z') {
368 filebehave = FILE_XZ;
370 } else if (pn[0] == 'l' && pn[1] == 'z') {
371 filebehave = FILE_LZMA;
373 } else if (pn[0] == 'r') {
374 dirbehave = DIR_RECURSE;
376 } else if (pn[0] == 'z') {
377 filebehave = FILE_GZIP;
382 grepbehave = GREP_EXTENDED;
385 grepbehave = GREP_FIXED;
395 eopts = getenv("GREP_OPTIONS");
397 /* support for extra arguments in GREP_OPTIONS */
399 if (eopts != NULL && eopts[0] != '\0') {
402 /* make an estimation of how many extra arguments we have */
403 for (unsigned int j = 0; j < strlen(eopts); j++)
407 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
410 /* parse extra arguments */
411 while ((str = strsep(&eopts, " ")) != NULL)
413 eargv[eargc++] = grep_strdup(str);
415 aargv = (char **)grep_calloc(eargc + argc + 1,
419 for (i = 0; i < eargc; i++)
420 aargv[i + 1] = eargv[i];
421 for (int j = 1; j < argc; j++, i++)
422 aargv[i + 1] = argv[j];
424 aargc = eargc + argc;
430 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
433 case '0': case '1': case '2': case '3': case '4':
434 case '5': case '6': case '7': case '8': case '9':
435 if (newarg || !isdigit(lastc))
437 else if (Aflag > LLONG_MAX / 10) {
441 Aflag = Bflag = (Aflag * 10) + (c - '0');
444 if (optarg == NULL) {
453 l = strtoull(optarg, &ep, 10);
454 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
455 ((errno == EINVAL) && (l == 0)))
457 else if (ep[0] != '\0') {
469 binbehave = BINFILE_TEXT;
478 if (strcasecmp(optarg, "skip") == 0)
479 devbehave = DEV_SKIP;
480 else if (strcasecmp(optarg, "read") == 0)
481 devbehave = DEV_READ;
483 errx(2, getstr(3), "--devices");
486 if (strcasecmp("recurse", optarg) == 0) {
488 dirbehave = DIR_RECURSE;
489 } else if (strcasecmp("skip", optarg) == 0)
490 dirbehave = DIR_SKIP;
491 else if (strcasecmp("read", optarg) == 0)
492 dirbehave = DIR_READ;
494 errx(2, getstr(3), "--directories");
497 grepbehave = GREP_EXTENDED;
502 char *string = optarg;
504 while ((token = strsep(&string, "\n")) != NULL)
505 add_pattern(token, strlen(token));
510 grepbehave = GREP_FIXED;
513 read_patterns(optarg);
517 grepbehave = GREP_BASIC;
527 binbehave = BINFILE_SKIP;
537 err(2, "bzip2 support was disabled at compile-time");
539 filebehave = FILE_BZIP;
552 mlimit = mcount = strtoll(optarg, &ep, 10);
553 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
554 ((errno == EINVAL) && (mcount == 0)))
556 else if (ep[0] != '\0') {
562 filebehave = FILE_LZMA;
568 linkbehave = LINK_EXPLICIT;
572 cflags &= ~REG_NOSUB;
575 linkbehave = LINK_SKIP;
581 linkbehave = LINK_READ;
585 dirbehave = DIR_RECURSE;
592 binbehave = BINFILE_BIN;
596 filebehave = FILE_MMAP;
599 printf(getstr(9), getprogname(), VERSION);
606 cflags &= ~REG_NOSUB;
610 cflags &= ~REG_NOSUB;
613 filebehave = FILE_XZ;
619 filebehave = FILE_GZIP;
622 if (strcasecmp("binary", optarg) == 0)
623 binbehave = BINFILE_BIN;
624 else if (strcasecmp("without-match", optarg) == 0)
625 binbehave = BINFILE_SKIP;
626 else if (strcasecmp("text", optarg) == 0)
627 binbehave = BINFILE_TEXT;
629 errx(2, getstr(3), "--binary-files");
633 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
634 strcasecmp("tty", optarg) == 0 ||
635 strcasecmp("if-tty", optarg) == 0) {
638 term = getenv("TERM");
639 if (isatty(STDOUT_FILENO) && term != NULL &&
640 strcasecmp(term, "dumb") != 0)
641 color = init_color("01;31");
642 } else if (strcasecmp("always", optarg) == 0 ||
643 strcasecmp("yes", optarg) == 0 ||
644 strcasecmp("force", optarg) == 0) {
645 color = init_color("01;31");
646 } else if (strcasecmp("never", optarg) != 0 &&
647 strcasecmp("none", optarg) != 0 &&
648 strcasecmp("no", optarg) != 0)
649 errx(2, getstr(3), "--color");
650 cflags &= ~REG_NOSUB;
663 add_fpattern(optarg, INCL_PAT);
667 add_fpattern(optarg, EXCL_PAT);
671 add_dpattern(optarg, INCL_PAT);
675 add_dpattern(optarg, EXCL_PAT);
682 newarg = optind != prevoptind;
688 /* Empty pattern file matches nothing */
689 if (!needpattern && (patterns == 0))
692 /* Fail if we don't have any pattern */
693 if (aargc == 0 && needpattern)
696 /* Process patterns from command line */
697 if (aargc != 0 && needpattern) {
699 char *string = *aargv;
701 while ((token = strsep(&string, "\n")) != NULL)
702 add_pattern(token, strlen(token));
707 switch (grepbehave) {
711 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
715 cflags |= REG_EXTENDED;
722 #ifndef WITHOUT_FASTMATCH
723 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
725 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
727 /* Check if cheating is allowed (always is for fgrep). */
728 for (i = 0; i < patterns; ++i) {
729 #ifndef WITHOUT_FASTMATCH
730 /* Attempt compilation with fastmatch regex and fallback to
731 regex(3) if it fails. */
732 if (fastncomp(&fg_pattern[i], pattern[i].pat,
733 pattern[i].len, cflags) == 0)
736 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
738 regerror(c, &r_pattern[i], re_error,
740 errx(2, "%s", re_error);
747 if ((aargc == 0 || aargc == 1) && !Hflag)
750 if (aargc == 0 && dirbehave != DIR_RECURSE)
751 exit(!procfile("-"));
753 if (dirbehave == DIR_RECURSE)
754 c = grep_tree(aargv);
756 for (c = 0; aargc--; ++aargv) {
757 if ((finclude || fexclude) && !file_matching(*aargv))
759 c+= procfile(*aargv);
766 /* Find out the correct return value according to the
767 results and the command line option. */
768 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));