1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #include "fastmatch.h"
61 * Default messags to use when NLS is disabled or no catalogue
64 const char *errstr[] = {
66 /* 1*/ "(standard input)",
67 /* 2*/ "cannot read bzip2 compressed file",
68 /* 3*/ "unknown %s option",
69 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
70 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 7*/ "\t[--null] [pattern] [file ...]\n",
73 /* 8*/ "Binary file %s matches\n",
74 /* 9*/ "%s (BSD grep) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags = REG_NOSUB;
79 int eflags = REG_STARTEND;
81 /* Shortcut for matching all cases like empty regex */
84 /* Searching patterns */
85 unsigned int patterns;
86 static unsigned int pattern_sz;
89 fastmatch_t *fg_pattern;
91 /* Filename exclusion/inclusion patterns */
92 unsigned int fpatterns, dpatterns;
93 static unsigned int fpattern_sz, dpattern_sz;
94 struct epat *dpattern, *fpattern;
96 /* For regex errors */
97 char re_error[RE_ERROR_BUF + 1];
99 /* Command-line flags */
100 unsigned long long Aflag; /* -A x: print x lines trailing each match */
101 unsigned long long Bflag; /* -B x: print x lines leading each match */
102 bool Hflag; /* -H: always print file name */
103 bool Lflag; /* -L: only show names of files with no matches */
104 bool bflag; /* -b: show block numbers for each match */
105 bool cflag; /* -c: only show a count of matching lines */
106 bool hflag; /* -h: don't print filename headers */
107 bool iflag; /* -i: ignore case */
108 bool lflag; /* -l: only show names of files with matches */
109 bool mflag; /* -m x: stop reading the files after x matches */
110 long long mcount; /* count for -m */
111 long long mlimit; /* requested value for -m */
112 bool nflag; /* -n: show line numbers in front of matching lines */
113 bool oflag; /* -o: print only matching part */
114 bool qflag; /* -q: quiet mode (don't output anything) */
115 bool sflag; /* -s: silent mode (ignore errors) */
116 bool vflag; /* -v: only show non-matching lines */
117 bool wflag; /* -w: pattern must start and end on word boundaries */
118 bool xflag; /* -x: pattern must match entire line */
119 bool lbflag; /* --line-buffered */
120 bool nullflag; /* --null */
121 char *label; /* --label */
122 const char *color; /* --color */
123 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
124 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
125 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
126 int devbehave = DEV_READ; /* -D: handling of devices */
127 int dirbehave = DIR_READ; /* -dRr: handling of directories */
128 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
130 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
131 bool fexclude, finclude; /* --exclude and --include */
134 BIN_OPT = CHAR_MAX + 1,
147 static inline const char *init_color(const char *);
150 bool first = true; /* flag whether we are processing the first match */
151 bool prev; /* flag whether or not the previous line matched */
152 int tail; /* lines left to print */
153 bool file_err; /* file reading error */
156 * Prints usage information and returns 2.
161 fprintf(stderr, getstr(4), getprogname());
162 fprintf(stderr, "%s", getstr(5));
163 fprintf(stderr, "%s", getstr(6));
164 fprintf(stderr, "%s", getstr(7));
168 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
170 static const struct option long_options[] =
172 {"binary-files", required_argument, NULL, BIN_OPT},
173 {"help", no_argument, NULL, HELP_OPT},
174 {"mmap", no_argument, NULL, MMAP_OPT},
175 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
176 {"label", required_argument, NULL, LABEL_OPT},
177 {"null", no_argument, NULL, NULL_OPT},
178 {"color", optional_argument, NULL, COLOR_OPT},
179 {"colour", optional_argument, NULL, COLOR_OPT},
180 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
181 {"include", required_argument, NULL, R_INCLUDE_OPT},
182 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
183 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
184 {"after-context", required_argument, NULL, 'A'},
185 {"text", no_argument, NULL, 'a'},
186 {"before-context", required_argument, NULL, 'B'},
187 {"byte-offset", no_argument, NULL, 'b'},
188 {"context", optional_argument, NULL, 'C'},
189 {"count", no_argument, NULL, 'c'},
190 {"devices", required_argument, NULL, 'D'},
191 {"directories", required_argument, NULL, 'd'},
192 {"extended-regexp", no_argument, NULL, 'E'},
193 {"regexp", required_argument, NULL, 'e'},
194 {"fixed-strings", no_argument, NULL, 'F'},
195 {"file", required_argument, NULL, 'f'},
196 {"basic-regexp", no_argument, NULL, 'G'},
197 {"no-filename", no_argument, NULL, 'h'},
198 {"with-filename", no_argument, NULL, 'H'},
199 {"ignore-case", no_argument, NULL, 'i'},
200 {"bz2decompress", no_argument, NULL, 'J'},
201 {"files-with-matches", no_argument, NULL, 'l'},
202 {"files-without-match", no_argument, NULL, 'L'},
203 {"max-count", required_argument, NULL, 'm'},
204 {"lzma", no_argument, NULL, 'M'},
205 {"line-number", no_argument, NULL, 'n'},
206 {"only-matching", no_argument, NULL, 'o'},
207 {"quiet", no_argument, NULL, 'q'},
208 {"silent", no_argument, NULL, 'q'},
209 {"recursive", no_argument, NULL, 'r'},
210 {"no-messages", no_argument, NULL, 's'},
211 {"binary", no_argument, NULL, 'U'},
212 {"unix-byte-offsets", no_argument, NULL, 'u'},
213 {"invert-match", no_argument, NULL, 'v'},
214 {"version", no_argument, NULL, 'V'},
215 {"word-regexp", no_argument, NULL, 'w'},
216 {"line-regexp", no_argument, NULL, 'x'},
217 {"xz", no_argument, NULL, 'X'},
218 {"decompress", no_argument, NULL, 'Z'},
219 {NULL, no_argument, NULL, 0}
223 * Adds a searching pattern to the internal array.
226 add_pattern(char *pat, size_t len)
229 /* Do not add further pattern is we already match everything */
233 /* Check if we can do a shortcut */
236 for (unsigned int i = 0; i < patterns; i++) {
237 free(pattern[i].pat);
239 pattern = grep_realloc(pattern, sizeof(struct pat));
240 pattern[0].pat = NULL;
245 /* Increase size if necessary */
246 if (patterns == pattern_sz) {
248 pattern = grep_realloc(pattern, ++pattern_sz *
251 if (len > 0 && pat[len - 1] == '\n')
253 /* pat may not be NUL-terminated */
254 pattern[patterns].pat = grep_malloc(len + 1);
255 memcpy(pattern[patterns].pat, pat, len);
256 pattern[patterns].len = len;
257 pattern[patterns].pat[len] = '\0';
262 * Adds a file include/exclude pattern to the internal array.
265 add_fpattern(const char *pat, int mode)
268 /* Increase size if necessary */
269 if (fpatterns == fpattern_sz) {
271 fpattern = grep_realloc(fpattern, ++fpattern_sz *
272 sizeof(struct epat));
274 fpattern[fpatterns].pat = grep_strdup(pat);
275 fpattern[fpatterns].mode = mode;
280 * Adds a directory include/exclude pattern to the internal array.
283 add_dpattern(const char *pat, int mode)
286 /* Increase size if necessary */
287 if (dpatterns == dpattern_sz) {
289 dpattern = grep_realloc(dpattern, ++dpattern_sz *
290 sizeof(struct epat));
292 dpattern[dpatterns].pat = grep_strdup(pat);
293 dpattern[dpatterns].mode = mode;
298 * Reads searching patterns from a file and adds them with add_pattern().
301 read_patterns(const char *fn)
309 if ((f = fopen(fn, "r")) == NULL)
311 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
317 while ((rlen = getline(&line, &len, f)) != -1)
318 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
325 static inline const char *
326 init_color(const char *d)
330 c = getenv("GREP_COLOR");
331 return (c != NULL && c[0] != '\0' ? c : d);
335 main(int argc, char *argv[])
337 char **aargv, **eargv, *eopts;
340 unsigned long long l;
341 unsigned int aargc, eargc, i;
342 int c, lastc, needpattern, newarg, prevoptind;
344 setlocale(LC_ALL, "");
347 catalog = catopen("grep", NL_CAT_LOCALE);
350 /* Check what is the program name of the binary. In this
351 way we can have all the funcionalities in one binary
352 without the need of scripting and using ugly hacks. */
354 if (pn[0] == 'b' && pn[1] == 'z') {
355 filebehave = FILE_BZIP;
357 } else if (pn[0] == 'x' && pn[1] == 'z') {
358 filebehave = FILE_XZ;
360 } else if (pn[0] == 'l' && pn[1] == 'z') {
361 filebehave = FILE_LZMA;
363 } else if (pn[0] == 'z') {
364 filebehave = FILE_GZIP;
369 grepbehave = GREP_EXTENDED;
372 grepbehave = GREP_FIXED;
381 eopts = getenv("GREP_OPTIONS");
383 /* support for extra arguments in GREP_OPTIONS */
385 if (eopts != NULL && eopts[0] != '\0') {
388 /* make an estimation of how many extra arguments we have */
389 for (unsigned int j = 0; j < strlen(eopts); j++)
393 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
396 /* parse extra arguments */
397 while ((str = strsep(&eopts, " ")) != NULL)
399 eargv[eargc++] = grep_strdup(str);
401 aargv = (char **)grep_calloc(eargc + argc + 1,
405 for (i = 0; i < eargc; i++)
406 aargv[i + 1] = eargv[i];
407 for (int j = 1; j < argc; j++, i++)
408 aargv[i + 1] = argv[j];
410 aargc = eargc + argc;
416 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
419 case '0': case '1': case '2': case '3': case '4':
420 case '5': case '6': case '7': case '8': case '9':
421 if (newarg || !isdigit(lastc))
423 else if (Aflag > LLONG_MAX / 10) {
427 Aflag = Bflag = (Aflag * 10) + (c - '0');
430 if (optarg == NULL) {
439 l = strtoull(optarg, &ep, 10);
440 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
441 ((errno == EINVAL) && (l == 0)))
443 else if (ep[0] != '\0') {
455 binbehave = BINFILE_TEXT;
464 if (strcasecmp(optarg, "skip") == 0)
465 devbehave = DEV_SKIP;
466 else if (strcasecmp(optarg, "read") == 0)
467 devbehave = DEV_READ;
469 errx(2, getstr(3), "--devices");
472 if (strcasecmp("recurse", optarg) == 0) {
474 dirbehave = DIR_RECURSE;
475 } else if (strcasecmp("skip", optarg) == 0)
476 dirbehave = DIR_SKIP;
477 else if (strcasecmp("read", optarg) == 0)
478 dirbehave = DIR_READ;
480 errx(2, getstr(3), "--directories");
483 grepbehave = GREP_EXTENDED;
488 char *string = optarg;
490 while ((token = strsep(&string, "\n")) != NULL)
491 add_pattern(token, strlen(token));
496 grepbehave = GREP_FIXED;
499 read_patterns(optarg);
503 grepbehave = GREP_BASIC;
513 binbehave = BINFILE_SKIP;
523 err(2, "bzip2 support was disabled at compile-time");
525 filebehave = FILE_BZIP;
538 mlimit = mcount = strtoll(optarg, &ep, 10);
539 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
540 ((errno == EINVAL) && (mcount == 0)))
542 else if (ep[0] != '\0') {
548 filebehave = FILE_LZMA;
554 linkbehave = LINK_EXPLICIT;
558 cflags &= ~REG_NOSUB;
561 linkbehave = LINK_SKIP;
567 linkbehave = LINK_READ;
571 dirbehave = DIR_RECURSE;
578 binbehave = BINFILE_BIN;
582 filebehave = FILE_MMAP;
585 printf(getstr(9), getprogname(), VERSION);
592 cflags &= ~REG_NOSUB;
596 cflags &= ~REG_NOSUB;
599 filebehave = FILE_XZ;
602 filebehave = FILE_GZIP;
605 if (strcasecmp("binary", optarg) == 0)
606 binbehave = BINFILE_BIN;
607 else if (strcasecmp("without-match", optarg) == 0)
608 binbehave = BINFILE_SKIP;
609 else if (strcasecmp("text", optarg) == 0)
610 binbehave = BINFILE_TEXT;
612 errx(2, getstr(3), "--binary-files");
616 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
617 strcasecmp("tty", optarg) == 0 ||
618 strcasecmp("if-tty", optarg) == 0) {
621 term = getenv("TERM");
622 if (isatty(STDOUT_FILENO) && term != NULL &&
623 strcasecmp(term, "dumb") != 0)
624 color = init_color("01;31");
625 } else if (strcasecmp("always", optarg) == 0 ||
626 strcasecmp("yes", optarg) == 0 ||
627 strcasecmp("force", optarg) == 0) {
628 color = init_color("01;31");
629 } else if (strcasecmp("never", optarg) != 0 &&
630 strcasecmp("none", optarg) != 0 &&
631 strcasecmp("no", optarg) != 0)
632 errx(2, getstr(3), "--color");
633 cflags &= ~REG_NOSUB;
646 add_fpattern(optarg, INCL_PAT);
650 add_fpattern(optarg, EXCL_PAT);
654 add_dpattern(optarg, INCL_PAT);
658 add_dpattern(optarg, EXCL_PAT);
665 newarg = optind != prevoptind;
671 /* Empty pattern file matches nothing */
672 if (!needpattern && (patterns == 0))
675 /* Fail if we don't have any pattern */
676 if (aargc == 0 && needpattern)
679 /* Process patterns from command line */
680 if (aargc != 0 && needpattern) {
682 char *string = *aargv;
684 while ((token = strsep(&string, "\n")) != NULL)
685 add_pattern(token, strlen(token));
690 switch (grepbehave) {
694 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
698 cflags |= REG_EXTENDED;
705 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
706 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
708 /* Check if cheating is allowed (always is for fgrep). */
709 for (i = 0; i < patterns; ++i) {
710 if (fastncomp(&fg_pattern[i], pattern[i].pat,
711 pattern[i].len, cflags) != 0) {
712 /* Fall back to full regex library */
713 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
715 regerror(c, &r_pattern[i], re_error,
717 errx(2, "%s", re_error);
725 if ((aargc == 0 || aargc == 1) && !Hflag)
729 exit(!procfile("-"));
731 if (dirbehave == DIR_RECURSE)
732 c = grep_tree(aargv);
734 for (c = 0; aargc--; ++aargv) {
735 if ((finclude || fexclude) && !file_matching(*aargv))
737 c+= procfile(*aargv);
744 /* Find out the correct return value according to the
745 results and the command line option. */
746 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));