1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #include "fastmatch.h"
61 * Default messags to use when NLS is disabled or no catalogue
64 const char *errstr[] = {
66 /* 1*/ "(standard input)",
67 /* 2*/ "cannot read bzip2 compressed file",
68 /* 3*/ "unknown %s option",
69 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
70 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 7*/ "\t[--null] [pattern] [file ...]\n",
73 /* 8*/ "Binary file %s matches\n",
74 /* 9*/ "%s (BSD grep) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags = REG_NOSUB;
79 int eflags = REG_STARTEND;
81 /* Shortcut for matching all cases like empty regex */
84 /* Searching patterns */
85 unsigned int patterns;
86 static unsigned int pattern_sz;
89 fastmatch_t *fg_pattern;
91 /* Filename exclusion/inclusion patterns */
92 unsigned int fpatterns, dpatterns;
93 static unsigned int fpattern_sz, dpattern_sz;
94 struct epat *dpattern, *fpattern;
96 /* For regex errors */
97 char re_error[RE_ERROR_BUF + 1];
99 /* Command-line flags */
100 unsigned long long Aflag; /* -A x: print x lines trailing each match */
101 unsigned long long Bflag; /* -B x: print x lines leading each match */
102 bool Hflag; /* -H: always print file name */
103 bool Lflag; /* -L: only show names of files with no matches */
104 bool bflag; /* -b: show block numbers for each match */
105 bool cflag; /* -c: only show a count of matching lines */
106 bool hflag; /* -h: don't print filename headers */
107 bool iflag; /* -i: ignore case */
108 bool lflag; /* -l: only show names of files with matches */
109 bool mflag; /* -m x: stop reading the files after x matches */
110 long long mcount; /* count for -m */
111 long long mlimit; /* requested value for -m */
112 bool nflag; /* -n: show line numbers in front of matching lines */
113 bool oflag; /* -o: print only matching part */
114 bool qflag; /* -q: quiet mode (don't output anything) */
115 bool sflag; /* -s: silent mode (ignore errors) */
116 bool vflag; /* -v: only show non-matching lines */
117 bool wflag; /* -w: pattern must start and end on word boundaries */
118 bool xflag; /* -x: pattern must match entire line */
119 bool lbflag; /* --line-buffered */
120 bool nullflag; /* --null */
121 char *label; /* --label */
122 const char *color; /* --color */
123 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
124 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
125 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
126 int devbehave = DEV_READ; /* -D: handling of devices */
127 int dirbehave = DIR_READ; /* -dRr: handling of directories */
128 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
130 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
131 bool fexclude, finclude; /* --exclude and --include */
134 BIN_OPT = CHAR_MAX + 1,
147 static inline const char *init_color(const char *);
150 bool first = true; /* flag whether we are processing the first match */
151 bool prev; /* flag whether or not the previous line matched */
152 int tail; /* lines left to print */
153 bool file_err; /* file reading error */
156 * Prints usage information and returns 2.
161 fprintf(stderr, getstr(4), getprogname());
162 fprintf(stderr, "%s", getstr(5));
163 fprintf(stderr, "%s", getstr(6));
164 fprintf(stderr, "%s", getstr(7));
168 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
170 static const struct option long_options[] =
172 {"binary-files", required_argument, NULL, BIN_OPT},
173 {"help", no_argument, NULL, HELP_OPT},
174 {"mmap", no_argument, NULL, MMAP_OPT},
175 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
176 {"label", required_argument, NULL, LABEL_OPT},
177 {"null", no_argument, NULL, NULL_OPT},
178 {"color", optional_argument, NULL, COLOR_OPT},
179 {"colour", optional_argument, NULL, COLOR_OPT},
180 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
181 {"include", required_argument, NULL, R_INCLUDE_OPT},
182 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
183 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
184 {"after-context", required_argument, NULL, 'A'},
185 {"text", no_argument, NULL, 'a'},
186 {"before-context", required_argument, NULL, 'B'},
187 {"byte-offset", no_argument, NULL, 'b'},
188 {"context", optional_argument, NULL, 'C'},
189 {"count", no_argument, NULL, 'c'},
190 {"devices", required_argument, NULL, 'D'},
191 {"directories", required_argument, NULL, 'd'},
192 {"extended-regexp", no_argument, NULL, 'E'},
193 {"regexp", required_argument, NULL, 'e'},
194 {"fixed-strings", no_argument, NULL, 'F'},
195 {"file", required_argument, NULL, 'f'},
196 {"basic-regexp", no_argument, NULL, 'G'},
197 {"no-filename", no_argument, NULL, 'h'},
198 {"with-filename", no_argument, NULL, 'H'},
199 {"ignore-case", no_argument, NULL, 'i'},
200 {"bz2decompress", no_argument, NULL, 'J'},
201 {"files-with-matches", no_argument, NULL, 'l'},
202 {"files-without-match", no_argument, NULL, 'L'},
203 {"max-count", required_argument, NULL, 'm'},
204 {"lzma", no_argument, NULL, 'M'},
205 {"line-number", no_argument, NULL, 'n'},
206 {"only-matching", no_argument, NULL, 'o'},
207 {"quiet", no_argument, NULL, 'q'},
208 {"silent", no_argument, NULL, 'q'},
209 {"recursive", no_argument, NULL, 'r'},
210 {"no-messages", no_argument, NULL, 's'},
211 {"binary", no_argument, NULL, 'U'},
212 {"unix-byte-offsets", no_argument, NULL, 'u'},
213 {"invert-match", no_argument, NULL, 'v'},
214 {"version", no_argument, NULL, 'V'},
215 {"word-regexp", no_argument, NULL, 'w'},
216 {"line-regexp", no_argument, NULL, 'x'},
217 {"xz", no_argument, NULL, 'X'},
218 {"decompress", no_argument, NULL, 'Z'},
219 {NULL, no_argument, NULL, 0}
223 * Adds a searching pattern to the internal array.
226 add_pattern(char *pat, size_t len)
229 /* Do not add further pattern is we already match everything */
233 /* Check if we can do a shortcut */
236 for (unsigned int i = 0; i < patterns; i++) {
237 free(pattern[i].pat);
239 pattern = grep_realloc(pattern, sizeof(struct pat));
240 pattern[0].pat = NULL;
245 /* Increase size if necessary */
246 if (patterns == pattern_sz) {
248 pattern = grep_realloc(pattern, ++pattern_sz *
251 if (len > 0 && pat[len - 1] == '\n')
253 /* pat may not be NUL-terminated */
254 pattern[patterns].pat = grep_malloc(len + 1);
255 memcpy(pattern[patterns].pat, pat, len);
256 pattern[patterns].len = len;
257 pattern[patterns].pat[len] = '\0';
262 * Adds a file include/exclude pattern to the internal array.
265 add_fpattern(const char *pat, int mode)
268 /* Increase size if necessary */
269 if (fpatterns == fpattern_sz) {
271 fpattern = grep_realloc(fpattern, ++fpattern_sz *
272 sizeof(struct epat));
274 fpattern[fpatterns].pat = grep_strdup(pat);
275 fpattern[fpatterns].mode = mode;
280 * Adds a directory include/exclude pattern to the internal array.
283 add_dpattern(const char *pat, int mode)
286 /* Increase size if necessary */
287 if (dpatterns == dpattern_sz) {
289 dpattern = grep_realloc(dpattern, ++dpattern_sz *
290 sizeof(struct epat));
292 dpattern[dpatterns].pat = grep_strdup(pat);
293 dpattern[dpatterns].mode = mode;
298 * Reads searching patterns from a file and adds them with add_pattern().
301 read_patterns(const char *fn)
309 if ((f = fopen(fn, "r")) == NULL)
311 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
317 while ((rlen = getline(&line, &len, f)) != -1) {
320 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
329 static inline const char *
330 init_color(const char *d)
334 c = getenv("GREP_COLOR");
335 return (c != NULL && c[0] != '\0' ? c : d);
339 main(int argc, char *argv[])
341 char **aargv, **eargv, *eopts;
344 unsigned long long l;
345 unsigned int aargc, eargc, i;
346 int c, lastc, needpattern, newarg, prevoptind;
348 setlocale(LC_ALL, "");
351 catalog = catopen("grep", NL_CAT_LOCALE);
354 /* Check what is the program name of the binary. In this
355 way we can have all the funcionalities in one binary
356 without the need of scripting and using ugly hacks. */
358 if (pn[0] == 'b' && pn[1] == 'z') {
359 filebehave = FILE_BZIP;
361 } else if (pn[0] == 'x' && pn[1] == 'z') {
362 filebehave = FILE_XZ;
364 } else if (pn[0] == 'l' && pn[1] == 'z') {
365 filebehave = FILE_LZMA;
367 } else if (pn[0] == 'r') {
368 dirbehave = DIR_RECURSE;
370 } else if (pn[0] == 'z') {
371 filebehave = FILE_GZIP;
376 grepbehave = GREP_EXTENDED;
379 grepbehave = GREP_FIXED;
388 eopts = getenv("GREP_OPTIONS");
390 /* support for extra arguments in GREP_OPTIONS */
392 if (eopts != NULL && eopts[0] != '\0') {
395 /* make an estimation of how many extra arguments we have */
396 for (unsigned int j = 0; j < strlen(eopts); j++)
400 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
403 /* parse extra arguments */
404 while ((str = strsep(&eopts, " ")) != NULL)
406 eargv[eargc++] = grep_strdup(str);
408 aargv = (char **)grep_calloc(eargc + argc + 1,
412 for (i = 0; i < eargc; i++)
413 aargv[i + 1] = eargv[i];
414 for (int j = 1; j < argc; j++, i++)
415 aargv[i + 1] = argv[j];
417 aargc = eargc + argc;
423 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
426 case '0': case '1': case '2': case '3': case '4':
427 case '5': case '6': case '7': case '8': case '9':
428 if (newarg || !isdigit(lastc))
430 else if (Aflag > LLONG_MAX / 10) {
434 Aflag = Bflag = (Aflag * 10) + (c - '0');
437 if (optarg == NULL) {
446 l = strtoull(optarg, &ep, 10);
447 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
448 ((errno == EINVAL) && (l == 0)))
450 else if (ep[0] != '\0') {
462 binbehave = BINFILE_TEXT;
471 if (strcasecmp(optarg, "skip") == 0)
472 devbehave = DEV_SKIP;
473 else if (strcasecmp(optarg, "read") == 0)
474 devbehave = DEV_READ;
476 errx(2, getstr(3), "--devices");
479 if (strcasecmp("recurse", optarg) == 0) {
481 dirbehave = DIR_RECURSE;
482 } else if (strcasecmp("skip", optarg) == 0)
483 dirbehave = DIR_SKIP;
484 else if (strcasecmp("read", optarg) == 0)
485 dirbehave = DIR_READ;
487 errx(2, getstr(3), "--directories");
490 grepbehave = GREP_EXTENDED;
495 char *string = optarg;
497 while ((token = strsep(&string, "\n")) != NULL)
498 add_pattern(token, strlen(token));
503 grepbehave = GREP_FIXED;
506 read_patterns(optarg);
510 grepbehave = GREP_BASIC;
520 binbehave = BINFILE_SKIP;
530 err(2, "bzip2 support was disabled at compile-time");
532 filebehave = FILE_BZIP;
545 mlimit = mcount = strtoll(optarg, &ep, 10);
546 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
547 ((errno == EINVAL) && (mcount == 0)))
549 else if (ep[0] != '\0') {
555 filebehave = FILE_LZMA;
561 linkbehave = LINK_EXPLICIT;
565 cflags &= ~REG_NOSUB;
568 linkbehave = LINK_SKIP;
574 linkbehave = LINK_READ;
578 dirbehave = DIR_RECURSE;
585 binbehave = BINFILE_BIN;
589 filebehave = FILE_MMAP;
592 printf(getstr(9), getprogname(), VERSION);
599 cflags &= ~REG_NOSUB;
603 cflags &= ~REG_NOSUB;
606 filebehave = FILE_XZ;
609 filebehave = FILE_GZIP;
612 if (strcasecmp("binary", optarg) == 0)
613 binbehave = BINFILE_BIN;
614 else if (strcasecmp("without-match", optarg) == 0)
615 binbehave = BINFILE_SKIP;
616 else if (strcasecmp("text", optarg) == 0)
617 binbehave = BINFILE_TEXT;
619 errx(2, getstr(3), "--binary-files");
623 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
624 strcasecmp("tty", optarg) == 0 ||
625 strcasecmp("if-tty", optarg) == 0) {
628 term = getenv("TERM");
629 if (isatty(STDOUT_FILENO) && term != NULL &&
630 strcasecmp(term, "dumb") != 0)
631 color = init_color("01;31");
632 } else if (strcasecmp("always", optarg) == 0 ||
633 strcasecmp("yes", optarg) == 0 ||
634 strcasecmp("force", optarg) == 0) {
635 color = init_color("01;31");
636 } else if (strcasecmp("never", optarg) != 0 &&
637 strcasecmp("none", optarg) != 0 &&
638 strcasecmp("no", optarg) != 0)
639 errx(2, getstr(3), "--color");
640 cflags &= ~REG_NOSUB;
653 add_fpattern(optarg, INCL_PAT);
657 add_fpattern(optarg, EXCL_PAT);
661 add_dpattern(optarg, INCL_PAT);
665 add_dpattern(optarg, EXCL_PAT);
672 newarg = optind != prevoptind;
678 /* Empty pattern file matches nothing */
679 if (!needpattern && (patterns == 0))
682 /* Fail if we don't have any pattern */
683 if (aargc == 0 && needpattern)
686 /* Process patterns from command line */
687 if (aargc != 0 && needpattern) {
689 char *string = *aargv;
691 while ((token = strsep(&string, "\n")) != NULL)
692 add_pattern(token, strlen(token));
697 switch (grepbehave) {
701 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
705 cflags |= REG_EXTENDED;
712 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
713 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
715 /* Check if cheating is allowed (always is for fgrep). */
716 for (i = 0; i < patterns; ++i) {
717 if (fastncomp(&fg_pattern[i], pattern[i].pat,
718 pattern[i].len, cflags) != 0) {
719 /* Fall back to full regex library */
720 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
722 regerror(c, &r_pattern[i], re_error,
724 errx(2, "%s", re_error);
732 if ((aargc == 0 || aargc == 1) && !Hflag)
736 exit(!procfile("-"));
738 if (dirbehave == DIR_RECURSE)
739 c = grep_tree(aargv);
741 for (c = 0; aargc--; ++aargv) {
742 if ((finclude || fexclude) && !file_matching(*aargv))
744 c+= procfile(*aargv);
751 /* Find out the correct return value according to the
752 results and the command line option. */
753 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));