1 /* $NetBSD: grep.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #include "fastmatch.h"
61 * Default messags to use when NLS is disabled or no catalogue
64 const char *errstr[] = {
66 /* 1*/ "(standard input)",
67 /* 2*/ "cannot read bzip2 compressed file",
68 /* 3*/ "unknown %s option",
69 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
70 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 7*/ "\t[--null] [pattern] [file ...]\n",
73 /* 8*/ "Binary file %s matches\n",
74 /* 9*/ "%s (BSD grep) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags = REG_NOSUB;
79 int eflags = REG_STARTEND;
81 /* Shortcut for matching all cases like empty regex */
84 /* Searching patterns */
85 unsigned int patterns, pattern_sz;
88 fastmatch_t *fg_pattern;
90 /* Filename exclusion/inclusion patterns */
91 unsigned int fpatterns, fpattern_sz;
92 unsigned int dpatterns, dpattern_sz;
93 struct epat *dpattern, *fpattern;
95 /* For regex errors */
96 char re_error[RE_ERROR_BUF + 1];
98 /* Command-line flags */
99 unsigned long long Aflag; /* -A x: print x lines trailing each match */
100 unsigned long long Bflag; /* -B x: print x lines leading each match */
101 bool Hflag; /* -H: always print file name */
102 bool Lflag; /* -L: only show names of files with no matches */
103 bool bflag; /* -b: show block numbers for each match */
104 bool cflag; /* -c: only show a count of matching lines */
105 bool hflag; /* -h: don't print filename headers */
106 bool iflag; /* -i: ignore case */
107 bool lflag; /* -l: only show names of files with matches */
108 bool mflag; /* -m x: stop reading the files after x matches */
109 long long mcount; /* count for -m */
110 bool nflag; /* -n: show line numbers in front of matching lines */
111 bool oflag; /* -o: print only matching part */
112 bool qflag; /* -q: quiet mode (don't output anything) */
113 bool sflag; /* -s: silent mode (ignore errors) */
114 bool vflag; /* -v: only show non-matching lines */
115 bool wflag; /* -w: pattern must start and end on word boundaries */
116 bool xflag; /* -x: pattern must match entire line */
117 bool lbflag; /* --line-buffered */
118 bool nullflag; /* --null */
119 char *label; /* --label */
120 const char *color; /* --color */
121 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
122 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
123 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
124 int devbehave = DEV_READ; /* -D: handling of devices */
125 int dirbehave = DIR_READ; /* -dRr: handling of directories */
126 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
128 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
129 bool fexclude, finclude; /* --exclude and --include */
132 BIN_OPT = CHAR_MAX + 1,
145 static inline const char *init_color(const char *);
148 bool first = true; /* flag whether we are processing the first match */
149 bool prev; /* flag whether or not the previous line matched */
150 int tail; /* lines left to print */
151 bool notfound; /* file not found */
154 * Prints usage information and returns 2.
159 fprintf(stderr, getstr(4), getprogname());
160 fprintf(stderr, "%s", getstr(5));
161 fprintf(stderr, "%s", getstr(5));
162 fprintf(stderr, "%s", getstr(6));
163 fprintf(stderr, "%s", getstr(7));
167 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
169 struct option long_options[] =
171 {"binary-files", required_argument, NULL, BIN_OPT},
172 {"help", no_argument, NULL, HELP_OPT},
173 {"mmap", no_argument, NULL, MMAP_OPT},
174 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
175 {"label", required_argument, NULL, LABEL_OPT},
176 {"null", no_argument, NULL, NULL_OPT},
177 {"color", optional_argument, NULL, COLOR_OPT},
178 {"colour", optional_argument, NULL, COLOR_OPT},
179 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
180 {"include", required_argument, NULL, R_INCLUDE_OPT},
181 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
182 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
183 {"after-context", required_argument, NULL, 'A'},
184 {"text", no_argument, NULL, 'a'},
185 {"before-context", required_argument, NULL, 'B'},
186 {"byte-offset", no_argument, NULL, 'b'},
187 {"context", optional_argument, NULL, 'C'},
188 {"count", no_argument, NULL, 'c'},
189 {"devices", required_argument, NULL, 'D'},
190 {"directories", required_argument, NULL, 'd'},
191 {"extended-regexp", no_argument, NULL, 'E'},
192 {"regexp", required_argument, NULL, 'e'},
193 {"fixed-strings", no_argument, NULL, 'F'},
194 {"file", required_argument, NULL, 'f'},
195 {"basic-regexp", no_argument, NULL, 'G'},
196 {"no-filename", no_argument, NULL, 'h'},
197 {"with-filename", no_argument, NULL, 'H'},
198 {"ignore-case", no_argument, NULL, 'i'},
199 {"bz2decompress", no_argument, NULL, 'J'},
200 {"files-with-matches", no_argument, NULL, 'l'},
201 {"files-without-match", no_argument, NULL, 'L'},
202 {"max-count", required_argument, NULL, 'm'},
203 {"lzma", no_argument, NULL, 'M'},
204 {"line-number", no_argument, NULL, 'n'},
205 {"only-matching", no_argument, NULL, 'o'},
206 {"quiet", no_argument, NULL, 'q'},
207 {"silent", no_argument, NULL, 'q'},
208 {"recursive", no_argument, NULL, 'r'},
209 {"no-messages", no_argument, NULL, 's'},
210 {"binary", no_argument, NULL, 'U'},
211 {"unix-byte-offsets", no_argument, NULL, 'u'},
212 {"invert-match", no_argument, NULL, 'v'},
213 {"version", no_argument, NULL, 'V'},
214 {"word-regexp", no_argument, NULL, 'w'},
215 {"line-regexp", no_argument, NULL, 'x'},
216 {"xz", no_argument, NULL, 'X'},
217 {"decompress", no_argument, NULL, 'Z'},
218 {NULL, no_argument, NULL, 0}
222 * Adds a searching pattern to the internal array.
225 add_pattern(char *pat, size_t len)
228 /* Do not add further pattern is we already match everything */
232 /* Check if we can do a shortcut */
235 for (unsigned int i = 0; i < patterns; i++) {
236 free(pattern[i].pat);
238 pattern = grep_realloc(pattern, sizeof(struct pat));
239 pattern[0].pat = NULL;
244 /* Increase size if necessary */
245 if (patterns == pattern_sz) {
247 pattern = grep_realloc(pattern, ++pattern_sz *
250 if (len > 0 && pat[len - 1] == '\n')
252 /* pat may not be NUL-terminated */
253 pattern[patterns].pat = grep_malloc(len + 1);
254 memcpy(pattern[patterns].pat, pat, len);
255 pattern[patterns].len = len;
256 pattern[patterns].pat[len] = '\0';
261 * Adds a file include/exclude pattern to the internal array.
264 add_fpattern(const char *pat, int mode)
267 /* Increase size if necessary */
268 if (fpatterns == fpattern_sz) {
270 fpattern = grep_realloc(fpattern, ++fpattern_sz *
271 sizeof(struct epat));
273 fpattern[fpatterns].pat = grep_strdup(pat);
274 fpattern[fpatterns].mode = mode;
279 * Adds a directory include/exclude pattern to the internal array.
282 add_dpattern(const char *pat, int mode)
285 /* Increase size if necessary */
286 if (dpatterns == dpattern_sz) {
288 dpattern = grep_realloc(dpattern, ++dpattern_sz *
289 sizeof(struct epat));
291 dpattern[dpatterns].pat = grep_strdup(pat);
292 dpattern[dpatterns].mode = mode;
297 * Reads searching patterns from a file and adds them with add_pattern().
300 read_patterns(const char *fn)
307 if ((f = fopen(fn, "r")) == NULL)
309 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
313 while ((line = fgetln(f, &len)) != NULL)
314 add_pattern(line, line[0] == '\n' ? 0 : len);
320 static inline const char *
321 init_color(const char *d)
325 c = getenv("GREP_COLOR");
326 return (c != NULL && c[0] != '\0' ? c : d);
330 main(int argc, char *argv[])
332 char **aargv, **eargv, *eopts;
335 unsigned long long l;
336 unsigned int aargc, eargc, i;
337 int c, lastc, needpattern, newarg, prevoptind;
339 setlocale(LC_ALL, "");
342 catalog = catopen("grep", NL_CAT_LOCALE);
345 /* Check what is the program name of the binary. In this
346 way we can have all the funcionalities in one binary
347 without the need of scripting and using ugly hacks. */
349 if (pn[0] == 'b' && pn[1] == 'z') {
350 filebehave = FILE_BZIP;
352 } else if (pn[0] == 'x' && pn[1] == 'z') {
353 filebehave = FILE_XZ;
355 } else if (pn[0] == 'l' && pn[1] == 'z') {
356 filebehave = FILE_LZMA;
358 } else if (pn[0] == 'z') {
359 filebehave = FILE_GZIP;
364 grepbehave = GREP_EXTENDED;
367 grepbehave = GREP_FIXED;
376 eopts = getenv("GREP_OPTIONS");
378 /* support for extra arguments in GREP_OPTIONS */
380 if (eopts != NULL && eopts[0] != '\0') {
383 /* make an estimation of how many extra arguments we have */
384 for (unsigned int j = 0; j < strlen(eopts); j++)
388 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
391 /* parse extra arguments */
392 while ((str = strsep(&eopts, " ")) != NULL)
394 eargv[eargc++] = grep_strdup(str);
396 aargv = (char **)grep_calloc(eargc + argc + 1,
400 for (i = 0; i < eargc; i++)
401 aargv[i + 1] = eargv[i];
402 for (int j = 1; j < argc; j++, i++)
403 aargv[i + 1] = argv[j];
405 aargc = eargc + argc;
411 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
414 case '0': case '1': case '2': case '3': case '4':
415 case '5': case '6': case '7': case '8': case '9':
416 if (newarg || !isdigit(lastc))
418 else if (Aflag > LLONG_MAX / 10) {
422 Aflag = Bflag = (Aflag * 10) + (c - '0');
425 if (optarg == NULL) {
434 l = strtoull(optarg, &ep, 10);
435 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
436 ((errno == EINVAL) && (l == 0)))
438 else if (ep[0] != '\0') {
450 binbehave = BINFILE_TEXT;
459 if (strcasecmp(optarg, "skip") == 0)
460 devbehave = DEV_SKIP;
461 else if (strcasecmp(optarg, "read") == 0)
462 devbehave = DEV_READ;
464 errx(2, getstr(3), "--devices");
467 if (strcasecmp("recurse", optarg) == 0) {
469 dirbehave = DIR_RECURSE;
470 } else if (strcasecmp("skip", optarg) == 0)
471 dirbehave = DIR_SKIP;
472 else if (strcasecmp("read", optarg) == 0)
473 dirbehave = DIR_READ;
475 errx(2, getstr(3), "--directories");
478 grepbehave = GREP_EXTENDED;
481 add_pattern(optarg, strlen(optarg));
485 grepbehave = GREP_FIXED;
488 read_patterns(optarg);
492 grepbehave = GREP_BASIC;
502 binbehave = BINFILE_SKIP;
512 err(2, "bzip2 support was disabled at compile-time");
514 filebehave = FILE_BZIP;
527 mcount = strtoll(optarg, &ep, 10);
528 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
529 ((errno == EINVAL) && (mcount == 0)))
531 else if (ep[0] != '\0') {
537 filebehave = FILE_LZMA;
543 linkbehave = LINK_EXPLICIT;
547 cflags &= ~REG_NOSUB;
550 linkbehave = LINK_SKIP;
556 linkbehave = LINK_READ;
560 dirbehave = DIR_RECURSE;
567 binbehave = BINFILE_BIN;
571 filebehave = FILE_MMAP;
574 printf(getstr(9), getprogname(), VERSION);
581 cflags &= ~REG_NOSUB;
585 cflags &= ~REG_NOSUB;
588 filebehave = FILE_XZ;
591 filebehave = FILE_GZIP;
594 if (strcasecmp("binary", optarg) == 0)
595 binbehave = BINFILE_BIN;
596 else if (strcasecmp("without-match", optarg) == 0)
597 binbehave = BINFILE_SKIP;
598 else if (strcasecmp("text", optarg) == 0)
599 binbehave = BINFILE_TEXT;
601 errx(2, getstr(3), "--binary-files");
605 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
606 strcasecmp("tty", optarg) == 0 ||
607 strcasecmp("if-tty", optarg) == 0) {
610 term = getenv("TERM");
611 if (isatty(STDOUT_FILENO) && term != NULL &&
612 strcasecmp(term, "dumb") != 0)
613 color = init_color("01;31");
614 } else if (strcasecmp("always", optarg) == 0 ||
615 strcasecmp("yes", optarg) == 0 ||
616 strcasecmp("force", optarg) == 0) {
617 color = init_color("01;31");
618 } else if (strcasecmp("never", optarg) != 0 &&
619 strcasecmp("none", optarg) != 0 &&
620 strcasecmp("no", optarg) != 0)
621 errx(2, getstr(3), "--color");
622 cflags &= ~REG_NOSUB;
635 add_fpattern(optarg, INCL_PAT);
639 add_fpattern(optarg, EXCL_PAT);
643 add_dpattern(optarg, INCL_PAT);
647 add_dpattern(optarg, EXCL_PAT);
654 newarg = optind != prevoptind;
660 /* Empty pattern file matches nothing */
661 if (!needpattern && (patterns == 0))
664 /* Fail if we don't have any pattern */
665 if (aargc == 0 && needpattern)
668 /* Process patterns from command line */
669 if (aargc != 0 && needpattern) {
670 add_pattern(*aargv, strlen(*aargv));
675 switch (grepbehave) {
679 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
683 cflags |= REG_EXTENDED;
690 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
691 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
693 /* Check if cheating is allowed (always is for fgrep). */
694 for (i = 0; i < patterns; ++i) {
695 if (fastncomp(&fg_pattern[i], pattern[i].pat,
696 pattern[i].len, cflags) != 0) {
697 /* Fall back to full regex library */
698 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
700 regerror(c, &r_pattern[i], re_error,
702 errx(2, "%s", re_error);
710 if ((aargc == 0 || aargc == 1) && !Hflag)
714 exit(!procfile("-"));
716 if (dirbehave == DIR_RECURSE)
717 c = grep_tree(aargv);
719 for (c = 0; aargc--; ++aargv) {
720 if ((finclude || fexclude) && !file_matching(*aargv))
722 c+= procfile(*aargv);
729 /* Find out the correct return value according to the
730 results and the command line option. */
731 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));