1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
53 #include "fastmatch.h"
62 * Default messags to use when NLS is disabled or no catalogue
65 const char *errstr[] = {
67 /* 1*/ "(standard input)",
68 /* 2*/ "cannot read bzip2 compressed file",
69 /* 3*/ "unknown %s option",
70 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
71 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
72 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
73 /* 7*/ "\t[--null] [pattern] [file ...]\n",
74 /* 8*/ "Binary file %s matches\n",
75 /* 9*/ "%s (BSD grep) %s\n",
78 /* Flags passed to regcomp() and regexec() */
79 int cflags = REG_NOSUB;
80 int eflags = REG_STARTEND;
82 /* Shortcut for matching all cases like empty regex */
85 /* Searching patterns */
86 unsigned int patterns;
87 static unsigned int pattern_sz;
90 fastmatch_t *fg_pattern;
92 /* Filename exclusion/inclusion patterns */
93 unsigned int fpatterns, dpatterns;
94 static unsigned int fpattern_sz, dpattern_sz;
95 struct epat *dpattern, *fpattern;
97 /* For regex errors */
98 char re_error[RE_ERROR_BUF + 1];
100 /* Command-line flags */
101 unsigned long long Aflag; /* -A x: print x lines trailing each match */
102 unsigned long long Bflag; /* -B x: print x lines leading each match */
103 bool Hflag; /* -H: always print file name */
104 bool Lflag; /* -L: only show names of files with no matches */
105 bool bflag; /* -b: show block numbers for each match */
106 bool cflag; /* -c: only show a count of matching lines */
107 bool hflag; /* -h: don't print filename headers */
108 bool iflag; /* -i: ignore case */
109 bool lflag; /* -l: only show names of files with matches */
110 bool mflag; /* -m x: stop reading the files after x matches */
111 long long mcount; /* count for -m */
112 long long mlimit; /* requested value for -m */
113 bool nflag; /* -n: show line numbers in front of matching lines */
114 bool oflag; /* -o: print only matching part */
115 bool qflag; /* -q: quiet mode (don't output anything) */
116 bool sflag; /* -s: silent mode (ignore errors) */
117 bool vflag; /* -v: only show non-matching lines */
118 bool wflag; /* -w: pattern must start and end on word boundaries */
119 bool xflag; /* -x: pattern must match entire line */
120 bool lbflag; /* --line-buffered */
121 bool nullflag; /* --null */
122 char *label; /* --label */
123 const char *color; /* --color */
124 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
125 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
126 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
127 int devbehave = DEV_READ; /* -D: handling of devices */
128 int dirbehave = DIR_READ; /* -dRr: handling of directories */
129 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
131 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
132 bool fexclude, finclude; /* --exclude and --include */
135 BIN_OPT = CHAR_MAX + 1,
148 static inline const char *init_color(const char *);
151 bool first = true; /* flag whether we are processing the first match */
152 bool prev; /* flag whether or not the previous line matched */
153 int tail; /* lines left to print */
154 bool file_err; /* file reading error */
157 * Prints usage information and returns 2.
162 fprintf(stderr, getstr(4), getprogname());
163 fprintf(stderr, "%s", getstr(5));
164 fprintf(stderr, "%s", getstr(6));
165 fprintf(stderr, "%s", getstr(7));
169 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
171 static const struct option long_options[] =
173 {"binary-files", required_argument, NULL, BIN_OPT},
174 {"help", no_argument, NULL, HELP_OPT},
175 {"mmap", no_argument, NULL, MMAP_OPT},
176 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
177 {"label", required_argument, NULL, LABEL_OPT},
178 {"null", no_argument, NULL, NULL_OPT},
179 {"color", optional_argument, NULL, COLOR_OPT},
180 {"colour", optional_argument, NULL, COLOR_OPT},
181 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
182 {"include", required_argument, NULL, R_INCLUDE_OPT},
183 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
184 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
185 {"after-context", required_argument, NULL, 'A'},
186 {"text", no_argument, NULL, 'a'},
187 {"before-context", required_argument, NULL, 'B'},
188 {"byte-offset", no_argument, NULL, 'b'},
189 {"context", optional_argument, NULL, 'C'},
190 {"count", no_argument, NULL, 'c'},
191 {"devices", required_argument, NULL, 'D'},
192 {"directories", required_argument, NULL, 'd'},
193 {"extended-regexp", no_argument, NULL, 'E'},
194 {"regexp", required_argument, NULL, 'e'},
195 {"fixed-strings", no_argument, NULL, 'F'},
196 {"file", required_argument, NULL, 'f'},
197 {"basic-regexp", no_argument, NULL, 'G'},
198 {"no-filename", no_argument, NULL, 'h'},
199 {"with-filename", no_argument, NULL, 'H'},
200 {"ignore-case", no_argument, NULL, 'i'},
201 {"bz2decompress", no_argument, NULL, 'J'},
202 {"files-with-matches", no_argument, NULL, 'l'},
203 {"files-without-match", no_argument, NULL, 'L'},
204 {"max-count", required_argument, NULL, 'm'},
205 {"lzma", no_argument, NULL, 'M'},
206 {"line-number", no_argument, NULL, 'n'},
207 {"only-matching", no_argument, NULL, 'o'},
208 {"quiet", no_argument, NULL, 'q'},
209 {"silent", no_argument, NULL, 'q'},
210 {"recursive", no_argument, NULL, 'r'},
211 {"no-messages", no_argument, NULL, 's'},
212 {"binary", no_argument, NULL, 'U'},
213 {"unix-byte-offsets", no_argument, NULL, 'u'},
214 {"invert-match", no_argument, NULL, 'v'},
215 {"version", no_argument, NULL, 'V'},
216 {"word-regexp", no_argument, NULL, 'w'},
217 {"line-regexp", no_argument, NULL, 'x'},
218 {"xz", no_argument, NULL, 'X'},
219 {"decompress", no_argument, NULL, 'Z'},
220 {NULL, no_argument, NULL, 0}
224 * Adds a searching pattern to the internal array.
227 add_pattern(char *pat, size_t len)
230 /* Do not add further pattern is we already match everything */
234 /* Check if we can do a shortcut */
237 for (unsigned int i = 0; i < patterns; i++) {
238 free(pattern[i].pat);
240 pattern = grep_realloc(pattern, sizeof(struct pat));
241 pattern[0].pat = NULL;
246 /* Increase size if necessary */
247 if (patterns == pattern_sz) {
249 pattern = grep_realloc(pattern, ++pattern_sz *
252 if (len > 0 && pat[len - 1] == '\n')
254 /* pat may not be NUL-terminated */
255 pattern[patterns].pat = grep_malloc(len + 1);
256 memcpy(pattern[patterns].pat, pat, len);
257 pattern[patterns].len = len;
258 pattern[patterns].pat[len] = '\0';
263 * Adds a file include/exclude pattern to the internal array.
266 add_fpattern(const char *pat, int mode)
269 /* Increase size if necessary */
270 if (fpatterns == fpattern_sz) {
272 fpattern = grep_realloc(fpattern, ++fpattern_sz *
273 sizeof(struct epat));
275 fpattern[fpatterns].pat = grep_strdup(pat);
276 fpattern[fpatterns].mode = mode;
281 * Adds a directory include/exclude pattern to the internal array.
284 add_dpattern(const char *pat, int mode)
287 /* Increase size if necessary */
288 if (dpatterns == dpattern_sz) {
290 dpattern = grep_realloc(dpattern, ++dpattern_sz *
291 sizeof(struct epat));
293 dpattern[dpatterns].pat = grep_strdup(pat);
294 dpattern[dpatterns].mode = mode;
299 * Reads searching patterns from a file and adds them with add_pattern().
302 read_patterns(const char *fn)
310 if ((f = fopen(fn, "r")) == NULL)
312 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
318 while ((rlen = getline(&line, &len, f)) != -1)
319 add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
326 static inline const char *
327 init_color(const char *d)
331 c = getenv("GREP_COLOR");
332 return (c != NULL && c[0] != '\0' ? c : d);
336 main(int argc, char *argv[])
338 char **aargv, **eargv, *eopts;
341 unsigned long long l;
342 unsigned int aargc, eargc, i;
343 int c, lastc, needpattern, newarg, prevoptind;
345 setlocale(LC_ALL, "");
348 catalog = catopen("grep", NL_CAT_LOCALE);
351 /* Check what is the program name of the binary. In this
352 way we can have all the funcionalities in one binary
353 without the need of scripting and using ugly hacks. */
355 if (pn[0] == 'b' && pn[1] == 'z') {
356 filebehave = FILE_BZIP;
358 } else if (pn[0] == 'x' && pn[1] == 'z') {
359 filebehave = FILE_XZ;
361 } else if (pn[0] == 'l' && pn[1] == 'z') {
362 filebehave = FILE_LZMA;
364 } else if (pn[0] == 'z') {
365 filebehave = FILE_GZIP;
370 grepbehave = GREP_EXTENDED;
373 grepbehave = GREP_FIXED;
382 eopts = getenv("GREP_OPTIONS");
384 /* support for extra arguments in GREP_OPTIONS */
386 if (eopts != NULL && eopts[0] != '\0') {
389 /* make an estimation of how many extra arguments we have */
390 for (unsigned int j = 0; j < strlen(eopts); j++)
394 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
397 /* parse extra arguments */
398 while ((str = strsep(&eopts, " ")) != NULL)
400 eargv[eargc++] = grep_strdup(str);
402 aargv = (char **)grep_calloc(eargc + argc + 1,
406 for (i = 0; i < eargc; i++)
407 aargv[i + 1] = eargv[i];
408 for (int j = 1; j < argc; j++, i++)
409 aargv[i + 1] = argv[j];
411 aargc = eargc + argc;
417 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
420 case '0': case '1': case '2': case '3': case '4':
421 case '5': case '6': case '7': case '8': case '9':
422 if (newarg || !isdigit(lastc))
424 else if (Aflag > LLONG_MAX / 10) {
428 Aflag = Bflag = (Aflag * 10) + (c - '0');
431 if (optarg == NULL) {
440 l = strtoull(optarg, &ep, 10);
441 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
442 ((errno == EINVAL) && (l == 0)))
444 else if (ep[0] != '\0') {
456 binbehave = BINFILE_TEXT;
465 if (strcasecmp(optarg, "skip") == 0)
466 devbehave = DEV_SKIP;
467 else if (strcasecmp(optarg, "read") == 0)
468 devbehave = DEV_READ;
470 errx(2, getstr(3), "--devices");
473 if (strcasecmp("recurse", optarg) == 0) {
475 dirbehave = DIR_RECURSE;
476 } else if (strcasecmp("skip", optarg) == 0)
477 dirbehave = DIR_SKIP;
478 else if (strcasecmp("read", optarg) == 0)
479 dirbehave = DIR_READ;
481 errx(2, getstr(3), "--directories");
484 grepbehave = GREP_EXTENDED;
489 char *string = optarg;
491 while ((token = strsep(&string, "\n")) != NULL)
492 add_pattern(token, strlen(token));
497 grepbehave = GREP_FIXED;
500 read_patterns(optarg);
504 grepbehave = GREP_BASIC;
514 binbehave = BINFILE_SKIP;
524 err(2, "bzip2 support was disabled at compile-time");
526 filebehave = FILE_BZIP;
539 mlimit = mcount = strtoll(optarg, &ep, 10);
540 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
541 ((errno == EINVAL) && (mcount == 0)))
543 else if (ep[0] != '\0') {
549 filebehave = FILE_LZMA;
555 linkbehave = LINK_EXPLICIT;
559 cflags &= ~REG_NOSUB;
562 linkbehave = LINK_SKIP;
568 linkbehave = LINK_READ;
572 dirbehave = DIR_RECURSE;
579 binbehave = BINFILE_BIN;
583 filebehave = FILE_MMAP;
586 printf(getstr(9), getprogname(), VERSION);
593 cflags &= ~REG_NOSUB;
597 cflags &= ~REG_NOSUB;
600 filebehave = FILE_XZ;
603 filebehave = FILE_GZIP;
606 if (strcasecmp("binary", optarg) == 0)
607 binbehave = BINFILE_BIN;
608 else if (strcasecmp("without-match", optarg) == 0)
609 binbehave = BINFILE_SKIP;
610 else if (strcasecmp("text", optarg) == 0)
611 binbehave = BINFILE_TEXT;
613 errx(2, getstr(3), "--binary-files");
617 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
618 strcasecmp("tty", optarg) == 0 ||
619 strcasecmp("if-tty", optarg) == 0) {
622 term = getenv("TERM");
623 if (isatty(STDOUT_FILENO) && term != NULL &&
624 strcasecmp(term, "dumb") != 0)
625 color = init_color("01;31");
626 } else if (strcasecmp("always", optarg) == 0 ||
627 strcasecmp("yes", optarg) == 0 ||
628 strcasecmp("force", optarg) == 0) {
629 color = init_color("01;31");
630 } else if (strcasecmp("never", optarg) != 0 &&
631 strcasecmp("none", optarg) != 0 &&
632 strcasecmp("no", optarg) != 0)
633 errx(2, getstr(3), "--color");
634 cflags &= ~REG_NOSUB;
647 add_fpattern(optarg, INCL_PAT);
651 add_fpattern(optarg, EXCL_PAT);
655 add_dpattern(optarg, INCL_PAT);
659 add_dpattern(optarg, EXCL_PAT);
666 newarg = optind != prevoptind;
672 /* Empty pattern file matches nothing */
673 if (!needpattern && (patterns == 0))
676 /* Fail if we don't have any pattern */
677 if (aargc == 0 && needpattern)
680 /* Process patterns from command line */
681 if (aargc != 0 && needpattern) {
683 char *string = *aargv;
685 while ((token = strsep(&string, "\n")) != NULL)
686 add_pattern(token, strlen(token));
691 switch (grepbehave) {
695 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
699 cflags |= REG_EXTENDED;
706 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
707 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
709 /* Check if cheating is allowed (always is for fgrep). */
710 for (i = 0; i < patterns; ++i) {
711 if (fastncomp(&fg_pattern[i], pattern[i].pat,
712 pattern[i].len, cflags) != 0) {
713 /* Fall back to full regex library */
714 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
716 regerror(c, &r_pattern[i], re_error,
718 errx(2, "%s", re_error);
726 if ((aargc == 0 || aargc == 1) && !Hflag)
730 exit(!procfile("-"));
732 if (dirbehave == DIR_RECURSE)
733 c = grep_tree(aargv);
735 for (c = 0; aargc--; ++aargv) {
736 if ((finclude || fexclude) && !file_matching(*aargv))
738 c+= procfile(*aargv);
745 /* Find out the correct return value according to the
746 results and the command line option. */
747 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));