1 /* $NetBSD: grep.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #include "fastmatch.h"
61 * Default messags to use when NLS is disabled or no catalogue
64 const char *errstr[] = {
66 /* 1*/ "(standard input)",
67 /* 2*/ "cannot read bzip2 compressed file",
68 /* 3*/ "unknown %s option",
69 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
70 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 7*/ "\t[--null] [pattern] [file ...]\n",
73 /* 8*/ "Binary file %s matches\n",
74 /* 9*/ "%s (BSD grep) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags = REG_NOSUB;
79 int eflags = REG_STARTEND;
81 /* Shortcut for matching all cases like empty regex */
84 /* Searching patterns */
85 unsigned int patterns, pattern_sz;
88 fastmatch_t *fg_pattern;
90 /* Filename exclusion/inclusion patterns */
91 unsigned int fpatterns, fpattern_sz;
92 unsigned int dpatterns, dpattern_sz;
93 struct epat *dpattern, *fpattern;
95 /* For regex errors */
96 char re_error[RE_ERROR_BUF + 1];
98 /* Command-line flags */
99 unsigned long long Aflag; /* -A x: print x lines trailing each match */
100 unsigned long long Bflag; /* -B x: print x lines leading each match */
101 bool Hflag; /* -H: always print file name */
102 bool Lflag; /* -L: only show names of files with no matches */
103 bool bflag; /* -b: show block numbers for each match */
104 bool cflag; /* -c: only show a count of matching lines */
105 bool hflag; /* -h: don't print filename headers */
106 bool iflag; /* -i: ignore case */
107 bool lflag; /* -l: only show names of files with matches */
108 bool mflag; /* -m x: stop reading the files after x matches */
109 long long mcount; /* count for -m */
110 bool nflag; /* -n: show line numbers in front of matching lines */
111 bool oflag; /* -o: print only matching part */
112 bool qflag; /* -q: quiet mode (don't output anything) */
113 bool sflag; /* -s: silent mode (ignore errors) */
114 bool vflag; /* -v: only show non-matching lines */
115 bool wflag; /* -w: pattern must start and end on word boundaries */
116 bool xflag; /* -x: pattern must match entire line */
117 bool lbflag; /* --line-buffered */
118 bool nullflag; /* --null */
119 char *label; /* --label */
120 const char *color; /* --color */
121 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
122 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
123 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
124 int devbehave = DEV_READ; /* -D: handling of devices */
125 int dirbehave = DIR_READ; /* -dRr: handling of directories */
126 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
128 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
129 bool fexclude, finclude; /* --exclude and --include */
132 BIN_OPT = CHAR_MAX + 1,
145 static inline const char *init_color(const char *);
148 bool first = true; /* flag whether we are processing the first match */
149 bool prev; /* flag whether or not the previous line matched */
150 int tail; /* lines left to print */
151 bool file_err; /* file reading error */
154 * Prints usage information and returns 2.
159 fprintf(stderr, getstr(4), getprogname());
160 fprintf(stderr, "%s", getstr(5));
161 fprintf(stderr, "%s", getstr(6));
162 fprintf(stderr, "%s", getstr(7));
166 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
168 struct option long_options[] =
170 {"binary-files", required_argument, NULL, BIN_OPT},
171 {"help", no_argument, NULL, HELP_OPT},
172 {"mmap", no_argument, NULL, MMAP_OPT},
173 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
174 {"label", required_argument, NULL, LABEL_OPT},
175 {"null", no_argument, NULL, NULL_OPT},
176 {"color", optional_argument, NULL, COLOR_OPT},
177 {"colour", optional_argument, NULL, COLOR_OPT},
178 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
179 {"include", required_argument, NULL, R_INCLUDE_OPT},
180 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
181 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
182 {"after-context", required_argument, NULL, 'A'},
183 {"text", no_argument, NULL, 'a'},
184 {"before-context", required_argument, NULL, 'B'},
185 {"byte-offset", no_argument, NULL, 'b'},
186 {"context", optional_argument, NULL, 'C'},
187 {"count", no_argument, NULL, 'c'},
188 {"devices", required_argument, NULL, 'D'},
189 {"directories", required_argument, NULL, 'd'},
190 {"extended-regexp", no_argument, NULL, 'E'},
191 {"regexp", required_argument, NULL, 'e'},
192 {"fixed-strings", no_argument, NULL, 'F'},
193 {"file", required_argument, NULL, 'f'},
194 {"basic-regexp", no_argument, NULL, 'G'},
195 {"no-filename", no_argument, NULL, 'h'},
196 {"with-filename", no_argument, NULL, 'H'},
197 {"ignore-case", no_argument, NULL, 'i'},
198 {"bz2decompress", no_argument, NULL, 'J'},
199 {"files-with-matches", no_argument, NULL, 'l'},
200 {"files-without-match", no_argument, NULL, 'L'},
201 {"max-count", required_argument, NULL, 'm'},
202 {"lzma", no_argument, NULL, 'M'},
203 {"line-number", no_argument, NULL, 'n'},
204 {"only-matching", no_argument, NULL, 'o'},
205 {"quiet", no_argument, NULL, 'q'},
206 {"silent", no_argument, NULL, 'q'},
207 {"recursive", no_argument, NULL, 'r'},
208 {"no-messages", no_argument, NULL, 's'},
209 {"binary", no_argument, NULL, 'U'},
210 {"unix-byte-offsets", no_argument, NULL, 'u'},
211 {"invert-match", no_argument, NULL, 'v'},
212 {"version", no_argument, NULL, 'V'},
213 {"word-regexp", no_argument, NULL, 'w'},
214 {"line-regexp", no_argument, NULL, 'x'},
215 {"xz", no_argument, NULL, 'X'},
216 {"decompress", no_argument, NULL, 'Z'},
217 {NULL, no_argument, NULL, 0}
221 * Adds a searching pattern to the internal array.
224 add_pattern(char *pat, size_t len)
227 /* Do not add further pattern is we already match everything */
231 /* Check if we can do a shortcut */
234 for (unsigned int i = 0; i < patterns; i++) {
235 free(pattern[i].pat);
237 pattern = grep_realloc(pattern, sizeof(struct pat));
238 pattern[0].pat = NULL;
243 /* Increase size if necessary */
244 if (patterns == pattern_sz) {
246 pattern = grep_realloc(pattern, ++pattern_sz *
249 if (len > 0 && pat[len - 1] == '\n')
251 /* pat may not be NUL-terminated */
252 pattern[patterns].pat = grep_malloc(len + 1);
253 memcpy(pattern[patterns].pat, pat, len);
254 pattern[patterns].len = len;
255 pattern[patterns].pat[len] = '\0';
260 * Adds a file include/exclude pattern to the internal array.
263 add_fpattern(const char *pat, int mode)
266 /* Increase size if necessary */
267 if (fpatterns == fpattern_sz) {
269 fpattern = grep_realloc(fpattern, ++fpattern_sz *
270 sizeof(struct epat));
272 fpattern[fpatterns].pat = grep_strdup(pat);
273 fpattern[fpatterns].mode = mode;
278 * Adds a directory include/exclude pattern to the internal array.
281 add_dpattern(const char *pat, int mode)
284 /* Increase size if necessary */
285 if (dpatterns == dpattern_sz) {
287 dpattern = grep_realloc(dpattern, ++dpattern_sz *
288 sizeof(struct epat));
290 dpattern[dpatterns].pat = grep_strdup(pat);
291 dpattern[dpatterns].mode = mode;
296 * Reads searching patterns from a file and adds them with add_pattern().
299 read_patterns(const char *fn)
306 if ((f = fopen(fn, "r")) == NULL)
308 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
312 while ((line = fgetln(f, &len)) != NULL)
313 add_pattern(line, line[0] == '\n' ? 0 : len);
319 static inline const char *
320 init_color(const char *d)
324 c = getenv("GREP_COLOR");
325 return (c != NULL && c[0] != '\0' ? c : d);
329 main(int argc, char *argv[])
331 char **aargv, **eargv, *eopts;
334 unsigned long long l;
335 unsigned int aargc, eargc, i;
336 int c, lastc, needpattern, newarg, prevoptind;
338 setlocale(LC_ALL, "");
341 catalog = catopen("grep", NL_CAT_LOCALE);
344 /* Check what is the program name of the binary. In this
345 way we can have all the funcionalities in one binary
346 without the need of scripting and using ugly hacks. */
348 if (pn[0] == 'b' && pn[1] == 'z') {
349 filebehave = FILE_BZIP;
351 } else if (pn[0] == 'x' && pn[1] == 'z') {
352 filebehave = FILE_XZ;
354 } else if (pn[0] == 'l' && pn[1] == 'z') {
355 filebehave = FILE_LZMA;
357 } else if (pn[0] == 'z') {
358 filebehave = FILE_GZIP;
363 grepbehave = GREP_EXTENDED;
366 grepbehave = GREP_FIXED;
375 eopts = getenv("GREP_OPTIONS");
377 /* support for extra arguments in GREP_OPTIONS */
379 if (eopts != NULL && eopts[0] != '\0') {
382 /* make an estimation of how many extra arguments we have */
383 for (unsigned int j = 0; j < strlen(eopts); j++)
387 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
390 /* parse extra arguments */
391 while ((str = strsep(&eopts, " ")) != NULL)
393 eargv[eargc++] = grep_strdup(str);
395 aargv = (char **)grep_calloc(eargc + argc + 1,
399 for (i = 0; i < eargc; i++)
400 aargv[i + 1] = eargv[i];
401 for (int j = 1; j < argc; j++, i++)
402 aargv[i + 1] = argv[j];
404 aargc = eargc + argc;
410 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
413 case '0': case '1': case '2': case '3': case '4':
414 case '5': case '6': case '7': case '8': case '9':
415 if (newarg || !isdigit(lastc))
417 else if (Aflag > LLONG_MAX / 10) {
421 Aflag = Bflag = (Aflag * 10) + (c - '0');
424 if (optarg == NULL) {
433 l = strtoull(optarg, &ep, 10);
434 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
435 ((errno == EINVAL) && (l == 0)))
437 else if (ep[0] != '\0') {
449 binbehave = BINFILE_TEXT;
458 if (strcasecmp(optarg, "skip") == 0)
459 devbehave = DEV_SKIP;
460 else if (strcasecmp(optarg, "read") == 0)
461 devbehave = DEV_READ;
463 errx(2, getstr(3), "--devices");
466 if (strcasecmp("recurse", optarg) == 0) {
468 dirbehave = DIR_RECURSE;
469 } else if (strcasecmp("skip", optarg) == 0)
470 dirbehave = DIR_SKIP;
471 else if (strcasecmp("read", optarg) == 0)
472 dirbehave = DIR_READ;
474 errx(2, getstr(3), "--directories");
477 grepbehave = GREP_EXTENDED;
480 add_pattern(optarg, strlen(optarg));
484 grepbehave = GREP_FIXED;
487 read_patterns(optarg);
491 grepbehave = GREP_BASIC;
501 binbehave = BINFILE_SKIP;
511 err(2, "bzip2 support was disabled at compile-time");
513 filebehave = FILE_BZIP;
526 mcount = strtoll(optarg, &ep, 10);
527 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
528 ((errno == EINVAL) && (mcount == 0)))
530 else if (ep[0] != '\0') {
536 filebehave = FILE_LZMA;
542 linkbehave = LINK_EXPLICIT;
546 cflags &= ~REG_NOSUB;
549 linkbehave = LINK_SKIP;
555 linkbehave = LINK_READ;
559 dirbehave = DIR_RECURSE;
566 binbehave = BINFILE_BIN;
570 filebehave = FILE_MMAP;
573 printf(getstr(9), getprogname(), VERSION);
580 cflags &= ~REG_NOSUB;
584 cflags &= ~REG_NOSUB;
587 filebehave = FILE_XZ;
590 filebehave = FILE_GZIP;
593 if (strcasecmp("binary", optarg) == 0)
594 binbehave = BINFILE_BIN;
595 else if (strcasecmp("without-match", optarg) == 0)
596 binbehave = BINFILE_SKIP;
597 else if (strcasecmp("text", optarg) == 0)
598 binbehave = BINFILE_TEXT;
600 errx(2, getstr(3), "--binary-files");
604 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
605 strcasecmp("tty", optarg) == 0 ||
606 strcasecmp("if-tty", optarg) == 0) {
609 term = getenv("TERM");
610 if (isatty(STDOUT_FILENO) && term != NULL &&
611 strcasecmp(term, "dumb") != 0)
612 color = init_color("01;31");
613 } else if (strcasecmp("always", optarg) == 0 ||
614 strcasecmp("yes", optarg) == 0 ||
615 strcasecmp("force", optarg) == 0) {
616 color = init_color("01;31");
617 } else if (strcasecmp("never", optarg) != 0 &&
618 strcasecmp("none", optarg) != 0 &&
619 strcasecmp("no", optarg) != 0)
620 errx(2, getstr(3), "--color");
621 cflags &= ~REG_NOSUB;
634 add_fpattern(optarg, INCL_PAT);
638 add_fpattern(optarg, EXCL_PAT);
642 add_dpattern(optarg, INCL_PAT);
646 add_dpattern(optarg, EXCL_PAT);
653 newarg = optind != prevoptind;
659 /* Empty pattern file matches nothing */
660 if (!needpattern && (patterns == 0))
663 /* Fail if we don't have any pattern */
664 if (aargc == 0 && needpattern)
667 /* Process patterns from command line */
668 if (aargc != 0 && needpattern) {
669 add_pattern(*aargv, strlen(*aargv));
674 switch (grepbehave) {
678 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
682 cflags |= REG_EXTENDED;
689 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
690 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
692 /* Check if cheating is allowed (always is for fgrep). */
693 for (i = 0; i < patterns; ++i) {
694 if (fastncomp(&fg_pattern[i], pattern[i].pat,
695 pattern[i].len, cflags) != 0) {
696 /* Fall back to full regex library */
697 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
699 regerror(c, &r_pattern[i], re_error,
701 errx(2, "%s", re_error);
709 if ((aargc == 0 || aargc == 1) && !Hflag)
713 exit(!procfile("-"));
715 if (dirbehave == DIR_RECURSE)
716 c = grep_tree(aargv);
718 for (c = 0; aargc--; ++aargv) {
719 if ((finclude || fexclude) && !file_matching(*aargv))
721 c+= procfile(*aargv);
728 /* Find out the correct return value according to the
729 results and the command line option. */
730 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));