1 /* $NetBSD: grep.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 #include <sys/types.h>
52 #include "fastmatch.h"
61 * Default messags to use when NLS is disabled or no catalogue
64 const char *errstr[] = {
66 /* 1*/ "(standard input)",
67 /* 2*/ "cannot read bzip2 compressed file",
68 /* 3*/ "unknown %s option",
69 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
70 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
71 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
72 /* 7*/ "\t[--null] [pattern] [file ...]\n",
73 /* 8*/ "Binary file %s matches\n",
74 /* 9*/ "%s (BSD grep) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags = REG_NOSUB;
79 int eflags = REG_STARTEND;
81 /* Shortcut for matching all cases like empty regex */
84 /* Searching patterns */
85 unsigned int patterns;
86 static unsigned int pattern_sz;
89 fastmatch_t *fg_pattern;
91 /* Filename exclusion/inclusion patterns */
92 unsigned int fpatterns, dpatterns;
93 static unsigned int fpattern_sz, dpattern_sz;
94 struct epat *dpattern, *fpattern;
96 /* For regex errors */
97 char re_error[RE_ERROR_BUF + 1];
99 /* Command-line flags */
100 unsigned long long Aflag; /* -A x: print x lines trailing each match */
101 unsigned long long Bflag; /* -B x: print x lines leading each match */
102 bool Hflag; /* -H: always print file name */
103 bool Lflag; /* -L: only show names of files with no matches */
104 bool bflag; /* -b: show block numbers for each match */
105 bool cflag; /* -c: only show a count of matching lines */
106 bool hflag; /* -h: don't print filename headers */
107 bool iflag; /* -i: ignore case */
108 bool lflag; /* -l: only show names of files with matches */
109 bool mflag; /* -m x: stop reading the files after x matches */
110 long long mcount; /* count for -m */
111 long long mlimit; /* requested value for -m */
112 bool nflag; /* -n: show line numbers in front of matching lines */
113 bool oflag; /* -o: print only matching part */
114 bool qflag; /* -q: quiet mode (don't output anything) */
115 bool sflag; /* -s: silent mode (ignore errors) */
116 bool vflag; /* -v: only show non-matching lines */
117 bool wflag; /* -w: pattern must start and end on word boundaries */
118 bool xflag; /* -x: pattern must match entire line */
119 bool lbflag; /* --line-buffered */
120 bool nullflag; /* --null */
121 char *label; /* --label */
122 const char *color; /* --color */
123 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
124 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
125 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
126 int devbehave = DEV_READ; /* -D: handling of devices */
127 int dirbehave = DIR_READ; /* -dRr: handling of directories */
128 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
130 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
131 bool fexclude, finclude; /* --exclude and --include */
134 BIN_OPT = CHAR_MAX + 1,
147 static inline const char *init_color(const char *);
150 bool first = true; /* flag whether we are processing the first match */
151 bool prev; /* flag whether or not the previous line matched */
152 int tail; /* lines left to print */
153 bool file_err; /* file reading error */
156 * Prints usage information and returns 2.
161 fprintf(stderr, getstr(4), getprogname());
162 fprintf(stderr, "%s", getstr(5));
163 fprintf(stderr, "%s", getstr(6));
164 fprintf(stderr, "%s", getstr(7));
168 static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
170 static const struct option long_options[] =
172 {"binary-files", required_argument, NULL, BIN_OPT},
173 {"help", no_argument, NULL, HELP_OPT},
174 {"mmap", no_argument, NULL, MMAP_OPT},
175 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
176 {"label", required_argument, NULL, LABEL_OPT},
177 {"null", no_argument, NULL, NULL_OPT},
178 {"color", optional_argument, NULL, COLOR_OPT},
179 {"colour", optional_argument, NULL, COLOR_OPT},
180 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
181 {"include", required_argument, NULL, R_INCLUDE_OPT},
182 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
183 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
184 {"after-context", required_argument, NULL, 'A'},
185 {"text", no_argument, NULL, 'a'},
186 {"before-context", required_argument, NULL, 'B'},
187 {"byte-offset", no_argument, NULL, 'b'},
188 {"context", optional_argument, NULL, 'C'},
189 {"count", no_argument, NULL, 'c'},
190 {"devices", required_argument, NULL, 'D'},
191 {"directories", required_argument, NULL, 'd'},
192 {"extended-regexp", no_argument, NULL, 'E'},
193 {"regexp", required_argument, NULL, 'e'},
194 {"fixed-strings", no_argument, NULL, 'F'},
195 {"file", required_argument, NULL, 'f'},
196 {"basic-regexp", no_argument, NULL, 'G'},
197 {"no-filename", no_argument, NULL, 'h'},
198 {"with-filename", no_argument, NULL, 'H'},
199 {"ignore-case", no_argument, NULL, 'i'},
200 {"bz2decompress", no_argument, NULL, 'J'},
201 {"files-with-matches", no_argument, NULL, 'l'},
202 {"files-without-match", no_argument, NULL, 'L'},
203 {"max-count", required_argument, NULL, 'm'},
204 {"lzma", no_argument, NULL, 'M'},
205 {"line-number", no_argument, NULL, 'n'},
206 {"only-matching", no_argument, NULL, 'o'},
207 {"quiet", no_argument, NULL, 'q'},
208 {"silent", no_argument, NULL, 'q'},
209 {"recursive", no_argument, NULL, 'r'},
210 {"no-messages", no_argument, NULL, 's'},
211 {"binary", no_argument, NULL, 'U'},
212 {"unix-byte-offsets", no_argument, NULL, 'u'},
213 {"invert-match", no_argument, NULL, 'v'},
214 {"version", no_argument, NULL, 'V'},
215 {"word-regexp", no_argument, NULL, 'w'},
216 {"line-regexp", no_argument, NULL, 'x'},
217 {"xz", no_argument, NULL, 'X'},
218 {"decompress", no_argument, NULL, 'Z'},
219 {NULL, no_argument, NULL, 0}
223 * Adds a searching pattern to the internal array.
226 add_pattern(char *pat, size_t len)
229 /* Do not add further pattern is we already match everything */
233 /* Check if we can do a shortcut */
236 for (unsigned int i = 0; i < patterns; i++) {
237 free(pattern[i].pat);
239 pattern = grep_realloc(pattern, sizeof(struct pat));
240 pattern[0].pat = NULL;
245 /* Increase size if necessary */
246 if (patterns == pattern_sz) {
248 pattern = grep_realloc(pattern, ++pattern_sz *
251 if (len > 0 && pat[len - 1] == '\n')
253 /* pat may not be NUL-terminated */
254 pattern[patterns].pat = grep_malloc(len + 1);
255 memcpy(pattern[patterns].pat, pat, len);
256 pattern[patterns].len = len;
257 pattern[patterns].pat[len] = '\0';
262 * Adds a file include/exclude pattern to the internal array.
265 add_fpattern(const char *pat, int mode)
268 /* Increase size if necessary */
269 if (fpatterns == fpattern_sz) {
271 fpattern = grep_realloc(fpattern, ++fpattern_sz *
272 sizeof(struct epat));
274 fpattern[fpatterns].pat = grep_strdup(pat);
275 fpattern[fpatterns].mode = mode;
280 * Adds a directory include/exclude pattern to the internal array.
283 add_dpattern(const char *pat, int mode)
286 /* Increase size if necessary */
287 if (dpatterns == dpattern_sz) {
289 dpattern = grep_realloc(dpattern, ++dpattern_sz *
290 sizeof(struct epat));
292 dpattern[dpatterns].pat = grep_strdup(pat);
293 dpattern[dpatterns].mode = mode;
298 * Reads searching patterns from a file and adds them with add_pattern().
301 read_patterns(const char *fn)
308 if ((f = fopen(fn, "r")) == NULL)
310 if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
314 while ((line = fgetln(f, &len)) != NULL)
315 add_pattern(line, line[0] == '\n' ? 0 : len);
321 static inline const char *
322 init_color(const char *d)
326 c = getenv("GREP_COLOR");
327 return (c != NULL && c[0] != '\0' ? c : d);
331 main(int argc, char *argv[])
333 char **aargv, **eargv, *eopts;
336 unsigned long long l;
337 unsigned int aargc, eargc, i;
338 int c, lastc, needpattern, newarg, prevoptind;
340 setlocale(LC_ALL, "");
343 catalog = catopen("grep", NL_CAT_LOCALE);
346 /* Check what is the program name of the binary. In this
347 way we can have all the funcionalities in one binary
348 without the need of scripting and using ugly hacks. */
350 if (pn[0] == 'b' && pn[1] == 'z') {
351 filebehave = FILE_BZIP;
353 } else if (pn[0] == 'x' && pn[1] == 'z') {
354 filebehave = FILE_XZ;
356 } else if (pn[0] == 'l' && pn[1] == 'z') {
357 filebehave = FILE_LZMA;
359 } else if (pn[0] == 'z') {
360 filebehave = FILE_GZIP;
365 grepbehave = GREP_EXTENDED;
368 grepbehave = GREP_FIXED;
377 eopts = getenv("GREP_OPTIONS");
379 /* support for extra arguments in GREP_OPTIONS */
381 if (eopts != NULL && eopts[0] != '\0') {
384 /* make an estimation of how many extra arguments we have */
385 for (unsigned int j = 0; j < strlen(eopts); j++)
389 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
392 /* parse extra arguments */
393 while ((str = strsep(&eopts, " ")) != NULL)
395 eargv[eargc++] = grep_strdup(str);
397 aargv = (char **)grep_calloc(eargc + argc + 1,
401 for (i = 0; i < eargc; i++)
402 aargv[i + 1] = eargv[i];
403 for (int j = 1; j < argc; j++, i++)
404 aargv[i + 1] = argv[j];
406 aargc = eargc + argc;
412 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
415 case '0': case '1': case '2': case '3': case '4':
416 case '5': case '6': case '7': case '8': case '9':
417 if (newarg || !isdigit(lastc))
419 else if (Aflag > LLONG_MAX / 10) {
423 Aflag = Bflag = (Aflag * 10) + (c - '0');
426 if (optarg == NULL) {
435 l = strtoull(optarg, &ep, 10);
436 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
437 ((errno == EINVAL) && (l == 0)))
439 else if (ep[0] != '\0') {
451 binbehave = BINFILE_TEXT;
460 if (strcasecmp(optarg, "skip") == 0)
461 devbehave = DEV_SKIP;
462 else if (strcasecmp(optarg, "read") == 0)
463 devbehave = DEV_READ;
465 errx(2, getstr(3), "--devices");
468 if (strcasecmp("recurse", optarg) == 0) {
470 dirbehave = DIR_RECURSE;
471 } else if (strcasecmp("skip", optarg) == 0)
472 dirbehave = DIR_SKIP;
473 else if (strcasecmp("read", optarg) == 0)
474 dirbehave = DIR_READ;
476 errx(2, getstr(3), "--directories");
479 grepbehave = GREP_EXTENDED;
484 char *string = optarg;
486 while ((token = strsep(&string, "\n")) != NULL)
487 add_pattern(token, strlen(token));
492 grepbehave = GREP_FIXED;
495 read_patterns(optarg);
499 grepbehave = GREP_BASIC;
509 binbehave = BINFILE_SKIP;
519 err(2, "bzip2 support was disabled at compile-time");
521 filebehave = FILE_BZIP;
534 mlimit = mcount = strtoll(optarg, &ep, 10);
535 if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
536 ((errno == EINVAL) && (mcount == 0)))
538 else if (ep[0] != '\0') {
544 filebehave = FILE_LZMA;
550 linkbehave = LINK_EXPLICIT;
554 cflags &= ~REG_NOSUB;
557 linkbehave = LINK_SKIP;
563 linkbehave = LINK_READ;
567 dirbehave = DIR_RECURSE;
574 binbehave = BINFILE_BIN;
578 filebehave = FILE_MMAP;
581 printf(getstr(9), getprogname(), VERSION);
588 cflags &= ~REG_NOSUB;
592 cflags &= ~REG_NOSUB;
595 filebehave = FILE_XZ;
598 filebehave = FILE_GZIP;
601 if (strcasecmp("binary", optarg) == 0)
602 binbehave = BINFILE_BIN;
603 else if (strcasecmp("without-match", optarg) == 0)
604 binbehave = BINFILE_SKIP;
605 else if (strcasecmp("text", optarg) == 0)
606 binbehave = BINFILE_TEXT;
608 errx(2, getstr(3), "--binary-files");
612 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
613 strcasecmp("tty", optarg) == 0 ||
614 strcasecmp("if-tty", optarg) == 0) {
617 term = getenv("TERM");
618 if (isatty(STDOUT_FILENO) && term != NULL &&
619 strcasecmp(term, "dumb") != 0)
620 color = init_color("01;31");
621 } else if (strcasecmp("always", optarg) == 0 ||
622 strcasecmp("yes", optarg) == 0 ||
623 strcasecmp("force", optarg) == 0) {
624 color = init_color("01;31");
625 } else if (strcasecmp("never", optarg) != 0 &&
626 strcasecmp("none", optarg) != 0 &&
627 strcasecmp("no", optarg) != 0)
628 errx(2, getstr(3), "--color");
629 cflags &= ~REG_NOSUB;
642 add_fpattern(optarg, INCL_PAT);
646 add_fpattern(optarg, EXCL_PAT);
650 add_dpattern(optarg, INCL_PAT);
654 add_dpattern(optarg, EXCL_PAT);
661 newarg = optind != prevoptind;
667 /* Empty pattern file matches nothing */
668 if (!needpattern && (patterns == 0))
671 /* Fail if we don't have any pattern */
672 if (aargc == 0 && needpattern)
675 /* Process patterns from command line */
676 if (aargc != 0 && needpattern) {
678 char *string = *aargv;
680 while ((token = strsep(&string, "\n")) != NULL)
681 add_pattern(token, strlen(token));
686 switch (grepbehave) {
690 /* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
694 cflags |= REG_EXTENDED;
701 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
702 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
704 /* Check if cheating is allowed (always is for fgrep). */
705 for (i = 0; i < patterns; ++i) {
706 if (fastncomp(&fg_pattern[i], pattern[i].pat,
707 pattern[i].len, cflags) != 0) {
708 /* Fall back to full regex library */
709 c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
711 regerror(c, &r_pattern[i], re_error,
713 errx(2, "%s", re_error);
721 if ((aargc == 0 || aargc == 1) && !Hflag)
725 exit(!procfile("-"));
727 if (dirbehave == DIR_RECURSE)
728 c = grep_tree(aargv);
730 for (c = 0; aargc--; ++aargv) {
731 if ((finclude || fexclude) && !file_matching(*aargv))
733 c+= procfile(*aargv);
740 /* Find out the correct return value according to the
741 results and the command line option. */
742 exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));