1 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
34 #include <sys/types.h>
57 * Default messags to use when NLS is disabled or no catalogue
60 const char *errstr[] = {
62 /* 1*/ "(standard input)",
63 /* 2*/ "cannot read bzip2 compressed file",
64 /* 3*/ "unknown --color option",
65 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
66 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
67 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
68 /* 7*/ "\t[--null] [pattern] [file ...]\n",
69 /* 8*/ "unknown --binary-files option",
70 /* 9*/ "Binary file %s matches\n",
71 /*10*/ "%s (BSD grep) %s\n",
74 /* Flags passed to regcomp() and regexec() */
76 int eflags = REG_STARTEND;
78 /* Shortcut for matching all cases like empty regex */
81 /* Searching patterns */
82 unsigned int patterns, pattern_sz;
85 fastgrep_t *fg_pattern;
87 /* Filename exclusion/inclusion patterns */
88 unsigned int epatterns, epattern_sz;
89 struct epat *epattern;
91 /* For regex errors */
92 char re_error[RE_ERROR_BUF + 1];
94 /* Command-line flags */
95 unsigned long long Aflag; /* -A x: print x lines trailing each match */
96 unsigned long long Bflag; /* -B x: print x lines leading each match */
97 bool Hflag; /* -H: always print file name */
98 bool Lflag; /* -L: only show names of files with no matches */
99 bool bflag; /* -b: show block numbers for each match */
100 bool cflag; /* -c: only show a count of matching lines */
101 bool hflag; /* -h: don't print filename headers */
102 bool iflag; /* -i: ignore case */
103 bool lflag; /* -l: only show names of files with matches */
104 bool mflag; /* -m x: stop reading the files after x matches */
105 unsigned long long mcount; /* count for -m */
106 bool nflag; /* -n: show line numbers in front of matching lines */
107 bool oflag; /* -o: print only matching part */
108 bool qflag; /* -q: quiet mode (don't output anything) */
109 bool sflag; /* -s: silent mode (ignore errors) */
110 bool vflag; /* -v: only show non-matching lines */
111 bool wflag; /* -w: pattern must start and end on word boundaries */
112 bool xflag; /* -x: pattern must match entire line */
113 bool lbflag; /* --line-buffered */
114 bool nullflag; /* --null */
115 bool exclflag; /* --exclude */
116 char *label; /* --label */
117 char *color; /* --color */
118 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
119 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
120 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
121 int devbehave = DEV_GREP; /* -D: handling of devices */
122 int dirbehave = DIR_GREP; /* -dRr: handling of directories */
123 int linkbehave = LINK_GREP; /* -OpS: handling of symlinks */
126 BIN_OPT = CHAR_MAX + 1,
140 bool first = true; /* flag whether we are processing the first match */
141 bool prev; /* flag whether or not the previous line matched */
142 int tail; /* lines left to print */
143 bool notfound; /* file not found */
145 extern char *__progname;
148 * Prints usage information and returns 2.
153 fprintf(stderr, getstr(4), __progname);
154 fprintf(stderr, "%s", getstr(5));
155 fprintf(stderr, "%s", getstr(5));
156 fprintf(stderr, "%s", getstr(6));
157 fprintf(stderr, "%s", getstr(7));
161 static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
163 struct option long_options[] =
165 {"binary-files", required_argument, NULL, BIN_OPT},
166 {"help", no_argument, NULL, HELP_OPT},
167 {"mmap", no_argument, NULL, MMAP_OPT},
168 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
169 {"label", required_argument, NULL, LABEL_OPT},
170 {"null", no_argument, NULL, NULL_OPT},
171 {"color", optional_argument, NULL, COLOR_OPT},
172 {"colour", optional_argument, NULL, COLOR_OPT},
173 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
174 {"include", required_argument, NULL, R_INCLUDE_OPT},
175 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
176 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
177 {"after-context", required_argument, NULL, 'A'},
178 {"text", no_argument, NULL, 'a'},
179 {"before-context", required_argument, NULL, 'B'},
180 {"byte-offset", no_argument, NULL, 'b'},
181 {"context", optional_argument, NULL, 'C'},
182 {"count", no_argument, NULL, 'c'},
183 {"devices", required_argument, NULL, 'D'},
184 {"directories", required_argument, NULL, 'd'},
185 {"extended-regexp", no_argument, NULL, 'E'},
186 {"regexp", required_argument, NULL, 'e'},
187 {"fixed-strings", no_argument, NULL, 'F'},
188 {"file", required_argument, NULL, 'f'},
189 {"basic-regexp", no_argument, NULL, 'G'},
190 {"no-filename", no_argument, NULL, 'h'},
191 {"with-filename", no_argument, NULL, 'H'},
192 {"ignore-case", no_argument, NULL, 'i'},
193 {"bz2decompress", no_argument, NULL, 'J'},
194 {"files-with-matches", no_argument, NULL, 'l'},
195 {"files-without-match", no_argument, NULL, 'L'},
196 {"max-count", required_argument, NULL, 'm'},
197 {"line-number", no_argument, NULL, 'n'},
198 {"only-matching", no_argument, NULL, 'o'},
199 {"quiet", no_argument, NULL, 'q'},
200 {"silent", no_argument, NULL, 'q'},
201 {"recursive", no_argument, NULL, 'r'},
202 {"no-messages", no_argument, NULL, 's'},
203 {"binary", no_argument, NULL, 'U'},
204 {"unix-byte-offsets", no_argument, NULL, 'u'},
205 {"invert-match", no_argument, NULL, 'v'},
206 {"version", no_argument, NULL, 'V'},
207 {"word-regexp", no_argument, NULL, 'w'},
208 {"line-regexp", no_argument, NULL, 'x'},
209 {"decompress", no_argument, NULL, 'Z'},
210 {NULL, no_argument, NULL, 0}
214 * Adds a searching pattern to the internal array.
217 add_pattern(char *pat, size_t len)
220 /* Check if we can do a shortcut */
221 if (len == 0 || matchall) {
225 /* Increase size if necessary */
226 if (patterns == pattern_sz) {
228 pattern = grep_realloc(pattern, ++pattern_sz *
231 if (len > 0 && pat[len - 1] == '\n')
233 /* pat may not be NUL-terminated */
234 pattern[patterns] = grep_malloc(len + 1);
235 memcpy(pattern[patterns], pat, len);
236 pattern[patterns][len] = '\0';
241 * Adds an include/exclude pattern to the internal array.
244 add_epattern(char *pat, size_t len, int type, int mode)
247 /* Increase size if necessary */
248 if (epatterns == epattern_sz) {
250 epattern = grep_realloc(epattern, ++epattern_sz *
251 sizeof(struct epat));
253 if (len > 0 && pat[len - 1] == '\n')
255 epattern[epatterns].pat = grep_malloc(len + 1);
256 memcpy(epattern[epatterns].pat, pat, len);
257 epattern[epatterns].pat[len] = '\0';
258 epattern[epatterns].type = type;
259 epattern[epatterns].mode = mode;
264 * Reads searching patterns from a file and adds them with add_pattern().
267 read_patterns(const char *fn)
273 if ((f = fopen(fn, "r")) == NULL)
275 while ((line = fgetln(f, &len)) != NULL)
276 add_pattern(line, *line == '\n' ? 0 : len);
283 main(int argc, char *argv[])
285 char **aargv, **eargv, *eopts;
287 unsigned long long l;
288 unsigned int aargc, eargc, i;
289 int c, lastc, needpattern, newarg, prevoptind;
291 setlocale(LC_ALL, "");
294 catalog = catopen("grep", NL_CAT_LOCALE);
297 /* Check what is the program name of the binary. In this
298 way we can have all the funcionalities in one binary
299 without the need of scripting and using ugly hacks. */
300 switch (__progname[0]) {
302 grepbehave = GREP_EXTENDED;
305 grepbehave = GREP_FIXED;
308 grepbehave = GREP_BASIC;
311 filebehave = FILE_GZIP;
312 switch(__progname[1]) {
314 grepbehave = GREP_EXTENDED;
317 grepbehave = GREP_FIXED;
320 grepbehave = GREP_BASIC;
331 eopts = getenv("GREP_OPTIONS");
337 for(i = 0; i < strlen(eopts); i++)
341 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
343 str = strtok(eopts, " ");
347 eargv[++eargc] = (char *)grep_malloc(sizeof(char) *
349 strlcpy(eargv[eargc], str, strlen(str) + 1);
350 str = strtok(NULL, " ");
352 eargv[++eargc] = NULL;
354 aargv = (char **)grep_calloc(eargc + argc + 1,
358 for(i = 1; i < eargc; i++)
360 for(int j = 1; j < argc; j++)
361 aargv[i++] = argv[j];
363 aargc = eargc + argc - 1;
370 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
373 case '0': case '1': case '2': case '3': case '4':
374 case '5': case '6': case '7': case '8': case '9':
375 if (newarg || !isdigit(lastc))
377 else if (Aflag > LLONG_MAX / 10) {
381 Aflag = Bflag = (Aflag * 10) + (c - '0');
384 if (optarg == NULL) {
393 l = strtoull(optarg, &ep, 10);
394 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
395 ((errno == EINVAL) && (l == 0)))
397 else if (ep[0] != '\0') {
409 binbehave = BINFILE_TEXT;
418 if (strcmp(optarg, "skip") == 0)
419 devbehave = DEV_SKIP;
422 if (strcmp("recurse", optarg) == 0) {
424 dirbehave = DIR_RECURSE;
425 } else if (strcmp("skip", optarg) == 0)
426 dirbehave = DIR_SKIP;
427 else if (strcmp("read", optarg) != 0) {
433 grepbehave = GREP_EXTENDED;
436 add_pattern(optarg, strlen(optarg));
440 grepbehave = GREP_FIXED;
443 read_patterns(optarg);
447 grepbehave = GREP_BASIC;
457 binbehave = BINFILE_SKIP;
465 filebehave = FILE_BZIP;
469 Lflag = qflag = true;
473 lflag = qflag = true;
478 mcount = strtoull(optarg, &ep, 10);
479 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
480 ((errno == EINVAL) && (mcount == 0)))
482 else if (ep[0] != '\0') {
491 linkbehave = LINK_EXPLICIT;
497 linkbehave = LINK_SKIP;
503 linkbehave = LINK_GREP;
507 dirbehave = DIR_RECURSE;
514 binbehave = BINFILE_BIN;
518 /* noop, compatibility */
521 printf(getstr(10), __progname, VERSION);
533 filebehave = FILE_GZIP;
536 if (strcmp("binary", optarg) == 0)
537 binbehave = BINFILE_BIN;
538 else if (strcmp("without-match", optarg) == 0)
539 binbehave = BINFILE_SKIP;
540 else if (strcmp("text", optarg) == 0)
541 binbehave = BINFILE_TEXT;
543 errx(2, "%s", getstr(8));
546 if (optarg == NULL || strcmp("auto", optarg) == 0 ||
547 strcmp("always", optarg) == 0 ) {
548 color = getenv("GREP_COLOR");
550 color = grep_malloc(sizeof(char) * 6);
551 strcpy(color, "01;31");
553 } else if (strcmp("never", optarg) == 0)
556 errx(2, "%s", getstr(3));
569 add_epattern(basename(optarg), strlen(basename(optarg)),
574 add_epattern(basename(optarg), strlen(basename(optarg)),
579 add_epattern(basename(optarg), strlen(basename(optarg)),
584 add_epattern(basename(optarg), strlen(basename(optarg)),
592 newarg = optind != prevoptind;
598 /* Fail if we don't have any pattern */
599 if (aargc == 0 && needpattern)
602 /* Process patterns from command line */
603 if (aargc != 0 && needpattern) {
604 add_pattern(*aargv, strlen(*aargv));
609 switch (grepbehave) {
614 cflags |= REG_EXTENDED;
621 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
622 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
624 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
625 * Optimizations should be done there.
627 /* Check if cheating is allowed (always is for fgrep). */
628 if (grepbehave == GREP_FIXED) {
629 for (i = 0; i < patterns; ++i)
630 fgrepcomp(&fg_pattern[i], pattern[i]);
632 for (i = 0; i < patterns; ++i) {
633 if (fastcomp(&fg_pattern[i], pattern[i])) {
634 /* Fall back to full regex library */
635 c = regcomp(&r_pattern[i], pattern[i], cflags);
637 regerror(c, &r_pattern[i], re_error,
639 errx(2, "%s", re_error);
648 if ((aargc == 0 || aargc == 1) && !Hflag)
652 exit(!procfile("-"));
654 if (dirbehave == DIR_RECURSE)
655 c = grep_tree(aargv);
657 for (c = 0; aargc--; ++aargv)
658 c+= procfile(*aargv);
664 /* Find out the correct return value according to the
665 results and the command line option. */
666 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));