]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/grep/util.c
Add BSD grep to the base system and make it our default grep.
[FreeBSD/FreeBSD.git] / usr.bin / grep / util.c
1 /*      $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $  */
2
3 /*-
4  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/stat.h>
34 #include <sys/types.h>
35
36 #include <ctype.h>
37 #include <err.h>
38 #include <errno.h>
39 #include <fnmatch.h>
40 #include <fts.h>
41 #include <libgen.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <wchar.h>
47 #include <wctype.h>
48
49 #include "grep.h"
50
51 static int       linesqueued;
52 static int       procline(struct str *l, int);
53
54 /*
55  * Processes a directory when a recursive search is performed with
56  * the -R option.  Each appropriate file is passed to procfile().
57  */
58 int
59 grep_tree(char **argv)
60 {
61         FTS *fts;
62         FTSENT *p;
63         char *d, *dir;
64         unsigned int i;
65         int c, fts_flags;
66         bool ok;
67
68         c = fts_flags = 0;
69
70         switch(linkbehave) {
71         case LINK_EXPLICIT:
72                 fts_flags = FTS_COMFOLLOW;
73                 break;
74         case LINK_SKIP:
75                 fts_flags = FTS_PHYSICAL;
76                 break;
77         default:
78                 fts_flags = FTS_LOGICAL;
79                         
80         }
81
82         fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
83
84         if (!(fts = fts_open(argv, fts_flags, NULL)))
85                 err(2, NULL);
86         while ((p = fts_read(fts)) != NULL) {
87                 switch (p->fts_info) {
88                 case FTS_DNR:
89                         /* FALLTHROUGH */
90                 case FTS_ERR:
91                         errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
92                         break;
93                 case FTS_D:
94                         /* FALLTHROUGH */
95                 case FTS_DP:
96                         break;
97                 case FTS_DC:
98                         /* Print a warning for recursive directory loop */
99                         warnx("warning: %s: recursive directory loop",
100                                 p->fts_path);
101                         break;
102                 default:
103                         /* Check for file exclusion/inclusion */
104                         ok = true;
105                         if (exclflag) {
106                                 d = strrchr(p->fts_path, '/');
107                                 dir = grep_malloc(sizeof(char) *
108                                     (d - p->fts_path + 2));
109                                 strlcpy(dir, p->fts_path,
110                                     (d - p->fts_path + 1));
111                                 for (i = 0; i < epatterns; ++i) {
112                                         switch(epattern[i].type) {
113                                         case FILE_PAT:
114                                                 if (fnmatch(epattern[i].pat,
115                                                     basename(p->fts_path), 0) == 0)
116                                                         ok = epattern[i].mode != EXCL_PAT;
117                                                 break;
118                                         case DIR_PAT:
119                                                 if (strstr(dir,
120                                                     epattern[i].pat) != NULL)
121                                                         ok = epattern[i].mode != EXCL_PAT;
122                                                 break;
123                                         }
124                                 }
125                         free(dir);
126                         }
127
128                         if (ok)
129                                 c += procfile(p->fts_path);
130                         break;
131                 }
132         }
133
134         return (c);
135 }
136
137 /*
138  * Opens a file and processes it.  Each file is processed line-by-line
139  * passing the lines to procline().
140  */
141 int
142 procfile(const char *fn)
143 {
144         struct file *f;
145         struct stat sb;
146         struct str ln;
147         mode_t s;
148         int c, t;
149
150         if (mflag && (mcount <= 0))
151                 return (0);
152
153         if (strcmp(fn, "-") == 0) {
154                 fn = label != NULL ? label : getstr(1);
155                 f = grep_stdin_open();
156         } else {
157                 if (!stat(fn, &sb)) {
158                         /* Check if we need to process the file */
159                         s = sb.st_mode & S_IFMT;
160                         if (s == S_IFDIR && dirbehave == DIR_SKIP)
161                                 return (0);
162                         if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
163                                 || s == S_IFSOCK) && devbehave == DEV_SKIP)
164                                         return (0);
165                 }
166                 f = grep_open(fn);
167         }
168         if (f == NULL) {
169                 if (!sflag)
170                         warn("%s", fn);
171                 if (errno == ENOENT)
172                         notfound = true;
173                 return (0);
174         }
175
176         ln.file = grep_malloc(strlen(fn) + 1);
177         strcpy(ln.file, fn);
178         ln.line_no = 0;
179         ln.len = 0;
180         linesqueued = 0;
181         tail = 0;
182         ln.off = -1;
183
184         for (c = 0;  c == 0 || !(lflag || qflag); ) {
185                 ln.off += ln.len + 1;
186                 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) {
187                         if (ln.line_no == 0 && matchall)
188                                 exit(0);
189                         else
190                                 break;
191                 }
192                 if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
193                         --ln.len;
194                 ln.line_no++;
195
196                 /* Return if we need to skip a binary file */
197                 if (f->binary && binbehave == BINFILE_SKIP) {
198                         grep_close(f);
199                         free(f);
200                         return (0);
201                 }
202                 /* Process the file line-by-line */
203                 if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
204                         enqueue(&ln);
205                         linesqueued++;
206                 }
207                 c += t;
208
209                 /* Count the matches if we have a match limit */
210                 if (mflag) {
211                         mcount -= t;
212                         if (mcount <= 0)
213                                 break;
214                 }
215         }
216         if (Bflag > 0)
217                 clearqueue();
218         grep_close(f);
219
220         if (cflag) {
221                 if (!hflag)
222                         printf("%s:", ln.file);
223                 printf("%u\n", c);
224         }
225         if (lflag && c != 0)
226                 printf("%s\n", fn);
227         if (Lflag && c == 0)
228                 printf("%s\n", fn);
229         if (c && !cflag && !lflag && !Lflag &&
230             binbehave == BINFILE_BIN && f->binary && !qflag)
231                 printf(getstr(9), fn);
232
233         free(f);
234         return (c);
235 }
236
237 #define iswword(x)      (iswalnum((x)) || (x) == L'_')
238
239 /*
240  * Processes a line comparing it with the specified patterns.  Each pattern
241  * is looped to be compared along with the full string, saving each and every
242  * match, which is necessary to colorize the output and to count the
243  * matches.  The matching lines are passed to printline() to display the
244  * appropriate output.
245  */
246 static int
247 procline(struct str *l, int nottext)
248 {
249         regmatch_t matches[MAX_LINE_MATCHES];
250         regmatch_t pmatch;
251         size_t st = 0;
252         unsigned int i;
253         int c = 0, m = 0, r = 0;
254
255         if (!matchall) {
256                 /* Loop to process the whole line */
257                 while (st <= l->len) {
258                         pmatch.rm_so = st;
259                         pmatch.rm_eo = l->len;
260
261                         /* Loop to compare with all the patterns */
262                         for (i = 0; i < patterns; i++) {
263 /*
264  * XXX: grep_search() is a workaround for speed up and should be
265  * removed in the future.  See fastgrep.c.
266  */
267                                 if (fg_pattern[i].pattern) {
268                                         r = grep_search(&fg_pattern[i],
269                                             (unsigned char *)l->dat,
270                                             l->len, &pmatch);
271                                         r = (r == 0) ? 0 : REG_NOMATCH;
272                                         st = pmatch.rm_eo;
273                                 } else {
274                                         r = regexec(&r_pattern[i], l->dat, 1,
275                                             &pmatch, eflags);
276                                         r = (r == 0) ? 0 : REG_NOMATCH;
277                                         st = pmatch.rm_eo;
278                                 }
279                                 if (r == REG_NOMATCH)
280                                         continue;
281                                 /* Check for full match */
282                                 if (r == 0 && xflag)
283                                         if (pmatch.rm_so != 0 ||
284                                             (size_t)pmatch.rm_eo != l->len)
285                                                 r = REG_NOMATCH;
286                                 /* Check for whole word match */
287                                 if (r == 0 && wflag && pmatch.rm_so != 0 &&
288                                     (size_t)pmatch.rm_eo != l->len) {
289                                         wchar_t *wbegin;
290                                         wint_t wend;
291                                         size_t size;
292
293                                         size = mbstowcs(NULL, l->dat,
294                                             pmatch.rm_so);
295
296                                         if (size == ((size_t) - 1))
297                                                 r = REG_NOMATCH;
298                                         else {
299                                                 wbegin = grep_malloc(size);
300                                                 if (mbstowcs(wbegin, l->dat,
301                                                     pmatch.rm_so) == ((size_t) - 1))
302                                                         r = REG_NOMATCH;
303                                                 else if (sscanf(&l->dat[pmatch.rm_eo],
304                                                     "%lc", &wend) != 1)
305                                                         r = REG_NOMATCH;
306                                                 else if (iswword(wbegin[wcslen(wbegin)]) ||
307                                                     iswword(wend))
308                                                         r = REG_NOMATCH;
309                                                 free(wbegin);
310                                         }
311                                 }
312                                 if (r == 0) {
313                                         if (m == 0)
314                                                 c++;
315                                         if (m < MAX_LINE_MATCHES)
316                                                 matches[m++] = pmatch;
317                                         /* matches - skip further patterns */
318                                         break;
319                                 }
320                         }
321
322                         if (vflag) {
323                                 c = !c;
324                                 break;
325                         }
326                         /* One pass if we are not recording matches */
327                         if (!oflag && !color)
328                                 break;
329
330                         if (st == (size_t)pmatch.rm_so)
331                                 break;  /* No matches */
332                 }
333         } else
334                 c = !vflag;
335
336         if (c && binbehave == BINFILE_BIN && nottext)
337                 return (c); /* Binary file */
338
339         /* Dealing with the context */
340         if ((tail || c) && !cflag && !qflag) {
341                 if (c) {
342                         if (!first && !prev && !tail && Aflag)
343                                 printf("--\n");
344                         tail = Aflag;
345                         if (Bflag > 0) {
346                                 if (!first && !prev)
347                                         printf("--\n");
348                                 printqueue();
349                         }
350                         linesqueued = 0;
351                         printline(l, ':', matches, m);
352                 } else {
353                         printline(l, '-', matches, m);
354                         tail--;
355                 }
356         }
357
358         if (c) {
359                 prev = true;
360                 first = false;
361         } else
362                 prev = false;
363
364         return (c);
365 }
366
367 /*
368  * Safe malloc() for internal use.
369  */
370 void *
371 grep_malloc(size_t size)
372 {
373         void *ptr;
374
375         if ((ptr = malloc(size)) == NULL)
376                 err(2, "malloc");
377         return (ptr);
378 }
379
380 /*
381  * Safe calloc() for internal use.
382  */
383 void *
384 grep_calloc(size_t nmemb, size_t size)
385 {
386         void *ptr;
387
388         if ((ptr = calloc(nmemb, size)) == NULL)
389                 err(2, "calloc");
390         return (ptr);
391 }
392
393 /*
394  * Safe realloc() for internal use.
395  */
396 void *
397 grep_realloc(void *ptr, size_t size)
398 {
399
400         if ((ptr = realloc(ptr, size)) == NULL)
401                 err(2, "realloc");
402         return (ptr);
403 }
404
405 /*
406  * Prints a matching line according to the command line options.
407  */
408 void
409 printline(struct str *line, int sep, regmatch_t *matches, int m)
410 {
411         size_t a = 0;
412         int i, n = 0;
413
414         if (!hflag) {
415                 if (nullflag == 0)
416                         fputs(line->file, stdout);
417                 else {
418                         printf("%s", line->file);
419                         putchar(0);
420                 }
421                 ++n;
422         }
423         if (nflag) {
424                 if (n > 0)
425                         putchar(sep);
426                 printf("%d", line->line_no);
427                 ++n;
428         }
429         if (bflag) {
430                 if (n > 0)
431                         putchar(sep);
432                 printf("%lld", (long long)line->off);
433                 ++n;
434         }
435         if (n)
436                 putchar(sep);
437         /* --color and -o */
438         if ((oflag || color) && m > 0) {
439                 for (i = 0; i < m; i++) {
440                         if (!oflag)
441                                 fwrite(line->dat + a, matches[i].rm_so - a, 1,
442                                     stdout);
443                         if (color) 
444                                 fprintf(stdout, "\33[%sm\33[K", color);
445
446                                 fwrite(line->dat + matches[i].rm_so, 
447                                     matches[i].rm_eo - matches[i].rm_so, 1,
448                                     stdout);
449                         if (color) 
450                                 fprintf(stdout, "\33[m\33[K");
451                         a = matches[i].rm_eo;
452                         if (oflag)
453                                 putchar('\n');
454                 }
455                 if (!oflag) {
456                         if (line->len - a > 0)
457                                 fwrite(line->dat + a, line->len - a, 1, stdout);
458                         putchar('\n');
459                 }
460         } else {
461                 fwrite(line->dat, line->len, 1, stdout);
462                 putchar('\n');
463         }
464 }