1 /* search.c - searching subroutines using dfa, kwset and regex for grep.
2 Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
24 # define _GNU_SOURCE 1
30 #include <sys/types.h>
31 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
32 /* We can handle multibyte string. */
48 #ifdef HAVE_LANGINFO_CODESET
49 # include <langinfo.h>
52 #define NCHAR (UCHAR_MAX + 1)
54 /* For -w, we also consider _ to be word constituent. */
55 #define WCHAR(C) (ISALNUM(C) || (C) == '_')
57 /* DFA compiled regexp. */
58 static struct dfa dfa;
60 /* The Regex compiled patterns. */
61 static struct patterns
63 /* Regex compiled regexp. */
64 struct re_pattern_buffer regexbuf;
65 struct re_registers regs; /* This is here on account of a BRAIN-DEAD
66 Q@#%!# library interface in regex.c. */
69 struct patterns *patterns;
72 /* KWset compiled pattern. For Ecompile and Gcompile, we compile
73 a list of strings, at least one of which is known to occur in
74 any string matching the regexp. */
77 /* Number of compiled fixed strings known to exactly match the regexp.
78 If kwsexec returns < kwset_exact_matches, then we don't need to
79 call the regexp matcher at all. */
80 static int kwset_exact_matches;
82 /* UTF-8 encoding allows some optimizations that we can't otherwise
83 assume in a multibyte encoding. */
84 static int using_utf8;
86 static void kwsinit PARAMS ((void));
87 static void kwsmusts PARAMS ((void));
88 static void Gcompile PARAMS ((char const *, size_t));
89 static void Ecompile PARAMS ((char const *, size_t));
90 static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
91 static void Fcompile PARAMS ((char const *, size_t));
92 static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
93 static void Pcompile PARAMS ((char const *, size_t ));
94 static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
99 #ifdef HAVE_LANGINFO_CODESET
100 if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
106 dfaerror (char const *mesg)
114 static char trans[NCHAR];
118 for (i = 0; i < NCHAR; ++i)
119 trans[i] = TOLOWER (i);
121 if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
122 error (2, 0, _("memory exhausted"));
125 /* If the DFA turns out to have some set of fixed strings one of
126 which must occur in the match, then we build a kwset matcher
127 to find those strings, and thus quickly filter out impossible
132 struct dfamust const *dm;
138 /* First, we compile in the substrings known to be exact
139 matches. The kwset matcher will return the index
140 of the matching string that it chooses. */
141 for (dm = dfa.musts; dm; dm = dm->next)
145 ++kwset_exact_matches;
146 if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
149 /* Now, we compile the substrings that will require
150 the use of the regexp matcher. */
151 for (dm = dfa.musts; dm; dm = dm->next)
155 if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
158 if ((err = kwsprep (kwset)) != 0)
164 Gcompile (char const *pattern, size_t size)
169 char const *motif = pattern;
172 re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
173 dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
175 /* For GNU regex compiler we have to pass the patterns separately to detect
176 errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
177 GNU regex should have raise a syntax error. The same for backref, where
178 the backref should have been local to each pattern. */
182 sep = memchr (motif, '\n', total);
195 patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
196 if (patterns == NULL)
197 error (2, errno, _("memory exhausted"));
199 patterns[pcount] = patterns0;
201 if ((err = re_compile_pattern (motif, len,
202 &(patterns[pcount].regexbuf))) != 0)
207 } while (sep && total != 0);
209 /* In the match_words and match_lines cases, we use a different pattern
210 for the DFA matcher that will quickly throw out cases that won't work.
211 Then if DFA succeeds we do some hairy stuff using the regex matcher
212 to decide whether the match should really count. */
213 if (match_words || match_lines)
215 /* In the whole-word case, we use the pattern:
216 \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
217 In the whole-line case, we use the pattern:
218 ^\(userpattern\)$. */
220 static char const line_beg[] = "^\\(";
221 static char const line_end[] = "\\)$";
222 static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
223 static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
224 char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
226 strcpy (n, match_lines ? line_beg : word_beg);
228 memcpy (n + i, pattern, size);
230 strcpy (n + i, match_lines ? line_end : word_end);
236 dfacomp (pattern, size, &dfa, 1);
241 Ecompile (char const *pattern, size_t size)
246 char const *motif = pattern;
249 if (strcmp (matcher, "awk") == 0)
251 re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
252 dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
256 re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
257 dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
260 /* For GNU regex compiler we have to pass the patterns separately to detect
261 errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
262 GNU regex should have raise a syntax error. The same for backref, where
263 the backref should have been local to each pattern. */
267 sep = memchr (motif, '\n', total);
280 patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
281 if (patterns == NULL)
282 error (2, errno, _("memory exhausted"));
283 patterns[pcount] = patterns0;
285 if ((err = re_compile_pattern (motif, len,
286 &(patterns[pcount].regexbuf))) != 0)
291 } while (sep && total != 0);
293 /* In the match_words and match_lines cases, we use a different pattern
294 for the DFA matcher that will quickly throw out cases that won't work.
295 Then if DFA succeeds we do some hairy stuff using the regex matcher
296 to decide whether the match should really count. */
297 if (match_words || match_lines)
299 /* In the whole-word case, we use the pattern:
300 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
301 In the whole-line case, we use the pattern:
304 static char const line_beg[] = "^(";
305 static char const line_end[] = ")$";
306 static char const word_beg[] = "(^|[^[:alnum:]_])(";
307 static char const word_end[] = ")([^[:alnum:]_]|$)";
308 char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
310 strcpy (n, match_lines ? line_beg : word_beg);
312 memcpy (n + i, pattern, size);
314 strcpy (n + i, match_lines ? line_end : word_end);
320 dfacomp (pattern, size, &dfa, 1);
325 EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
327 register char const *buflim, *beg, *end;
330 ptrdiff_t start, len;
331 struct kwsmatch kwsm;
334 static int use_dfa_checked = 0;
336 const char *last_char = NULL;
337 int mb_cur_max = MB_CUR_MAX;
339 memset (&mbs, '\0', sizeof (mbstate_t));
340 #endif /* MBS_SUPPORT */
342 if (!use_dfa_checked)
344 char *grep_use_dfa = getenv ("GREP_USE_DFA");
348 /* Turn off DFA when processing multibyte input. */
349 use_dfa = (MB_CUR_MAX == 1);
352 #endif /* MBS_SUPPORT */
356 use_dfa = atoi (grep_use_dfa);
364 for (beg = end = buf; end < buflim; beg = end)
370 /* Find a possible match using the KWset matcher. */
372 size_t bytes_left = 0;
373 #endif /* MBS_SUPPORT */
376 /* kwsexec doesn't work with match_icase and multibyte input. */
377 if (match_icase && mb_cur_max > 1)
381 #endif /* MBS_SUPPORT */
382 offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
383 if (offset == (size_t) -1)
386 if (mb_cur_max > 1 && !using_utf8)
391 size_t mlen = mbrlen (beg, bytes_left, &mbs);
394 if (mlen == (size_t) -1 || mlen == 0)
396 /* Incomplete character: treat as single-byte. */
397 memset (&mbs, '\0', sizeof (mbstate_t));
403 if (mlen == (size_t) -2)
404 /* Offset points inside multibyte character:
413 #endif /* MBS_SUPPORT */
415 /* Narrow down to the line containing the candidate, and
416 run it through DFA. */
417 end = memchr(beg, eol, buflim - beg);
420 if (mb_cur_max > 1 && bytes_left)
422 #endif /* MBS_SUPPORT */
423 while (beg > buf && beg[-1] != eol)
427 !(match_icase && mb_cur_max > 1) &&
428 #endif /* MBS_SUPPORT */
429 (kwsm.index < kwset_exact_matches))
430 goto success_in_beg_and_end;
432 dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
437 /* No good fixed strings; start with DFA. */
439 size_t bytes_left = 0;
440 #endif /* MBS_SUPPORT */
443 offset = dfaexec (&dfa, beg, buflim - beg, &backref);
444 if (offset == (size_t) -1)
446 /* Narrow down to the line we've found. */
448 if (mb_cur_max > 1 && !using_utf8)
453 size_t mlen = mbrlen (beg, bytes_left, &mbs);
456 if (mlen == (size_t) -1 || mlen == 0)
458 /* Incomplete character: treat as single-byte. */
459 memset (&mbs, '\0', sizeof (mbstate_t));
465 if (mlen == (size_t) -2)
466 /* Offset points inside multibyte character:
475 #endif /* MBS_SUPPORT */
477 end = memchr (beg, eol, buflim - beg);
480 if (mb_cur_max > 1 && bytes_left)
482 #endif /* MBS_SUPPORT */
483 while (beg > buf && beg[-1] != eol)
486 /* Successful, no backreferences encountered! */
487 if (use_dfa && !backref)
488 goto success_in_beg_and_end;
493 /* If we've made it to this point, this means DFA has seen
494 a probable match, and we need to run it through Regex. */
495 for (i = 0; i < pcount; i++)
497 patterns[i].regexbuf.not_eol = 0;
498 if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
500 end - beg - 1, &(patterns[i].regs))))
502 len = patterns[i].regs.end[0] - start;
503 if (exact && !match_words)
504 goto success_in_start_and_len;
505 if ((!match_lines && !match_words)
506 || (match_lines && len == end - beg - 1))
507 goto success_in_beg_and_end;
508 /* If -w, check if the match aligns with word boundaries.
509 We do this iteratively because:
510 (a) the line may contain more than one occurence of the
512 (b) Several alternatives in the pattern might be valid at a
513 given point, and we may need to consider a shorter one to
514 find a word boundary. */
531 /* Locate the start of the multibyte character
532 before the match position (== beg + start). */
535 /* UTF-8 is a special case: scan backwards
536 until we find a 7-bit character or a
540 && (unsigned char) *s >= 0x80
541 && (unsigned char) *s <= 0xbf)
546 /* Scan forwards to find the start of the
547 last complete character before the
549 size_t bytes_left = start - 1;
551 while (bytes_left > 0)
553 mr = mbrlen (s, bytes_left, &mbs);
554 if (mr == (size_t) -1 || mr == 0)
556 memset (&mbs, '\0', sizeof (mbs));
561 if (mr == (size_t) -2)
563 memset (&mbs, '\0', sizeof (mbs));
570 mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
571 if (mr == (size_t) -2 || mr == (size_t) -1 ||
574 memset (&mbs, '\0', sizeof (mbstate_t));
577 else if (!(iswalnum (pwc) || pwc == L'_')
578 && mr == beg + start - s)
582 #endif /* MBS_SUPPORT */
583 if (!WCHAR ((unsigned char) beg[start - 1]))
590 if (start + len == end - beg - 1)
600 mr = mbtowc (&nwc, beg + start + len,
601 end - beg - start - len - 1);
604 memset (&mbs, '\0', sizeof (mbstate_t));
607 else if (!iswalnum (nwc) && nwc != L'_')
611 #endif /* MBS_SUPPORT */
612 if (!WCHAR ((unsigned char) beg[start + len]))
619 /* Returns the whole line. */
620 goto success_in_beg_and_end;
622 /* Returns just this word match. */
623 goto success_in_start_and_len;
628 /* Try a shorter length anchored at the same place. */
630 patterns[i].regexbuf.not_eol = 1;
631 len = re_match (&(patterns[i].regexbuf), beg,
633 &(patterns[i].regs));
637 /* Try looking further on. */
638 if (start == end - beg - 1)
641 patterns[i].regexbuf.not_eol = 0;
642 start = re_search (&(patterns[i].regexbuf), beg,
644 start, end - beg - 1 - start,
645 &(patterns[i].regs));
646 len = patterns[i].regs.end[0] - start;
650 } /* for Regex patterns. */
651 } /* for (beg = end ..) */
656 success_in_beg_and_end:
661 success_in_start_and_len:
667 static int f_i_multibyte; /* whether we're using the new -Fi MB method */
671 size_t count, maxlen;
672 unsigned char *match;
677 Fcompile (char const *pattern, size_t size)
679 int mb_cur_max = MB_CUR_MAX;
680 char const *beg, *lim, *err;
684 /* Support -F -i for UTF-8 input. */
685 if (match_icase && mb_cur_max > 1)
688 wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
689 const char *patternend = pattern;
691 kwset_t fimb_kwset = NULL;
693 wchar_t *wcbeg, *wclim;
694 size_t allocated = 0;
696 memset (&mbs, '\0', sizeof (mbs));
697 # ifdef __GNU_LIBRARY__
698 wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
699 if (patternend != pattern + size)
700 wcsize = (size_t) -1;
703 char *patterncopy = xmalloc (size + 1);
705 memcpy (patterncopy, pattern, size);
706 patterncopy[size] = '\0';
707 patternend = patterncopy;
708 wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
709 if (patternend != patterncopy + size)
710 wcsize = (size_t) -1;
720 kwsfree (fimb_kwset);
721 free (Fimb.patterns);
722 Fimb.patterns = NULL;
726 if (!(fimb_kwset = kwsalloc (NULL)))
727 error (2, 0, _("memory exhausted"));
729 starts = xmalloc (mb_cur_max * 3);
736 if (Fimb.count >= allocated)
742 Fimb.patterns = xrealloc (Fimb.patterns,
743 sizeof (wchar_t *) * allocated);
745 Fimb.patterns[Fimb.count++] = wcbeg;
747 wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
748 *wclim = towlower (*wclim);
750 wclen = wclim - wcbeg;
751 if (wclen > Fimb.maxlen)
757 if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
761 for (i = 0; i < (1 << wclen); i++)
766 for (j = 0; j < wclen; ++j)
768 wchar_t wc = wcbeg[j];
780 if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
783 if (wclim < wcpattern + wcsize)
787 while (wcbeg < wcpattern + wcsize);
791 Fimb.match = xmalloc (Fimb.count);
792 if ((err = kwsprep (kwset)) != 0)
797 #endif /* MBS_SUPPORT */
804 for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
806 if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
808 if (lim < pattern + size)
812 while (beg < pattern + size);
814 if ((err = kwsprep (kwset)) != 0)
820 Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
822 size_t len, letter, i;
828 assert (match_icase && f_i_multibyte == 1);
829 assert (MB_CUR_MAX > 1);
831 memset (&mbs, '\0', sizeof (mbs));
832 memset (Fimb.match, '\1', Fimb.count);
835 while (patterns_left && len <= size)
842 c = mbrtowc (&wc, buf + len, size - len, &mbs);
854 for (i = 0; i < Fimb.count; i++)
858 if (Fimb.patterns[i][letter] == L'\0')
862 if (!exact && !match_words)
866 /* For -w or exact look for longest match. */
868 Fimb.match[i] = '\0';
873 if (Fimb.patterns[i][letter] == wc)
876 Fimb.match[i] = '\0';
886 #endif /* MBS_SUPPORT */
889 Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
891 register char const *beg, *try, *end;
894 struct kwsmatch kwsmatch;
897 int mb_cur_max = MB_CUR_MAX;
899 memset (&mbs, '\0', sizeof (mbstate_t));
900 const char *last_char = NULL;
901 #endif /* MBS_SUPPORT */
903 for (beg = buf; beg <= buf + size; ++beg)
906 offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
908 if (offset == (size_t) -1)
911 if (mb_cur_max > 1 && !using_utf8)
913 size_t bytes_left = offset;
916 size_t mlen = mbrlen (beg, bytes_left, &mbs);
919 if (mlen == (size_t) -1 || mlen == 0)
921 /* Incomplete character: treat as single-byte. */
922 memset (&mbs, '\0', sizeof (mbstate_t));
928 if (mlen == (size_t) -2)
929 /* Offset points inside multibyte character: no good. */
940 #endif /* MBS_SUPPORT */
943 /* For f_i_multibyte, the string at beg now matches first 3 chars of
944 one of the search strings (less if there are shorter search strings).
945 See if this is a real match. */
947 && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
949 #endif /* MBS_SUPPORT */
950 len = kwsmatch.size[0];
951 if (exact && !match_words)
952 goto success_in_beg_and_len;
955 if (beg > buf && beg[-1] != eol)
957 if (beg + len < buf + size && beg[len] != eol)
961 else if (match_words)
979 && (unsigned char) *s >= 0x80
980 && (unsigned char) *s <= 0xbf)
985 mr = mbtowc (&pwc, s, beg - s);
987 memset (&mbs, '\0', sizeof (mbstate_t));
988 else if ((iswalnum (pwc) || pwc == L'_')
989 && mr == (int) (beg - s))
993 #endif /* MBS_SUPPORT */
994 if (WCHAR ((unsigned char) beg[-1]))
1003 mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
1006 memset (&mbs, '\0', sizeof (mbstate_t));
1009 else if (!iswalnum (nwc) && nwc != L'_')
1013 #endif /* MBS_SUPPORT */
1014 if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
1019 /* Returns the whole line now we know there's a word match. */
1022 /* Returns just this word match. */
1023 goto success_in_beg_and_len;
1027 /* Try a shorter length anchored at the same place. */
1029 offset = kwsexec (kwset, beg, len, &kwsmatch);
1032 goto next_char; /* Try a different anchor. */
1034 if (mb_cur_max > 1 && !using_utf8)
1036 size_t bytes_left = offset;
1039 size_t mlen = mbrlen (beg, bytes_left, &mbs);
1042 if (mlen == (size_t) -1 || mlen == 0)
1044 /* Incomplete character: treat as single-byte. */
1045 memset (&mbs, '\0', sizeof (mbstate_t));
1051 if (mlen == (size_t) -2)
1053 /* Offset points inside multibyte character:
1064 memset (&mbs, '\0', sizeof (mbstate_t));
1065 goto next_char; /* Try a different anchor. */
1069 #endif /* MBS_SUPPORT */
1072 /* The string at beg now matches first 3 chars of one of
1073 the search strings (less if there are shorter search
1074 strings). See if this is a real match. */
1076 && Fimbexec (beg, len - offset, &kwsmatch.size[0],
1079 #endif /* MBS_SUPPORT */
1080 len = kwsmatch.size[0];
1088 /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
1094 unsigned char c = *beg;
1111 size_t l = mbrlen (beg, buf + size - beg, &mbs);
1117 memset (&mbs, '\0', sizeof (mbstate_t));
1120 #endif /* MBS_SUPPORT */
1128 if (mb_cur_max > 1 && !using_utf8)
1131 while (end < buf + size)
1133 size_t mlen = mbrlen (end, buf + size - end, &mbs);
1134 if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
1136 memset (&mbs, '\0', sizeof (mbstate_t));
1139 if (mlen == 1 && *end == eol)
1146 #endif /* MBS_SUPPORT */
1147 end = memchr (beg + len, eol, (buf + size) - (beg + len));
1150 while (buf < beg && beg[-1] != eol)
1155 success_in_beg_and_len:
1161 /* Compiled internal form of a Perl regular expression. */
1164 /* Additional information about the pattern. */
1165 static pcre_extra *extra;
1169 Pcompile (char const *pattern, size_t size)
1172 error (2, 0, _("The -P option is not supported"));
1176 char *re = xmalloc (4 * size + 7);
1177 int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
1178 char const *patlim = pattern + size;
1183 /* FIXME: Remove this restriction. */
1184 if (eolbyte != '\n')
1185 error (2, 0, _("The -P and -z options cannot be combined"));
1194 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
1195 replace each NUL byte in the pattern with the four characters
1196 "\000", removing a preceding backslash if there are an odd
1197 number of backslashes before the NUL.
1199 FIXME: This method does not work with some multibyte character
1200 encodings, notably Shift-JIS, where a multibyte character can end
1201 in a backslash byte. */
1202 for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
1204 memcpy (n, p, pnul - p);
1206 for (p = pnul; pattern < p && p[-1] == '\\'; p--)
1208 n -= (pnul - p) & 1;
1209 strcpy (n, "\\000");
1213 memcpy (n, p, patlim - p);
1221 cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
1225 extra = pcre_study (cre, 0, &ep);
1234 Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
1240 /* This array must have at least two elements; everything after that
1241 is just for performance improvement in pcre_exec. */
1244 int e = pcre_exec (cre, extra, buf, size, 0, 0,
1245 sub, sizeof sub / sizeof *sub);
1251 case PCRE_ERROR_NOMATCH:
1254 case PCRE_ERROR_NOMEMORY:
1255 error (2, 0, _("Memory exhausted"));
1263 /* Narrow down to the line we've found. */
1264 char const *beg = buf + sub[0];
1265 char const *end = buf + sub[1];
1266 char const *buflim = buf + size;
1270 end = memchr (end, eol, buflim - end);
1272 while (buf < beg && beg[-1] != eol)
1276 *match_size = end - beg;
1282 struct matcher const matchers[] = {
1283 { "default", Gcompile, EGexecute },
1284 { "grep", Gcompile, EGexecute },
1285 { "egrep", Ecompile, EGexecute },
1286 { "awk", Ecompile, EGexecute },
1287 { "fgrep", Fcompile, Fexecute },
1288 { "perl", Pcompile, Pexecute },