1 /* search.c - searching subroutines using dfa, kwset and regex for grep.
2 Copyright 1992, 1998, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
24 # define _GNU_SOURCE 1
30 #include <sys/types.h>
31 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
32 /* We can handle multibyte string. */
48 #ifdef HAVE_LANGINFO_CODESET
49 # include <langinfo.h>
52 #define NCHAR (UCHAR_MAX + 1)
54 /* For -w, we also consider _ to be word constituent. */
55 #define WCHAR(C) (ISALNUM(C) || (C) == '_')
57 /* DFA compiled regexp. */
58 static struct dfa dfa;
60 /* The Regex compiled patterns. */
61 static struct patterns
63 /* Regex compiled regexp. */
64 struct re_pattern_buffer regexbuf;
65 struct re_registers regs; /* This is here on account of a BRAIN-DEAD
66 Q@#%!# library interface in regex.c. */
69 struct patterns *patterns;
72 /* KWset compiled pattern. For Ecompile and Gcompile, we compile
73 a list of strings, at least one of which is known to occur in
74 any string matching the regexp. */
77 /* Number of compiled fixed strings known to exactly match the regexp.
78 If kwsexec returns < kwset_exact_matches, then we don't need to
79 call the regexp matcher at all. */
80 static int kwset_exact_matches;
82 /* UTF-8 encoding allows some optimizations that we can't otherwise
83 assume in a multibyte encoding. */
84 static int using_utf8;
86 static void kwsinit PARAMS ((void));
87 static void kwsmusts PARAMS ((void));
88 static void Gcompile PARAMS ((char const *, size_t));
89 static void Ecompile PARAMS ((char const *, size_t));
90 static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
91 static void Fcompile PARAMS ((char const *, size_t));
92 static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
93 static void Pcompile PARAMS ((char const *, size_t ));
94 static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
99 #ifdef HAVE_LANGINFO_CODESET
100 if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0)
106 dfaerror (char const *mesg)
114 static char trans[NCHAR];
118 for (i = 0; i < NCHAR; ++i)
119 trans[i] = TOLOWER (i);
121 if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
122 error (2, 0, _("memory exhausted"));
125 /* If the DFA turns out to have some set of fixed strings one of
126 which must occur in the match, then we build a kwset matcher
127 to find those strings, and thus quickly filter out impossible
132 struct dfamust const *dm;
138 /* First, we compile in the substrings known to be exact
139 matches. The kwset matcher will return the index
140 of the matching string that it chooses. */
141 for (dm = dfa.musts; dm; dm = dm->next)
145 ++kwset_exact_matches;
146 if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
149 /* Now, we compile the substrings that will require
150 the use of the regexp matcher. */
151 for (dm = dfa.musts; dm; dm = dm->next)
155 if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
158 if ((err = kwsprep (kwset)) != 0)
164 Gcompile (char const *pattern, size_t size)
169 char const *motif = pattern;
172 re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0));
173 dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
175 /* For GNU regex compiler we have to pass the patterns separately to detect
176 errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
177 GNU regex should have raise a syntax error. The same for backref, where
178 the backref should have been local to each pattern. */
182 sep = memchr (motif, '\n', total);
195 patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
196 if (patterns == NULL)
197 error (2, errno, _("memory exhausted"));
199 patterns[pcount] = patterns0;
201 if ((err = re_compile_pattern (motif, len,
202 &(patterns[pcount].regexbuf))) != 0)
207 } while (sep && total != 0);
209 /* In the match_words and match_lines cases, we use a different pattern
210 for the DFA matcher that will quickly throw out cases that won't work.
211 Then if DFA succeeds we do some hairy stuff using the regex matcher
212 to decide whether the match should really count. */
213 if (match_words || match_lines)
215 /* In the whole-word case, we use the pattern:
216 \(^\|[^[:alnum:]_]\)\(userpattern\)\([^[:alnum:]_]|$\).
217 In the whole-line case, we use the pattern:
218 ^\(userpattern\)$. */
220 static char const line_beg[] = "^\\(";
221 static char const line_end[] = "\\)$";
222 static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
223 static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
224 char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
226 strcpy (n, match_lines ? line_beg : word_beg);
228 memcpy (n + i, pattern, size);
230 strcpy (n + i, match_lines ? line_end : word_end);
236 dfacomp (pattern, size, &dfa, 1);
241 Ecompile (char const *pattern, size_t size)
246 char const *motif = pattern;
249 if (strcmp (matcher, "awk") == 0)
251 re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0));
252 dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
256 re_set_syntax (RE_SYNTAX_POSIX_EGREP | (match_icase ? RE_ICASE : 0));
257 dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
260 /* For GNU regex compiler we have to pass the patterns separately to detect
261 errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]"
262 GNU regex should have raise a syntax error. The same for backref, where
263 the backref should have been local to each pattern. */
267 sep = memchr (motif, '\n', total);
280 patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
281 if (patterns == NULL)
282 error (2, errno, _("memory exhausted"));
283 patterns[pcount] = patterns0;
285 if ((err = re_compile_pattern (motif, len,
286 &(patterns[pcount].regexbuf))) != 0)
291 } while (sep && total != 0);
293 /* In the match_words and match_lines cases, we use a different pattern
294 for the DFA matcher that will quickly throw out cases that won't work.
295 Then if DFA succeeds we do some hairy stuff using the regex matcher
296 to decide whether the match should really count. */
297 if (match_words || match_lines)
299 /* In the whole-word case, we use the pattern:
300 (^|[^[:alnum:]_])(userpattern)([^[:alnum:]_]|$).
301 In the whole-line case, we use the pattern:
304 static char const line_beg[] = "^(";
305 static char const line_end[] = ")$";
306 static char const word_beg[] = "(^|[^[:alnum:]_])(";
307 static char const word_end[] = ")([^[:alnum:]_]|$)";
308 char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
310 strcpy (n, match_lines ? line_beg : word_beg);
312 memcpy (n + i, pattern, size);
314 strcpy (n + i, match_lines ? line_end : word_end);
320 dfacomp (pattern, size, &dfa, 1);
325 EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
327 register char const *buflim, *beg, *end;
329 int backref, start, len;
330 struct kwsmatch kwsm;
333 static int use_dfa_checked = 0;
335 const char *last_char = NULL;
336 int mb_cur_max = MB_CUR_MAX;
338 memset (&mbs, '\0', sizeof (mbstate_t));
339 #endif /* MBS_SUPPORT */
341 if (!use_dfa_checked)
343 char *grep_use_dfa = getenv ("GREP_USE_DFA");
347 /* Turn off DFA when processing multibyte input. */
348 use_dfa = (MB_CUR_MAX == 1);
351 #endif /* MBS_SUPPORT */
355 use_dfa = atoi (grep_use_dfa);
363 for (beg = end = buf; end < buflim; beg = end)
369 /* Find a possible match using the KWset matcher. */
371 size_t bytes_left = 0;
372 #endif /* MBS_SUPPORT */
375 /* kwsexec doesn't work with match_icase and multibyte input. */
376 if (match_icase && mb_cur_max > 1)
380 #endif /* MBS_SUPPORT */
381 offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
382 if (offset == (size_t) -1)
385 if (mb_cur_max > 1 && !using_utf8)
390 size_t mlen = mbrlen (beg, bytes_left, &mbs);
393 if (mlen == (size_t) -1 || mlen == 0)
395 /* Incomplete character: treat as single-byte. */
396 memset (&mbs, '\0', sizeof (mbstate_t));
402 if (mlen == (size_t) -2)
403 /* Offset points inside multibyte character:
412 #endif /* MBS_SUPPORT */
414 /* Narrow down to the line containing the candidate, and
415 run it through DFA. */
416 end = memchr(beg, eol, buflim - beg);
419 if (mb_cur_max > 1 && bytes_left)
421 #endif /* MBS_SUPPORT */
422 while (beg > buf && beg[-1] != eol)
426 !(match_icase && mb_cur_max > 1) &&
427 #endif /* MBS_SUPPORT */
428 (kwsm.index < kwset_exact_matches))
429 goto success_in_beg_and_end;
431 dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
436 /* No good fixed strings; start with DFA. */
438 size_t bytes_left = 0;
439 #endif /* MBS_SUPPORT */
442 offset = dfaexec (&dfa, beg, buflim - beg, &backref);
443 if (offset == (size_t) -1)
445 /* Narrow down to the line we've found. */
447 if (mb_cur_max > 1 && !using_utf8)
452 size_t mlen = mbrlen (beg, bytes_left, &mbs);
455 if (mlen == (size_t) -1 || mlen == 0)
457 /* Incomplete character: treat as single-byte. */
458 memset (&mbs, '\0', sizeof (mbstate_t));
464 if (mlen == (size_t) -2)
465 /* Offset points inside multibyte character:
474 #endif /* MBS_SUPPORT */
476 end = memchr (beg, eol, buflim - beg);
479 if (mb_cur_max > 1 && bytes_left)
481 #endif /* MBS_SUPPORT */
482 while (beg > buf && beg[-1] != eol)
485 /* Successful, no backreferences encountered! */
486 if (use_dfa && !backref)
487 goto success_in_beg_and_end;
492 /* If we've made it to this point, this means DFA has seen
493 a probable match, and we need to run it through Regex. */
494 for (i = 0; i < pcount; i++)
496 patterns[i].regexbuf.not_eol = 0;
497 if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
499 end - beg - 1, &(patterns[i].regs))))
501 len = patterns[i].regs.end[0] - start;
502 if (exact && !match_words)
503 goto success_in_start_and_len;
504 if ((!match_lines && !match_words)
505 || (match_lines && len == end - beg - 1))
506 goto success_in_beg_and_end;
507 /* If -w, check if the match aligns with word boundaries.
508 We do this iteratively because:
509 (a) the line may contain more than one occurence of the
511 (b) Several alternatives in the pattern might be valid at a
512 given point, and we may need to consider a shorter one to
513 find a word boundary. */
530 /* Locate the start of the multibyte character
531 before the match position (== beg + start). */
534 /* UTF-8 is a special case: scan backwards
535 until we find a 7-bit character or a
539 && (unsigned char) *s >= 0x80
540 && (unsigned char) *s <= 0xbf)
545 /* Scan forwards to find the start of the
546 last complete character before the
548 size_t bytes_left = start - 1;
550 while (bytes_left > 0)
552 mr = mbrlen (s, bytes_left, &mbs);
553 if (mr == (size_t) -1 || mr == 0)
555 memset (&mbs, '\0', sizeof (mbs));
560 if (mr == (size_t) -2)
562 memset (&mbs, '\0', sizeof (mbs));
569 mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
570 if (mr == (size_t) -2 || mr == (size_t) -1 ||
573 memset (&mbs, '\0', sizeof (mbstate_t));
576 else if (!(iswalnum (pwc) || pwc == L'_')
577 && mr == beg + start - s)
581 #endif /* MBS_SUPPORT */
582 if (!WCHAR ((unsigned char) beg[start - 1]))
589 if (start + len == end - beg - 1)
599 mr = mbtowc (&nwc, beg + start + len,
600 end - beg - start - len - 1);
603 memset (&mbs, '\0', sizeof (mbstate_t));
606 else if (!iswalnum (nwc) && nwc != L'_')
610 #endif /* MBS_SUPPORT */
611 if (!WCHAR ((unsigned char) beg[start + len]))
618 /* Returns the whole line. */
619 goto success_in_beg_and_end;
621 /* Returns just this word match. */
622 goto success_in_start_and_len;
627 /* Try a shorter length anchored at the same place. */
629 patterns[i].regexbuf.not_eol = 1;
630 len = re_match (&(patterns[i].regexbuf), beg,
632 &(patterns[i].regs));
636 /* Try looking further on. */
637 if (start == end - beg - 1)
640 patterns[i].regexbuf.not_eol = 0;
641 start = re_search (&(patterns[i].regexbuf), beg,
643 start, end - beg - 1 - start,
644 &(patterns[i].regs));
645 len = patterns[i].regs.end[0] - start;
649 } /* for Regex patterns. */
650 } /* for (beg = end ..) */
655 success_in_beg_and_end:
660 success_in_start_and_len:
666 static int f_i_multibyte; /* whether we're using the new -Fi MB method */
670 size_t count, maxlen;
671 unsigned char *match;
676 Fcompile (char const *pattern, size_t size)
678 int mb_cur_max = MB_CUR_MAX;
679 char const *beg, *lim, *err;
683 /* Support -F -i for UTF-8 input. */
684 if (match_icase && mb_cur_max > 1)
687 wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t));
688 const char *patternend = pattern;
690 kwset_t fimb_kwset = NULL;
692 wchar_t *wcbeg, *wclim;
693 size_t allocated = 0;
695 memset (&mbs, '\0', sizeof (mbs));
696 # ifdef __GNU_LIBRARY__
697 wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs);
698 if (patternend != pattern + size)
699 wcsize = (size_t) -1;
702 char *patterncopy = xmalloc (size + 1);
704 memcpy (patterncopy, pattern, size);
705 patterncopy[size] = '\0';
706 patternend = patterncopy;
707 wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs);
708 if (patternend != patterncopy + size)
709 wcsize = (size_t) -1;
719 kwsfree (fimb_kwset);
720 free (Fimb.patterns);
721 Fimb.patterns = NULL;
725 if (!(fimb_kwset = kwsalloc (NULL)))
726 error (2, 0, _("memory exhausted"));
728 starts = xmalloc (mb_cur_max * 3);
735 if (Fimb.count >= allocated)
741 Fimb.patterns = xrealloc (Fimb.patterns,
742 sizeof (wchar_t *) * allocated);
744 Fimb.patterns[Fimb.count++] = wcbeg;
746 wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim)
747 *wclim = towlower (*wclim);
749 wclen = wclim - wcbeg;
750 if (wclen > Fimb.maxlen)
756 if ((err = kwsincr (fimb_kwset, "", 0)) != 0)
760 for (i = 0; i < (1 << wclen); i++)
765 for (j = 0; j < wclen; ++j)
767 wchar_t wc = wcbeg[j];
779 if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0)
782 if (wclim < wcpattern + wcsize)
786 while (wcbeg < wcpattern + wcsize);
790 Fimb.match = xmalloc (Fimb.count);
791 if ((err = kwsprep (kwset)) != 0)
796 #endif /* MBS_SUPPORT */
803 for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
805 if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
807 if (lim < pattern + size)
811 while (beg < pattern + size);
813 if ((err = kwsprep (kwset)) != 0)
819 Fimbexec (const char *buf, size_t size, size_t *plen, int exact)
821 size_t len, letter, i;
827 assert (match_icase && f_i_multibyte == 1);
828 assert (MB_CUR_MAX > 1);
830 memset (&mbs, '\0', sizeof (mbs));
831 memset (Fimb.match, '\1', Fimb.count);
834 while (patterns_left && len <= size)
841 c = mbrtowc (&wc, buf + len, size - len, &mbs);
853 for (i = 0; i < Fimb.count; i++)
857 if (Fimb.patterns[i][letter] == L'\0')
861 if (!exact && !match_words)
865 /* For -w or exact look for longest match. */
867 Fimb.match[i] = '\0';
872 if (Fimb.patterns[i][letter] == wc)
875 Fimb.match[i] = '\0';
885 #endif /* MBS_SUPPORT */
888 Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
890 register char const *beg, *try, *end;
893 struct kwsmatch kwsmatch;
896 int mb_cur_max = MB_CUR_MAX;
898 memset (&mbs, '\0', sizeof (mbstate_t));
899 const char *last_char = NULL;
900 #endif /* MBS_SUPPORT */
902 for (beg = buf; beg <= buf + size; ++beg)
905 offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
907 if (offset == (size_t) -1)
910 if (mb_cur_max > 1 && !using_utf8)
912 size_t bytes_left = offset;
915 size_t mlen = mbrlen (beg, bytes_left, &mbs);
918 if (mlen == (size_t) -1 || mlen == 0)
920 /* Incomplete character: treat as single-byte. */
921 memset (&mbs, '\0', sizeof (mbstate_t));
927 if (mlen == (size_t) -2)
928 /* Offset points inside multibyte character: no good. */
939 #endif /* MBS_SUPPORT */
942 /* For f_i_multibyte, the string at beg now matches first 3 chars of
943 one of the search strings (less if there are shorter search strings).
944 See if this is a real match. */
946 && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], exact))
948 #endif /* MBS_SUPPORT */
949 len = kwsmatch.size[0];
950 if (exact && !match_words)
951 goto success_in_beg_and_len;
954 if (beg > buf && beg[-1] != eol)
956 if (beg + len < buf + size && beg[len] != eol)
960 else if (match_words)
978 && (unsigned char) *s >= 0x80
979 && (unsigned char) *s <= 0xbf)
984 mr = mbtowc (&pwc, s, beg - s);
986 memset (&mbs, '\0', sizeof (mbstate_t));
987 else if ((iswalnum (pwc) || pwc == L'_')
988 && mr == (int) (beg - s))
992 #endif /* MBS_SUPPORT */
993 if (WCHAR ((unsigned char) beg[-1]))
1002 mr = mbtowc (&nwc, beg + len, buf + size - beg - len);
1005 memset (&mbs, '\0', sizeof (mbstate_t));
1008 else if (!iswalnum (nwc) && nwc != L'_')
1012 #endif /* MBS_SUPPORT */
1013 if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len]))
1018 /* Returns the whole line now we know there's a word match. */
1021 /* Returns just this word match. */
1022 goto success_in_beg_and_len;
1026 /* Try a shorter length anchored at the same place. */
1028 offset = kwsexec (kwset, beg, len, &kwsmatch);
1031 goto next_char; /* Try a different anchor. */
1033 if (mb_cur_max > 1 && !using_utf8)
1035 size_t bytes_left = offset;
1038 size_t mlen = mbrlen (beg, bytes_left, &mbs);
1041 if (mlen == (size_t) -1 || mlen == 0)
1043 /* Incomplete character: treat as single-byte. */
1044 memset (&mbs, '\0', sizeof (mbstate_t));
1050 if (mlen == (size_t) -2)
1052 /* Offset points inside multibyte character:
1063 memset (&mbs, '\0', sizeof (mbstate_t));
1064 goto next_char; /* Try a different anchor. */
1068 #endif /* MBS_SUPPORT */
1071 /* The string at beg now matches first 3 chars of one of
1072 the search strings (less if there are shorter search
1073 strings). See if this is a real match. */
1075 && Fimbexec (beg, len - offset, &kwsmatch.size[0],
1078 #endif /* MBS_SUPPORT */
1079 len = kwsmatch.size[0];
1087 /* Advance to next character. For MB_CUR_MAX == 1 case this is handled
1093 unsigned char c = *beg;
1110 size_t l = mbrlen (beg, buf + size - beg, &mbs);
1116 memset (&mbs, '\0', sizeof (mbstate_t));
1119 #endif /* MBS_SUPPORT */
1127 if (mb_cur_max > 1 && !using_utf8)
1130 while (end < buf + size)
1132 size_t mlen = mbrlen (end, buf + size - end, &mbs);
1133 if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0)
1135 memset (&mbs, '\0', sizeof (mbstate_t));
1138 if (mlen == 1 && *end == eol)
1145 #endif /* MBS_SUPPORT */
1146 end = memchr (beg + len, eol, (buf + size) - (beg + len));
1149 while (buf < beg && beg[-1] != eol)
1154 success_in_beg_and_len:
1160 /* Compiled internal form of a Perl regular expression. */
1163 /* Additional information about the pattern. */
1164 static pcre_extra *extra;
1168 Pcompile (char const *pattern, size_t size)
1171 error (2, 0, _("The -P option is not supported"));
1175 char *re = xmalloc (4 * size + 7);
1176 int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
1177 char const *patlim = pattern + size;
1182 /* FIXME: Remove this restriction. */
1183 if (eolbyte != '\n')
1184 error (2, 0, _("The -P and -z options cannot be combined"));
1193 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
1194 replace each NUL byte in the pattern with the four characters
1195 "\000", removing a preceding backslash if there are an odd
1196 number of backslashes before the NUL.
1198 FIXME: This method does not work with some multibyte character
1199 encodings, notably Shift-JIS, where a multibyte character can end
1200 in a backslash byte. */
1201 for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
1203 memcpy (n, p, pnul - p);
1205 for (p = pnul; pattern < p && p[-1] == '\\'; p--)
1207 n -= (pnul - p) & 1;
1208 strcpy (n, "\\000");
1212 memcpy (n, p, patlim - p);
1220 cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
1224 extra = pcre_study (cre, 0, &ep);
1233 Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
1239 /* This array must have at least two elements; everything after that
1240 is just for performance improvement in pcre_exec. */
1243 int e = pcre_exec (cre, extra, buf, size, 0, 0,
1244 sub, sizeof sub / sizeof *sub);
1250 case PCRE_ERROR_NOMATCH:
1253 case PCRE_ERROR_NOMEMORY:
1254 error (2, 0, _("Memory exhausted"));
1262 /* Narrow down to the line we've found. */
1263 char const *beg = buf + sub[0];
1264 char const *end = buf + sub[1];
1265 char const *buflim = buf + size;
1269 end = memchr (end, eol, buflim - end);
1271 while (buf < beg && beg[-1] != eol)
1275 *match_size = end - beg;
1281 struct matcher const matchers[] = {
1282 { "default", Gcompile, EGexecute },
1283 { "grep", Gcompile, EGexecute },
1284 { "egrep", Ecompile, EGexecute },
1285 { "awk", Ecompile, EGexecute },
1286 { "fgrep", Fcompile, Fexecute },
1287 { "perl", Pcompile, Pexecute },