2 * Copyright (C) 1984-2023 Mark Nudelman
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
7 * For more information, see the README file.
11 * Routines to do pattern matching.
17 extern int is_caseless;
21 * Compile a search pattern, for future use by match_pattern.
23 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
25 if (search_type & SRCH_NO_REGEX)
29 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30 ecalloc(1, sizeof(struct re_pattern_buffer));
31 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32 if (re_compile_pattern(pattern, strlen(pattern), comp))
36 error("Invalid pattern", NULL_PARG);
39 if (*comp_pattern != NULL)
41 regfree(*comp_pattern);
46 #if HAVE_POSIX_REGCOMP
47 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48 if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
52 error("Invalid pattern", NULL_PARG);
55 if (*comp_pattern != NULL)
57 regfree(*comp_pattern);
63 constant char *errstring;
66 pcre *comp = pcre_compile(pattern,
67 ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68 (is_caseless ? PCRE_CASELESS : 0),
69 &errstring, &erroffset, NULL);
72 parg.p_string = (char *) errstring;
83 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84 (is_caseless ? PCRE2_CASELESS : 0),
85 &errcode, &erroffset, NULL);
91 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
101 if ((parg.p_string = re_comp(pattern)) != NULL)
111 if ((comp = regcmp(pattern, 0)) == NULL)
114 error("Invalid pattern", NULL_PARG);
117 if (comp_pattern != NULL)
119 *comp_pattern = comp;
123 reg_show_error = show_error;
124 comp = regcomp(pattern);
129 * regcomp has already printed an error message
134 if (*comp_pattern != NULL)
136 *comp_pattern = comp;
143 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
145 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
150 if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
151 cvt_pattern = pattern;
154 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
157 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
158 if (cvt_pattern != pattern)
164 * Forget that we have a compiled pattern.
166 public void uncompile_pattern(PATTERN_TYPE *pattern)
169 if (*pattern != NULL)
176 #if HAVE_POSIX_REGCOMP
177 if (*pattern != NULL)
185 if (*pattern != NULL)
190 if (*pattern != NULL)
191 pcre2_code_free(*pattern);
198 if (*pattern != NULL)
203 if (*pattern != NULL)
211 * Can a pattern be successfully compiled?
213 public int valid_pattern(char *pattern)
215 PATTERN_TYPE comp_pattern;
218 SET_NULL_PATTERN(comp_pattern);
219 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
222 uncompile_pattern(&comp_pattern);
228 * Is a compiled pattern null?
230 public int is_null_pattern(PATTERN_TYPE pattern)
233 return (pattern == NULL);
235 #if HAVE_POSIX_REGCOMP
236 return (pattern == NULL);
239 return (pattern == NULL);
242 return (pattern == NULL);
245 return (pattern == 0);
248 return (pattern == NULL);
251 return (pattern == NULL);
254 return (pattern == NULL);
258 * Simple pattern matching function.
259 * It supports no metacharacters like *, etc.
261 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
264 char *pattern_end = pattern + pattern_len;
265 char *buf_end = buf + buf_len;
267 for ( ; buf < buf_end; buf++)
269 for (pp = pattern, lp = buf; ; pp++, lp++)
273 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
274 cp = ASCII_TO_LOWER(cp);
277 if (pp == pattern_end || lp == buf_end)
280 if (pp == pattern_end)
292 * Perform a pattern match with the previously compiled pattern.
293 * Set sp[0] and ep[0] to the start and end of the matched string.
294 * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
295 * Subpatterns are defined by parentheses in the regex language.
297 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
302 search_type |= SRCH_NO_REGEX;
304 if (search_type & SRCH_NO_REGEX)
305 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
310 struct re_registers search_regs;
311 pattern->not_bol = notbol;
312 pattern->regs_allocated = REGS_UNALLOCATED;
313 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
316 *sp++ = line + search_regs.start[0];
317 *ep++ = line + search_regs.end[0];
321 #if HAVE_POSIX_REGCOMP
323 #define RM_COUNT (NUM_SEARCH_COLORS+2)
324 regmatch_t rm[RM_COUNT];
325 int flags = (notbol) ? REG_NOTBOL : 0;
327 flags |= REG_STARTEND;
329 rm[0].rm_eo = line_len;
331 matched = !regexec(pattern, line, RM_COUNT, rm, flags);
336 for (ecount = RM_COUNT; ecount > 0; ecount--)
337 if (rm[ecount-1].rm_so >= 0)
341 for (i = 0; i < ecount; i++)
345 *sp++ = *ep++ = line;
349 *sp++ = line + rm[i].rm_so;
350 *ep++ = line + rm[i].rm_eo;
362 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
363 int ovector[OVECTOR_COUNT];
364 int flags = (notbol) ? PCRE_NOTBOL : 0;
367 int mcount = pcre_exec(pattern, NULL, line, line_len,
368 0, flags, ovector, OVECTOR_COUNT);
369 matched = (mcount > 0);
371 if (ecount > mcount) ecount = mcount;
372 for (i = 0; i < ecount*2; )
374 if (ovector[i] < 0 || ovector[i+1] < 0)
376 *sp++ = *ep++ = line;
380 *sp++ = line + ovector[i++];
381 *ep++ = line + ovector[i++];
388 int flags = (notbol) ? PCRE2_NOTBOL : 0;
389 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
390 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
392 matched = (mcount > 0);
395 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
398 if (ecount > mcount) ecount = mcount;
399 for (i = 0; i < ecount*2; )
401 if (ovector[i] < 0 || ovector[i+1] < 0)
403 *sp++ = *ep++ = line;
407 *sp++ = line + ovector[i++];
408 *ep++ = line + ovector[i++];
412 pcre2_match_data_free(md);
416 matched = (re_exec(line) == 1);
418 * re_exec doesn't seem to provide a way to get the matched string.
422 matched = ((*ep++ = regex(pattern, line)) != NULL);
428 matched = regexec2(pattern, line, notbol);
430 matched = regexec(pattern, line);
434 *sp++ = pattern->startp[0];
435 *ep++ = pattern->endp[0];
440 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
441 ((search_type & SRCH_NO_MATCH) && !matched);
445 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
447 int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
449 for (i = 1; i <= NUM_SEARCH_COLORS; i++)
451 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
458 * Return the name of the pattern matching library.
460 public char * pattern_lib_name(void)
465 #if HAVE_POSIX_REGCOMP
481 return ("Spencer V8");