]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/less/pattern.c
zfs: merge openzfs/zfs@d99134be8 (zfs-2.1-release) into stable/13
[FreeBSD/FreeBSD.git] / contrib / less / pattern.c
1 /*
2  * Copyright (C) 1984-2023  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9
10 /*
11  * Routines to do pattern matching.
12  */
13
14 #include "less.h"
15
16 extern int caseless;
17 extern int is_caseless;
18 extern int utf_mode;
19
20 /*
21  * Compile a search pattern, for future use by match_pattern.
22  */
23 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
24 {
25         if (search_type & SRCH_NO_REGEX)
26                 return (0);
27   {
28 #if HAVE_GNU_REGEX
29         struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
30                 ecalloc(1, sizeof(struct re_pattern_buffer));
31         re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
32         if (re_compile_pattern(pattern, strlen(pattern), comp))
33         {
34                 free(comp);
35                 if (show_error)
36                         error("Invalid pattern", NULL_PARG);
37                 return (-1);
38         }
39         if (*comp_pattern != NULL)
40         {
41                 regfree(*comp_pattern);
42                 free(*comp_pattern);
43         }
44         *comp_pattern = comp;
45 #endif
46 #if HAVE_POSIX_REGCOMP
47         regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
48         if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
49         {
50                 free(comp);
51                 if (show_error)
52                         error("Invalid pattern", NULL_PARG);
53                 return (-1);
54         }
55         if (*comp_pattern != NULL)
56         {
57                 regfree(*comp_pattern);
58                 free(*comp_pattern);
59         }
60         *comp_pattern = comp;
61 #endif
62 #if HAVE_PCRE
63         constant char *errstring;
64         int erroffset;
65         PARG parg;
66         pcre *comp = pcre_compile(pattern,
67                         ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
68                         (is_caseless ? PCRE_CASELESS : 0),
69                         &errstring, &erroffset, NULL);
70         if (comp == NULL)
71         {
72                 parg.p_string = (char *) errstring;
73                 if (show_error)
74                         error("%s", &parg);
75                 return (-1);
76         }
77         *comp_pattern = comp;
78 #endif
79 #if HAVE_PCRE2
80         int errcode;
81         PCRE2_SIZE erroffset;
82         PARG parg;
83         pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
84                         (is_caseless ? PCRE2_CASELESS : 0),
85                         &errcode, &erroffset, NULL);
86         if (comp == NULL)
87         {
88                 if (show_error)
89                 {
90                         char msg[160];
91                         pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
92                         parg.p_string = msg;
93                         error("%s", &parg);
94                 }
95                 return (-1);
96         }
97         *comp_pattern = comp;
98 #endif
99 #if HAVE_RE_COMP
100         PARG parg;
101         if ((parg.p_string = re_comp(pattern)) != NULL)
102         {
103                 if (show_error)
104                         error("%s", &parg);
105                 return (-1);
106         }
107         *comp_pattern = 1;
108 #endif
109 #if HAVE_REGCMP
110         char *comp;
111         if ((comp = regcmp(pattern, 0)) == NULL)
112         {
113                 if (show_error)
114                         error("Invalid pattern", NULL_PARG);
115                 return (-1);
116         }
117         if (comp_pattern != NULL)
118                 free(*comp_pattern);
119         *comp_pattern = comp;
120 #endif
121 #if HAVE_V8_REGCOMP
122         struct regexp *comp;
123         reg_show_error = show_error;
124         comp = regcomp(pattern);
125         reg_show_error = 1;
126         if (comp == NULL)
127         {
128                 /*
129                  * regcomp has already printed an error message 
130                  * via regerror().
131                  */
132                 return (-1);
133         }
134         if (*comp_pattern != NULL)
135                 free(*comp_pattern);
136         *comp_pattern = comp;
137 #endif
138   }
139         return (0);
140 }
141
142 /*
143  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
144  */
145 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
146 {
147         char *cvt_pattern;
148         int result;
149
150         if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
151                 cvt_pattern = pattern;
152         else
153         {
154                 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
155                 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
156         }
157         result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
158         if (cvt_pattern != pattern)
159                 free(cvt_pattern);
160         return (result);
161 }
162
163 /*
164  * Forget that we have a compiled pattern.
165  */
166 public void uncompile_pattern(PATTERN_TYPE *pattern)
167 {
168 #if HAVE_GNU_REGEX
169         if (*pattern != NULL)
170         {
171                 regfree(*pattern);
172                 free(*pattern);
173         }
174         *pattern = NULL;
175 #endif
176 #if HAVE_POSIX_REGCOMP
177         if (*pattern != NULL)
178         {
179                 regfree(*pattern);
180                 free(*pattern);
181         }
182         *pattern = NULL;
183 #endif
184 #if HAVE_PCRE
185         if (*pattern != NULL)
186                 pcre_free(*pattern);
187         *pattern = NULL;
188 #endif
189 #if HAVE_PCRE2
190         if (*pattern != NULL)
191                 pcre2_code_free(*pattern);
192         *pattern = NULL;
193 #endif
194 #if HAVE_RE_COMP
195         *pattern = 0;
196 #endif
197 #if HAVE_REGCMP
198         if (*pattern != NULL)
199                 free(*pattern);
200         *pattern = NULL;
201 #endif
202 #if HAVE_V8_REGCOMP
203         if (*pattern != NULL)
204                 free(*pattern);
205         *pattern = NULL;
206 #endif
207 }
208
209 #if 0
210 /*
211  * Can a pattern be successfully compiled?
212  */
213 public int valid_pattern(char *pattern)
214 {
215         PATTERN_TYPE comp_pattern;
216         int result;
217
218         SET_NULL_PATTERN(comp_pattern);
219         result = compile_pattern2(pattern, 0, &comp_pattern, 0);
220         if (result != 0)
221                 return (0);
222         uncompile_pattern(&comp_pattern);
223         return (1);
224 }
225 #endif
226
227 /*
228  * Is a compiled pattern null?
229  */
230 public int is_null_pattern(PATTERN_TYPE pattern)
231 {
232 #if HAVE_GNU_REGEX
233         return (pattern == NULL);
234 #endif
235 #if HAVE_POSIX_REGCOMP
236         return (pattern == NULL);
237 #endif
238 #if HAVE_PCRE
239         return (pattern == NULL);
240 #endif
241 #if HAVE_PCRE2
242         return (pattern == NULL);
243 #endif
244 #if HAVE_RE_COMP
245         return (pattern == 0);
246 #endif
247 #if HAVE_REGCMP
248         return (pattern == NULL);
249 #endif
250 #if HAVE_V8_REGCOMP
251         return (pattern == NULL);
252 #endif
253 #if NO_REGEX
254         return (pattern == NULL);
255 #endif
256 }
257 /*
258  * Simple pattern matching function.
259  * It supports no metacharacters like *, etc.
260  */
261 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
262 {
263         char *pp, *lp;
264         char *pattern_end = pattern + pattern_len;
265         char *buf_end = buf + buf_len;
266
267         for ( ;  buf < buf_end;  buf++)
268         {
269                 for (pp = pattern, lp = buf;  ;  pp++, lp++)
270                 {
271                         char cp = *pp;
272                         char cl = *lp;
273                         if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
274                                 cp = ASCII_TO_LOWER(cp);
275                         if (cp != cl)
276                                 break;
277                         if (pp == pattern_end || lp == buf_end)
278                                 break;
279                 }
280                 if (pp == pattern_end)
281                 {
282                         *(*sp)++ = buf;
283                         *(*ep)++ = lp;
284                         return (1);
285                 }
286         }
287         **sp = **ep = NULL;
288         return (0);
289 }
290
291 /*
292  * Perform a pattern match with the previously compiled pattern.
293  * Set sp[0] and ep[0] to the start and end of the matched string.
294  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
295  * Subpatterns are defined by parentheses in the regex language.
296  */
297 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
298 {
299         int matched;
300
301 #if NO_REGEX
302         search_type |= SRCH_NO_REGEX;
303 #endif
304         if (search_type & SRCH_NO_REGEX)
305                 matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
306         else
307         {
308 #if HAVE_GNU_REGEX
309         {
310                 struct re_registers search_regs;
311                 pattern->not_bol = notbol;
312                 pattern->regs_allocated = REGS_UNALLOCATED;
313                 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
314                 if (matched)
315                 {
316                         *sp++ = line + search_regs.start[0];
317                         *ep++ = line + search_regs.end[0];
318                 }
319         }
320 #endif
321 #if HAVE_POSIX_REGCOMP
322         {
323                 #define RM_COUNT (NUM_SEARCH_COLORS+2)
324                 regmatch_t rm[RM_COUNT];
325                 int flags = (notbol) ? REG_NOTBOL : 0;
326 #ifdef REG_STARTEND
327                 flags |= REG_STARTEND;
328                 rm[0].rm_so = 0;
329                 rm[0].rm_eo = line_len;
330 #endif
331                 matched = !regexec(pattern, line, RM_COUNT, rm, flags);
332                 if (matched)
333                 {
334                         int i;
335                         int ecount;
336                         for (ecount = RM_COUNT;  ecount > 0;  ecount--)
337                                 if (rm[ecount-1].rm_so >= 0)
338                                         break;
339                         if (ecount >= nsp)
340                                 ecount = nsp-1;
341                         for (i = 0;  i < ecount;  i++)
342                         {
343                                 if (rm[i].rm_so < 0)
344                                 {
345                                         *sp++ = *ep++ = line;
346                                 } else
347                                 {
348 #ifndef __WATCOMC__
349                                         *sp++ = line + rm[i].rm_so;
350                                         *ep++ = line + rm[i].rm_eo;
351 #else
352                                         *sp++ = rm[i].rm_sp;
353                                         *ep++ = rm[i].rm_ep;
354 #endif
355                                 }
356                         }
357                 }
358         }
359 #endif
360 #if HAVE_PCRE
361         {
362                 #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
363                 int ovector[OVECTOR_COUNT];
364                 int flags = (notbol) ? PCRE_NOTBOL : 0;
365                 int i;
366                 int ecount;
367                 int mcount = pcre_exec(pattern, NULL, line, line_len,
368                         0, flags, ovector, OVECTOR_COUNT);
369                 matched = (mcount > 0);
370                 ecount = nsp-1;
371                 if (ecount > mcount) ecount = mcount;
372                 for (i = 0;  i < ecount*2; )
373                 {
374                         if (ovector[i] < 0 || ovector[i+1] < 0)
375                         {
376                                 *sp++ = *ep++ = line;
377                                 i += 2;
378                         } else
379                         {
380                                 *sp++ = line + ovector[i++];
381                                 *ep++ = line + ovector[i++];
382                         }
383                 }
384         }
385 #endif
386 #if HAVE_PCRE2
387         {
388                 int flags = (notbol) ? PCRE2_NOTBOL : 0;
389                 pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
390                 int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
391                         0, flags, md, NULL);
392                 matched = (mcount > 0);
393                 if (matched)
394                 {
395                         PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
396                         int i;
397                         int ecount = nsp-1;
398                         if (ecount > mcount) ecount = mcount;
399                         for (i = 0;  i < ecount*2; )
400                         {
401                                 if (ovector[i] < 0 || ovector[i+1] < 0)
402                                 {
403                                         *sp++ = *ep++ = line;
404                                         i += 2;
405                                 } else
406                                 {
407                                         *sp++ = line + ovector[i++];
408                                         *ep++ = line + ovector[i++];
409                                 }
410                         }
411                 }
412                 pcre2_match_data_free(md);
413         }
414 #endif
415 #if HAVE_RE_COMP
416         matched = (re_exec(line) == 1);
417         /*
418          * re_exec doesn't seem to provide a way to get the matched string.
419          */
420 #endif
421 #if HAVE_REGCMP
422         matched = ((*ep++ = regex(pattern, line)) != NULL);
423         if (matched)
424                 *sp++ = __loc1;
425 #endif
426 #if HAVE_V8_REGCOMP
427 #if HAVE_REGEXEC2
428         matched = regexec2(pattern, line, notbol);
429 #else
430         matched = regexec(pattern, line);
431 #endif
432         if (matched)
433         {
434                 *sp++ = pattern->startp[0];
435                 *ep++ = pattern->endp[0];
436         }
437 #endif
438         }
439         *sp = *ep = NULL;
440         matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
441                         ((search_type & SRCH_NO_MATCH) && !matched);
442         return (matched);
443 }
444
445 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
446 {
447         int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
448         int i;
449         for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
450         {
451                 if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
452                         matched = 0;
453         }
454         return matched;
455 }
456
457 /*
458  * Return the name of the pattern matching library.
459  */
460 public char * pattern_lib_name(void)
461 {
462 #if HAVE_GNU_REGEX
463         return ("GNU");
464 #else
465 #if HAVE_POSIX_REGCOMP
466         return ("POSIX");
467 #else
468 #if HAVE_PCRE2
469         return ("PCRE2");
470 #else
471 #if HAVE_PCRE
472         return ("PCRE");
473 #else
474 #if HAVE_RE_COMP
475         return ("BSD");
476 #else
477 #if HAVE_REGCMP
478         return ("V8");
479 #else
480 #if HAVE_V8_REGCOMP
481         return ("Spencer V8");
482 #else
483         return ("no");
484 #endif
485 #endif
486 #endif
487 #endif
488 #endif
489 #endif
490 #endif
491 }