1 /* $NetBSD: t_regex_att.c,v 1.1 2012/08/24 20:24:40 jmmv Exp $ */
4 * Copyright (c) 2011 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: t_regex_att.c,v 1.1 2012/08/24 20:24:40 jmmv Exp $");
42 #include <sys/param.h>
55 static const char sep[] = "\r\n\t";
56 static const char delim[3] = "\\\\\0";
60 fail(const char *pattern, const char *input, size_t lineno) {
62 "skipping failed test at line %zu (pattern=%s, input=%s)\n",
63 lineno, pattern, input);
67 bug(const char *pattern, const char *input, size_t lineno) {
72 #if defined(REGEX_SPENCER)
74 * The default libc implementation by Henry Spencer
76 { "a[-]?c", "ac" }, // basic.dat
77 { "(a*)*", "a" }, // categorization.dat
78 { "(aba|a*b)*", "ababa" }, // categorization.dat
79 { "\\(a\\(b\\)*\\)*\\2", "abab" }, // categorization.dat
80 { "(a*)*", "aaaaaa" }, // nullsubexpression.dat
81 { "(a*)*", "aaaaaax" }, // nullsubexpression.dat
82 { "(a*)+", "a" }, // nullsubexpression.dat
83 { "(a*)+", "aaaaaa" }, // nullsubexpression.dat
84 { "(a*)+", "aaaaaax" }, // nullsubexpression.dat
85 { "([a]*)*", "a" }, // nullsubexpression.dat
86 { "([a]*)*", "aaaaaa" }, // nullsubexpression.dat
87 { "([a]*)*", "aaaaaax" }, // nullsubexpression.dat
88 { "([a]*)+", "a" }, // nullsubexpression.dat
89 { "([a]*)+", "aaaaaa" }, // nullsubexpression.dat
90 { "([a]*)+", "aaaaaax" }, // nullsubexpression.dat
91 { "([^b]*)*", "a" }, // nullsubexpression.dat
92 { "([^b]*)*", "aaaaaa" }, // nullsubexpression.dat
93 { "([^b]*)*", "aaaaaab" }, // nullsubexpression.dat
94 { "([ab]*)*", "a" }, // nullsubexpression.dat
95 { "([ab]*)*", "aaaaaa" }, // nullsubexpression.dat
96 { "([ab]*)*", "ababab" }, // nullsubexpression.dat
97 { "([ab]*)*", "bababa" }, // nullsubexpression.dat
98 { "([ab]*)*", "b" }, // nullsubexpression.dat
99 { "([ab]*)*", "bbbbbb" }, // nullsubexpression.dat
100 { "([ab]*)*", "aaaabcde" }, // nullsubexpression.dat
101 { "([^a]*)*", "b" }, // nullsubexpression.dat
102 { "([^a]*)*", "bbbbbb" }, // nullsubexpression.dat
103 { "([^ab]*)*", "ccccxx" }, // nullsubexpression.dat
104 { "\\(a*\\)*\\(x\\)", "ax" }, // nullsubexpression.dat
105 { "\\(a*\\)*\\(x\\)", "axa" }, // nullsubexpression.dat
106 { "\\(a*\\)*\\(x\\)\\(\\1\\)", "x" }, // nullsubexpression.dat
107 /* crash! */ { "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" }, // nullsubexpression.dat
108 /* crash! */ { "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" }, // ""
109 { "(a*)*(x)", "ax" }, // nullsubexpression.dat
110 { "(a*)*(x)", "axa" }, // nullsubexpression.dat
111 { "(a*)+(x)", "ax" }, // nullsubexpression.dat
112 { "(a*)+(x)", "axa" }, // nullsubexpression.dat
113 { "((a|ab)(c|bcd))(d*)", "abcd" }, // forcedassoc.dat
114 { "((a|ab)(bcd|c))(d*)", "abcd" }, // forcedassoc.dat
115 { "((ab|a)(c|bcd))(d*)", "abcd" }, // forcedassoc.dat
116 { "((ab|a)(bcd|c))(d*)", "abcd" }, // forcedassoc.dat
117 { "((a*)(b|abc))(c*)", "abc" }, // forcedassoc.dat
118 { "((a*)(abc|b))(c*)", "abc" }, // forcedassoc.dat
119 { "((..)|(.)){2}", "aaa" }, // repetition.dat
120 { "((..)|(.)){3}", "aaa" }, // repetition.dat
121 { "((..)|(.)){3}", "aaaa" }, // repetition.dat
122 { "((..)|(.)){3}", "aaaaa" }, // repetition.dat
123 { "X(.?){0,}Y", "X1234567Y" }, // repetition.dat
124 { "X(.?){1,}Y", "X1234567Y" }, // repetition.dat
125 { "X(.?){2,}Y", "X1234567Y" }, // repetition.dat
126 { "X(.?){3,}Y", "X1234567Y" }, // repetition.dat
127 { "X(.?){4,}Y", "X1234567Y" }, // repetition.dat
128 { "X(.?){5,}Y", "X1234567Y" }, // repetition.dat
129 { "X(.?){6,}Y", "X1234567Y" }, // repetition.dat
130 { "X(.?){7,}Y", "X1234567Y" }, // repetition.dat
131 { "X(.?){0,8}Y", "X1234567Y" }, // repetition.dat
132 { "X(.?){1,8}Y", "X1234567Y" }, // repetition.dat
133 { "X(.?){2,8}Y", "X1234567Y" }, // repetition.dat
134 { "X(.?){3,8}Y", "X1234567Y" }, // repetition.dat
135 { "X(.?){4,8}Y", "X1234567Y" }, // repetition.dat
136 { "X(.?){5,8}Y", "X1234567Y" }, // repetition.dat
137 { "X(.?){6,8}Y", "X1234567Y" }, // repetition.dat
138 { "X(.?){7,8}Y", "X1234567Y" }, // repetition.dat
139 { "(a|ab|c|bcd){0,}(d*)", "ababcd" }, // repetition.dat
140 { "(a|ab|c|bcd){1,}(d*)", "ababcd" }, // repetition.dat
141 { "(a|ab|c|bcd){2,}(d*)", "ababcd" }, // repetition.dat
142 { "(a|ab|c|bcd){3,}(d*)", "ababcd" }, // repetition.dat
143 { "(a|ab|c|bcd){1,10}(d*)", "ababcd" }, // repetition.dat
144 { "(a|ab|c|bcd){2,10}(d*)", "ababcd" }, // repetition.dat
145 { "(a|ab|c|bcd){3,10}(d*)", "ababcd" }, // repetition.dat
146 { "(a|ab|c|bcd)*(d*)", "ababcd" }, // repetition.dat
147 { "(a|ab|c|bcd)+(d*)", "ababcd" }, // repetition.dat
148 { "(ab|a|c|bcd){0,}(d*)", "ababcd" }, // repetition.dat
149 { "(ab|a|c|bcd){1,}(d*)", "ababcd" }, // repetition.dat
150 { "(ab|a|c|bcd){2,}(d*)", "ababcd" }, // repetition.dat
151 { "(ab|a|c|bcd){3,}(d*)", "ababcd" }, // repetition.dat
152 { "(ab|a|c|bcd){1,10}(d*)", "ababcd" }, // repetition.dat
153 { "(ab|a|c|bcd){2,10}(d*)", "ababcd" }, // repetition.dat
154 { "(ab|a|c|bcd){3,10}(d*)", "ababcd" }, // repetition.dat
155 { "(ab|a|c|bcd)*(d*)", "ababcd" }, // repetition.dat
156 { "(ab|a|c|bcd)+(d*)", "ababcd" }, // repetition.dat
157 #elif defined(REGEX_TRE)
158 { "a[-]?c", "ac" }, // basic.dat
159 { "a\\(b\\)*\\1", "a" }, // categorization.dat
160 { "a\\(b\\)*\\1", "abab" }, // categorization.dat
161 { "\\(a\\(b\\)*\\)*\\2", "abab" }, // categorization.dat
162 { "\\(a*\\)*\\(x\\)\\(\\1\\)", "ax" }, // categorization.dat
163 { "\\(a*\\)*\\(x\\)\\(\\1\\)\\(x\\)", "axxa" }, // ""
164 { "((..)|(.))*", "aa" }, // repetition.dat
165 { "((..)|(.))*", "aaa" }, // repetition.dat
166 { "((..)|(.))*", "aaaaa" }, // repetition.dat
167 { "X(.?){7,}Y", "X1234567Y" }, // repetition.dat
173 for (size_t i = 0; i < __arraycount(b); i++) {
174 if (strcmp(pattern, b[i].p) == 0 &&
175 strcmp(input, b[i].i) == 0) {
176 fail(pattern, input, lineno);
184 #define HAVE_BRACES 1
185 #define HAVE_MINIMAL 0
188 #define HAVE_BRACES 1
191 #define HAVE_MINIMAL 1
195 optional(const char *s)
201 { "[[<element>]] not supported", HAVE_BRACES },
202 { "no *? +? mimimal match ops", HAVE_MINIMAL },
205 for (size_t i = 0; i < __arraycount(nv); i++)
206 if (strcmp(nv[i].n, s) == 0) {
209 fprintf(stderr, "skipping unsupported [%s] tests\n", s);
213 ATF_REQUIRE_MSG(0, "Unknown feature: %s", s);
218 unsupported(const char *s)
220 static const char *we[] = {
221 #if defined(REGEX_SPENCER)
222 "ASSOCIATIVITY=left", // have right associativity
223 "SUBEXPRESSION=precedence", // have grouping subexpression
224 "REPEAT_LONGEST=last", // have first repeat longest
225 "BUG=alternation-order", // don't have it
226 "BUG=first-match", // don't have it
227 "BUG=nomatch-match", // don't have it
228 "BUG=repeat-any", // don't have it
229 "BUG=range-null", // don't have it
230 "BUG=repeat-null-unknown", // don't have it
231 "BUG=repeat-null", // don't have it
232 "BUG=repeat-artifact", // don't have it
233 "BUG=subexpression-first", // don't have it
234 #elif defined(REGEX_TRE)
235 "ASSOCIATIVITY=right", // have left associativity
236 "SUBEXPRESSION=grouping", // have precedence subexpression
237 "REPEAT_LONGEST=first", // have last repeat longest
238 "LENGTH=first", // have last length
239 "BUG=alternation-order", // don't have it
240 "BUG=first-match", // don't have it
241 "BUG=range-null", // don't have it
242 "BUG=repeat-null", // don't have it
243 "BUG=repeat-artifact", // don't have it
244 "BUG=subexpression-first", // don't have it
245 "BUG=repeat-short", // don't have it
252 while (*s == '#' || isspace((unsigned char)*s))
255 for (size_t i = 0; i < __arraycount(we); i++)
256 if (strcmp(we[i], s) == 0)
262 geterror(const char *s, int *comp, int *exec)
264 static const struct {
271 { "OK", 0, COMP|EXEC },
272 #define _DO(a, b) { # a, REG_ ## a, b },
294 for (size_t i = 0; i < __arraycount(nv); i++)
295 if (strcmp(s, nv[i].n) == 0) {
302 ATF_REQUIRE_MSG(0, "Unknown error %s", s);
313 case '0': case '1': case '2': case '3': case '4':
314 case '5': case '6': case '7': case '8': case '9':
335 ATF_REQUIRE_MSG(0, "Unknown char %c", *s);
341 getmatches(const char *s)
345 for (i = 0; (q = strchr(s, '(')) != NULL; i++, s = q + 1)
347 ATF_REQUIRE_MSG(i != 0, "No parentheses found");
352 checkcomment(const char *s, size_t lineno)
354 if (s && strstr(s, "BUG") != NULL)
355 fprintf(stderr, "Expected %s at line %zu\n", s, lineno);
359 checkmatches(const char *matches, size_t nm, const regmatch_t *pm,
366 size_t len = strlen(matches) + 1, off = 0;
368 ATF_REQUIRE((res = strdup(matches)) != NULL);
369 for (size_t i = 0; i < nm; i++) {
371 if (pm[i].rm_so == -1 && pm[i].rm_eo == -1)
372 l = snprintf(res + off, len - off, "(?,?)");
374 l = snprintf(res + off, len - off, "(%lld,%lld)",
375 (long long)pm[i].rm_so, (long long)pm[i].rm_eo);
376 ATF_REQUIRE_MSG((size_t) l < len - off, "String too long %s"
377 " cur=%d, max=%zu", res, l, len - off);
381 ATF_CHECK_STREQ_MSG(res, matches, " at line %zu", lineno);
383 ATF_REQUIRE_STREQ_MSG(res, matches, " at line %zu", lineno);
389 att_test(const struct atf_tc *tc, const char *data_name)
392 char *line, *lastpattern = NULL, data_path[MAXPATHLEN];
393 size_t len, lineno = 0;
397 snprintf(data_path, sizeof(data_path), "%s/data/%s.dat",
398 atf_tc_get_config_var(tc, "srcdir"), data_name);
400 input_file = fopen(data_path, "r");
401 if (input_file == NULL)
402 atf_tc_fail("Failed to open input file %s", data_path);
404 for (; (line = fparseln(input_file, &len, &lineno, delim, 0))
405 != NULL; free(line)) {
406 char *name, *pattern, *input, *matches, *comment;
410 fprintf(stderr, "[%s]\n", line);
412 if ((name = strtok(line, sep)) == NULL)
416 * We check these early so that we skip the lines quickly
417 * in order to do more strict testing on the other arguments
418 * The same characters are also tested in the switch below
426 if (*name == ';' || *name == '#' || strcmp(name, "NOTE") == 0)
429 /* Skip ":HA#???:" prefix */
430 while (*++name && *name != ':')
436 ATF_REQUIRE_MSG((pattern = strtok(NULL, sep)) != NULL,
437 "Missing pattern at line %zu", lineno);
438 ATF_REQUIRE_MSG((input = strtok(NULL, sep)) != NULL,
439 "Missing input at line %zu", lineno);
441 if (strchr(name, '$')) {
442 ATF_REQUIRE(strunvis(pattern, pattern) != -1);
443 ATF_REQUIRE(strunvis(input, input) != -1);
447 if (strcmp(input, "NULL") == 0)
450 if (strcmp(pattern, "SAME") == 0) {
451 ATF_REQUIRE(lastpattern != NULL);
452 pattern = lastpattern;
455 ATF_REQUIRE((lastpattern = strdup(pattern)) != NULL);
458 ATF_REQUIRE_MSG((matches = strtok(NULL, sep)) != NULL,
459 "Missing matches at line %zu", lineno);
461 comment = strtok(NULL, sep);
463 case '{': /* Begin optional implementation */
464 if (optional(comment)) {
468 name++; /* We have it, so ignore */
470 case '}': /* End optional implementation */
473 case '?': /* Optional */
474 case '|': /* Alternative */
475 if (unsupported(comment))
477 name++; /* We have it, so ignore */
479 case '#': /* Comment */
487 if (bug(pattern, input, lineno))
491 if (*matches != '(') {
492 geterror(matches, &comp, &exec);
497 nm = getmatches(matches);
498 ATF_REQUIRE((pm = calloc(nm, sizeof(*pm))) != NULL);
503 int iflags = getflags(name);
504 for (; *name; name++) {
511 flags = REG_EXTENDED;
517 ATF_REQUIRE_MSG(0, "Bad name %c", *name);
520 int c = regcomp(&re, pattern, flags | iflags);
521 ATF_REQUIRE_MSG(c == comp,
522 "regcomp returned %d for pattern %s at line %zu",
526 int e = regexec(&re, input, nm, pm, 0);
527 ATF_REQUIRE_MSG(e == exec, "Expected error %d,"
528 " got %d at line %zu", exec, e, lineno);
529 checkmatches(matches, nm, pm, lineno);
530 checkcomment(comment, lineno);
540 ATF_TC_HEAD(basic, tc)
542 atf_tc_set_md_var(tc, "descr", "Tests basic functionality");
544 ATF_TC_BODY(basic, tc)
546 att_test(tc, "basic");
549 ATF_TC(categorization);
550 ATF_TC_HEAD(categorization, tc)
552 atf_tc_set_md_var(tc, "descr", "Tests implementation categorization");
554 ATF_TC_BODY(categorization, tc)
556 att_test(tc, "categorization");
560 ATF_TC_HEAD(nullsubexpr, tc)
562 atf_tc_set_md_var(tc, "descr", "Tests (...)*");
564 ATF_TC_BODY(nullsubexpr, tc)
566 att_test(tc, "nullsubexpr");
570 ATF_TC_HEAD(leftassoc, tc)
572 atf_tc_set_md_var(tc, "descr", "Tests left-associative "
575 ATF_TC_BODY(leftassoc, tc)
578 /* jmmv: I converted the original shell-based tests to C and they
579 * disabled this test in a very unconventional way without giving
580 * any explation. Mark as broken here, but I don't know why. */
581 atf_tc_expect_fail("Reason for breakage unknown");
584 atf_tc_expect_fail("The expected and matched groups are mismatched on FreeBSD");
586 att_test(tc, "leftassoc");
590 ATF_TC_HEAD(rightassoc, tc)
592 atf_tc_set_md_var(tc, "descr", "Tests right-associative "
595 ATF_TC_BODY(rightassoc, tc)
598 /* jmmv: I converted the original shell-based tests to C and they
599 * disabled this test in a very unconventional way without giving
600 * any explation. Mark as broken here, but I don't know why. */
601 atf_tc_expect_fail("Reason for breakage unknown");
603 att_test(tc, "rightassoc");
607 ATF_TC_HEAD(forcedassoc, tc)
609 atf_tc_set_md_var(tc, "descr", "Tests subexpression grouping to "
610 "force association");
612 ATF_TC_BODY(forcedassoc, tc)
614 att_test(tc, "forcedassoc");
618 ATF_TC_HEAD(repetition, tc)
620 atf_tc_set_md_var(tc, "descr", "Tests implicit vs. explicit "
623 ATF_TC_BODY(repetition, tc)
625 att_test(tc, "repetition");
631 ATF_TP_ADD_TC(tp, basic);
632 ATF_TP_ADD_TC(tp, categorization);
633 ATF_TP_ADD_TC(tp, nullsubexpr);
634 ATF_TP_ADD_TC(tp, leftassoc);
635 ATF_TP_ADD_TC(tp, rightassoc);
636 ATF_TP_ADD_TC(tp, forcedassoc);
637 ATF_TP_ADD_TC(tp, repetition);
638 return atf_no_error();