]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - lib/libc/regex/grot/tests
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / lib / libc / regex / grot / tests
1 # regular expression test set
2 # $FreeBSD$
3 # Lines are at least three fields, separated by one or more tabs.  "" stands
4 # for an empty field.  First field is an RE.  Second field is flags.  If
5 # C flag given, regcomp() is expected to fail, and the third field is the
6 # error name (minus the leading REG_).
7 #
8 # Otherwise it is expected to succeed, and the third field is the string to
9 # try matching it against.  If there is no fourth field, the match is
10 # expected to fail.  If there is a fourth field, it is the substring that
11 # the RE is expected to match.  If there is a fifth field, it is a comma-
12 # separated list of what the subexpressions should match, with - indicating
13 # no match for that one.  In both the fourth and fifth fields, a (sub)field
14 # starting with @ indicates that the (sub)expression is expected to match
15 # a null string followed by the stuff after the @; this provides a way to
16 # test where null strings match.  The character `N' in REs and strings
17 # is newline, `S' is space, `T' is tab, `Z' is NUL.
18 #
19 # The full list of flags:
20 #       -       placeholder, does nothing
21 #       b       RE is a BRE, not an ERE
22 #       &       try it as both an ERE and a BRE
23 #       C       regcomp() error expected, third field is error name
24 #       i       REG_ICASE
25 #       m       ("mundane") REG_NOSPEC
26 #       s       REG_NOSUB (not really testable)
27 #       n       REG_NEWLINE
28 #       ^       REG_NOTBOL
29 #       $       REG_NOTEOL
30 #       #       REG_STARTEND (see below)
31 #       p       REG_PEND
32 #
33 # For REG_STARTEND, the start/end offsets are those of the substring
34 # enclosed in ().
35
36 # basics
37 a               &       a       a
38 abc             &       abc     abc
39 abc|de          -       abc     abc
40 a|b|c           -       abc     a
41
42 # parentheses and perversions thereof
43 a(b)c           -       abc     abc
44 a\(b\)c         b       abc     abc
45 a(              C       EPAREN
46 a(              b       a(      a(
47 a\(             -       a(      a(
48 a\(             bC      EPAREN
49 a\(b            bC      EPAREN
50 a(b             C       EPAREN
51 a(b             b       a(b     a(b
52 # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
53 a)              -       a)      a)
54 )               -       )       )
55 # end gagging (in a just world, those *should* give EPAREN)
56 a)              b       a)      a)
57 a\)             bC      EPAREN
58 \)              bC      EPAREN
59 a()b            -       ab      ab
60 a\(\)b          b       ab      ab
61
62 # anchoring and REG_NEWLINE
63 ^abc$           &       abc     abc
64 a^b             -       a^b
65 a^b             b       a^b     a^b
66 a$b             -       a$b
67 a$b             b       a$b     a$b
68 ^               &       abc     @abc
69 $               &       abc     @
70 ^$              &       ""      @
71 $^              -       ""      @
72 \($\)\(^\)      b       ""      @
73 # stop retching, those are legitimate (although disgusting)
74 ^^              -       ""      @
75 $$              -       ""      @
76 b$              &       abNc
77 b$              &n      abNc    b
78 ^b$             &       aNbNc
79 ^b$             &n      aNbNc   b
80 ^$              &n      aNNb    @Nb
81 ^$              n       abc
82 ^$              n       abcN    @
83 $^              n       aNNb    @Nb
84 \($\)\(^\)      bn      aNNb    @Nb
85 ^^              n^      aNNb    @Nb
86 $$              n       aNNb    @NN
87 ^a              ^       a
88 a$              $       a
89 ^a              ^n      aNb
90 ^b              ^n      aNb     b
91 a$              $n      bNa
92 b$              $n      bNa     b
93 a*(^b$)c*       -       b       b
94 a*\(^b$\)c*     b       b       b
95
96 # certain syntax errors and non-errors
97 |               C       EMPTY
98 |               b       |       |
99 *               C       BADRPT
100 *               b       *       *
101 +               C       BADRPT
102 ?               C       BADRPT
103 ""              &C      EMPTY
104 ()              -       abc     @abc
105 \(\)            b       abc     @abc
106 a||b            C       EMPTY
107 |ab             C       EMPTY
108 ab|             C       EMPTY
109 (|a)b           C       EMPTY
110 (a|)b           C       EMPTY
111 (*a)            C       BADRPT
112 (+a)            C       BADRPT
113 (?a)            C       BADRPT
114 ({1}a)          C       BADRPT
115 \(\{1\}a\)      bC      BADRPT
116 (a|*b)          C       BADRPT
117 (a|+b)          C       BADRPT
118 (a|?b)          C       BADRPT
119 (a|{1}b)        C       BADRPT
120 ^*              C       BADRPT
121 ^*              b       *       *
122 ^+              C       BADRPT
123 ^?              C       BADRPT
124 ^{1}            C       BADRPT
125 ^\{1\}          bC      BADRPT
126
127 # metacharacters, backslashes
128 a.c             &       abc     abc
129 a[bc]d          &       abd     abd
130 a\*c            &       a*c     a*c
131 a\\b            &       a\b     a\b
132 a\\\*b          &       a\*b    a\*b
133 a\bc            &       abc     abc
134 a\              &C      EESCAPE
135 a\\bc           &       a\bc    a\bc
136 \{              bC      BADRPT
137 # trailing $ is a peculiar special case for the BRE code
138 a$              &       a       a
139 a$              &       a$
140 a\$             &       a
141 a\$             &       a$      a$
142 a\\$            &       a
143 a\\$            &       a$
144 a\\$            &       a\$
145 a\\$            &       a\      a\
146
147 # back references, ugh
148 a\(b\)\2c       bC      ESUBREG
149 a\(b\1\)c       bC      ESUBREG
150 a\(b*\)c\1d     b       abbcbbd abbcbbd bb
151 a\(b*\)c\1d     b       abbcbd
152 a\(b*\)c\1d     b       abbcbbbd
153 ^\(.\)\1        b       abc
154 a\([bc]\)\1d    b       abcdabbd        abbd    b
155 a\(\([bc]\)\2\)*d       b       abbccd  abbccd
156 a\(\([bc]\)\2\)*d       b       abbcbd
157 # actually, this next one probably ought to fail, but the spec is unclear
158 a\(\(b\)*\2\)*d         b       abbbd   abbbd
159 # here is a case that no NFA implementation does right
160 \(ab*\)[ab]*\1  b       ababaaa ababaaa a
161 # check out normal matching in the presence of back refs
162 \(a\)\1bcd      b       aabcd   aabcd
163 \(a\)\1bc*d     b       aabcd   aabcd
164 \(a\)\1bc*d     b       aabd    aabd
165 \(a\)\1bc*d     b       aabcccd aabcccd
166 \(a\)\1bc*[ce]d b       aabcccd aabcccd
167 ^\(a\)\1b\(c\)*cd$      b       aabcccd aabcccd
168 \(b*\)\(a*\1\)* b       ab      a
169 \([^_]*\)\(_*\1\)*      b       foo_foo_bar_bar_bar_baz foo_foo foo,_foo
170 \([^_]*\)\(_*\1\)*      b       bar_bar_bar_baz bar_bar_bar     bar,_bar
171 \([^_]*\)\(_*\1\)*      b       foo_bar_baz     foo     foo
172 \(.*\)\1        b       ""      ""
173 \(.*\)\1        b       a       ""
174 \(.*\)\1        b       aa      aa
175 \(.*\)\1        b       aaa     aa
176 \(.*\)\1        b       aaaa    aaaa
177 \([^_]*\)\1     b       ""      ""
178 \([^_]*\)\1     b       a       ""
179 \([^_]*\)\1     b       aa      aa
180 \([^_]*\)\1     b       aaa     aa
181 \([^_]*\)\1     b       aaaa    aaaa
182 foo\(.*\)bar\1  b       foolbarl        foolbarl        l
183 foo\(.*\)bar\1  b       foobar  foobar  ""
184 \(\(.\)b\)*\1   b       aba
185 \(\(.\)b\)*\1   b       abba
186 \(\(.\)b\)*\1   b       abbba
187 \(\(.\)b\)*\1   b       abbbba  bbbb    bb,b
188 \(\(.\)b\)*\1   b       abbbbba abbbbb  bb,b
189 \(\(.\)b\)*\1   b       abbbbbba        abbbbb  bb,b
190 \(\(.\)b\)*\1   b       abbbbbbbbbbbbbba        abbbbbbbbbbbbb  bb,b
191 \(\(.\)b\)*\1   b       abbbbbbbbbbbbbbba       abbbbbbbbbbbbbbb        bb,b
192
193 # ordinary repetitions
194 ab*c            &       abc     abc
195 ab+c            -       abc     abc
196 ab?c            -       abc     abc
197 a\(*\)b         b       a*b     a*b
198 a\(**\)b        b       ab      ab
199 a\(***\)b       bC      BADRPT
200 *a              b       *a      *a
201 **a             b       a       a
202 ***a            bC      BADRPT
203
204 # the dreaded bounded repetitions
205 {               &       {       {
206 {abc            &       {abc    {abc
207 {1              C       BADRPT
208 {1}             C       BADRPT
209 a{b             &       a{b     a{b
210 a{1}b           -       ab      ab
211 a\{1\}b         b       ab      ab
212 a{1,}b          -       ab      ab
213 a\{1,\}b        b       ab      ab
214 a{1,2}b         -       aab     aab
215 a\{1,2\}b       b       aab     aab
216 a{1             C       EBRACE
217 a\{1            bC      EBRACE
218 a{1a            C       EBRACE
219 a\{1a           bC      EBRACE
220 a{1a}           C       BADBR
221 a\{1a\}         bC      BADBR
222 a{,2}           -       a{,2}   a{,2}
223 a\{,2\}         bC      BADBR
224 a{,}            -       a{,}    a{,}
225 a\{,\}          bC      BADBR
226 a{1,x}          C       BADBR
227 a\{1,x\}        bC      BADBR
228 a{1,x           C       EBRACE
229 a\{1,x          bC      EBRACE
230 a{300}          C       BADBR
231 a\{300\}        bC      BADBR
232 a{1,0}          C       BADBR
233 a\{1,0\}        bC      BADBR
234 ab{0,0}c        -       abcac   ac
235 ab\{0,0\}c      b       abcac   ac
236 ab{0,1}c        -       abcac   abc
237 ab\{0,1\}c      b       abcac   abc
238 ab{0,3}c        -       abbcac  abbc
239 ab\{0,3\}c      b       abbcac  abbc
240 ab{1,1}c        -       acabc   abc
241 ab\{1,1\}c      b       acabc   abc
242 ab{1,3}c        -       acabc   abc
243 ab\{1,3\}c      b       acabc   abc
244 ab{2,2}c        -       abcabbc abbc
245 ab\{2,2\}c      b       abcabbc abbc
246 ab{2,4}c        -       abcabbc abbc
247 ab\{2,4\}c      b       abcabbc abbc
248 ((a{1,10}){1,10}){1,10} -       a       a       a,a
249 ((a{1,10}){1,10}){1,10}bb       -       aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb        aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb
250
251 # multiple repetitions
252 a**             &C      BADRPT
253 a++             C       BADRPT
254 a??             C       BADRPT
255 a*+             C       BADRPT
256 a*?             C       BADRPT
257 a+*             C       BADRPT
258 a+?             C       BADRPT
259 a?*             C       BADRPT
260 a?+             C       BADRPT
261 a{1}{1}         C       BADRPT
262 a*{1}           C       BADRPT
263 a+{1}           C       BADRPT
264 a?{1}           C       BADRPT
265 a{1}*           C       BADRPT
266 a{1}+           C       BADRPT
267 a{1}?           C       BADRPT
268 a*{b}           -       a{b}    a{b}
269 a\{1\}\{1\}     bC      BADRPT
270 a*\{1\}         bC      BADRPT
271 a\{1\}*         bC      BADRPT
272
273 # brackets, and numerous perversions thereof
274 a[b]c           &       abc     abc
275 a[ab]c          &       abc     abc
276 a[^ab]c         &       adc     adc
277 a[]b]c          &       a]c     a]c
278 a[[b]c          &       a[c     a[c
279 a[-b]c          &       a-c     a-c
280 a[^]b]c         &       adc     adc
281 a[^-b]c         &       adc     adc
282 a[b-]c          &       a-c     a-c
283 a[b             &C      EBRACK
284 a[]             &C      EBRACK
285 a[1-3]c         &       a2c     a2c
286 a[3-1]c         &C      ERANGE
287 a[1-3-5]c       &C      ERANGE
288 a[[.-.]--]c     &       a-c     a-c
289 a[1-            &C      ERANGE
290 a[[.            &C      EBRACK
291 a[[.x           &C      EBRACK
292 a[[.x.          &C      EBRACK
293 a[[.x.]         &C      EBRACK
294 a[[.x.]]        &       ax      ax
295 a[[.x,.]]       &C      ECOLLATE
296 a[[.one.]]b     &       a1b     a1b
297 a[[.notdef.]]b  &C      ECOLLATE
298 a[[.].]]b       &       a]b     a]b
299 a[[:alpha:]]c   &       abc     abc
300 a[[:notdef:]]c  &C      ECTYPE
301 a[[:            &C      EBRACK
302 a[[:alpha       &C      EBRACK
303 a[[:alpha:]     &C      EBRACK
304 a[[:alpha,:]    &C      ECTYPE
305 a[[:]:]]b       &C      ECTYPE
306 a[[:-:]]b       &C      ECTYPE
307 a[[:alph:]]     &C      ECTYPE
308 a[[:alphabet:]] &C      ECTYPE
309 [[:alnum:]]+    -       -%@a0X- a0X
310 [[:alpha:]]+    -       -%@aX0- aX
311 [[:blank:]]+    -       aSSTb   SST
312 [[:cntrl:]]+    -       aNTb    NT
313 [[:digit:]]+    -       a019b   019
314 [[:graph:]]+    -       Sa%bS   a%b
315 [[:lower:]]+    -       AabC    ab
316 [[:print:]]+    -       NaSbN   aSb
317 [[:punct:]]+    -       S%-&T   %-&
318 [[:space:]]+    -       aSNTb   SNT
319 [[:upper:]]+    -       aBCd    BC
320 [[:xdigit:]]+   -       p0f3Cq  0f3C
321 a[[=b=]]c       &       abc     abc
322 a[[=            &C      EBRACK
323 a[[=b           &C      EBRACK
324 a[[=b=          &C      EBRACK
325 a[[=b=]         &C      EBRACK
326 a[[=b,=]]       &C      ECOLLATE
327 a[[=one=]]b     &       a1b     a1b
328
329 # complexities
330 a(((b)))c       -       abc     abc
331 a(b|(c))d       -       abd     abd
332 a(b*|c)d        -       abbd    abbd
333 # just gotta have one DFA-buster, of course
334 a[ab]{20}       -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
335 # and an inline expansion in case somebody gets tricky
336 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]       -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
337 # and in case somebody just slips in an NFA...
338 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)      -       aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
339 # fish for anomalies as the number of states passes 32
340 12345678901234567890123456789   -       a12345678901234567890123456789b 12345678901234567890123456789
341 123456789012345678901234567890  -       a123456789012345678901234567890b        123456789012345678901234567890
342 1234567890123456789012345678901 -       a1234567890123456789012345678901b       1234567890123456789012345678901
343 12345678901234567890123456789012        -       a12345678901234567890123456789012b      12345678901234567890123456789012
344 123456789012345678901234567890123       -       a123456789012345678901234567890123b     123456789012345678901234567890123
345 # and one really big one, beyond any plausible word width
346 1234567890123456789012345678901234567890123456789012345678901234567890  -       a1234567890123456789012345678901234567890123456789012345678901234567890b        1234567890123456789012345678901234567890123456789012345678901234567890
347 # fish for problems as brackets go past 8
348 [ab][cd][ef][gh][ij][kl][mn]    -       xacegikmoq      acegikm
349 [ab][cd][ef][gh][ij][kl][mn][op]        -       xacegikmoq      acegikmo
350 [ab][cd][ef][gh][ij][kl][mn][op][qr]    -       xacegikmoqy     acegikmoq
351 [ab][cd][ef][gh][ij][kl][mn][op][q]     -       xacegikmoqy     acegikmoq
352
353 # subtleties of matching
354 abc             &       xabcy   abc
355 a\(b\)?c\1d     b       acd
356 aBc             i       Abc     Abc
357 a[Bc]*d         i       abBCcd  abBCcd
358 0[[:upper:]]1   &i      0a1     0a1
359 0[[:lower:]]1   &i      0A1     0A1
360 a[^b]c          &i      abc
361 a[^b]c          &i      aBc
362 a[^b]c          &i      adc     adc
363 [a]b[c]         -       abc     abc
364 [a]b[a]         -       aba     aba
365 [abc]b[abc]     -       abc     abc
366 [abc]b[abd]     -       abd     abd
367 a(b?c)+d        -       accd    accd
368 (wee|week)(knights|night)       -       weeknights      weeknights
369 (we|wee|week|frob)(knights|night|day)   -       weeknights      weeknights
370 a[bc]d          -       xyzaaabcaababdacd       abd
371 a[ab]c          -       aaabc   abc
372 abc             s       abc     abc
373
374 # subexpressions
375 a(b)(c)d        -       abcd    abcd    b,c
376 a(((b)))c       -       abc     abc     b,b,b
377 a(b|(c))d       -       abd     abd     b,-
378 a(b*|c|e)d      -       abbd    abbd    bb
379 a(b*|c|e)d      -       acd     acd     c
380 a(b*|c|e)d      -       ad      ad      @d
381 a(b?)c          -       abc     abc     b
382 a(b?)c          -       ac      ac      @c
383 a(b+)c          -       abc     abc     b
384 a(b+)c          -       abbbc   abbbc   bbb
385 a(b*)c          -       ac      ac      @c
386 (a|ab)(bc([de]+)f|cde)  -       abcdef  abcdef  a,bcdef,de
387 # the regression tester only asks for 9 subexpressions
388 a(b)(c)(d)(e)(f)(g)(h)(i)(j)k   -       abcdefghijk     abcdefghijk     b,c,d,e,f,g,h,i,j
389 a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l        -       abcdefghijkl    abcdefghijkl    b,c,d,e,f,g,h,i,j,k
390 a([bc]?)c       -       abc     abc     b
391 a([bc]?)c       -       ac      ac      @c
392 a([bc]+)c       -       abc     abc     b
393 a([bc]+)c       -       abcc    abcc    bc
394 a([bc]+)bc      -       abcbc   abcbc   bc
395 a(bb+|b)b       -       abb     abb     b
396 a(bbb+|bb+|b)b  -       abb     abb     b
397 a(bbb+|bb+|b)b  -       abbb    abbb    bb
398 a(bbb+|bb+|b)bb -       abbb    abbb    b
399 (.*).*          -       abcdef  abcdef  abcdef
400 (a*)*           -       bc      @b      @b
401
402 # do we get the right subexpression when it is used more than once?
403 a(b|c)*d        -       ad      ad      -
404 a(b|c)*d        -       abcd    abcd    c
405 a(b|c)+d        -       abd     abd     b
406 a(b|c)+d        -       abcd    abcd    c
407 a(b|c?)+d       -       ad      ad      @d
408 a(b|c?)+d       -       abcd    abcd    @d
409 a(b|c){0,0}d    -       ad      ad      -
410 a(b|c){0,1}d    -       ad      ad      -
411 a(b|c){0,1}d    -       abd     abd     b
412 a(b|c){0,2}d    -       ad      ad      -
413 a(b|c){0,2}d    -       abcd    abcd    c
414 a(b|c){0,}d     -       ad      ad      -
415 a(b|c){0,}d     -       abcd    abcd    c
416 a(b|c){1,1}d    -       abd     abd     b
417 a(b|c){1,1}d    -       acd     acd     c
418 a(b|c){1,2}d    -       abd     abd     b
419 a(b|c){1,2}d    -       abcd    abcd    c
420 a(b|c){1,}d     -       abd     abd     b
421 a(b|c){1,}d     -       abcd    abcd    c
422 a(b|c){2,2}d    -       acbd    acbd    b
423 a(b|c){2,2}d    -       abcd    abcd    c
424 a(b|c){2,4}d    -       abcd    abcd    c
425 a(b|c){2,4}d    -       abcbd   abcbd   b
426 a(b|c){2,4}d    -       abcbcd  abcbcd  c
427 a(b|c){2,}d     -       abcd    abcd    c
428 a(b|c){2,}d     -       abcbd   abcbd   b
429 a(b+|((c)*))+d  -       abd     abd     @d,@d,-
430 a(b+|((c)*))+d  -       abcd    abcd    @d,@d,-
431
432 # check out the STARTEND option
433 [abc]           &#      a(b)c   b
434 [abc]           &#      a(d)c
435 [abc]           &#      a(bc)d  b
436 [abc]           &#      a(dc)d  c
437 .               &#      a()c
438 b.*c            &#      b(bc)c  bc
439 b.*             &#      b(bc)c  bc
440 .*c             &#      b(bc)c  bc
441
442 # plain strings, with the NOSPEC flag
443 abc             m       abc     abc
444 abc             m       xabcy   abc
445 abc             m       xyz
446 a*b             m       aba*b   a*b
447 a*b             m       ab
448 ""              mC      EMPTY
449
450 # cases involving NULs
451 aZb             &       a       a
452 aZb             &p      a
453 aZb             &p#     (aZb)   aZb
454 aZ*b            &p#     (ab)    ab
455 a.b             &#      (aZb)   aZb
456 a.*             &#      (aZb)c  aZb
457
458 # word boundaries (ick)
459 [[:<:]]a        &       a       a
460 [[:<:]]a        &       ba
461 [[:<:]]a        &       -a      a
462 a[[:>:]]        &       a       a
463 a[[:>:]]        &       ab
464 a[[:>:]]        &       a-      a
465 [[:<:]]a.c[[:>:]]       &       axcd-dayc-dazce-abc     abc
466 [[:<:]]a.c[[:>:]]       &       axcd-dayc-dazce-abc-q   abc
467 [[:<:]]a.c[[:>:]]       &       axc-dayc-dazce-abc      axc
468
469 # past problems
470 (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])   -       A1      A1
471 abcdefghijklmnop        i       abcdefghijklmnop        abcdefghijklmnop
472 abcdefghijklmnopqrstuv  i       abcdefghijklmnopqrstuv  abcdefghijklmnopqrstuv
473 (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])     -       CC11    CC11
474 CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a    -       CC11    CC11
475 # PR 130504
476 (.|())(b)       -       ab      ab
477 (()|.)(b)       -       ab      ab