]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - gnu/usr.bin/grep/tests/tests
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / gnu / usr.bin / grep / tests / tests
1 # regular expression test set
2 # Lines are at least three fields, separated by one or more tabs.  "" stands
3 # for an empty field.  First field is an RE.  Second field is flags.  If
4 # C flag given, regcomp() is expected to fail, and the third field is the
5 # error name (minus the leading REG_).
6 #
7 # Otherwise it is expected to succeed, and the third field is the string to
8 # try matching it against.  If there is no fourth field, the match is
9 # expected to fail.  If there is a fourth field, it is the substring that
10 # the RE is expected to match.  If there is a fifth field, it is a comma-
11 # separated list of what the subexpressions should match, with - indicating
12 # no match for that one.  In both the fourth and fifth fields, a (sub)field
13 # starting with @ indicates that the (sub)expression is expected to match
14 # a null string followed by the stuff after the @; this provides a way to
15 # test where null strings match.  The character `N' in REs and strings
16 # is newline, `S' is space, `T' is tab, `Z' is NUL.
17 #
18 # The full list of flags:
19 #       -       placeholder, does nothing
20 #       b       RE is a BRE, not an ERE
21 #       &       try it as both an ERE and a BRE
22 #       C       regcomp() error expected, third field is error name
23 #       i       REG_ICASE
24 #       m       ("mundane") REG_NOSPEC
25 #       s       REG_NOSUB (not really testable)
26 #       n       REG_NEWLINE
27 #       ^       REG_NOTBOL
28 #       $       REG_NOTEOL
29 #       #       REG_STARTEND (see below)
30 #       p       REG_PEND
31 #
32 # For REG_STARTEND, the start/end offsets are those of the substring
33 # enclosed in ().
34
35 # basics
36 a               &       a       a
37 abc             &       abc     abc
38 abc|de          -       abc     abc
39 a|b|c           -       abc     a
40
41 # parentheses and perversions thereof
42 a(b)c           -       abc     abc
43 a\(b\)c         b       abc     abc
44 a(              C       EPAREN
45 a(              b       a(      a(
46 a\(             -       a(      a(
47 a\(             bC      EPAREN
48 a\(b            bC      EPAREN
49 a(b             C       EPAREN
50 a(b             b       a(b     a(b
51 # gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
52 a)              -       a)      a)
53 )               -       )       )
54 # end gagging (in a just world, those *should* give EPAREN)
55 a)              b       a)      a)
56 a\)             bC      EPAREN
57 \)              bC      EPAREN
58 a()b            -       ab      ab
59 a\(\)b          b       ab      ab
60
61 # anchoring and REG_NEWLINE
62 ^abc$           &       abc     abc
63 a^b             -       a^b
64 a^b             b       a^b     a^b
65 a$b             -       a$b
66 a$b             b       a$b     a$b
67 ^               &       abc     @abc
68 $               &       abc     @
69 ^$              &       ""      @
70 $^              -       ""      @
71 \($\)\(^\)      b       ""      @
72 # stop retching, those are legitimate (although disgusting)
73 ^^              -       ""      @
74 $$              -       ""      @
75 ##b$            &       abNc
76 ##b$            &n      abNc    b
77 ##^b$           &       aNbNc
78 ##^b$           &n      aNbNc   b
79 ##^$            &n      aNNb    @Nb
80 ^$              n       abc
81 ##^$            n       abcN    @
82 ##$^            n       aNNb    @Nb
83 ##\($\)\(^\)    bn      aNNb    @Nb
84 ##^^            n^      aNNb    @Nb
85 ##$$            n       aNNb    @NN
86 ^a              ^       a
87 a$              $       a
88 ##^a            ^n      aNb
89 ##^b            ^n      aNb     b
90 ##a$            $n      bNa
91 ##b$            $n      bNa     b
92 a*(^b$)c*       -       b       b
93 a*\(^b$\)c*     b       b       b
94
95 # certain syntax errors and non-errors
96 |               C       EMPTY
97 |               b       |       |
98 *               C       BADRPT
99 *               b       *       *
100 +               C       BADRPT
101 ?               C       BADRPT
102 ""              &C      EMPTY
103 ()              -       abc     @abc
104 \(\)            b       abc     @abc
105 a||b            C       EMPTY
106 |ab             C       EMPTY
107 ab|             C       EMPTY
108 (|a)b           C       EMPTY
109 (a|)b           C       EMPTY
110 (*a)            C       BADRPT
111 (+a)            C       BADRPT
112 (?a)            C       BADRPT
113 ({1}a)          C       BADRPT
114 \(\{1\}a\)      bC      BADRPT
115 (a|*b)          C       BADRPT
116 (a|+b)          C       BADRPT
117 (a|?b)          C       BADRPT
118 (a|{1}b)        C       BADRPT
119 ^*              C       BADRPT
120 ^*              b       *       *
121 ^+              C       BADRPT
122 ^?              C       BADRPT
123 ^{1}            C       BADRPT
124 ^\{1\}          bC      BADRPT
125
126 # metacharacters, backslashes
127 a.c             &       abc     abc
128 a[bc]d          &       abd     abd
129 a\*c            &       a*c     a*c
130 a\\b            &       a\b     a\b
131 a\\\*b          &       a\*b    a\*b
132 a\bc            &       abc     abc
133 a\              &C      EESCAPE
134 a\\bc           &       a\bc    a\bc
135 \{              bC      BADRPT
136 a\[b            &       a[b     a[b
137 a[b             &C      EBRACK
138 # trailing $ is a peculiar special case for the BRE code
139 a$              &       a       a
140 a$              &       a$
141 a\$             &       a
142 a\$             &       a$      a$
143 a\\$            &       a
144 a\\$            &       a$
145 a\\$            &       a\$
146 a\\$            &       a\      a\
147
148 # back references, ugh
149 ##a\(b\)\2c     bC      ESUBREG
150 ##a\(b\1\)c     bC      ESUBREG
151 a\(b*\)c\1d     b       abbcbbd abbcbbd bb
152 a\(b*\)c\1d     b       abbcbd
153 a\(b*\)c\1d     b       abbcbbbd
154 ^\(.\)\1        b       abc
155 a\([bc]\)\1d    b       abcdabbd        abbd    b
156 a\(\([bc]\)\2\)*d       b       abbccd  abbccd
157 a\(\([bc]\)\2\)*d       b       abbcbd
158 # actually, this next one probably ought to fail, but the spec is unclear
159 a\(\(b\)*\2\)*d         b       abbbd   abbbd
160 # here is a case that no NFA implementation does right
161 \(ab*\)[ab]*\1  b       ababaaa ababaaa a
162 # check out normal matching in the presence of back refs
163 \(a\)\1bcd      b       aabcd   aabcd
164 \(a\)\1bc*d     b       aabcd   aabcd
165 \(a\)\1bc*d     b       aabd    aabd
166 \(a\)\1bc*d     b       aabcccd aabcccd
167 \(a\)\1bc*[ce]d b       aabcccd aabcccd
168 ^\(a\)\1b\(c\)*cd$      b       aabcccd aabcccd
169
170 # ordinary repetitions
171 ab*c            &       abc     abc
172 ab+c            -       abc     abc
173 ab?c            -       abc     abc
174 a\(*\)b         b       a*b     a*b
175 a\(**\)b        b       ab      ab
176 a\(***\)b       bC      BADRPT
177 *a              b       *a      *a
178 **a             b       a       a
179 ***a            bC      BADRPT
180
181 # the dreaded bounded repetitions
182 {               &       {       {
183 {abc            &       {abc    {abc
184 {1              C       BADRPT
185 {1}             C       BADRPT
186 a{b             &       a{b     a{b
187 a{1}b           -       ab      ab
188 a\{1\}b         b       ab      ab
189 a{1,}b          -       ab      ab
190 a\{1,\}b        b       ab      ab
191 a{1,2}b         -       aab     aab
192 a\{1,2\}b       b       aab     aab
193 a{1             C       EBRACE
194 a\{1            bC      EBRACE
195 a{1a            C       EBRACE
196 a\{1a           bC      EBRACE
197 a{1a}           C       BADBR
198 a\{1a\}         bC      BADBR
199 a{,2}           -       a{,2}   a{,2}
200 a\{,2\}         bC      BADBR
201 a{,}            -       a{,}    a{,}
202 a\{,\}          bC      BADBR
203 a{1,x}          C       BADBR
204 a\{1,x\}        bC      BADBR
205 a{1,x           C       EBRACE
206 a\{1,x          bC      EBRACE
207 a{300}          C       BADBR
208 a\{300\}        bC      BADBR
209 a{1,0}          C       BADBR
210 a\{1,0\}        bC      BADBR
211 ab{0,0}c        -       abcac   ac
212 ab\{0,0\}c      b       abcac   ac
213 ab{0,1}c        -       abcac   abc
214 ab\{0,1\}c      b       abcac   abc
215 ab{0,3}c        -       abbcac  abbc
216 ab\{0,3\}c      b       abbcac  abbc
217 ab{1,1}c        -       acabc   abc
218 ab\{1,1\}c      b       acabc   abc
219 ab{1,3}c        -       acabc   abc
220 ab\{1,3\}c      b       acabc   abc
221 ab{2,2}c        -       abcabbc abbc
222 ab\{2,2\}c      b       abcabbc abbc
223 ab{2,4}c        -       abcabbc abbc
224 ab\{2,4\}c      b       abcabbc abbc
225 ((a{1,10}){1,10}){1,10} -       a       a       a,a
226
227 # multiple repetitions
228 a**             &C      BADRPT
229 a++             C       BADRPT
230 a??             C       BADRPT
231 a*+             C       BADRPT
232 a*?             C       BADRPT
233 a+*             C       BADRPT
234 a+?             C       BADRPT
235 a?*             C       BADRPT
236 a?+             C       BADRPT
237 a{1}{1}         C       BADRPT
238 a*{1}           C       BADRPT
239 a+{1}           C       BADRPT
240 a?{1}           C       BADRPT
241 a{1}*           C       BADRPT
242 a{1}+           C       BADRPT
243 a{1}?           C       BADRPT
244 a*{b}           -       a{b}    a{b}
245 a\{1\}\{1\}     bC      BADRPT
246 a*\{1\}         bC      BADRPT
247 a\{1\}*         bC      BADRPT
248
249 # brackets, and numerous perversions thereof
250 a[b]c           &       abc     abc
251 a[ab]c          &       abc     abc
252 a[^ab]c         &       adc     adc
253 a[]b]c          &       a]c     a]c
254 a[[b]c          &       a[c     a[c
255 a[-b]c          &       a-c     a-c
256 a[^]b]c         &       adc     adc
257 a[^-b]c         &       adc     adc
258 a[b-]c          &       a-c     a-c
259 a[b             &C      EBRACK
260 a[]             &C      EBRACK
261 a[1-3]c         &       a2c     a2c
262 a[3-1]c         &C      ERANGE
263 a[1-3-5]c       &C      ERANGE
264 a[[.-.]--]c     &       a-c     a-c
265 a[1-            &C      ERANGE
266 a[[.            &C      EBRACK
267 a[[.x           &C      EBRACK
268 a[[.x.          &C      EBRACK
269 a[[.x.]         &C      EBRACK
270 a[[.x.]]        &       ax      ax
271 a[[.x,.]]       &C      ECOLLATE
272 a[[.one.]]b     &       a1b     a1b
273 a[[.notdef.]]b  &C      ECOLLATE
274 a[[.].]]b       &       a]b     a]b
275 a[[:alpha:]]c   &       abc     abc
276 a[[:notdef:]]c  &C      ECTYPE
277 a[[:            &C      EBRACK
278 a[[:alpha       &C      EBRACK
279 a[[:alpha:]     &C      EBRACK
280 a[[:alpha,:]    &C      ECTYPE
281 a[[:]:]]b       &C      ECTYPE
282 a[[:-:]]b       &C      ECTYPE
283 a[[:alph:]]     &C      ECTYPE
284 a[[:alphabet:]] &C      ECTYPE
285 ##[[:alnum:]]+  -       -%@a0X- a0X
286 ##[[:alpha:]]+  -       -%@aX0- aX
287 [[:blank:]]+    -       aSSTb   SST
288 ##[[:cntrl:]]+  -       aNTb    NT
289 [[:digit:]]+    -       a019b   019
290 ##[[:graph:]]+  -       Sa%bS   a%b
291 [[:lower:]]+    -       AabC    ab
292 ##[[:print:]]+  -       NaSbN   aSb
293 ##[[:punct:]]+  -       S%-&T   %-&
294 [[:space:]]+    -       aSNTb   SNT
295 [[:upper:]]+    -       aBCd    BC
296 [[:xdigit:]]+   -       p0f3Cq  0f3C
297 a[[=b=]]c       &       abc     abc
298 a[[=            &C      EBRACK
299 a[[=b           &C      EBRACK
300 a[[=b=          &C      EBRACK
301 a[[=b=]         &C      EBRACK
302 a[[=b,=]]       &C      ECOLLATE
303 a[[=one=]]b     &       a1b     a1b
304
305 # complexities
306 a(((b)))c       -       abc     abc
307 a(b|(c))d       -       abd     abd
308 a(b*|c)d        -       abbd    abbd
309 # just gotta have one DFA-buster, of course
310 a[ab]{20}       -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
311 # and an inline expansion in case somebody gets tricky
312 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]       -       aaaaabaaaabaaaabaaaab   aaaaabaaaabaaaabaaaab
313 # and in case somebody just slips in an NFA...
314 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)      -       aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
315 # fish for anomalies as the number of states passes 32
316 12345678901234567890123456789   -       a12345678901234567890123456789b 12345678901234567890123456789
317 123456789012345678901234567890  -       a123456789012345678901234567890b        123456789012345678901234567890
318 1234567890123456789012345678901 -       a1234567890123456789012345678901b       1234567890123456789012345678901
319 12345678901234567890123456789012        -       a12345678901234567890123456789012b      12345678901234567890123456789012
320 123456789012345678901234567890123       -       a123456789012345678901234567890123b     123456789012345678901234567890123
321 # and one really big one, beyond any plausible word width
322 1234567890123456789012345678901234567890123456789012345678901234567890  -       a1234567890123456789012345678901234567890123456789012345678901234567890b        1234567890123456789012345678901234567890123456789012345678901234567890
323 # fish for problems as brackets go past 8
324 [ab][cd][ef][gh][ij][kl][mn]    -       xacegikmoq      acegikm
325 [ab][cd][ef][gh][ij][kl][mn][op]        -       xacegikmoq      acegikmo
326 [ab][cd][ef][gh][ij][kl][mn][op][qr]    -       xacegikmoqy     acegikmoq
327 [ab][cd][ef][gh][ij][kl][mn][op][q]     -       xacegikmoqy     acegikmoq
328
329 # subtleties of matching
330 abc             &       xabcy   abc
331 a\(b\)?c\1d     b       acd
332 aBc             i       Abc     Abc
333 a[Bc]*d         i       abBCcd  abBCcd
334 0[[:upper:]]1   &i      0a1     0a1
335 0[[:lower:]]1   &i      0A1     0A1
336 a[^b]c          &i      abc
337 a[^b]c          &i      aBc
338 a[^b]c          &i      adc     adc
339 [a]b[c]         -       abc     abc
340 [a]b[a]         -       aba     aba
341 [abc]b[abc]     -       abc     abc
342 [abc]b[abd]     -       abd     abd
343 a(b?c)+d        -       accd    accd
344 (wee|week)(knights|night)       -       weeknights      weeknights
345 (we|wee|week|frob)(knights|night|day)   -       weeknights      weeknights
346 a[bc]d          -       xyzaaabcaababdacd       abd
347 a[ab]c          -       aaabc   abc
348 abc             s       abc     abc
349 a*              &       b       @b
350
351 # Let's have some fun -- try to match a C comment.
352 # first the obvious, which looks okay at first glance...
353 /\*.*\*/        -       /*x*/   /*x*/
354 # but...
355 /\*.*\*/        -       /*x*/y/*z*/     /*x*/y/*z*/
356 # okay, we must not match */ inside; try to do that...
357 /\*([^*]|\*[^/])*\*/    -       /*x*/   /*x*/
358 /\*([^*]|\*[^/])*\*/    -       /*x*/y/*z*/     /*x*/
359 # but...
360 /\*([^*]|\*[^/])*\*/    -       /*x**/y/*z*/    /*x**/y/*z*/
361 # and a still fancier version, which does it right (I think)...
362 /\*([^*]|\*+[^*/])*\*+/ -       /*x*/   /*x*/
363 /\*([^*]|\*+[^*/])*\*+/ -       /*x*/y/*z*/     /*x*/
364 /\*([^*]|\*+[^*/])*\*+/ -       /*x**/y/*z*/    /*x**/
365 /\*([^*]|\*+[^*/])*\*+/ -       /*x****/y/*z*/  /*x****/
366 /\*([^*]|\*+[^*/])*\*+/ -       /*x**x*/y/*z*/  /*x**x*/
367 /\*([^*]|\*+[^*/])*\*+/ -       /*x***x/y/*z*/  /*x***x/y/*z*/
368
369 # subexpressions
370 a(b)(c)d        -       abcd    abcd    b,c
371 a(((b)))c       -       abc     abc     b,b,b
372 a(b|(c))d       -       abd     abd     b,-
373 a(b*|c|e)d      -       abbd    abbd    bb
374 a(b*|c|e)d      -       acd     acd     c
375 a(b*|c|e)d      -       ad      ad      @d
376 a(b?)c          -       abc     abc     b
377 a(b?)c          -       ac      ac      @c
378 a(b+)c          -       abc     abc     b
379 a(b+)c          -       abbbc   abbbc   bbb
380 a(b*)c          -       ac      ac      @c
381 (a|ab)(bc([de]+)f|cde)  -       abcdef  abcdef  a,bcdef,de
382 # the regression tester only asks for 9 subexpressions
383 a(b)(c)(d)(e)(f)(g)(h)(i)(j)k   -       abcdefghijk     abcdefghijk     b,c,d,e,f,g,h,i,j
384 a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l        -       abcdefghijkl    abcdefghijkl    b,c,d,e,f,g,h,i,j,k
385 a([bc]?)c       -       abc     abc     b
386 a([bc]?)c       -       ac      ac      @c
387 a([bc]+)c       -       abc     abc     b
388 a([bc]+)c       -       abcc    abcc    bc
389 a([bc]+)bc      -       abcbc   abcbc   bc
390 a(bb+|b)b       -       abb     abb     b
391 a(bbb+|bb+|b)b  -       abb     abb     b
392 a(bbb+|bb+|b)b  -       abbb    abbb    bb
393 a(bbb+|bb+|b)bb -       abbb    abbb    b
394 (.*).*          -       abcdef  abcdef  abcdef
395 ##(a*)*         -       bc      @b      @b
396
397 # do we get the right subexpression when it is used more than once?
398 a(b|c)*d        -       ad      ad      -
399 a(b|c)*d        -       abcd    abcd    c
400 a(b|c)+d        -       abd     abd     b
401 a(b|c)+d        -       abcd    abcd    c
402 a(b|c?)+d       -       ad      ad      @d
403 a(b|c?)+d       -       abcd    abcd    @d
404 a(b|c){0,0}d    -       ad      ad      -
405 a(b|c){0,1}d    -       ad      ad      -
406 a(b|c){0,1}d    -       abd     abd     b
407 a(b|c){0,2}d    -       ad      ad      -
408 a(b|c){0,2}d    -       abcd    abcd    c
409 a(b|c){0,}d     -       ad      ad      -
410 a(b|c){0,}d     -       abcd    abcd    c
411 a(b|c){1,1}d    -       abd     abd     b
412 a(b|c){1,1}d    -       acd     acd     c
413 a(b|c){1,2}d    -       abd     abd     b
414 a(b|c){1,2}d    -       abcd    abcd    c
415 a(b|c){1,}d     -       abd     abd     b
416 a(b|c){1,}d     -       abcd    abcd    c
417 a(b|c){2,2}d    -       acbd    acbd    b
418 a(b|c){2,2}d    -       abcd    abcd    c
419 a(b|c){2,4}d    -       abcd    abcd    c
420 a(b|c){2,4}d    -       abcbd   abcbd   b
421 a(b|c){2,4}d    -       abcbcd  abcbcd  c
422 a(b|c){2,}d     -       abcd    abcd    c
423 a(b|c){2,}d     -       abcbd   abcbd   b
424 ##a(b+|((c)*))+d        -       abd     abd     @d,@d,-
425 ##a(b+|((c)*))+d        -       abcd    abcd    @d,@d,-
426
427 # check out the STARTEND option
428 [abc]           &#      a(b)c   b
429 [abc]           &#      a(d)c
430 [abc]           &#      a(bc)d  b
431 [abc]           &#      a(dc)d  c
432 .               &#      a()c
433 b.*c            &#      b(bc)c  bc
434 b.*             &#      b(bc)c  bc
435 .*c             &#      b(bc)c  bc
436
437 # plain strings, with the NOSPEC flag
438 abc             m       abc     abc
439 abc             m       xabcy   abc
440 abc             m       xyz
441 a*b             m       aba*b   a*b
442 a*b             m       ab
443 ""              mC      EMPTY
444
445 # cases involving NULs
446 aZb             &       a       a
447 aZb             &p      a
448 #aZb            &p#     (aZb)   aZb
449 aZ*b            &p#     (ab)    ab
450 #a.b            &#      (aZb)   aZb
451 #a.*            &#      (aZb)c  aZb
452
453 # word boundaries (ick)
454 [[:<:]]a        &       a       a
455 [[:<:]]a        &       ba
456 [[:<:]]a        &       -a      a
457 a[[:>:]]        &       a       a
458 a[[:>:]]        &       ab
459 a[[:>:]]        &       a-      a
460 [[:<:]]a.c[[:>:]]       &       axcd-dayc-dazce-abc     abc
461 [[:<:]]a.c[[:>:]]       &       axcd-dayc-dazce-abc-q   abc
462 [[:<:]]a.c[[:>:]]       &       axc-dayc-dazce-abc      axc
463 [[:<:]]b.c[[:>:]]       &       a_bxc-byc_d-bzc-q       bzc
464 [[:<:]].x..[[:>:]]      &       y_xa_-_xb_y-_xc_-axdc   _xc_
465 [[:<:]]a_b[[:>:]]       &       x_a_b
466
467 # past problems, and suspected problems
468 (A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])   -       A1      A1
469 abcdefghijklmnop        i       abcdefghijklmnop        abcdefghijklmnop
470 abcdefghijklmnopqrstuv  i       abcdefghijklmnopqrstuv  abcdefghijklmnopqrstuv
471 (ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN])     -       CC11    CC11
472 CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a    -       CC11    CC11
473 Char \([a-z0-9_]*\)\[.* b       Char xyz[k      Char xyz[k      xyz
474 a?b     -       ab      ab
475 -\{0,1\}[0-9]*$ b       -5      -5