contrib/bmake/unit-tests/varmod-subst-regex.mk

   1 # $NetBSD: varmod-subst-regex.mk,v 1.5 2020/10/31 12:20:36 rillig Exp $
   2 #
   3 # Tests for the :C,from,to, variable modifier.
   4
   5 all: mod-regex-compile-error
   6 all: mod-regex-limits
   7 all: mod-regex-errors
   8
   9 # The variable expression expands to 4 words.  Of these words, none matches
  10 # the regular expression "a b" since these words don't contain any
  11 # whitespace.
  12 .if ${:Ua b b c:C,a b,,} != "a b b c"
  13 .  error
  14 .endif
  15
  16 # Using the '1' modifier does not change anything.  The '1' modifier just
  17 # means to apply at most 1 replacement in the whole variable expression.
  18 .if ${:Ua b b c:C,a b,,1} != "a b b c"
  19 .  error
  20 .endif
  21
  22 # The 'W' modifier treats the whole variable value as a single big word,
  23 # containing whitespace.  This big word matches the regular expression,
  24 # therefore it gets replaced.  Whitespace is preserved after replacing.
  25 .if ${:Ua b b c:C,a b,,W} != " b c"
  26 .  error
  27 .endif
  28
  29 # The 'g' modifier does not have any effect here since each of the words
  30 # contains the character 'b' a single time.
  31 .if ${:Ua b b c:C,b,,g} != "a c"
  32 .  error
  33 .endif
  34
  35 # The first :C modifier has the 'W' modifier, which makes the whole
  36 # expression a single word.  The 'g' modifier then replaces all occurrences
  37 # of "1 2" with "___".  The 'W' modifier only applies to this single :C
  38 # modifier.  This is demonstrated by the :C modifier that follows.  If the
  39 # 'W' modifier would be preserved, only a single underscore would have been
  40 # replaced with an 'x'.
  41 .if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3"
  42 .  error
  43 .endif
  44
  45 # The regular expression does not match in the first word.
  46 # It matches once in the second word, and the \0\0 doubles that word.
  47 # In the third word, the regular expression matches as early as possible,
  48 # and since the matches must not overlap, the next possible match would
  49 # start at the 6, but at that point, there is only one character left,
  50 # and that cannot match the regular expression "..".  Therefore only the
  51 # "45" is doubled in the third word.
  52 .if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456"
  53 .  error
  54 .endif
  55
  56 # The modifier '1' applies the replacement at most once, across the whole
  57 # expression value, no matter whether it is a single big word or many small
  58 # words.
  59 #
  60 # Up to 2020-08-28, the manual page said that the modifiers '1' and 'g'
  61 # were orthogonal, which was wrong.  It doesn't make sense to specify both
  62 # 'g' and '1' at the same time.
  63 .if ${:U12345 12345:C,.,\0\0,1} != "112345 12345"
  64 .  error
  65 .endif
  66
  67 # A regular expression that matches the empty string applies before every
  68 # single character of the word.
  69 # XXX: Most other places where regular expression are used match at the end
  70 # of the string as well.
  71 .if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c"
  72 .  error
  73 .endif
  74
  75 # A dot in the regular expression matches any character, even a newline.
  76 # In most other contexts where regular expressions are used, a dot matches
  77 # any character except newline.  In make, regcomp is called without
  78 # REG_NEWLINE, thus newline is an ordinary character.
  79 .if ${:U"${.newline}":C,.,.,g} != "..."
  80 .  error
  81 .endif
  82
  83 # Multiple asterisks form an invalid regular expression.  This produces an
  84 # error message and (as of 2020-08-28) stops parsing in the middle of the
  85 # variable expression.  The unparsed part of the expression is then copied
  86 # verbatim to the output, which is unexpected and can lead to strange shell
  87 # commands being run.
  88 mod-regex-compile-error:
  89         @echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}.
  90
  91 # These tests generate error messages but as of 2020-08-28 just continue
  92 # parsing and execution as if nothing bad had happened.
  93 mod-regex-limits:
  94         @echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q}
  95         @echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q}
  96         @echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q}
  97         @echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q}
  98         @echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q}
  99         # The :C modifier only handles single-digit capturing groups,
 100         # which is more than enough for daily use.
 101         @echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,}
 102
 103 mod-regex-errors:
 104         @echo $@: ${UNDEF:Uvalue:C,[,,}