contrib/bmake/str.c

   1 /*      $NetBSD: str.c,v 1.74 2020/11/16 18:28:27 rillig Exp $  */
   2
   3 /*-
   4  * Copyright (c) 1988, 1989, 1990, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * Adam de Boor.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 /*-
  36  * Copyright (c) 1989 by Berkeley Softworks
  37  * All rights reserved.
  38  *
  39  * This code is derived from software contributed to Berkeley by
  40  * Adam de Boor.
  41  *
  42  * Redistribution and use in source and binary forms, with or without
  43  * modification, are permitted provided that the following conditions
  44  * are met:
  45  * 1. Redistributions of source code must retain the above copyright
  46  *    notice, this list of conditions and the following disclaimer.
  47  * 2. Redistributions in binary form must reproduce the above copyright
  48  *    notice, this list of conditions and the following disclaimer in the
  49  *    documentation and/or other materials provided with the distribution.
  50  * 3. All advertising materials mentioning features or use of this software
  51  *    must display the following acknowledgement:
  52  *      This product includes software developed by the University of
  53  *      California, Berkeley and its contributors.
  54  * 4. Neither the name of the University nor the names of its contributors
  55  *    may be used to endorse or promote products derived from this software
  56  *    without specific prior written permission.
  57  *
  58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  68  * SUCH DAMAGE.
  69  */
  70
  71 #include "make.h"
  72
  73 /*      "@(#)str.c      5.8 (Berkeley) 6/1/90"  */
  74 MAKE_RCSID("$NetBSD: str.c,v 1.74 2020/11/16 18:28:27 rillig Exp $");
  75
  76 /* Return the concatenation of s1 and s2, freshly allocated. */
  77 char *
  78 str_concat2(const char *s1, const char *s2)
  79 {
  80         size_t len1 = strlen(s1);
  81         size_t len2 = strlen(s2);
  82         char *result = bmake_malloc(len1 + len2 + 1);
  83         memcpy(result, s1, len1);
  84         memcpy(result + len1, s2, len2 + 1);
  85         return result;
  86 }
  87
  88 /* Return the concatenation of s1, s2 and s3, freshly allocated. */
  89 char *
  90 str_concat3(const char *s1, const char *s2, const char *s3)
  91 {
  92         size_t len1 = strlen(s1);
  93         size_t len2 = strlen(s2);
  94         size_t len3 = strlen(s3);
  95         char *result = bmake_malloc(len1 + len2 + len3 + 1);
  96         memcpy(result, s1, len1);
  97         memcpy(result + len1, s2, len2);
  98         memcpy(result + len1 + len2, s3, len3 + 1);
  99         return result;
 100 }
 101
 102 /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
 103 char *
 104 str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
 105 {
 106         size_t len1 = strlen(s1);
 107         size_t len2 = strlen(s2);
 108         size_t len3 = strlen(s3);
 109         size_t len4 = strlen(s4);
 110         char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
 111         memcpy(result, s1, len1);
 112         memcpy(result + len1, s2, len2);
 113         memcpy(result + len1 + len2, s3, len3);
 114         memcpy(result + len1 + len2 + len3, s4, len4 + 1);
 115         return result;
 116 }
 117
 118 /* Fracture a string into an array of words (as delineated by tabs or spaces)
 119  * taking quotation marks into account.
 120  *
 121  * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
 122  * etc... are expanded. In this case, return NULL on parse errors.
 123  *
 124  * Returns the fractured words, which must be freed later using Words_Free,
 125  * unless the returned Words.words was NULL.
 126  */
 127 Words
 128 Str_Words(const char *str, Boolean expand)
 129 {
 130         size_t str_len;
 131         char *words_buf;
 132         size_t words_cap;
 133         char **words;
 134         size_t words_len;
 135         char inquote;
 136         char *word_start;
 137         char *word_end;
 138         const char *str_p;
 139
 140         /* XXX: why only hspace, not whitespace? */
 141         cpp_skip_hspace(&str);  /* skip leading space chars. */
 142
 143         /* words_buf holds the words, separated by '\0'. */
 144         str_len = strlen(str);
 145         words_buf = bmake_malloc(strlen(str) + 1);
 146
 147         words_cap = str_len / 5 > 50 ? str_len / 5 : 50;
 148         words = bmake_malloc((words_cap + 1) * sizeof(char *));
 149
 150         /*
 151          * copy the string; at the same time, parse backslashes,
 152          * quotes and build the word list.
 153          */
 154         words_len = 0;
 155         inquote = '\0';
 156         word_start = words_buf;
 157         word_end = words_buf;
 158         for (str_p = str;; ++str_p) {
 159                 char ch = *str_p;
 160                 switch (ch) {
 161                 case '"':
 162                 case '\'':
 163                         if (inquote) {
 164                                 if (inquote == ch)
 165                                         inquote = '\0';
 166                                 else
 167                                         break;
 168                         } else {
 169                                 inquote = ch;
 170                                 /* Don't miss "" or '' */
 171                                 if (word_start == NULL && str_p[1] == inquote) {
 172                                         if (!expand) {
 173                                                 word_start = word_end;
 174                                                 *word_end++ = ch;
 175                                         } else
 176                                                 word_start = word_end + 1;
 177                                         str_p++;
 178                                         inquote = '\0';
 179                                         break;
 180                                 }
 181                         }
 182                         if (!expand) {
 183                                 if (word_start == NULL)
 184                                         word_start = word_end;
 185                                 *word_end++ = ch;
 186                         }
 187                         continue;
 188                 case ' ':
 189                 case '\t':
 190                 case '\n':
 191                         if (inquote)
 192                                 break;
 193                         if (word_start == NULL)
 194                                 continue;
 195                         /* FALLTHROUGH */
 196                 case '\0':
 197                         /*
 198                          * end of a token -- make sure there's enough words
 199                          * space and save off a pointer.
 200                          */
 201                         if (word_start == NULL)
 202                                 goto done;
 203
 204                         *word_end++ = '\0';
 205                         if (words_len == words_cap) {
 206                                 size_t new_size;
 207                                 words_cap *= 2;         /* ramp up fast */
 208                                 new_size = (words_cap + 1) * sizeof(char *);
 209                                 words = bmake_realloc(words, new_size);
 210                         }
 211                         words[words_len++] = word_start;
 212                         word_start = NULL;
 213                         if (ch == '\n' || ch == '\0') {
 214                                 if (expand && inquote) {
 215                                         free(words);
 216                                         free(words_buf);
 217                                         return (Words){ NULL, 0, NULL };
 218                                 }
 219                                 goto done;
 220                         }
 221                         continue;
 222                 case '\\':
 223                         if (!expand) {
 224                                 if (word_start == NULL)
 225                                         word_start = word_end;
 226                                 *word_end++ = '\\';
 227                                 /* catch '\' at end of line */
 228                                 if (str_p[1] == '\0')
 229                                         continue;
 230                                 ch = *++str_p;
 231                                 break;
 232                         }
 233
 234                         switch (ch = *++str_p) {
 235                         case '\0':
 236                         case '\n':
 237                                 /* hmmm; fix it up as best we can */
 238                                 ch = '\\';
 239                                 str_p--;
 240                                 break;
 241                         case 'b':
 242                                 ch = '\b';
 243                                 break;
 244                         case 'f':
 245                                 ch = '\f';
 246                                 break;
 247                         case 'n':
 248                                 ch = '\n';
 249                                 break;
 250                         case 'r':
 251                                 ch = '\r';
 252                                 break;
 253                         case 't':
 254                                 ch = '\t';
 255                                 break;
 256                         }
 257                         break;
 258                 }
 259                 if (word_start == NULL)
 260                         word_start = word_end;
 261                 *word_end++ = ch;
 262         }
 263 done:
 264         words[words_len] = NULL;        /* useful for argv */
 265         return (Words){ words, words_len, words_buf };
 266 }
 267
 268 /*
 269  * Str_Match -- Test if a string matches a pattern like "*.[ch]".
 270  * The following special characters are known *?\[] (as in fnmatch(3)).
 271  *
 272  * XXX: this function does not detect or report malformed patterns.
 273  */
 274 Boolean
 275 Str_Match(const char *str, const char *pat)
 276 {
 277         for (;;) {
 278                 /*
 279                  * See if we're at the end of both the pattern and the
 280                  * string. If so, we succeeded.  If we're at the end of the
 281                  * pattern but not at the end of the string, we failed.
 282                  */
 283                 if (*pat == '\0')
 284                         return *str == '\0';
 285                 if (*str == '\0' && *pat != '*')
 286                         return FALSE;
 287
 288                 /*
 289                  * A '*' in the pattern matches any substring.  We handle this
 290                  * by calling ourselves for each suffix of the string.
 291                  */
 292                 if (*pat == '*') {
 293                         pat++;
 294                         while (*pat == '*')
 295                                 pat++;
 296                         if (*pat == '\0')
 297                                 return TRUE;
 298                         while (*str != '\0') {
 299                                 if (Str_Match(str, pat))
 300                                         return TRUE;
 301                                 str++;
 302                         }
 303                         return FALSE;
 304                 }
 305
 306                 /* A '?' in the pattern matches any single character. */
 307                 if (*pat == '?')
 308                         goto thisCharOK;
 309
 310                 /*
 311                  * A '[' in the pattern matches a character from a list.
 312                  * The '[' is followed by the list of acceptable characters,
 313                  * or by ranges (two characters separated by '-'). In these
 314                  * character lists, the backslash is an ordinary character.
 315                  */
 316                 if (*pat == '[') {
 317                         Boolean neg = pat[1] == '^';
 318                         pat += neg ? 2 : 1;
 319
 320                         for (;;) {
 321                                 if (*pat == ']' || *pat == '\0') {
 322                                         if (neg)
 323                                                 break;
 324                                         return FALSE;
 325                                 }
 326                                 /* XXX: This naive comparison makes the parser
 327                                  * for the pattern dependent on the actual of
 328                                  * the string.  This is unpredictable. */
 329                                 if (*pat == *str)
 330                                         break;
 331                                 if (pat[1] == '-') {
 332                                         if (pat[2] == '\0')
 333                                                 return neg;
 334                                         if (*pat <= *str && pat[2] >= *str)
 335                                                 break;
 336                                         if (*pat >= *str && pat[2] <= *str)
 337                                                 break;
 338                                         pat += 2;
 339                                 }
 340                                 pat++;
 341                         }
 342                         if (neg && *pat != ']' && *pat != '\0')
 343                                 return FALSE;
 344                         while (*pat != ']' && *pat != '\0')
 345                                 pat++;
 346                         if (*pat == '\0')
 347                                 pat--;
 348                         goto thisCharOK;
 349                 }
 350
 351                 /*
 352                  * A backslash in the pattern matches the character following
 353                  * it exactly.
 354                  */
 355                 if (*pat == '\\') {
 356                         pat++;
 357                         if (*pat == '\0')
 358                                 return FALSE;
 359                 }
 360
 361                 if (*pat != *str)
 362                         return FALSE;
 363
 364         thisCharOK:
 365                 pat++;
 366                 str++;
 367         }
 368 }