contrib/bmake/str.c

   1 /*      $NetBSD: str.c,v 1.64 2020/08/30 19:56:02 rillig Exp $  */
   2
   3 /*-
   4  * Copyright (c) 1988, 1989, 1990, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * Adam de Boor.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 /*-
  36  * Copyright (c) 1989 by Berkeley Softworks
  37  * All rights reserved.
  38  *
  39  * This code is derived from software contributed to Berkeley by
  40  * Adam de Boor.
  41  *
  42  * Redistribution and use in source and binary forms, with or without
  43  * modification, are permitted provided that the following conditions
  44  * are met:
  45  * 1. Redistributions of source code must retain the above copyright
  46  *    notice, this list of conditions and the following disclaimer.
  47  * 2. Redistributions in binary form must reproduce the above copyright
  48  *    notice, this list of conditions and the following disclaimer in the
  49  *    documentation and/or other materials provided with the distribution.
  50  * 3. All advertising materials mentioning features or use of this software
  51  *    must display the following acknowledgement:
  52  *      This product includes software developed by the University of
  53  *      California, Berkeley and its contributors.
  54  * 4. Neither the name of the University nor the names of its contributors
  55  *    may be used to endorse or promote products derived from this software
  56  *    without specific prior written permission.
  57  *
  58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  68  * SUCH DAMAGE.
  69  */
  70
  71 #ifndef MAKE_NATIVE
  72 static char rcsid[] = "$NetBSD: str.c,v 1.64 2020/08/30 19:56:02 rillig Exp $";
  73 #else
  74 #include <sys/cdefs.h>
  75 #ifndef lint
  76 #if 0
  77 static char     sccsid[] = "@(#)str.c   5.8 (Berkeley) 6/1/90";
  78 #else
  79 __RCSID("$NetBSD: str.c,v 1.64 2020/08/30 19:56:02 rillig Exp $");
  80 #endif
  81 #endif                          /* not lint */
  82 #endif
  83
  84 #include "make.h"
  85
  86 /* Return the concatenation of s1 and s2, freshly allocated. */
  87 char *
  88 str_concat2(const char *s1, const char *s2)
  89 {
  90         size_t len1 = strlen(s1);
  91         size_t len2 = strlen(s2);
  92         char *result = bmake_malloc(len1 + len2 + 1);
  93         memcpy(result, s1, len1);
  94         memcpy(result + len1, s2, len2 + 1);
  95         return result;
  96 }
  97
  98 /* Return the concatenation of s1, s2 and s3, freshly allocated. */
  99 char *
 100 str_concat3(const char *s1, const char *s2, const char *s3)
 101 {
 102         size_t len1 = strlen(s1);
 103         size_t len2 = strlen(s2);
 104         size_t len3 = strlen(s3);
 105         char *result = bmake_malloc(len1 + len2 + len3 + 1);
 106         memcpy(result, s1, len1);
 107         memcpy(result + len1, s2, len2);
 108         memcpy(result + len1 + len2, s3, len3 + 1);
 109         return result;
 110 }
 111
 112 /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
 113 char *
 114 str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
 115 {
 116         size_t len1 = strlen(s1);
 117         size_t len2 = strlen(s2);
 118         size_t len3 = strlen(s3);
 119         size_t len4 = strlen(s4);
 120         char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
 121         memcpy(result, s1, len1);
 122         memcpy(result + len1, s2, len2);
 123         memcpy(result + len1 + len2, s3, len3);
 124         memcpy(result + len1 + len2 + len3, s4, len4 + 1);
 125         return result;
 126 }
 127
 128 /* Fracture a string into an array of words (as delineated by tabs or spaces)
 129  * taking quotation marks into account.  Leading tabs/spaces are ignored.
 130  *
 131  * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
 132  * etc... are expanded. In this case, the return value is NULL on parse
 133  * errors.
 134  *
 135  * Returns the fractured words, which must be freed later using Words_Free.
 136  * If expand was TRUE and there was a parse error, words is NULL, and in that
 137  * case, nothing needs to be freed.
 138  */
 139 Words
 140 Str_Words(const char *str, Boolean expand)
 141 {
 142         size_t str_len;
 143         char *words_buf;
 144         size_t words_cap;
 145         char **words;
 146         size_t words_len;
 147         char inquote;
 148         char *word_start;
 149         char *word_end;
 150         const char *str_p;
 151
 152         /* skip leading space chars. */
 153         for (; *str == ' ' || *str == '\t'; ++str)
 154                 continue;
 155
 156         /* words_buf holds the words, separated by '\0'. */
 157         str_len = strlen(str);
 158         words_buf = bmake_malloc(strlen(str) + 1);
 159
 160         words_cap = MAX((str_len / 5), 50);
 161         words = bmake_malloc((words_cap + 1) * sizeof(char *));
 162
 163         /*
 164          * copy the string; at the same time, parse backslashes,
 165          * quotes and build the word list.
 166          */
 167         words_len = 0;
 168         inquote = '\0';
 169         word_start = words_buf;
 170         word_end = words_buf;
 171         for (str_p = str;; ++str_p) {
 172                 char ch = *str_p;
 173                 switch (ch) {
 174                 case '"':
 175                 case '\'':
 176                         if (inquote) {
 177                                 if (inquote == ch)
 178                                         inquote = '\0';
 179                                 else
 180                                         break;
 181                         } else {
 182                                 inquote = (char)ch;
 183                                 /* Don't miss "" or '' */
 184                                 if (word_start == NULL && str_p[1] == inquote) {
 185                                         if (!expand) {
 186                                                 word_start = word_end;
 187                                                 *word_end++ = ch;
 188                                         } else
 189                                                 word_start = word_end + 1;
 190                                         str_p++;
 191                                         inquote = '\0';
 192                                         break;
 193                                 }
 194                         }
 195                         if (!expand) {
 196                                 if (word_start == NULL)
 197                                         word_start = word_end;
 198                                 *word_end++ = ch;
 199                         }
 200                         continue;
 201                 case ' ':
 202                 case '\t':
 203                 case '\n':
 204                         if (inquote)
 205                                 break;
 206                         if (word_start == NULL)
 207                                 continue;
 208                         /* FALLTHROUGH */
 209                 case '\0':
 210                         /*
 211                          * end of a token -- make sure there's enough words
 212                          * space and save off a pointer.
 213                          */
 214                         if (word_start == NULL)
 215                                 goto done;
 216
 217                         *word_end++ = '\0';
 218                         if (words_len == words_cap) {
 219                                 size_t new_size;
 220                                 words_cap *= 2;         /* ramp up fast */
 221                                 new_size = (words_cap + 1) * sizeof(char *);
 222                                 words = bmake_realloc(words, new_size);
 223                         }
 224                         words[words_len++] = word_start;
 225                         word_start = NULL;
 226                         if (ch == '\n' || ch == '\0') {
 227                                 if (expand && inquote) {
 228                                         free(words);
 229                                         free(words_buf);
 230                                         return (Words){ NULL, 0, NULL };
 231                                 }
 232                                 goto done;
 233                         }
 234                         continue;
 235                 case '\\':
 236                         if (!expand) {
 237                                 if (word_start == NULL)
 238                                         word_start = word_end;
 239                                 *word_end++ = '\\';
 240                                 /* catch '\' at end of line */
 241                                 if (str_p[1] == '\0')
 242                                         continue;
 243                                 ch = *++str_p;
 244                                 break;
 245                         }
 246
 247                         switch (ch = *++str_p) {
 248                         case '\0':
 249                         case '\n':
 250                                 /* hmmm; fix it up as best we can */
 251                                 ch = '\\';
 252                                 --str_p;
 253                                 break;
 254                         case 'b':
 255                                 ch = '\b';
 256                                 break;
 257                         case 'f':
 258                                 ch = '\f';
 259                                 break;
 260                         case 'n':
 261                                 ch = '\n';
 262                                 break;
 263                         case 'r':
 264                                 ch = '\r';
 265                                 break;
 266                         case 't':
 267                                 ch = '\t';
 268                                 break;
 269                         }
 270                         break;
 271                 }
 272                 if (word_start == NULL)
 273                         word_start = word_end;
 274                 *word_end++ = ch;
 275         }
 276 done:
 277         words[words_len] = NULL;
 278         return (Words){ words, words_len, words_buf };
 279 }
 280
 281 /*
 282  * Str_FindSubstring -- See if a string contains a particular substring.
 283  *
 284  * Input:
 285  *      string          String to search.
 286  *      substring       Substring to find in string.
 287  *
 288  * Results: If string contains substring, the return value is the location of
 289  * the first matching instance of substring in string.  If string doesn't
 290  * contain substring, the return value is NULL.  Matching is done on an exact
 291  * character-for-character basis with no wildcards or special characters.
 292  *
 293  * Side effects: None.
 294  */
 295 char *
 296 Str_FindSubstring(const char *string, const char *substring)
 297 {
 298         const char *a, *b;
 299
 300         /*
 301          * First scan quickly through the two strings looking for a single-
 302          * character match.  When it's found, then compare the rest of the
 303          * substring.
 304          */
 305
 306         for (b = substring; *string != 0; string++) {
 307                 if (*string != *b)
 308                         continue;
 309                 a = string;
 310                 for (;;) {
 311                         if (*b == 0)
 312                                 return UNCONST(string);
 313                         if (*a++ != *b++)
 314                                 break;
 315                 }
 316                 b = substring;
 317         }
 318         return NULL;
 319 }
 320
 321 /*
 322  * Str_Match -- Test if a string matches a pattern like "*.[ch]".
 323  *
 324  * XXX this function does not detect or report malformed patterns.
 325  *
 326  * Results:
 327  *      Non-zero is returned if string matches the pattern, 0 otherwise. The
 328  *      matching operation permits the following special characters in the
 329  *      pattern: *?\[] (as in fnmatch(3)).
 330  *
 331  * Side effects: None.
 332  */
 333 Boolean
 334 Str_Match(const char *str, const char *pat)
 335 {
 336         for (;;) {
 337                 /*
 338                  * See if we're at the end of both the pattern and the
 339                  * string. If, we succeeded.  If we're at the end of the
 340                  * pattern but not at the end of the string, we failed.
 341                  */
 342                 if (*pat == 0)
 343                         return *str == 0;
 344                 if (*str == 0 && *pat != '*')
 345                         return FALSE;
 346
 347                 /*
 348                  * A '*' in the pattern matches any substring.  We handle this
 349                  * by calling ourselves for each suffix of the string.
 350                  */
 351                 if (*pat == '*') {
 352                         pat++;
 353                         while (*pat == '*')
 354                                 pat++;
 355                         if (*pat == 0)
 356                                 return TRUE;
 357                         while (*str != 0) {
 358                                 if (Str_Match(str, pat))
 359                                         return TRUE;
 360                                 str++;
 361                         }
 362                         return FALSE;
 363                 }
 364
 365                 /* A '?' in the pattern matches any single character. */
 366                 if (*pat == '?')
 367                         goto thisCharOK;
 368
 369                 /*
 370                  * A '[' in the pattern matches a character from a list.
 371                  * The '[' is followed by the list of acceptable characters,
 372                  * or by ranges (two characters separated by '-'). In these
 373                  * character lists, the backslash is an ordinary character.
 374                  */
 375                 if (*pat == '[') {
 376                         Boolean neg = pat[1] == '^';
 377                         pat += neg ? 2 : 1;
 378
 379                         for (;;) {
 380                                 if (*pat == ']' || *pat == 0) {
 381                                         if (neg)
 382                                                 break;
 383                                         return FALSE;
 384                                 }
 385                                 if (*pat == *str)
 386                                         break;
 387                                 if (pat[1] == '-') {
 388                                         if (pat[2] == 0)
 389                                                 return neg;
 390                                         if (*pat <= *str && pat[2] >= *str)
 391                                                 break;
 392                                         if (*pat >= *str && pat[2] <= *str)
 393                                                 break;
 394                                         pat += 2;
 395                                 }
 396                                 pat++;
 397                         }
 398                         if (neg && *pat != ']' && *pat != 0)
 399                                 return FALSE;
 400                         while (*pat != ']' && *pat != 0)
 401                                 pat++;
 402                         if (*pat == 0)
 403                                 pat--;
 404                         goto thisCharOK;
 405                 }
 406
 407                 /*
 408                  * A backslash in the pattern matches the character following
 409                  * it exactly.
 410                  */
 411                 if (*pat == '\\') {
 412                         pat++;
 413                         if (*pat == 0)
 414                                 return FALSE;
 415                 }
 416
 417                 if (*pat != *str)
 418                         return FALSE;
 419
 420         thisCharOK:
 421                 pat++;
 422                 str++;
 423         }
 424 }