contrib/apr/strings/apr_cstr.c

   1 /*    Licensed to the Apache Software Foundation (ASF) under one
   2  *    or more contributor license agreements.  See the NOTICE file
   3  *    distributed with this work for additional information
   4  *    regarding copyright ownership.  The ASF licenses this file
   5  *    to you under the Apache License, Version 2.0 (the
   6  *    "License"); you may not use this file except in compliance
   7  *    with the License.  You may obtain a copy of the License at
   8  *
   9  *      http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  *    Unless required by applicable law or agreed to in writing,
  12  *    software distributed under the License is distributed on an
  13  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14  *    KIND, either express or implied.  See the License for the
  15  *    specific language governing permissions and limitations
  16  *    under the License.
  17  */
  18
  19 #include "apr.h"
  20 #include "apr_lib.h"
  21 #include "apr_strings.h"
  22 #include "apr_fnmatch.h"
  23 #if 0
  24 #define APR_WANT_STDIO
  25 #define APR_WANT_STRFUNC
  26 #endif
  27 #include "apr_want.h"
  28 #include "apr_cstr.h"
  29
  30 APR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array,
  31                                         const char *input,
  32                                         const char *sep_chars,
  33                                         int chop_whitespace,
  34                                         apr_pool_t *pool)
  35 {
  36   char *pats;
  37   char *p;
  38
  39   pats = apr_pstrdup(pool, input);  /* strtok wants non-const data */
  40   p = apr_cstr_tokenize(sep_chars, &pats);
  41
  42   while (p)
  43     {
  44       if (chop_whitespace)
  45         {
  46           while (apr_isspace(*p))
  47             p++;
  48
  49           {
  50             char *e = p + (strlen(p) - 1);
  51             while ((e >= p) && (apr_isspace(*e)))
  52               e--;
  53             *(++e) = '\0';
  54           }
  55         }
  56
  57       if (p[0] != '\0')
  58         APR_ARRAY_PUSH(array, const char *) = p;
  59
  60       p = apr_cstr_tokenize(sep_chars, &pats);
  61     }
  62
  63   return;
  64 }
  65
  66
  67 APR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input,
  68                                                  const char *sep_chars,
  69                                                  int chop_whitespace,
  70                                                  apr_pool_t *pool)
  71 {
  72   apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input));
  73   apr_cstr_split_append(a, input, sep_chars, chop_whitespace, pool);
  74   return a;
  75 }
  76
  77
  78 APR_DECLARE(int) apr_cstr_match_glob_list(const char *str,
  79                                           const apr_array_header_t *list)
  80 {
  81   int i;
  82
  83   for (i = 0; i < list->nelts; i++)
  84     {
  85       const char *this_pattern = APR_ARRAY_IDX(list, i, char *);
  86
  87       if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS)
  88         return TRUE;
  89     }
  90
  91   return FALSE;
  92 }
  93
  94 APR_DECLARE(int) apr_cstr_match_list(const char *str,
  95                                      const apr_array_header_t *list)
  96 {
  97   int i;
  98
  99   for (i = 0; i < list->nelts; i++)
 100     {
 101       const char *this_str = APR_ARRAY_IDX(list, i, char *);
 102
 103       if (strcmp(this_str, str) == 0)
 104         return TRUE;
 105     }
 106
 107   return FALSE;
 108 }
 109
 110 APR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str)
 111 {
 112     char *token;
 113     char *next;
 114     char csep;
 115
 116     /* check parameters */
 117     if ((sep == NULL) || (str == NULL) || (*str == NULL))
 118         return NULL;
 119
 120     /* let APR handle edge cases and multiple separators */
 121     csep = *sep;
 122     if (csep == '\0' || sep[1] != '\0')
 123       return apr_strtok(NULL, sep, str);
 124
 125     /* skip characters in sep (will terminate at '\0') */
 126     token = *str;
 127     while (*token == csep)
 128         ++token;
 129
 130     if (!*token)          /* no more tokens */
 131         return NULL;
 132
 133     /* skip valid token characters to terminate token and
 134      * prepare for the next call (will terminate at '\0)
 135      */
 136     next = strchr(token, csep);
 137     if (next == NULL)
 138       {
 139         *str = token + strlen(token);
 140       }
 141     else
 142       {
 143         *next = '\0';
 144         *str = next + 1;
 145       }
 146
 147     return token;
 148 }
 149
 150 APR_DECLARE(int) apr_cstr_count_newlines(const char *msg)
 151 {
 152   int count = 0;
 153   const char *p;
 154
 155   for (p = msg; *p; p++)
 156     {
 157       if (*p == '\n')
 158         {
 159           count++;
 160           if (*(p + 1) == '\r')
 161             p++;
 162         }
 163       else if (*p == '\r')
 164         {
 165           count++;
 166           if (*(p + 1) == '\n')
 167             p++;
 168         }
 169     }
 170
 171   return count;
 172 }
 173
 174 #if 0 /* XXX: stringbuf logic is not present in APR */
 175 APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings,
 176                                   const char *separator,
 177                                   apr_pool_t *pool)
 178 {
 179   svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
 180   size_t sep_len = strlen(separator);
 181   int i;
 182
 183   for (i = 0; i < strings->nelts; i++)
 184     {
 185       const char *string = APR_ARRAY_IDX(strings, i, const char *);
 186       svn_stringbuf_appendbytes(new_str, string, strlen(string));
 187       svn_stringbuf_appendbytes(new_str, separator, sep_len);
 188     }
 189   return new_str->data;
 190 }
 191 #endif
 192
 193 #if !APR_CHARSET_EBCDIC
 194 /*
 195  * Our own known-fast translation table for casecmp by character.
 196  * Only ASCII alpha characters 41-5A are folded to 61-7A, other
 197  * octets (such as extended latin alphabetics) are never case-folded.
 198  * NOTE: Other than Alpha A-Z/a-z, each code point is unique!
 199  */
 200 static const short ucharmap[] = {
 201     0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,
 202     0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf,
 203     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 204     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
 205     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
 206     0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
 207     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
 208     0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
 209     0x40,  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 210      'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 211      'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 212      'x',  'y',  'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
 213     0x60,  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 214      'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 215      'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 216      'x',  'y',  'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
 217     0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
 218     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
 219     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
 220     0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
 221     0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
 222     0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
 223     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
 224     0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
 225     0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
 226     0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
 227     0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
 228     0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
 229     0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
 230     0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
 231     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
 232     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
 233 };
 234 #else /* APR_CHARSET_EBCDIC */
 235 /*
 236  * Derived from apr-iconv/ccs/cp037.c for EBCDIC case comparison,
 237  * provides unique identity of every char value (strict ISO-646
 238  * conformance, arbitrary election of an ISO-8859-1 ordering, and
 239  * very arbitrary control code assignments into C1 to achieve
 240  * identity and a reversible mapping of code points),
 241  * then folding the equivalences of ASCII 41-5A into 61-7A,
 242  * presenting comparison results in a somewhat ISO/IEC 10646
 243  * (ASCII-like) order, depending on the EBCDIC code page in use.
 244  *
 245  * NOTE: Other than Alpha A-Z/a-z, each code point is unique!
 246  */
 247 static const short ucharmap[] = {
 248     0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
 249     0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
 250     0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
 251     0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
 252     0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
 253     0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
 254     0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
 255     0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
 256     0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
 257     0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
 258     0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
 259     0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
 260     0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
 261     0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
 262     0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
 263     0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
 264     0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
 265     0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
 266     0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
 267     0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
 268     0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
 269     0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
 270     0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
 271     0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
 272     0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
 273     0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
 274     0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
 275     0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
 276     0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
 277     0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
 278     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
 279     0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
 280 };
 281 #endif
 282
 283 APR_DECLARE(int) apr_cstr_casecmp(const char *s1, const char *s2)
 284 {
 285     const unsigned char *str1 = (const unsigned char *)s1;
 286     const unsigned char *str2 = (const unsigned char *)s2;
 287     for (;;)
 288     {
 289         const int c1 = (int)(*str1);
 290         const int c2 = (int)(*str2);
 291         const int cmp = ucharmap[c1] - ucharmap[c2];
 292         /* Not necessary to test for !c2, this is caught by cmp */
 293         if (cmp || !c1)
 294             return cmp;
 295         str1++;
 296         str2++;
 297     }
 298 }
 299
 300 APR_DECLARE(int) apr_cstr_casecmpn(const char *s1, const char *s2, apr_size_t n)
 301 {
 302     const unsigned char *str1 = (const unsigned char *)s1;
 303     const unsigned char *str2 = (const unsigned char *)s2;
 304     while (n--)
 305     {
 306         const int c1 = (int)(*str1);
 307         const int c2 = (int)(*str2);
 308         const int cmp = ucharmap[c1] - ucharmap[c2];
 309         /* Not necessary to test for !c2, this is caught by cmp */
 310         if (cmp || !c1)
 311             return cmp;
 312         str1++;
 313         str2++;
 314     }
 315     return 0;
 316 }
 317
 318 APR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str,
 319                                 apr_uint64_t minval, apr_uint64_t maxval,
 320                                 int base)
 321 {
 322   apr_int64_t val;
 323   char *endptr;
 324
 325   /* We assume errno is thread-safe. */
 326   errno = 0; /* APR-0.9 doesn't always set errno */
 327
 328   /* ### We're throwing away half the number range here.
 329    * ### APR needs a apr_strtoui64() function. */
 330   val = apr_strtoi64(str, &endptr, base);
 331   if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
 332     return APR_EINVAL;
 333   if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
 334       val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval)
 335     return APR_ERANGE;
 336   *n = val;
 337   return APR_SUCCESS;
 338 }
 339
 340 APR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str)
 341 {
 342   return apr_cstr_strtoui64(n, str, 0, APR_UINT64_MAX, 10);
 343 }
 344
 345 APR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str)
 346 {
 347   apr_uint64_t val;
 348   apr_status_t rv = apr_cstr_strtoui64(&val, str, 0, APR_UINT32_MAX, 10);
 349   if (rv == APR_SUCCESS)
 350     *n = (unsigned int)val;
 351   return rv;
 352 }
 353
 354 APR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str,
 355                                apr_int64_t minval, apr_int64_t maxval,
 356                                int base)
 357 {
 358   apr_int64_t val;
 359   char *endptr;
 360
 361   /* We assume errno is thread-safe. */
 362   errno = 0; /* APR-0.9 doesn't always set errno */
 363
 364   val = apr_strtoi64(str, &endptr, base);
 365   if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
 366     return APR_EINVAL;
 367   if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
 368       val < minval || val > maxval)
 369     return APR_ERANGE;
 370   *n = val;
 371   return APR_SUCCESS;
 372 }
 373
 374 APR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str)
 375 {
 376   return apr_cstr_strtoi64(n, str, APR_INT64_MIN, APR_INT64_MAX, 10);
 377 }
 378
 379 APR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str)
 380 {
 381   apr_int64_t val;
 382   apr_status_t rv;
 383
 384   rv = apr_cstr_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10);
 385   if (rv == APR_SUCCESS)
 386     *n = (int)val;
 387   return rv;
 388 }
 389
 390 APR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str,
 391                                                const char *prefix)
 392 {
 393   apr_size_t len = strlen(prefix);
 394
 395   if (strncmp(str, prefix, len) == 0)
 396     {
 397       return str + len;
 398     }
 399   else
 400     {
 401       return NULL;
 402     }
 403 }