contrib/subversion/subversion/libsvn_subr/string.c

   1 /*
   2  * string.c:  routines to manipulate counted-length strings
   3  *            (svn_stringbuf_t and svn_string_t) and C strings.
   4  *
   5  *
   6  * ====================================================================
   7  *    Licensed to the Apache Software Foundation (ASF) under one
   8  *    or more contributor license agreements.  See the NOTICE file
   9  *    distributed with this work for additional information
  10  *    regarding copyright ownership.  The ASF licenses this file
  11  *    to you under the Apache License, Version 2.0 (the
  12  *    "License"); you may not use this file except in compliance
  13  *    with the License.  You may obtain a copy of the License at
  14  *
  15  *      http://www.apache.org/licenses/LICENSE-2.0
  16  *
  17  *    Unless required by applicable law or agreed to in writing,
  18  *    software distributed under the License is distributed on an
  19  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  20  *    KIND, either express or implied.  See the License for the
  21  *    specific language governing permissions and limitations
  22  *    under the License.
  23  * ====================================================================
  24  */
  25
  26
  27 \f
  28 #include <apr.h>
  29 #include <assert.h>
  30
  31 #include <string.h>      /* for memcpy(), memcmp(), strlen() */
  32 #include <apr_fnmatch.h>
  33 #include "svn_string.h"  /* loads "svn_types.h" and <apr_pools.h> */
  34 #include "svn_ctype.h"
  35 #include "private/svn_dep_compat.h"
  36 #include "private/svn_string_private.h"
  37
  38 #include "svn_private_config.h"
  39
  40 \f
  41
  42 /* Allocate the space for a memory buffer from POOL.
  43  * Return a pointer to the new buffer in *DATA and its size in *SIZE.
  44  * The buffer size will be at least MINIMUM_SIZE.
  45  *
  46  * N.B.: The stringbuf creation functions use this, but since stringbufs
  47  *       always consume at least 1 byte for the NUL terminator, the
  48  *       resulting data pointers will never be NULL.
  49  */
  50 static APR_INLINE void
  51 membuf_create(void **data, apr_size_t *size,
  52               apr_size_t minimum_size, apr_pool_t *pool)
  53 {
  54   /* apr_palloc will allocate multiples of 8.
  55    * Thus, we would waste some of that memory if we stuck to the
  56    * smaller size. Note that this is safe even if apr_palloc would
  57    * use some other alignment or none at all. */
  58   minimum_size = APR_ALIGN_DEFAULT(minimum_size);
  59   *data = apr_palloc(pool, minimum_size);
  60   *size = minimum_size;
  61 }
  62
  63 /* Ensure that the size of a given memory buffer is at least MINIMUM_SIZE
  64  * bytes. If *SIZE is already greater than or equal to MINIMUM_SIZE,
  65  * this function does nothing.
  66  *
  67  * If *SIZE is 0, the allocated buffer size will be MINIMUM_SIZE
  68  * rounded up to the nearest APR alignment boundary. Otherwse, *SIZE
  69  * will be multiplied by a power of two such that the result is
  70  * greater or equal to MINIMUM_SIZE. The pointer to the new buffer
  71  * will be returned in *DATA, and its size in *SIZE.
  72  */
  73 static APR_INLINE void
  74 membuf_ensure(void **data, apr_size_t *size,
  75               apr_size_t minimum_size, apr_pool_t *pool)
  76 {
  77   if (minimum_size > *size)
  78     {
  79       apr_size_t new_size = *size;
  80
  81       if (new_size == 0)
  82         new_size = minimum_size;
  83       else
  84         while (new_size < minimum_size)
  85           {
  86             const apr_size_t prev_size = new_size;
  87             new_size *= 2;
  88
  89             /* check for apr_size_t overflow */
  90             if (prev_size > new_size)
  91               {
  92                 new_size = minimum_size;
  93                 break;
  94               }
  95           }
  96
  97       membuf_create(data, size, new_size, pool);
  98     }
  99 }
 100
 101 void
 102 svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool)
 103 {
 104   membuf_create(&membuf->data, &membuf->size, size, pool);
 105   membuf->pool = pool;
 106 }
 107
 108 void
 109 svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size)
 110 {
 111   membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
 112 }
 113
 114 void
 115 svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)
 116 {
 117   const void *const old_data = membuf->data;
 118   const apr_size_t old_size = membuf->size;
 119
 120   membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
 121
 122   /* If we re-allocated MEMBUF->DATA, it cannot be NULL.
 123    * Statically initialized membuffers (OLD_DATA) may be NULL, though. */
 124   if (old_data && old_data != membuf->data)
 125     memcpy(membuf->data, old_data, old_size);
 126 }
 127
 128 /* Always provide an out-of-line implementation of svn_membuf__zero */
 129 #undef svn_membuf__zero
 130 void
 131 svn_membuf__zero(svn_membuf_t *membuf)
 132 {
 133   SVN_MEMBUF__ZERO(membuf);
 134 }
 135
 136 /* Always provide an out-of-line implementation of svn_membuf__nzero */
 137 #undef svn_membuf__nzero
 138 void
 139 svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size)
 140 {
 141   SVN_MEMBUF__NZERO(membuf, size);
 142 }
 143
 144 static APR_INLINE svn_boolean_t
 145 string_compare(const char *str1,
 146                const char *str2,
 147                apr_size_t len1,
 148                apr_size_t len2)
 149 {
 150   /* easy way out :)  */
 151   if (len1 != len2)
 152     return FALSE;
 153
 154   /* now the strings must have identical lengths */
 155
 156   if ((memcmp(str1, str2, len1)) == 0)
 157     return TRUE;
 158   else
 159     return FALSE;
 160 }
 161
 162 static APR_INLINE apr_size_t
 163 string_first_non_whitespace(const char *str, apr_size_t len)
 164 {
 165   apr_size_t i;
 166
 167   for (i = 0; i < len; i++)
 168     {
 169       if (! svn_ctype_isspace(str[i]))
 170         return i;
 171     }
 172
 173   /* if we get here, then the string must be entirely whitespace */
 174   return len;
 175 }
 176
 177 static APR_INLINE apr_size_t
 178 find_char_backward(const char *str, apr_size_t len, char ch)
 179 {
 180   apr_size_t i = len;
 181
 182   while (i != 0)
 183     {
 184       if (str[--i] == ch)
 185         return i;
 186     }
 187
 188   /* char was not found, return len */
 189   return len;
 190 }
 191
 192 \f
 193 /* svn_string functions */
 194
 195 /* Return a new svn_string_t object, allocated in POOL, initialized with
 196  * DATA and SIZE.  Do not copy the contents of DATA, just store the pointer.
 197  * SIZE is the length in bytes of DATA, excluding the required NUL
 198  * terminator. */
 199 static svn_string_t *
 200 create_string(const char *data, apr_size_t size,
 201               apr_pool_t *pool)
 202 {
 203   svn_string_t *new_string;
 204
 205   new_string = apr_palloc(pool, sizeof(*new_string));
 206
 207   new_string->data = data;
 208   new_string->len = size;
 209
 210   return new_string;
 211 }
 212
 213 /* A data buffer for a zero-length string (just a null terminator).  Many
 214  * svn_string_t instances may share this same buffer. */
 215 static const char empty_buffer[1] = {0};
 216
 217 svn_string_t *
 218 svn_string_create_empty(apr_pool_t *pool)
 219 {
 220   svn_string_t *new_string = apr_palloc(pool, sizeof(*new_string));
 221   new_string->data = empty_buffer;
 222   new_string->len = 0;
 223
 224   return new_string;
 225 }
 226
 227
 228 svn_string_t *
 229 svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
 230 {
 231   void *mem;
 232   char *data;
 233   svn_string_t *new_string;
 234
 235   /* Allocate memory for svn_string_t and data in one chunk. */
 236   mem = apr_palloc(pool, sizeof(*new_string) + size + 1);
 237   data = (char*)mem + sizeof(*new_string);
 238
 239   new_string = mem;
 240   new_string->data = data;
 241   new_string->len = size;
 242
 243   /* If SIZE is 0, NULL is valid for BYTES. */
 244   if (size)
 245     memcpy(data, bytes, size);
 246
 247   /* Null termination is the convention -- even if we suspect the data
 248      to be binary, it's not up to us to decide, it's the caller's
 249      call.  Heck, that's why they call it the caller! */
 250   data[size] = '\0';
 251
 252   return new_string;
 253 }
 254
 255
 256 svn_string_t *
 257 svn_string_create(const char *cstring, apr_pool_t *pool)
 258 {
 259   return svn_string_ncreate(cstring, strlen(cstring), pool);
 260 }
 261
 262
 263 svn_string_t *
 264 svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool)
 265 {
 266   return svn_string_ncreate(strbuf->data, strbuf->len, pool);
 267 }
 268
 269
 270 svn_string_t *
 271 svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap)
 272 {
 273   char *data = apr_pvsprintf(pool, fmt, ap);
 274
 275   /* wrap an svn_string_t around the new data */
 276   return create_string(data, strlen(data), pool);
 277 }
 278
 279
 280 svn_string_t *
 281 svn_string_createf(apr_pool_t *pool, const char *fmt, ...)
 282 {
 283   svn_string_t *str;
 284
 285   va_list ap;
 286   va_start(ap, fmt);
 287   str = svn_string_createv(pool, fmt, ap);
 288   va_end(ap);
 289
 290   return str;
 291 }
 292
 293
 294 svn_boolean_t
 295 svn_string_isempty(const svn_string_t *str)
 296 {
 297   return (str->len == 0);
 298 }
 299
 300
 301 svn_string_t *
 302 svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)
 303 {
 304   return (original_string ? svn_string_ncreate(original_string->data,
 305                                                original_string->len, pool)
 306                           : NULL);
 307 }
 308
 309
 310
 311 svn_boolean_t
 312 svn_string_compare(const svn_string_t *str1, const svn_string_t *str2)
 313 {
 314   return
 315     string_compare(str1->data, str2->data, str1->len, str2->len);
 316 }
 317
 318
 319
 320 apr_size_t
 321 svn_string_first_non_whitespace(const svn_string_t *str)
 322 {
 323   return
 324     string_first_non_whitespace(str->data, str->len);
 325 }
 326
 327
 328 apr_size_t
 329 svn_string_find_char_backward(const svn_string_t *str, char ch)
 330 {
 331   return find_char_backward(str->data, str->len, ch);
 332 }
 333
 334 svn_string_t *
 335 svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf)
 336 {
 337   /* In debug mode, detect attempts to modify the original STRBUF object.
 338    */
 339 #ifdef SVN_DEBUG
 340   strbuf->pool = NULL;
 341   strbuf->blocksize = strbuf->len + 1;
 342 #endif
 343
 344   /* Both, svn_string_t and svn_stringbuf_t are public API structures
 345    * since the svn epoch. Thus, we can rely on their precise layout not
 346    * to change.
 347    *
 348    * It just so happens that svn_string_t is structurally equivalent
 349    * to the (data, len) sub-set of svn_stringbuf_t. There is also no
 350    * difference in alignment and padding. So, we can just re-interpret
 351    * that part of STRBUF as a svn_string_t.
 352    *
 353    * However, since svn_string_t does not know about the blocksize
 354    * member in svn_stringbuf_t, any attempt to re-size the returned
 355    * svn_string_t might invalidate the STRBUF struct. Hence, we consider
 356    * the source STRBUF "consumed".
 357    *
 358    * Modifying the string character content is fine, though.
 359    */
 360   return (svn_string_t *)&strbuf->data;
 361 }
 362
 363
 364 \f
 365 /* svn_stringbuf functions */
 366
 367 svn_stringbuf_t *
 368 svn_stringbuf_create_empty(apr_pool_t *pool)
 369 {
 370   return svn_stringbuf_create_ensure(0, pool);
 371 }
 372
 373 svn_stringbuf_t *
 374 svn_stringbuf_create_ensure(apr_size_t blocksize, apr_pool_t *pool)
 375 {
 376   void *mem;
 377   svn_stringbuf_t *new_string;
 378
 379   ++blocksize; /* + space for '\0' */
 380
 381   /* Allocate memory for svn_string_t and data in one chunk. */
 382   membuf_create(&mem, &blocksize, blocksize + sizeof(*new_string), pool);
 383
 384   /* Initialize header and string */
 385   new_string = mem;
 386   new_string->data = (char*)mem + sizeof(*new_string);
 387   new_string->data[0] = '\0';
 388   new_string->len = 0;
 389   new_string->blocksize = blocksize - sizeof(*new_string);
 390   new_string->pool = pool;
 391
 392   return new_string;
 393 }
 394
 395 svn_stringbuf_t *
 396 svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
 397 {
 398   svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool);
 399
 400   /* If SIZE is 0, NULL is valid for BYTES. */
 401   if (size)
 402     memcpy(strbuf->data, bytes, size);
 403
 404   /* Null termination is the convention -- even if we suspect the data
 405      to be binary, it's not up to us to decide, it's the caller's
 406      call.  Heck, that's why they call it the caller! */
 407   strbuf->data[size] = '\0';
 408   strbuf->len = size;
 409
 410   return strbuf;
 411 }
 412
 413
 414 svn_stringbuf_t *
 415 svn_stringbuf_create(const char *cstring, apr_pool_t *pool)
 416 {
 417   return svn_stringbuf_ncreate(cstring, strlen(cstring), pool);
 418 }
 419
 420
 421 svn_stringbuf_t *
 422 svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)
 423 {
 424   return svn_stringbuf_ncreate(str->data, str->len, pool);
 425 }
 426
 427 svn_stringbuf_t *
 428 svn_stringbuf_create_wrap(char *str, apr_pool_t *pool)
 429 {
 430   svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result));
 431   result->pool = pool;
 432   result->data = str;
 433   result->len = strlen(str);
 434   result->blocksize = result->len + 1;
 435
 436   return result;
 437 }
 438
 439 svn_stringbuf_t *
 440 svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
 441 {
 442   char *data = apr_pvsprintf(pool, fmt, ap);
 443   apr_size_t size = strlen(data);
 444   svn_stringbuf_t *new_string;
 445
 446   new_string = apr_palloc(pool, sizeof(*new_string));
 447   new_string->data = data;
 448   new_string->len = size;
 449   new_string->blocksize = size + 1;
 450   new_string->pool = pool;
 451
 452   return new_string;
 453 }
 454
 455
 456 svn_stringbuf_t *
 457 svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...)
 458 {
 459   svn_stringbuf_t *str;
 460
 461   va_list ap;
 462   va_start(ap, fmt);
 463   str = svn_stringbuf_createv(pool, fmt, ap);
 464   va_end(ap);
 465
 466   return str;
 467 }
 468
 469
 470 void
 471 svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c)
 472 {
 473   memset(str->data, c, str->len);
 474 }
 475
 476
 477 void
 478 svn_stringbuf_set(svn_stringbuf_t *str, const char *value)
 479 {
 480   apr_size_t amt = strlen(value);
 481
 482   svn_stringbuf_ensure(str, amt);
 483   memcpy(str->data, value, amt + 1);
 484   str->len = amt;
 485 }
 486
 487 void
 488 svn_stringbuf_setempty(svn_stringbuf_t *str)
 489 {
 490   if (str->len > 0)
 491     str->data[0] = '\0';
 492
 493   str->len = 0;
 494 }
 495
 496
 497 void
 498 svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes)
 499 {
 500   if (nbytes > str->len)
 501     str->len = 0;
 502   else
 503     str->len -= nbytes;
 504
 505   str->data[str->len] = '\0';
 506 }
 507
 508
 509 svn_boolean_t
 510 svn_stringbuf_isempty(const svn_stringbuf_t *str)
 511 {
 512   return (str->len == 0);
 513 }
 514
 515
 516 void
 517 svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size)
 518 {
 519   void *mem = NULL;
 520   ++minimum_size;  /* + space for '\0' */
 521
 522   membuf_ensure(&mem, &str->blocksize, minimum_size, str->pool);
 523   if (mem && mem != str->data)
 524     {
 525       if (str->data)
 526         memcpy(mem, str->data, str->len + 1);
 527       str->data = mem;
 528     }
 529 }
 530
 531
 532 /* WARNING - Optimized code ahead!
 533  * This function has been hand-tuned for performance. Please read
 534  * the comments below before modifying the code.
 535  */
 536 void
 537 svn_stringbuf_appendbyte(svn_stringbuf_t *str, char byte)
 538 {
 539   char *dest;
 540   apr_size_t old_len = str->len;
 541
 542   /* In most cases, there will be pre-allocated memory left
 543    * to just write the new byte at the end of the used section
 544    * and terminate the string properly.
 545    */
 546   if (str->blocksize > old_len + 1)
 547     {
 548       /* The following read does not depend this write, so we
 549        * can issue the write first to minimize register pressure:
 550        * The value of old_len+1 is no longer needed; on most processors,
 551        * dest[old_len+1] will be calculated implicitly as part of
 552        * the addressing scheme.
 553        */
 554       str->len = old_len+1;
 555
 556       /* Since the compiler cannot be sure that *src->data and *src
 557        * don't overlap, we read src->data *once* before writing
 558        * to *src->data. Replacing dest with str->data would force
 559        * the compiler to read it again after the first byte.
 560        */
 561       dest = str->data;
 562
 563       /* If not already available in a register as per ABI, load
 564        * "byte" into the register (e.g. the one freed from old_len+1),
 565        * then write it to the string buffer and terminate it properly.
 566        *
 567        * Including the "byte" fetch, all operations so far could be
 568        * issued at once and be scheduled at the CPU's descression.
 569        * Most likely, no-one will soon depend on the data that will be
 570        * written in this function. So, no stalls there, either.
 571        */
 572       dest[old_len] = byte;
 573       dest[old_len+1] = '\0';
 574     }
 575   else
 576     {
 577       /* we need to re-allocate the string buffer
 578        * -> let the more generic implementation take care of that part
 579        */
 580
 581       /* Depending on the ABI, "byte" is a register value. If we were
 582        * to take its address directly, the compiler might decide to
 583        * put in on the stack *unconditionally*, even if that would
 584        * only be necessary for this block.
 585        */
 586       char b = byte;
 587       svn_stringbuf_appendbytes(str, &b, 1);
 588     }
 589 }
 590
 591
 592 void
 593 svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
 594                           apr_size_t count)
 595 {
 596   apr_size_t total_len;
 597   void *start_address;
 598
 599   if (!count)
 600     /* Allow BYTES to be NULL by avoiding passing it to memcpy. */
 601     return;
 602
 603   total_len = str->len + count;  /* total size needed */
 604
 605   /* svn_stringbuf_ensure adds 1 for null terminator. */
 606   svn_stringbuf_ensure(str, total_len);
 607
 608   /* get address 1 byte beyond end of original bytestring */
 609   start_address = (str->data + str->len);
 610
 611   memcpy(start_address, bytes, count);
 612   str->len = total_len;
 613
 614   str->data[str->len] = '\0';  /* We don't know if this is binary
 615                                   data or not, but convention is
 616                                   to null-terminate. */
 617 }
 618
 619 void
 620 svn_stringbuf_appendfill(svn_stringbuf_t *str,
 621                          char byte,
 622                          apr_size_t count)
 623 {
 624   apr_size_t new_len = str->len + count;
 625   svn_stringbuf_ensure(str, new_len);
 626
 627   memset(str->data + str->len, byte, count);
 628
 629   /* update buffer length and always NUL-terminate it */
 630   str->len = new_len;
 631   str->data[new_len] = '\0';
 632 }
 633
 634
 635 void
 636 svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
 637                         const svn_stringbuf_t *appendstr)
 638 {
 639   svn_stringbuf_appendbytes(targetstr, appendstr->data, appendstr->len);
 640 }
 641
 642
 643 void
 644 svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, const char *cstr)
 645 {
 646   svn_stringbuf_appendbytes(targetstr, cstr, strlen(cstr));
 647 }
 648
 649 void
 650 svn_stringbuf_insert(svn_stringbuf_t *str,
 651                      apr_size_t pos,
 652                      const char *bytes,
 653                      apr_size_t count)
 654 {
 655   /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */
 656   if (count == 0)
 657     return;
 658
 659   /* special case: BYTES overlaps with this string -> copy the source */
 660   if (bytes + count > str->data && bytes < str->data + str->blocksize)
 661     bytes = apr_pmemdup(str->pool, bytes, count);
 662
 663   if (pos > str->len)
 664     pos = str->len;
 665
 666   svn_stringbuf_ensure(str, str->len + count);
 667   memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
 668   memcpy(str->data + pos, bytes, count);
 669
 670   str->len += count;
 671 }
 672
 673 void
 674 svn_stringbuf_remove(svn_stringbuf_t *str,
 675                      apr_size_t pos,
 676                      apr_size_t count)
 677 {
 678   if (pos > str->len)
 679     pos = str->len;
 680   if (count > str->len - pos)
 681     count = str->len - pos;
 682
 683   memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1);
 684   str->len -= count;
 685 }
 686
 687 void
 688 svn_stringbuf_replace(svn_stringbuf_t *str,
 689                       apr_size_t pos,
 690                       apr_size_t old_count,
 691                       const char *bytes,
 692                       apr_size_t new_count)
 693 {
 694   /* For COUNT==0, we allow BYTES to be NULL.
 695    * In that case, this is just a substring removal. */
 696   if (new_count == 0)
 697     {
 698       svn_stringbuf_remove(str, pos, old_count);
 699       return;
 700     }
 701
 702   /* special case: BYTES overlaps with this string -> copy the source */
 703   if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
 704     bytes = apr_pmemdup(str->pool, bytes, new_count);
 705
 706   if (pos > str->len)
 707     pos = str->len;
 708   if (old_count > str->len - pos)
 709     old_count = str->len - pos;
 710
 711   if (old_count < new_count)
 712     {
 713       apr_size_t delta = new_count - old_count;
 714       svn_stringbuf_ensure(str, str->len + delta);
 715     }
 716
 717   if (old_count != new_count)
 718     memmove(str->data + pos + new_count, str->data + pos + old_count,
 719             str->len - pos - old_count + 1);
 720
 721   memcpy(str->data + pos, bytes, new_count);
 722   str->len += new_count - old_count;
 723 }
 724
 725
 726 svn_stringbuf_t *
 727 svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool)
 728 {
 729   return (svn_stringbuf_ncreate(original_string->data,
 730                                 original_string->len, pool));
 731 }
 732
 733
 734
 735 svn_boolean_t
 736 svn_stringbuf_compare(const svn_stringbuf_t *str1,
 737                       const svn_stringbuf_t *str2)
 738 {
 739   return string_compare(str1->data, str2->data, str1->len, str2->len);
 740 }
 741
 742
 743
 744 apr_size_t
 745 svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str)
 746 {
 747   return string_first_non_whitespace(str->data, str->len);
 748 }
 749
 750
 751 void
 752 svn_stringbuf_strip_whitespace(svn_stringbuf_t *str)
 753 {
 754   /* Find first non-whitespace character */
 755   apr_size_t offset = svn_stringbuf_first_non_whitespace(str);
 756
 757   /* Go ahead!  Waste some RAM, we've got pools! :)  */
 758   str->data += offset;
 759   str->len -= offset;
 760   str->blocksize -= offset;
 761
 762   /* Now that we've trimmed the front, trim the end, wasting more RAM. */
 763   while ((str->len > 0) && svn_ctype_isspace(str->data[str->len - 1]))
 764     str->len--;
 765   str->data[str->len] = '\0';
 766 }
 767
 768
 769 apr_size_t
 770 svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch)
 771 {
 772   return find_char_backward(str->data, str->len, ch);
 773 }
 774
 775
 776 svn_boolean_t
 777 svn_string_compare_stringbuf(const svn_string_t *str1,
 778                              const svn_stringbuf_t *str2)
 779 {
 780   return string_compare(str1->data, str2->data, str1->len, str2->len);
 781 }
 782
 783
 784 \f
 785 /*** C string stuff. ***/
 786
 787 void
 788 svn_cstring_split_append(apr_array_header_t *array,
 789                          const char *input,
 790                          const char *sep_chars,
 791                          svn_boolean_t chop_whitespace,
 792                          apr_pool_t *pool)
 793 {
 794   char *pats;
 795   char *p;
 796
 797   pats = apr_pstrdup(pool, input);  /* strtok wants non-const data */
 798   p = svn_cstring_tokenize(sep_chars, &pats);
 799
 800   while (p)
 801     {
 802       if (chop_whitespace)
 803         {
 804           while (svn_ctype_isspace(*p))
 805             p++;
 806
 807           {
 808             char *e = p + (strlen(p) - 1);
 809             while ((e >= p) && (svn_ctype_isspace(*e)))
 810               e--;
 811             *(++e) = '\0';
 812           }
 813         }
 814
 815       if (p[0] != '\0')
 816         APR_ARRAY_PUSH(array, const char *) = p;
 817
 818       p = svn_cstring_tokenize(sep_chars, &pats);
 819     }
 820
 821   return;
 822 }
 823
 824
 825 apr_array_header_t *
 826 svn_cstring_split(const char *input,
 827                   const char *sep_chars,
 828                   svn_boolean_t chop_whitespace,
 829                   apr_pool_t *pool)
 830 {
 831   apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input));
 832   svn_cstring_split_append(a, input, sep_chars, chop_whitespace, pool);
 833   return a;
 834 }
 835
 836
 837 svn_boolean_t svn_cstring_match_glob_list(const char *str,
 838                                           const apr_array_header_t *list)
 839 {
 840   int i;
 841
 842   for (i = 0; i < list->nelts; i++)
 843     {
 844       const char *this_pattern = APR_ARRAY_IDX(list, i, char *);
 845
 846       if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS)
 847         return TRUE;
 848     }
 849
 850   return FALSE;
 851 }
 852
 853 svn_boolean_t
 854 svn_cstring_match_list(const char *str, const apr_array_header_t *list)
 855 {
 856   int i;
 857
 858   for (i = 0; i < list->nelts; i++)
 859     {
 860       const char *this_str = APR_ARRAY_IDX(list, i, char *);
 861
 862       if (strcmp(this_str, str) == 0)
 863         return TRUE;
 864     }
 865
 866   return FALSE;
 867 }
 868
 869 char *
 870 svn_cstring_tokenize(const char *sep, char **str)
 871 {
 872     char *token;
 873     char *next;
 874     char csep;
 875
 876     /* check parameters */
 877     if ((sep == NULL) || (str == NULL) || (*str == NULL))
 878         return NULL;
 879
 880     /* let APR handle edge cases and multiple separators */
 881     csep = *sep;
 882     if (csep == '\0' || sep[1] != '\0')
 883       return apr_strtok(NULL, sep, str);
 884
 885     /* skip characters in sep (will terminate at '\0') */
 886     token = *str;
 887     while (*token == csep)
 888         ++token;
 889
 890     if (!*token)          /* no more tokens */
 891         return NULL;
 892
 893     /* skip valid token characters to terminate token and
 894      * prepare for the next call (will terminate at '\0)
 895      */
 896     next = strchr(token, csep);
 897     if (next == NULL)
 898       {
 899         *str = token + strlen(token);
 900       }
 901     else
 902       {
 903         *next = '\0';
 904         *str = next + 1;
 905       }
 906
 907     return token;
 908 }
 909
 910 int svn_cstring_count_newlines(const char *msg)
 911 {
 912   int count = 0;
 913   const char *p;
 914
 915   for (p = msg; *p; p++)
 916     {
 917       if (*p == '\n')
 918         {
 919           count++;
 920           if (*(p + 1) == '\r')
 921             p++;
 922         }
 923       else if (*p == '\r')
 924         {
 925           count++;
 926           if (*(p + 1) == '\n')
 927             p++;
 928         }
 929     }
 930
 931   return count;
 932 }
 933
 934 char *
 935 svn_cstring_join(const apr_array_header_t *strings,
 936                  const char *separator,
 937                  apr_pool_t *pool)
 938 {
 939   svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
 940   size_t sep_len = strlen(separator);
 941   int i;
 942
 943   for (i = 0; i < strings->nelts; i++)
 944     {
 945       const char *string = APR_ARRAY_IDX(strings, i, const char *);
 946       svn_stringbuf_appendbytes(new_str, string, strlen(string));
 947       svn_stringbuf_appendbytes(new_str, separator, sep_len);
 948     }
 949   return new_str->data;
 950 }
 951
 952 int
 953 svn_cstring_casecmp(const char *str1, const char *str2)
 954 {
 955   for (;;)
 956     {
 957       const int a = *str1++;
 958       const int b = *str2++;
 959       const int cmp = svn_ctype_casecmp(a, b);
 960       if (cmp || !a || !b)
 961         return cmp;
 962     }
 963 }
 964
 965 svn_error_t *
 966 svn_cstring_strtoui64(apr_uint64_t *n, const char *str,
 967                       apr_uint64_t minval, apr_uint64_t maxval,
 968                       int base)
 969 {
 970   apr_int64_t val;
 971   char *endptr;
 972
 973   /* We assume errno is thread-safe. */
 974   errno = 0; /* APR-0.9 doesn't always set errno */
 975
 976   /* ### We're throwing away half the number range here.
 977    * ### APR needs a apr_strtoui64() function. */
 978   val = apr_strtoi64(str, &endptr, base);
 979   if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
 980     return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
 981                              _("Could not convert '%s' into a number"),
 982                              str);
 983   if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
 984       val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval)
 985     /* ### Mark this for translation when gettext doesn't choke on macros. */
 986     return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
 987                              "Number '%s' is out of range "
 988                              "'[%" APR_UINT64_T_FMT ", %" APR_UINT64_T_FMT "]'",
 989                              str, minval, maxval);
 990   *n = val;
 991   return SVN_NO_ERROR;
 992 }
 993
 994 svn_error_t *
 995 svn_cstring_atoui64(apr_uint64_t *n, const char *str)
 996 {
 997   return svn_error_trace(svn_cstring_strtoui64(n, str, 0,
 998                                                APR_UINT64_MAX, 10));
 999 }
1000
1001 svn_error_t *
1002 svn_cstring_atoui(unsigned int *n, const char *str)
1003 {
1004   apr_uint64_t val;
1005
1006   SVN_ERR(svn_cstring_strtoui64(&val, str, 0, APR_UINT32_MAX, 10));
1007   *n = (unsigned int)val;
1008   return SVN_NO_ERROR;
1009 }
1010
1011 svn_error_t *
1012 svn_cstring_strtoi64(apr_int64_t *n, const char *str,
1013                      apr_int64_t minval, apr_int64_t maxval,
1014                      int base)
1015 {
1016   apr_int64_t val;
1017   char *endptr;
1018
1019   /* We assume errno is thread-safe. */
1020   errno = 0; /* APR-0.9 doesn't always set errno */
1021
1022   val = apr_strtoi64(str, &endptr, base);
1023   if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
1024     return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1025                              _("Could not convert '%s' into a number"),
1026                              str);
1027   if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
1028       val < minval || val > maxval)
1029     /* ### Mark this for translation when gettext doesn't choke on macros. */
1030     return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1031                              "Number '%s' is out of range "
1032                              "'[%" APR_INT64_T_FMT ", %" APR_INT64_T_FMT "]'",
1033                              str, minval, maxval);
1034   *n = val;
1035   return SVN_NO_ERROR;
1036 }
1037
1038 svn_error_t *
1039 svn_cstring_atoi64(apr_int64_t *n, const char *str)
1040 {
1041   return svn_error_trace(svn_cstring_strtoi64(n, str, APR_INT64_MIN,
1042                                               APR_INT64_MAX, 10));
1043 }
1044
1045 svn_error_t *
1046 svn_cstring_atoi(int *n, const char *str)
1047 {
1048   apr_int64_t val;
1049
1050   SVN_ERR(svn_cstring_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10));
1051   *n = (int)val;
1052   return SVN_NO_ERROR;
1053 }
1054
1055 unsigned long
1056 svn__strtoul(const char* buffer, const char** end)
1057 {
1058   unsigned long result = 0;
1059
1060   /* this loop will execute in just 2 CPU cycles, confirmed by measurement:
1061      7 macro-ops (max 4 / cycle => 2 cycles)
1062        1 load (max 1 / cycle)
1063        1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle)
1064        2 arithmetic ops (subtract, increment; max 3 / cycle)
1065        2 scale-and-add AGU ops (max 3 / cycle)
1066        1 compiler-generated move operation
1067      dependency chain: temp = result * 4 + result; result = temp * 2 + c
1068                        (2 ops with latency 1 => 2 cycles)
1069    */
1070   while (1)
1071     {
1072       unsigned long c = (unsigned char)*buffer - (unsigned char)'0';
1073       if (c > 9)
1074         break;
1075
1076       result = result * 10 + c;
1077       ++buffer;
1078     }
1079
1080   *end = buffer;
1081   return result;
1082 }
1083
1084 /* "Precalculated" itoa values for 2 places (including leading zeros).
1085  * For maximum performance, make sure all table entries are word-aligned.
1086  */
1087 static const char decimal_table[100][4]
1088     = { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09"
1089       , "10", "11", "12", "13", "14", "15", "16", "17", "18", "19"
1090       , "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"
1091       , "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"
1092       , "40", "41", "42", "43", "44", "45", "46", "47", "48", "49"
1093       , "50", "51", "52", "53", "54", "55", "56", "57", "58", "59"
1094       , "60", "61", "62", "63", "64", "65", "66", "67", "68", "69"
1095       , "70", "71", "72", "73", "74", "75", "76", "77", "78", "79"
1096       , "80", "81", "82", "83", "84", "85", "86", "87", "88", "89"
1097       , "90", "91", "92", "93", "94", "95", "96", "97", "98", "99"};
1098
1099 /* Copy the two bytes at SOURCE[0] and SOURCE[1] to DEST[0] and DEST[1] */
1100 #define COPY_TWO_BYTES(dest,source)\
1101   memcpy((dest), (source), 2)
1102
1103 apr_size_t
1104 svn__ui64toa(char * dest, apr_uint64_t number)
1105 {
1106   char buffer[SVN_INT64_BUFFER_SIZE];
1107   apr_uint32_t reduced;   /* used for 32 bit DIV */
1108   char* target;
1109
1110   /* Small numbers are by far the most common case.
1111    * Therefore, we use special code.
1112    */
1113   if (number < 100)
1114     {
1115       if (number < 10)
1116         {
1117           dest[0] = (char)('0' + number);
1118           dest[1] = 0;
1119           return 1;
1120         }
1121       else
1122         {
1123           COPY_TWO_BYTES(dest, decimal_table[(apr_size_t)number]);
1124           dest[2] = 0;
1125           return 2;
1126         }
1127     }
1128
1129   /* Standard code. Write string in pairs of chars back-to-front */
1130   buffer[SVN_INT64_BUFFER_SIZE - 1] = 0;
1131   target = &buffer[SVN_INT64_BUFFER_SIZE - 3];
1132
1133   /* Loop may be executed 0 .. 2 times. */
1134   while (number >= 100000000)
1135     {
1136       /* Number is larger than 100^4, i.e. we can write 4x2 chars.
1137        * Also, use 32 bit DIVs as these are about twice as fast.
1138        */
1139       reduced = (apr_uint32_t)(number % 100000000);
1140       number /= 100000000;
1141
1142       COPY_TWO_BYTES(target - 0, decimal_table[reduced % 100]);
1143       reduced /= 100;
1144       COPY_TWO_BYTES(target - 2, decimal_table[reduced % 100]);
1145       reduced /= 100;
1146       COPY_TWO_BYTES(target - 4, decimal_table[reduced % 100]);
1147       reduced /= 100;
1148       COPY_TWO_BYTES(target - 6, decimal_table[reduced % 100]);
1149       target -= 8;
1150     }
1151
1152   /* Now, the number fits into 32 bits, but may still be larger than 99 */
1153   reduced = (apr_uint32_t)(number);
1154   while (reduced >= 100)
1155     {
1156       COPY_TWO_BYTES(target, decimal_table[reduced % 100]);
1157       reduced /= 100;
1158       target -= 2;
1159     }
1160
1161   /* The number is now smaller than 100 but larger than 1 */
1162   COPY_TWO_BYTES(target, decimal_table[reduced]);
1163
1164   /* Correction for uneven count of places. */
1165   if (reduced < 10)
1166     ++target;
1167
1168   /* Copy to target */
1169   memcpy(dest, target, &buffer[SVN_INT64_BUFFER_SIZE] - target);
1170   return &buffer[SVN_INT64_BUFFER_SIZE] - target - 1;
1171 }
1172
1173 apr_size_t
1174 svn__i64toa(char * dest, apr_int64_t number)
1175 {
1176   if (number >= 0)
1177     return svn__ui64toa(dest, (apr_uint64_t)number);
1178
1179   *dest = '-';
1180   return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1;
1181 }
1182
1183 static void
1184 ui64toa_sep(apr_uint64_t number, char separator, char *buffer)
1185 {
1186   apr_size_t length = svn__ui64toa(buffer, number);
1187   apr_size_t i;
1188
1189   for (i = length; i > 3; i -= 3)
1190     {
1191       memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3);
1192       buffer[i-3] = separator;
1193       length++;
1194     }
1195
1196   buffer[length] = 0;
1197 }
1198
1199 char *
1200 svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool)
1201 {
1202   char buffer[2 * SVN_INT64_BUFFER_SIZE];
1203   ui64toa_sep(number, separator, buffer);
1204
1205   return apr_pstrdup(pool, buffer);
1206 }
1207
1208 char *
1209 svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool)
1210 {
1211   char buffer[2 * SVN_INT64_BUFFER_SIZE];
1212   if (number < 0)
1213     {
1214       buffer[0] = '-';
1215       ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]);
1216     }
1217   else
1218     ui64toa_sep((apr_uint64_t)(number), separator, buffer);
1219
1220   return apr_pstrdup(pool, buffer);
1221 }
1222
1223 apr_size_t
1224 svn__ui64tobase36(char *dest, apr_uint64_t value)
1225 {
1226   char *dest_start = dest;
1227   if (value < 10)
1228     {
1229       /* pretty frequent and trivial case. Make it fast. */
1230       *(dest++) = (char)(value) + '0';
1231     }
1232   else
1233     {
1234       char buffer[SVN_INT64_BUFFER_SIZE];
1235       char *p = buffer;
1236
1237       /* write result as little-endian to buffer */
1238       while (value > 0)
1239         {
1240           char c = (char)(value % 36);
1241           value /= 36;
1242
1243           *p = (c <= 9) ? (c + '0') : (c - 10 + 'a');
1244           ++p;
1245         }
1246
1247       /* copy as big-endian to DEST */
1248       while (p > buffer)
1249         *(dest++) = *(--p);
1250     }
1251
1252   *dest = '\0';
1253   return dest - dest_start;
1254 }
1255
1256 apr_uint64_t
1257 svn__base36toui64(const char **next, const char *source)
1258 {
1259   apr_uint64_t result = 0;
1260   apr_uint64_t factor = 1;
1261   int i  = 0;
1262   char digits[SVN_INT64_BUFFER_SIZE];
1263
1264   /* convert digits to numerical values and count the number of places.
1265    * Also, prevent buffer overflow. */
1266   while (i < sizeof(digits))
1267     {
1268       char c = *source;
1269       if (c < 'a')
1270         {
1271           /* includes detection of NUL terminator */
1272           if (c < '0' || c > '9')
1273             break;
1274
1275           c -= '0';
1276         }
1277       else
1278         {
1279           if (c < 'a' || c > 'z')
1280             break;
1281
1282           c -= 'a' - 10;
1283         }
1284
1285       digits[i++] = c;
1286       source++;
1287     }
1288
1289   /* fold digits into the result */
1290   while (i > 0)
1291     {
1292       result += factor * (apr_uint64_t)digits[--i];
1293       factor *= 36;
1294     }
1295
1296   if (next)
1297     *next = source;
1298
1299   return result;
1300 }
1301
1302
1303 apr_size_t
1304 svn_cstring__similarity(const char *stra, const char *strb,
1305                         svn_membuf_t *buffer, apr_size_t *rlcs)
1306 {
1307   svn_string_t stringa, stringb;
1308   stringa.data = stra;
1309   stringa.len = strlen(stra);
1310   stringb.data = strb;
1311   stringb.len = strlen(strb);
1312   return svn_string__similarity(&stringa, &stringb, buffer, rlcs);
1313 }
1314
1315 apr_size_t
1316 svn_string__similarity(const svn_string_t *stringa,
1317                        const svn_string_t *stringb,
1318                        svn_membuf_t *buffer, apr_size_t *rlcs)
1319 {
1320   const char *stra = stringa->data;
1321   const char *strb = stringb->data;
1322   const apr_size_t lena = stringa->len;
1323   const apr_size_t lenb = stringb->len;
1324   const apr_size_t total = lena + lenb;
1325   const char *enda = stra + lena;
1326   const char *endb = strb + lenb;
1327   apr_size_t lcs = 0;
1328
1329   /* Skip the common prefix ... */
1330   while (stra < enda && strb < endb && *stra == *strb)
1331     {
1332       ++stra; ++strb;
1333       ++lcs;
1334     }
1335
1336   /* ... and the common suffix */
1337   while (stra < enda && strb < endb)
1338     {
1339       --enda; --endb;
1340       if (*enda != *endb)
1341         {
1342           ++enda; ++endb;
1343           break;
1344         }
1345
1346       ++lcs;
1347     }
1348
1349   if (stra < enda && strb < endb)
1350     {
1351       const apr_size_t resta = enda - stra;
1352       const apr_size_t restb = endb - strb;
1353       const apr_size_t slots = (resta > restb ? restb : resta);
1354       apr_size_t *curr, *prev;
1355       const char *pstr;
1356
1357       /* The outer loop must iterate on the longer string. */
1358       if (resta < restb)
1359         {
1360           pstr = stra;
1361           stra = strb;
1362           strb = pstr;
1363
1364           pstr = enda;
1365           enda = endb;
1366           endb = pstr;
1367         }
1368
1369       /* Allocate two columns in the LCS matrix
1370          ### Optimize this to (slots + 2) instesd of 2 * (slots + 1) */
1371       svn_membuf__ensure(buffer, 2 * (slots + 1) * sizeof(apr_size_t));
1372       svn_membuf__nzero(buffer, (slots + 2) * sizeof(apr_size_t));
1373       prev = buffer->data;
1374       curr = prev + slots + 1;
1375
1376       /* Calculate LCS length of the remainder */
1377       for (pstr = stra; pstr < enda; ++pstr)
1378         {
1379           apr_size_t i;
1380           for (i = 1; i <= slots; ++i)
1381             {
1382               if (*pstr == strb[i-1])
1383                 curr[i] = prev[i-1] + 1;
1384               else
1385                 curr[i] = (curr[i-1] > prev[i] ? curr[i-1] : prev[i]);
1386             }
1387
1388           /* Swap the buffers, making the previous one current */
1389           {
1390             apr_size_t *const temp = prev;
1391             prev = curr;
1392             curr = temp;
1393           }
1394         }
1395
1396       lcs += prev[slots];
1397     }
1398
1399   if (rlcs)
1400     *rlcs = lcs;
1401
1402   /* Return similarity ratio rounded to 4 significant digits */
1403   if (total)
1404     return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total);
1405   else
1406     return SVN_STRING__SIM_RANGE_MAX;
1407 }
1408
1409 apr_size_t
1410 svn_cstring__match_length(const char *a,
1411                           const char *b,
1412                           apr_size_t max_len)
1413 {
1414   apr_size_t pos = 0;
1415
1416 #if SVN_UNALIGNED_ACCESS_IS_OK
1417
1418   /* Chunky processing is so much faster ...
1419    *
1420    * We can't make this work on architectures that require aligned access
1421    * because A and B will probably have different alignment. So, skipping
1422    * the first few chars until alignment is reached is not an option.
1423    */
1424   for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
1425     if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
1426       break;
1427
1428 #endif
1429
1430   for (; pos < max_len; ++pos)
1431     if (a[pos] != b[pos])
1432       break;
1433
1434   return pos;
1435 }
1436
1437 apr_size_t
1438 svn_cstring__reverse_match_length(const char *a,
1439                                   const char *b,
1440                                   apr_size_t max_len)
1441 {
1442   apr_size_t pos = 0;
1443
1444 #if SVN_UNALIGNED_ACCESS_IS_OK
1445
1446   /* Chunky processing is so much faster ...
1447    *
1448    * We can't make this work on architectures that require aligned access
1449    * because A and B will probably have different alignment. So, skipping
1450    * the first few chars until alignment is reached is not an option.
1451    */
1452   for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
1453     if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
1454       break;
1455
1456   pos -= sizeof(apr_size_t);
1457
1458 #endif
1459
1460   /* If we find a mismatch at -pos, pos-1 characters matched.
1461    */
1462   while (++pos <= max_len)
1463     if (a[0-pos] != b[0-pos])
1464       return pos - 1;
1465
1466   /* No mismatch found -> at least MAX_LEN matching chars.
1467    */
1468   return max_len;
1469 }
1470
1471 const char *
1472 svn_cstring_skip_prefix(const char *str, const char *prefix)
1473 {
1474   apr_size_t len = strlen(prefix);
1475
1476   if (strncmp(str, prefix, len) == 0)
1477     {
1478       return str + len;
1479     }
1480   else
1481     {
1482       return NULL;
1483     }
1484 }