contrib/subversion/subversion/libsvn_subr/cache-membuffer.c

   1 /*
   2  * cache-membuffer.c: in-memory caching for Subversion
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24 #include <assert.h>
  25 #include <apr_md5.h>
  26 #include <apr_thread_rwlock.h>
  27
  28 #include "svn_pools.h"
  29 #include "svn_checksum.h"
  30 #include "svn_private_config.h"
  31 #include "svn_string.h"
  32 #include "svn_sorts.h"  /* get the MIN macro */
  33
  34 #include "private/svn_atomic.h"
  35 #include "private/svn_dep_compat.h"
  36 #include "private/svn_mutex.h"
  37 #include "private/svn_string_private.h"
  38
  39 #include "cache.h"
  40 #include "fnv1a.h"
  41
  42 /*
  43  * This svn_cache__t implementation actually consists of two parts:
  44  * a shared (per-process) singleton membuffer cache instance and shallow
  45  * svn_cache__t front-end instances that each use different key spaces.
  46  * For data management, they all forward to the singleton membuffer cache.
  47  *
  48  * A membuffer cache consists of two parts:
  49  *
  50  * 1. A linear data buffer containing cached items in a serialized
  51  *    representation, prefixed by their full cache keys. There may be
  52  *    arbitrary gaps between entries.  This buffer is sub-devided into
  53  *    (currently two) cache levels.
  54  *
  55  * 2. A directory of cache entries. This is organized similar to CPU
  56  *    data caches: for every possible key, there is exactly one group
  57  *    of entries that may contain the header info for an item with
  58  *    that given key.  The result is a GROUP_SIZE+-way associative cache
  59  *    whose associativity can be dynamically increased.
  60  *
  61  * Only the start address of these two data parts are given as a native
  62  * pointer. All other references are expressed as offsets to these pointers.
  63  * With that design, it is relatively easy to share the same data structure
  64  * between different processes and / or to persist them on disk. These
  65  * out-of-process features have not been implemented, yet.
  66  *
  67  * Superficially, cache levels are being used as usual: insertion happens
  68  * into L1 and evictions will promote items to L2.  But their whole point
  69  * is a different one.  L1 uses a circular buffer, i.e. we have perfect
  70  * caching for the last N bytes where N is the size of L1.  L2 uses a more
  71  * elaborate scheme based on priorities and hit counts as described below.
  72  *
  73  * The data buffer usage information is implicitly given by the directory
  74  * entries. Every USED entry has a reference to the previous and the next
  75  * used dictionary entry and this double-linked list is ordered by the
  76  * offsets of their item data within the data buffer. So removing data,
  77  * for instance, is done simply by unlinking it from the chain, implicitly
  78  * marking the entry as well as the data buffer section previously
  79  * associated to it as unused.  First and last element of that chain are
  80  * being referenced from the respective cache level.
  81  *
  82  * Insertion can occur at only one, sliding position per cache level.  It is
  83  * marked by its offset in the data buffer and the index of the first used
  84  * entry at or behind that position.  If this gap is too small to accommodate
  85  * the new item (plus its full key), the insertion window is extended as
  86  * described below.  The new entry will always be inserted at the bottom end
  87  * of the window and since the next used entry is known, properly sorted
  88  * insertion is possible.
  89  *
  90  * To make the cache perform robustly in a wide range of usage scenarios,
  91  * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
  92  * details). Every item holds a read hit counter and there is a global read
  93  * hit counter. The more hits an entry has in relation to the average, the
  94  * more it is likely to be kept using a rand()-based condition. The test is
  95  * applied only to the entry following the insertion window. If it doesn't
  96  * get evicted, it is moved to the begin of that window and the window is
  97  * moved.
  98  *
  99  * Moreover, the entry's hits get halved to make that entry more likely to
 100  * be removed the next time the sliding insertion / removal window comes by.
 101  * As a result, frequently used entries are likely not to be dropped until
 102  * they get not used for a while. Also, even a cache thrashing situation
 103  * about 50% of the content survives every 50% of the cache being re-written
 104  * with new entries. For details on the fine-tuning involved, see the
 105  * comments in ensure_data_insertable_l2().
 106  *
 107  * Due to the randomized mapping of keys to entry groups, some groups may
 108  * overflow.  In that case, there are spare groups that can be chained to
 109  * an already used group to extend it.
 110  *
 111  * To limit the entry size and management overhead, not the actual item keys
 112  * but only their hashed "fingerprint" will be stored.  These are reasonably
 113  * unique to prevent collisions, so we only need to support up to one entry
 114  * per entry key.  To guarantee that there are no conflicts, however, we
 115  * store the actual full key immediately in front of the serialized item
 116  * data.  That is, the entry offset actually points to the full key and the
 117  * key length stored in the entry acts as an additional offset to find the
 118  * actual item.
 119  *
 120  * All access to the cached data needs to be serialized. Because we want
 121  * to scale well despite that bottleneck, we simply segment the cache into
 122  * a number of independent caches (segments). Items will be multiplexed based
 123  * on their hash key.
 124  */
 125
 126 /* APR's read-write lock implementation on Windows is horribly inefficient.
 127  * Even with very low contention a runtime overhead of 35% percent has been
 128  * measured for 'svn-bench null-export' over ra_serf.
 129  *
 130  * Use a simple mutex on Windows.  Because there is one mutex per segment,
 131  * large machines should (and usually can) be configured with large caches
 132  * such that read contention is kept low.  This is basically the situation
 133  * we had before 1.8.
 134  */
 135 #ifdef WIN32
 136 #  define USE_SIMPLE_MUTEX 1
 137 #else
 138 #  define USE_SIMPLE_MUTEX 0
 139 #endif
 140
 141 /* For more efficient copy operations, let's align all data items properly.
 142  * Must be a power of 2.
 143  */
 144 #define ITEM_ALIGNMENT 16
 145
 146 /* By default, don't create cache segments smaller than this value unless
 147  * the total cache size itself is smaller.
 148  */
 149 #define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
 150
 151 /* The minimum segment size we will allow for multi-segmented caches
 152  */
 153 #define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
 154
 155 /* The maximum number of segments allowed. Larger numbers reduce the size
 156  * of each segment, in turn reducing the max size of a cachable item.
 157  * Also, each segment gets its own lock object. The actual number supported
 158  * by the OS may therefore be lower and svn_cache__membuffer_cache_create
 159  * may return an error.
 160  */
 161 #define MAX_SEGMENT_COUNT 0x10000
 162
 163 /* As of today, APR won't allocate chunks of 4GB or more. So, limit the
 164  * segment size to slightly below that.
 165  */
 166 #define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
 167
 168 /* We don't mark the initialization status for every group but initialize
 169  * a number of groups at once. That will allow for a very small init flags
 170  * vector that is likely to fit into the CPU caches even for fairly large
 171  * membuffer caches. For instance, the default of 32 means 8x32 groups per
 172  * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
 173  * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
 174  */
 175 #define GROUP_INIT_GRANULARITY 32
 176
 177 /* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
 178  */
 179 #define NO_INDEX APR_UINT32_MAX
 180
 181 /* To save space in our group structure, we only use 32 bit size values
 182  * and, therefore, limit the size of each entry to just below 4GB.
 183  * Supporting larger items is not a good idea as the data transfer
 184  * to and from the cache would block other threads for a very long time.
 185  */
 186 #define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
 187
 188 /* We use this structure to identify cache entries. There cannot be two
 189  * entries with the same entry key. However unlikely, though, two different
 190  * full keys (see full_key_t) may have the same entry key.  That is a
 191  * collision and at most one of them can be stored in the cache at any time.
 192  */
 193 typedef struct entry_key_t
 194 {
 195   /* 16 byte finger print of the full key. */
 196   apr_uint64_t fingerprint[2];
 197
 198   /* Length of the full key.  This value is aligned to ITEM_ALIGNMENT to
 199    * make sure the subsequent item content is properly aligned. */
 200   apr_size_t key_len;
 201 } entry_key_t;
 202
 203 /* A full key, i.e. the combination of the cache's key prefix with some
 204  * dynamic part appended to it.  It also contains its ENTRY_KEY.
 205  */
 206 typedef struct full_key_t
 207 {
 208   /* Reduced form identifying the cache entry (if such an entry exists). */
 209   entry_key_t entry_key;
 210
 211   /* This contains the full combination.  Note that the SIZE element may
 212    * be larger than ENTRY_KEY.KEY_LEN, but only the latter determines the
 213    * valid key size. */
 214   svn_membuf_t full_key;
 215 } full_key_t;
 216
 217 /* Debugging / corruption detection support.
 218  * If you define this macro, the getter functions will performed expensive
 219  * checks on the item data, requested keys and entry types. If there is
 220  * a mismatch found in any of them when being compared with the values
 221  * remembered in the setter function, an error will be returned.
 222  */
 223 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
 224
 225 /* The prefix passed to svn_cache__create_membuffer_cache() effectively
 226  * defines the type of all items stored by that cache instance. We'll take
 227  * the last 15 bytes + \0 as plaintext for easy identification by the dev.
 228  */
 229 #define PREFIX_TAIL_LEN 16
 230
 231 /* This record will be attached to any cache entry. It tracks item data
 232  * (content), key and type as hash values and is the baseline against which
 233  * the getters will compare their results to detect inconsistencies.
 234  */
 235 typedef struct entry_tag_t
 236 {
 237   /* MD5 checksum over the serialized item data.
 238    */
 239   unsigned char content_hash[APR_MD5_DIGESTSIZE];
 240
 241   /* Hash value of the svn_cache_t instance that wrote the item
 242    * (i.e. a combination of type and repository)
 243    */
 244   unsigned char prefix_hash[APR_MD5_DIGESTSIZE];
 245
 246   /* Note that this only covers the variable part of the key,
 247    * i.e. it will be different from the full key hash used for
 248    * cache indexing.
 249    */
 250   unsigned char key_hash[APR_MD5_DIGESTSIZE];
 251
 252   /* Last letters from of the key in human readable format
 253    * (ends with the type identifier, e.g. "DAG")
 254    */
 255   char prefix_tail[PREFIX_TAIL_LEN];
 256
 257   /* Length of the variable key part.
 258    */
 259   apr_size_t key_len;
 260
 261 } entry_tag_t;
 262
 263 /* Initialize all members of TAG except for the content hash.
 264  */
 265 static svn_error_t *store_key_part(entry_tag_t *tag,
 266                                    const full_key_t *prefix_key,
 267                                    const void *key,
 268                                    apr_size_t key_len,
 269                                    apr_pool_t *pool)
 270 {
 271   svn_checksum_t *checksum;
 272   const char *prefix = prefix_key->full_key.data;
 273   apr_size_t prefix_len = strlen(prefix);
 274
 275   if (prefix_len > sizeof(tag->prefix_tail))
 276     {
 277       prefix += prefix_len - (sizeof(tag->prefix_tail) - 1);
 278       prefix_len = sizeof(tag->prefix_tail) - 1;
 279     }
 280
 281   SVN_ERR(svn_checksum(&checksum,
 282                        svn_checksum_md5,
 283                        key,
 284                        key_len,
 285                        pool));
 286
 287   memcpy(tag->prefix_hash, prefix_key->entry_key.fingerprint,
 288          sizeof(tag->prefix_hash));
 289   memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
 290
 291   memset(tag->prefix_tail, 0, sizeof(tag->key_hash));
 292   memcpy(tag->prefix_tail, prefix, prefix_len + 1);
 293
 294   tag->key_len = key_len;
 295
 296   return SVN_NO_ERROR;
 297 }
 298
 299 /* Initialize the content hash member of TAG.
 300  */
 301 static svn_error_t* store_content_part(entry_tag_t *tag,
 302                                        const void *data,
 303                                        apr_size_t size,
 304                                        apr_pool_t *pool)
 305 {
 306   svn_checksum_t *checksum;
 307   SVN_ERR(svn_checksum(&checksum,
 308                        svn_checksum_md5,
 309                        data,
 310                        size,
 311                        pool));
 312
 313   memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
 314
 315   return SVN_NO_ERROR;
 316 }
 317
 318 /* Compare two tags and fail with an assertion upon differences.
 319  */
 320 static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
 321                                       const entry_tag_t *rhs)
 322 {
 323   SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
 324                         sizeof(lhs->content_hash)) == 0);
 325   SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
 326                         sizeof(lhs->prefix_hash)) == 0);
 327   SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
 328                         sizeof(lhs->key_hash)) == 0);
 329   SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
 330                         sizeof(lhs->prefix_tail)) == 0);
 331
 332   SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
 333
 334   return SVN_NO_ERROR;
 335 }
 336
 337 /* Reoccurring code snippets.
 338  */
 339
 340 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
 341
 342 #define DEBUG_CACHE_MEMBUFFER_TAG tag,
 343
 344 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)                     \
 345   entry_tag_t _tag;                                              \
 346   entry_tag_t *tag = &_tag;                                      \
 347   if (key)                                                       \
 348     SVN_ERR(store_key_part(tag,                                  \
 349                            &cache->prefix,                       \
 350                            key,                                  \
 351                            cache->key_len == APR_HASH_KEY_STRING \
 352                                ? strlen((const char *) key)      \
 353                                : cache->key_len,                 \
 354                            pool));
 355
 356 #else
 357
 358 /* Don't generate any checks if consistency checks have not been enabled.
 359  */
 360 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG
 361 #define DEBUG_CACHE_MEMBUFFER_TAG
 362 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)
 363
 364 #endif /* SVN_DEBUG_CACHE_MEMBUFFER */
 365
 366 /* A single dictionary entry. Since all entries will be allocated once
 367  * during cache creation, those entries might be either used or unused.
 368  * An entry is used if and only if it is contained in the doubly-linked
 369  * list of used entries per cache level.
 370  */
 371 typedef struct entry_t
 372 {
 373   /* Identifying the data item. Only valid for used entries.
 374    */
 375   entry_key_t key;
 376
 377   /* The offset of the cached item's serialized data within the caches
 378    * DATA buffer.
 379    */
 380   apr_uint64_t offset;
 381
 382   /* Size of the serialized item data. May be 0.  The MAX_ITEM_SIZE macro
 383    * above ensures that there will be no overflows.
 384    * Only valid for used entries.
 385    */
 386   apr_size_t size;
 387
 388   /* Number of (read) hits for this entry. Will be reset upon write.
 389    * Only valid for used entries.
 390    */
 391   svn_atomic_t hit_count;
 392
 393   /* Reference to the next used entry in the order defined by offset.
 394    * NO_INDEX indicates the end of the list; this entry must be referenced
 395    * by the caches cache_level_t.last member.  NO_INDEX also implies that
 396    * the data buffer is not used beyond offset+size.
 397    * Only valid for used entries.
 398    */
 399   apr_uint32_t next;
 400
 401   /* Reference to the previous used entry in the order defined by offset.
 402    * NO_INDEX indicates the end of the list; this entry must be referenced
 403    * by the caches cache_level_t.first member.
 404    * Only valid for used entries.
 405    */
 406   apr_uint32_t previous;
 407
 408   /* Priority of this entry.  This entry will not be replaced by lower-
 409    * priority items.
 410    */
 411   apr_uint32_t priority;
 412 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
 413   /* Remember type, content and key hashes.
 414    */
 415   entry_tag_t tag;
 416 #endif
 417 } entry_t;
 418
 419 /* Group header struct.
 420  */
 421 typedef struct group_header_t
 422 {
 423   /* number of entries used [0 .. USED-1] */
 424   apr_uint32_t used;
 425
 426   /* next group in the chain or NO_INDEX for the last.
 427    * For recycleable unused spare groups, this points to the next
 428    * unused spare group */
 429   apr_uint32_t next;
 430
 431   /* previously group in the chain or NO_INDEX for the first */
 432   apr_uint32_t previous;
 433
 434   /* number of elements in the chain from start to here.
 435    * >= 1 for used groups, 0 for unused spare groups */
 436   apr_uint32_t chain_length;
 437
 438 } group_header_t;
 439
 440 /* The size of the group struct should be a power of two make sure it does
 441  * not cross memory page boundaries.  Since we already access the cache
 442  * randomly, having two page table lookups instead of one is bad.
 443  */
 444 #define GROUP_BLOCK_SIZE 512
 445
 446 /* A ~10-way associative cache seems to be a good compromise between
 447  * performance (worst-case lookups) and efficiency-loss due to collisions.
 448  *
 449  * This value may be changed to any positive integer.
 450  */
 451 #define GROUP_SIZE \
 452           ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t))
 453
 454 /* Maximum number of groups in a chain, i.e. a cache index group can hold
 455  * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries.
 456  */
 457 #define MAX_GROUP_CHAIN_LENGTH 8
 458
 459 /* We group dictionary entries to make this GROUP-SIZE-way associative.
 460  */
 461 typedef struct entry_group_t
 462 {
 463   /* group globals */
 464   group_header_t header;
 465
 466   /* padding and also room for future extensions */
 467   char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t)
 468                - sizeof(entry_t) * GROUP_SIZE];
 469
 470   /* the actual entries */
 471   entry_t entries[GROUP_SIZE];
 472
 473 } entry_group_t;
 474
 475 /* Per-cache level header structure.  Instances of this are members of
 476  * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
 477  * All offset values are global / absolute to that whole buffer.
 478  */
 479 typedef struct cache_level_t
 480 {
 481   /* Reference to the first (defined by the order content in the data
 482    * buffer) dictionary entry used by any data item.
 483    * NO_INDEX for an empty cache.
 484    */
 485   apr_uint32_t first;
 486
 487   /* Reference to the last (defined by the order content in the data
 488    * buffer) dictionary entry used by any data item.
 489    * NO_INDEX for an empty cache.
 490    */
 491   apr_uint32_t last;
 492
 493   /* Reference to the first (defined by the order content in the data
 494    * buffer) used dictionary entry behind the insertion position
 495    * (current_data). If NO_INDEX, the data buffer is free starting at the
 496    * current_data offset.
 497    */
 498   apr_uint32_t next;
 499
 500
 501   /* First offset in the caches DATA buffer that belongs to this level.
 502    */
 503   apr_uint64_t start_offset;
 504
 505   /* Size of data buffer allocated to this level in bytes. Must be > 0.
 506    */
 507   apr_uint64_t size;
 508
 509   /* Offset in the data buffer where the next insertion shall occur.
 510    */
 511   apr_uint64_t current_data;
 512
 513 } cache_level_t;
 514
 515 /* The cache header structure.
 516  */
 517 struct svn_membuffer_t
 518 {
 519   /* Number of cache segments. Must be a power of 2.
 520      Please note that this structure represents only one such segment
 521      and that all segments must / will report the same values here. */
 522   apr_uint32_t segment_count;
 523
 524   /* The dictionary, GROUP_SIZE * (group_count + spare_group_count)
 525    * entries long.  Never NULL.
 526    */
 527   entry_group_t *directory;
 528
 529   /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
 530    * Allows for efficiently marking groups as "not initialized".
 531    */
 532   unsigned char *group_initialized;
 533
 534   /* Size of dictionary in groups. Must be > 0.
 535    */
 536   apr_uint32_t group_count;
 537
 538   /* Total number of spare groups.
 539    */
 540   apr_uint32_t spare_group_count;
 541
 542   /* First recycleable spare group.
 543    */
 544   apr_uint32_t first_spare_group;
 545
 546   /* Maximum number of spare groups ever used.  I.e. group index
 547    * group_count + max_spare_used is the first unused spare group
 548    * if first_spare_group is NO_INDEX.
 549    */
 550   apr_uint32_t max_spare_used;
 551
 552   /* Pointer to the data buffer, data_size bytes long. Never NULL.
 553    */
 554   unsigned char *data;
 555
 556   /* Total number of data buffer bytes in use.
 557    */
 558   apr_uint64_t data_used;
 559
 560   /* Largest entry size that we would accept.  For total cache sizes
 561    * less than 4TB (sic!), this is determined by the total cache size.
 562    */
 563   apr_uint64_t max_entry_size;
 564
 565   /* The cache levels, organized as sub-buffers.  Since entries in the
 566    * DIRECTORY use offsets in DATA for addressing, a cache lookup does
 567    * not need to know the cache level of a specific item.  Cache levels
 568    * are only used to implement a hybrid insertion / eviction strategy.
 569    */
 570
 571   /* First cache level, i.e. most insertions happen here.  Very large
 572    * items might get inserted directly into L2.  L1 is a strict FIFO
 573    * ring buffer that does not care about item priorities.  All evicted
 574    * items get a chance to be promoted to L2.
 575    */
 576   cache_level_t l1;
 577
 578   /* Second cache level, i.e. data evicted from L1 will be added here
 579    * if the item is "important" enough or the L2 insertion window is large
 580    * enough.
 581    */
 582   cache_level_t l2;
 583
 584
 585   /* Number of used dictionary entries, i.e. number of cached items.
 586    * Purely statistical information that may be used for profiling only.
 587    * Updates are not synchronized and values may be nonsensicle on some
 588    * platforms.
 589    */
 590   apr_uint32_t used_entries;
 591
 592   /* Total number of calls to membuffer_cache_get.
 593    * Purely statistical information that may be used for profiling only.
 594    * Updates are not synchronized and values may be nonsensicle on some
 595    * platforms.
 596    */
 597   apr_uint64_t total_reads;
 598
 599   /* Total number of calls to membuffer_cache_set.
 600    * Purely statistical information that may be used for profiling only.
 601    * Updates are not synchronized and values may be nonsensicle on some
 602    * platforms.
 603    */
 604   apr_uint64_t total_writes;
 605
 606   /* Total number of hits since the cache's creation.
 607    * Purely statistical information that may be used for profiling only.
 608    * Updates are not synchronized and values may be nonsensicle on some
 609    * platforms.
 610    */
 611   apr_uint64_t total_hits;
 612
 613 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 614   /* A lock for intra-process synchronization to the cache, or NULL if
 615    * the cache's creator doesn't feel the cache needs to be
 616    * thread-safe.
 617    */
 618   svn_mutex__t *lock;
 619 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 620   /* Same for read-write lock. */
 621   apr_thread_rwlock_t *lock;
 622
 623   /* If set, write access will wait until they get exclusive access.
 624    * Otherwise, they will become no-ops if the segment is currently
 625    * read-locked.  Only used when LOCK is an r/w lock.
 626    */
 627   svn_boolean_t allow_blocking_writes;
 628 #endif
 629 };
 630
 631 /* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
 632  */
 633 #define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
 634
 635 /* Align POINTER value to the next ITEM_ALIGNMENT boundary.
 636  */
 637 #define ALIGN_POINTER(pointer) ((void*)ALIGN_VALUE((apr_size_t)(char*)(pointer)))
 638
 639 /* If locking is supported for CACHE, acquire a read lock for it.
 640  */
 641 static svn_error_t *
 642 read_lock_cache(svn_membuffer_t *cache)
 643 {
 644 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 645   return svn_mutex__lock(cache->lock);
 646 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 647   if (cache->lock)
 648   {
 649     apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
 650     if (status)
 651       return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
 652   }
 653
 654   return SVN_NO_ERROR;
 655 #else
 656   return SVN_NO_ERROR;
 657 #endif
 658 }
 659
 660 /* If locking is supported for CACHE, acquire a write lock for it.
 661  * Set *SUCCESS to FALSE, if we couldn't acquire the write lock;
 662  * leave it untouched otherwise.
 663  */
 664 static svn_error_t *
 665 write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
 666 {
 667 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 668   return svn_mutex__lock(cache->lock);
 669 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 670   if (cache->lock)
 671     {
 672       apr_status_t status;
 673       if (cache->allow_blocking_writes)
 674         {
 675           status = apr_thread_rwlock_wrlock(cache->lock);
 676         }
 677       else
 678         {
 679           status = apr_thread_rwlock_trywrlock(cache->lock);
 680           if (SVN_LOCK_IS_BUSY(status))
 681             {
 682               *success = FALSE;
 683               status = APR_SUCCESS;
 684             }
 685         }
 686
 687       if (status)
 688         return svn_error_wrap_apr(status,
 689                                   _("Can't write-lock cache mutex"));
 690     }
 691
 692   return SVN_NO_ERROR;
 693 #else
 694   return SVN_NO_ERROR;
 695 #endif
 696 }
 697
 698 /* If locking is supported for CACHE, acquire an unconditional write lock
 699  * for it.
 700  */
 701 static svn_error_t *
 702 force_write_lock_cache(svn_membuffer_t *cache)
 703 {
 704 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 705   return svn_mutex__lock(cache->lock);
 706 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 707   apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
 708   if (status)
 709     return svn_error_wrap_apr(status,
 710                               _("Can't write-lock cache mutex"));
 711
 712   return SVN_NO_ERROR;
 713 #else
 714   return SVN_NO_ERROR;
 715 #endif
 716 }
 717
 718 /* If locking is supported for CACHE, release the current lock
 719  * (read or write).  Return ERR upon success.
 720  */
 721 static svn_error_t *
 722 unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
 723 {
 724 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 725   return svn_mutex__unlock(cache->lock, err);
 726 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 727   if (cache->lock)
 728   {
 729     apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
 730     if (err)
 731       return err;
 732
 733     if (status)
 734       return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
 735   }
 736
 737   return err;
 738 #else
 739   return err;
 740 #endif
 741 }
 742
 743 /* If supported, guard the execution of EXPR with a read lock to CACHE.
 744  * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
 745  */
 746 #define WITH_READ_LOCK(cache, expr)         \
 747 do {                                        \
 748   SVN_ERR(read_lock_cache(cache));          \
 749   SVN_ERR(unlock_cache(cache, (expr)));     \
 750 } while (0)
 751
 752 /* If supported, guard the execution of EXPR with a write lock to CACHE.
 753  * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
 754  *
 755  * The write lock process is complicated if we don't allow to wait for
 756  * the lock: If we didn't get the lock, we may still need to remove an
 757  * existing entry for the given key because that content is now stale.
 758  * Once we discovered such an entry, we unconditionally do a blocking
 759  * wait for the write lock.  In case no old content could be found, a
 760  * failing lock attempt is simply a no-op and we exit the macro.
 761  */
 762 #define WITH_WRITE_LOCK(cache, expr)                            \
 763 do {                                                            \
 764   svn_boolean_t got_lock = TRUE;                                \
 765   SVN_ERR(write_lock_cache(cache, &got_lock));                  \
 766   if (!got_lock)                                                \
 767     {                                                           \
 768       svn_boolean_t exists;                                     \
 769       SVN_ERR(entry_exists(cache, group_index, key, &exists));  \
 770       if (exists)                                               \
 771         SVN_ERR(force_write_lock_cache(cache));                 \
 772       else                                                      \
 773         break;                                                  \
 774     }                                                           \
 775   SVN_ERR(unlock_cache(cache, (expr)));                         \
 776 } while (0)
 777
 778 /* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
 779  * been initialized, yet. In that case, this group can not data. Otherwise,
 780  * a non-zero value is returned.
 781  */
 782 static APR_INLINE unsigned char
 783 is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
 784 {
 785   unsigned char flags
 786     = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
 787   unsigned char bit_mask
 788     = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
 789
 790   return flags & bit_mask;
 791 }
 792
 793 /* Initializes the section of the directory in CACHE that contains
 794  * the entry group identified by GROUP_INDEX. */
 795 static void
 796 initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
 797 {
 798   unsigned char bit_mask;
 799   apr_uint32_t i;
 800
 801   /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
 802   apr_uint32_t first_index =
 803       (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
 804   apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
 805   if (last_index > cache->group_count + cache->spare_group_count)
 806     last_index = cache->group_count + cache->spare_group_count;
 807
 808   for (i = first_index; i < last_index; ++i)
 809     {
 810       group_header_t *header = &cache->directory[i].header;
 811       header->used = 0;
 812       header->chain_length = 1;
 813       header->next = NO_INDEX;
 814       header->previous = NO_INDEX;
 815     }
 816
 817   /* set the "initialized" bit for these groups */
 818   bit_mask
 819     = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
 820   cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
 821     |= bit_mask;
 822 }
 823
 824 /* Return the next available spare group from CACHE and mark it as used.
 825  * May return NULL.
 826  */
 827 static entry_group_t *
 828 allocate_spare_group(svn_membuffer_t *cache)
 829 {
 830   entry_group_t *group = NULL;
 831
 832   /* is there some ready-to-use group? */
 833   if (cache->first_spare_group != NO_INDEX)
 834     {
 835       group = &cache->directory[cache->first_spare_group];
 836       cache->first_spare_group = group->header.next;
 837     }
 838
 839   /* any so far untouched spares available? */
 840   else if (cache->max_spare_used < cache->spare_group_count)
 841     {
 842       apr_uint32_t group_index = cache->group_count + cache->max_spare_used;
 843       ++cache->max_spare_used;
 844
 845       if (!is_group_initialized(cache, group_index))
 846         initialize_group(cache, group_index);
 847
 848       group = &cache->directory[group_index];
 849     }
 850
 851   /* spare groups must be empty */
 852   assert(!group || !group->header.used);
 853   return group;
 854 }
 855
 856 /* Mark previously allocated spare group GROUP in CACHE as "unused".
 857  */
 858 static void
 859 free_spare_group(svn_membuffer_t *cache,
 860                  entry_group_t *group)
 861 {
 862   assert(group->header.used == 0);
 863   assert(group->header.previous != NO_INDEX);
 864   assert(group - cache->directory >= (apr_ssize_t)cache->group_count);
 865
 866   /* unchain */
 867   cache->directory[group->header.previous].header.next = NO_INDEX;
 868   group->header.chain_length = 0;
 869   group->header.previous = NO_INDEX;
 870
 871   /* add to chain of spares */
 872   group->header.next = cache->first_spare_group;
 873   cache->first_spare_group = (apr_uint32_t) (group - cache->directory);
 874 }
 875
 876 /* Follow the group chain from GROUP in CACHE to its end and return the last
 877  * group.  May return GROUP.
 878  */
 879 static entry_group_t *
 880 last_group_in_chain(svn_membuffer_t *cache,
 881                     entry_group_t *group)
 882 {
 883   while (group->header.next != NO_INDEX)
 884     group = &cache->directory[group->header.next];
 885
 886   return group;
 887 }
 888
 889 /* Return the CHAIN_INDEX-th element in the group chain starting from group
 890  * START_GROUP_INDEX in CACHE.
 891  */
 892 static entry_group_t *
 893 get_group(svn_membuffer_t *cache,
 894           apr_uint32_t start_group_index,
 895           apr_uint32_t chain_index)
 896 {
 897   entry_group_t *group = &cache->directory[start_group_index];
 898   for (; chain_index; --chain_index)
 899     group = &cache->directory[group->header.next];
 900
 901   return group;
 902 }
 903
 904 /* Resolve a dictionary entry reference, i.e. return the entry
 905  * for the given IDX.
 906  */
 907 static APR_INLINE entry_t *
 908 get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
 909 {
 910   return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
 911 }
 912
 913 /* Get the entry references for the given ENTRY.
 914  */
 915 static APR_INLINE apr_uint32_t
 916 get_index(svn_membuffer_t *cache, entry_t *entry)
 917 {
 918   apr_size_t group_index
 919     = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
 920
 921   return (apr_uint32_t)group_index * GROUP_SIZE
 922        + (apr_uint32_t)(entry - cache->directory[group_index].entries);
 923 }
 924
 925 /* Return the cache level of ENTRY in CACHE.
 926  */
 927 static cache_level_t *
 928 get_cache_level(svn_membuffer_t *cache, entry_t *entry)
 929 {
 930   return entry->offset < cache->l1.size ? &cache->l1
 931                                         : &cache->l2;
 932 }
 933
 934 /* Insert ENTRY to the chain of items that belong to LEVEL in CACHE.  IDX
 935  * is ENTRY's item index and is only given for efficiency.  The insertion
 936  * takes place just before LEVEL->NEXT.  *CACHE will not be modified.
 937  */
 938 static void
 939 chain_entry(svn_membuffer_t *cache,
 940             cache_level_t *level,
 941             entry_t *entry,
 942             apr_uint32_t idx)
 943 {
 944   /* insert ENTRY before this item */
 945   entry_t *next = level->next == NO_INDEX
 946                 ? NULL
 947                 : get_entry(cache, level->next);
 948   assert(idx == get_index(cache, entry));
 949
 950   /* update entry chain
 951    */
 952   entry->next = level->next;
 953   if (level->first == NO_INDEX)
 954     {
 955       /* insert as the first entry and only in the chain
 956        */
 957       entry->previous = NO_INDEX;
 958       level->last = idx;
 959       level->first = idx;
 960     }
 961   else if (next == NULL)
 962     {
 963       /* insert as the last entry in the chain.
 964        * Note that it cannot also be at the beginning of the chain.
 965        */
 966       entry->previous = level->last;
 967       get_entry(cache, level->last)->next = idx;
 968       level->last = idx;
 969     }
 970   else
 971     {
 972       /* insert either at the start of a non-empty list or
 973        * somewhere in the middle
 974        */
 975       entry->previous = next->previous;
 976       next->previous = idx;
 977
 978       if (entry->previous != NO_INDEX)
 979         get_entry(cache, entry->previous)->next = idx;
 980       else
 981         level->first = idx;
 982     }
 983 }
 984
 985 /* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
 986  * is ENTRY's item index and is only given for efficiency.  Please note
 987  * that neither *CACHE nor *ENTRY will not be modified.
 988  */
 989 static void
 990 unchain_entry(svn_membuffer_t *cache,
 991               cache_level_t *level,
 992               entry_t *entry,
 993               apr_uint32_t idx)
 994 {
 995   assert(idx == get_index(cache, entry));
 996
 997   /* update
 998    */
 999   if (level->next == idx)
1000     level->next = entry->next;
1001
1002   /* unlink it from the chain of used entries
1003    */
1004   if (entry->previous == NO_INDEX)
1005     level->first = entry->next;
1006   else
1007     get_entry(cache, entry->previous)->next = entry->next;
1008
1009   if (entry->next == NO_INDEX)
1010     level->last = entry->previous;
1011   else
1012     get_entry(cache, entry->next)->previous = entry->previous;
1013 }
1014
1015 /* Remove the used ENTRY from the CACHE, i.e. make it "unused".
1016  * In contrast to insertion, removal is possible for any entry.
1017  */
1018 static void
1019 drop_entry(svn_membuffer_t *cache, entry_t *entry)
1020 {
1021   /* the group that ENTRY belongs to plus a number of useful index values
1022    */
1023   apr_uint32_t idx = get_index(cache, entry);
1024   apr_uint32_t group_index = idx / GROUP_SIZE;
1025   entry_group_t *last_group
1026     = last_group_in_chain(cache, &cache->directory[group_index]);
1027   apr_uint32_t last_in_group
1028     = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE
1029     + last_group->header.used - 1);
1030
1031   cache_level_t *level = get_cache_level(cache, entry);
1032
1033   /* update global cache usage counters
1034    */
1035   cache->used_entries--;
1036   cache->data_used -= entry->size;
1037
1038   /* extend the insertion window, if the entry happens to border it
1039    */
1040   if (idx == level->next)
1041     level->next = entry->next;
1042   else
1043     if (entry->next == level->next)
1044       {
1045         /* insertion window starts right behind the entry to remove
1046          */
1047         if (entry->previous == NO_INDEX)
1048           {
1049             /* remove the first entry -> insertion may start at pos 0, now */
1050             level->current_data = level->start_offset;
1051           }
1052         else
1053           {
1054             /* insertion may start right behind the previous entry */
1055             entry_t *previous = get_entry(cache, entry->previous);
1056             level->current_data = ALIGN_VALUE(  previous->offset
1057                                               + previous->size);
1058           }
1059       }
1060
1061   /* unlink it from the chain of used entries
1062    */
1063   unchain_entry(cache, level, entry, idx);
1064
1065   /* Move last entry into hole (if the removed one is not the last used).
1066    * We need to do this since all used entries are at the beginning of
1067    * the group's entries array.
1068    */
1069   if (idx != last_in_group)
1070     {
1071       /* copy the last used entry to the removed entry's index
1072        */
1073       *entry = last_group->entries[last_group->header.used-1];
1074
1075       /* this ENTRY may belong to a different cache level than the entry
1076        * we have just removed */
1077       level = get_cache_level(cache, entry);
1078
1079       /* update foreign links to new index
1080        */
1081       if (last_in_group == level->next)
1082         level->next = idx;
1083
1084       if (entry->previous == NO_INDEX)
1085         level->first = idx;
1086       else
1087         get_entry(cache, entry->previous)->next = idx;
1088
1089       if (entry->next == NO_INDEX)
1090         level->last = idx;
1091       else
1092         get_entry(cache, entry->next)->previous = idx;
1093     }
1094
1095   /* Update the number of used entries.
1096    */
1097   last_group->header.used--;
1098
1099   /* Release the last group in the chain if it is a spare group
1100    */
1101   if (!last_group->header.used && last_group->header.previous != NO_INDEX)
1102     free_spare_group(cache, last_group);
1103 }
1104
1105 /* Insert ENTRY into the chain of used dictionary entries. The entry's
1106  * offset and size members must already have been initialized. Also,
1107  * the offset must match the beginning of the insertion window.
1108  */
1109 static void
1110 insert_entry(svn_membuffer_t *cache, entry_t *entry)
1111 {
1112   /* the group that ENTRY belongs to plus a number of useful index values
1113    */
1114   apr_uint32_t idx = get_index(cache, entry);
1115   apr_uint32_t group_index = idx / GROUP_SIZE;
1116   entry_group_t *group = &cache->directory[group_index];
1117   cache_level_t *level = get_cache_level(cache, entry);
1118
1119   /* The entry must start at the beginning of the insertion window.
1120    * It must also be the first unused entry in the group.
1121    */
1122   assert(entry->offset == level->current_data);
1123   assert(idx == group_index * GROUP_SIZE + group->header.used);
1124   level->current_data = ALIGN_VALUE(entry->offset + entry->size);
1125
1126   /* update usage counters
1127    */
1128   cache->used_entries++;
1129   cache->data_used += entry->size;
1130   entry->hit_count = 0;
1131   group->header.used++;
1132
1133   /* update entry chain
1134    */
1135   chain_entry(cache, level, entry, idx);
1136
1137   /* The current insertion position must never point outside our
1138    * data buffer.
1139    */
1140   assert(level->current_data <= level->start_offset + level->size);
1141 }
1142
1143 /* Map a KEY of 16 bytes to the CACHE and group that shall contain the
1144  * respective item.
1145  */
1146 static apr_uint32_t
1147 get_group_index(svn_membuffer_t **cache,
1148                 const entry_key_t *key)
1149 {
1150   svn_membuffer_t *segment0 = *cache;
1151   apr_uint64_t key0 = key->fingerprint[0];
1152   apr_uint64_t key1 = key->fingerprint[1];
1153
1154   /* select the cache segment to use. they have all the same group_count.
1155    * Since key may not be well-distributed, pre-fold it to a smaller but
1156    * "denser" ranger.  The modulus is a prime larger than the largest
1157    * counts. */
1158   *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37))
1159                      & (segment0->segment_count - 1)];
1160   return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count;
1161 }
1162
1163 /* Reduce the hit count of ENTRY and update the accumulated hit info
1164  * in CACHE accordingly.
1165  */
1166 static APR_INLINE void
1167 let_entry_age(svn_membuffer_t *cache, entry_t *entry)
1168 {
1169   apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
1170
1171   if (hits_removed)
1172     {
1173       entry->hit_count -= hits_removed;
1174     }
1175   else
1176     {
1177       entry->priority /= 2;
1178     }
1179 }
1180
1181 /* Return whether the keys in LHS and RHS match.
1182  */
1183 static svn_boolean_t
1184 entry_keys_match(const entry_key_t *lhs,
1185                  const entry_key_t *rhs)
1186 {
1187   return (lhs->fingerprint[0] == rhs->fingerprint[0])
1188       && (lhs->fingerprint[1] == rhs->fingerprint[1])
1189       && (lhs->key_len == rhs->key_len);
1190 }
1191
1192 /* Given the GROUP_INDEX that shall contain an entry with the hash key
1193  * TO_FIND, find that entry in the specified group.
1194  *
1195  * If FIND_EMPTY is not set, this function will return the one used entry
1196  * that actually matches the hash or NULL, if no such entry exists.
1197  *
1198  * If FIND_EMPTY has been set, this function will drop the one used entry
1199  * that actually matches the hash (i.e. make it fit to be replaced with
1200  * new content), an unused entry or a forcibly removed entry (if all
1201  * group entries are currently in use). The entries' hash value will be
1202  * initialized with TO_FIND.
1203  *
1204  * Note: This function requires the caller to appropriately lock the CACHE.
1205  * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE,
1206  * the write lock must have been acquired.
1207  */
1208 static entry_t *
1209 find_entry(svn_membuffer_t *cache,
1210            apr_uint32_t group_index,
1211            const full_key_t *to_find,
1212            svn_boolean_t find_empty)
1213 {
1214   entry_group_t *group;
1215   entry_t *entry = NULL;
1216   apr_size_t i;
1217
1218   /* get the group that *must* contain the entry
1219    */
1220   group = &cache->directory[group_index];
1221
1222   /* If the entry group has not been initialized, yet, there is no data.
1223    */
1224   if (! is_group_initialized(cache, group_index))
1225     {
1226       if (find_empty)
1227         {
1228           initialize_group(cache, group_index);
1229           entry = &group->entries[0];
1230
1231           /* initialize entry for the new key */
1232           entry->key = to_find->entry_key;
1233         }
1234
1235       return entry;
1236     }
1237
1238   /* try to find the matching entry
1239    */
1240   while (1)
1241     {
1242       for (i = 0; i < group->header.used; ++i)
1243         if (entry_keys_match(&group->entries[i].key, &to_find->entry_key))
1244           {
1245             /* This is the only entry that _may_ contain the correct data. */
1246             entry = &group->entries[i];
1247
1248             /* If we want to preserve it, check that it is actual a match. */
1249             if (!find_empty)
1250               {
1251                 /* If there is no full key to compare, we are done. */
1252                 if (!entry->key.key_len)
1253                   return entry;
1254
1255                 /* Compare the full key. */
1256                 if (memcmp(to_find->full_key.data,
1257                            cache->data + entry->offset,
1258                            entry->key.key_len) == 0)
1259                   return entry;
1260
1261                 /* Key conflict. The entry to find cannot be anywhere else.
1262                  * Therefore, it is not cached. */
1263                 return NULL;
1264               }
1265
1266             /* need to empty that entry */
1267             drop_entry(cache, entry);
1268             if (group->header.used == GROUP_SIZE)
1269               group = last_group_in_chain(cache, group);
1270             else if (group->header.chain_length == 0)
1271               group = last_group_in_chain(cache,
1272                                           &cache->directory[group_index]);
1273
1274             /* No entry found (actually, none left to find). */
1275             entry = NULL;
1276             break;
1277           }
1278
1279       /* end of chain? */
1280       if (group->header.next == NO_INDEX)
1281         break;
1282
1283       /* only full groups may chain */
1284       assert(group->header.used == GROUP_SIZE);
1285       group = &cache->directory[group->header.next];
1286     }
1287
1288   /* None found. Are we looking for a free entry?
1289    */
1290   if (find_empty)
1291     {
1292       /* There is no empty entry in the chain, try chaining a spare group.
1293        */
1294       if (   group->header.used == GROUP_SIZE
1295           && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH)
1296         {
1297           entry_group_t *new_group = allocate_spare_group(cache);
1298           if (new_group)
1299             {
1300               /* chain groups
1301                */
1302               new_group->header.chain_length = group->header.chain_length + 1;
1303               new_group->header.previous = (apr_uint32_t) (group -
1304                                                            cache->directory);
1305               new_group->header.next = NO_INDEX;
1306               group->header.next = (apr_uint32_t) (new_group -
1307                                                    cache->directory);
1308               group = new_group;
1309             }
1310         }
1311
1312       /* if GROUP is still filled, we need to remove a random entry */
1313       if (group->header.used == GROUP_SIZE)
1314         {
1315           /* every entry gets the same chance of being removed.
1316            * Otherwise, we free the first entry, fill it and
1317            * remove it again on the next occasion without considering
1318            * the other entries in this group.
1319            *
1320            * We hit only one random group instead of processing all
1321            * groups in the chain.
1322            */
1323           cache_level_t *entry_level;
1324           int to_remove = rand() % (GROUP_SIZE * group->header.chain_length);
1325           entry_group_t *to_shrink
1326             = get_group(cache, group_index, to_remove / GROUP_SIZE);
1327
1328           entry = &to_shrink->entries[to_remove % GROUP_SIZE];
1329           entry_level = get_cache_level(cache, entry);
1330           for (i = 0; i < GROUP_SIZE; ++i)
1331             {
1332               /* keep L1 entries whenever possible */
1333
1334               cache_level_t *level
1335                 = get_cache_level(cache, &to_shrink->entries[i]);
1336               if (   (level != entry_level && entry_level == &cache->l1)
1337                   || (entry->hit_count > to_shrink->entries[i].hit_count))
1338                 {
1339                   entry_level = level;
1340                   entry = &to_shrink->entries[i];
1341                 }
1342             }
1343
1344           /* for the entries that don't have been removed,
1345            * reduce their hit counts to put them at a relative
1346            * disadvantage the next time.
1347            */
1348           for (i = 0; i < GROUP_SIZE; ++i)
1349             if (entry != &to_shrink->entries[i])
1350               let_entry_age(cache, entry);
1351
1352           drop_entry(cache, entry);
1353         }
1354
1355       /* initialize entry for the new key
1356        */
1357       entry = &group->entries[group->header.used];
1358       entry->key = to_find->entry_key;
1359     }
1360
1361   return entry;
1362 }
1363
1364 /* Move a surviving ENTRY from just behind the insertion window to
1365  * its beginning and move the insertion window up accordingly.
1366  */
1367 static void
1368 move_entry(svn_membuffer_t *cache, entry_t *entry)
1369 {
1370   apr_size_t size = ALIGN_VALUE(entry->size);
1371   cache_level_t *level = get_cache_level(cache, entry);
1372
1373   /* This entry survived this cleansing run. Reset half of its
1374    * hit count so that its removal gets more likely in the next
1375    * run unless someone read / hit this entry in the meantime.
1376    */
1377   let_entry_age(cache, entry);
1378
1379   /* Move the entry to the start of the empty / insertion section
1380    * (if it isn't there already). Size-aligned moves are legal
1381    * since all offsets and block sizes share this same alignment.
1382    * Size-aligned moves tend to be faster than non-aligned ones
1383    * because no "odd" bytes at the end need to special treatment.
1384    */
1385   if (entry->offset != level->current_data)
1386     {
1387       memmove(cache->data + level->current_data,
1388               cache->data + entry->offset,
1389               size);
1390       entry->offset = level->current_data;
1391     }
1392
1393   /* The insertion position is now directly behind this entry.
1394    */
1395   level->current_data = entry->offset + size;
1396   level->next = entry->next;
1397
1398   /* The current insertion position must never point outside our
1399    * data buffer.
1400    */
1401   assert(level->current_data <= level->start_offset + level->size);
1402 }
1403
1404 /* Move ENTRY in CACHE from L1 to L2.
1405  */
1406 static void
1407 promote_entry(svn_membuffer_t *cache, entry_t *entry)
1408 {
1409   apr_uint32_t idx = get_index(cache, entry);
1410   apr_size_t size = ALIGN_VALUE(entry->size);
1411   assert(get_cache_level(cache, entry) == &cache->l1);
1412   assert(idx == cache->l1.next);
1413
1414   /* copy item from the current location in L1 to the start of L2's
1415    * insertion window */
1416   memmove(cache->data + cache->l2.current_data,
1417           cache->data + entry->offset,
1418           size);
1419   entry->offset = cache->l2.current_data;
1420
1421   /* The insertion position is now directly behind this entry.
1422    */
1423   cache->l2.current_data += size;
1424
1425   /* remove ENTRY from chain of L1 entries and put it into L2
1426    */
1427   unchain_entry(cache, &cache->l1, entry, idx);
1428   chain_entry(cache, &cache->l2, entry, idx);
1429 }
1430
1431 /* This function implements the cache insertion / eviction strategy for L2.
1432  *
1433  * If necessary, enlarge the insertion window of CACHE->L2 until it is at
1434  * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
1435  * data buffer size allocated to CACHE->L2.  IDX is the item index of
1436  * TO_FIT_IN and is given for performance reasons.
1437  *
1438  * Return TRUE if enough room could be found or made.  A FALSE result
1439  * indicates that the respective item shall not be added.
1440  */
1441 static svn_boolean_t
1442 ensure_data_insertable_l2(svn_membuffer_t *cache,
1443                           entry_t *to_fit_in)
1444 {
1445   entry_t *entry;
1446
1447   /* accumulated size of the entries that have been removed to make
1448    * room for the new one.
1449    */
1450   apr_size_t moved_size = 0;
1451
1452   /* count the number of entries that got moved.  A single large entry
1453    * being moved is not enough to reject an insertion.
1454    */
1455   apr_size_t moved_count = 0;
1456
1457   /* accumulated "worth" of items dropped so far */
1458   apr_uint64_t drop_hits = 0;
1459
1460   /* estimated "worth" of the new entry */
1461   apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1)
1462                                * (apr_uint64_t)to_fit_in->priority;
1463
1464   /* This loop will eventually terminate because every cache entry
1465    * would get dropped eventually:
1466    *
1467    * - the incoming entry is small enough to fit into L2
1468    * - every iteration either frees parts of L2 or counts the moved size
1469    * - eventually, we either moved too many items with too much total size
1470    *   to accept the new entry, or made enough room in L2 for the new entry
1471    *
1472    * Low-prio items get rejected even sooner.
1473    */
1474   while (1)
1475     {
1476       /* first offset behind the insertion window
1477        */
1478       apr_uint64_t end = cache->l2.next == NO_INDEX
1479                        ? cache->l2.start_offset + cache->l2.size
1480                        : get_entry(cache, cache->l2.next)->offset;
1481
1482       /* leave function as soon as the insertion window is large enough
1483        */
1484       if (end >= to_fit_in->size + cache->l2.current_data)
1485         return TRUE;
1486
1487       /* Don't be too eager to cache data.  If a lot of data has been moved
1488        * around, the current item has probably a relatively low priority.
1489        * We must also limit the effort spent here (if even in case of faulty
1490        * heuristics).  Therefore, give up after some time.
1491        */
1492       if (moved_size > 4 * to_fit_in->size && moved_count > 7)
1493         return FALSE;
1494
1495       /* if the net worth (in weighted hits) of items removed is already
1496        * larger than what we want to insert, reject TO_FIT_IN because it
1497        * still does not fit in. */
1498       if (drop_hits > drop_hits_limit)
1499         return FALSE;
1500
1501       /* try to enlarge the insertion window
1502        */
1503       if (cache->l2.next == NO_INDEX)
1504         {
1505           /* We reached the end of the data buffer; restart at the beginning.
1506            * Due to the randomized nature of our LFU implementation, very
1507            * large data items may require multiple passes. Therefore, SIZE
1508            * should be restricted to significantly less than data_size.
1509            */
1510           cache->l2.current_data = cache->l2.start_offset;
1511           cache->l2.next = cache->l2.first;
1512         }
1513       else
1514         {
1515           svn_boolean_t keep;
1516           entry = get_entry(cache, cache->l2.next);
1517
1518           if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1519             {
1520               /* Low prio items can only be accepted only if the current
1521                * entry is of even lower prio and has fewer hits.
1522                */
1523               if (   entry->priority > to_fit_in->priority
1524                   || entry->hit_count > to_fit_in->hit_count)
1525                 return FALSE;
1526             }
1527
1528           if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1529             {
1530               /* Be quick to remove low-prio entries - even if the incoming
1531                * one is low-prio as well.  This makes room for more important
1532                * data and replaces existing data with newly read information.
1533                */
1534               keep = FALSE;
1535             }
1536           else
1537             {
1538               /* If the existing data is the same prio as the incoming data,
1539                * drop the existing entry if it had seen fewer (probably 0)
1540                * hits than the entry coming in from L1.  In case of different
1541                * priorities, keep the current entry of it has higher prio.
1542                * The new entry may still find room by ousting other entries.
1543                */
1544               keep = to_fit_in->priority == entry->priority
1545                    ? entry->hit_count >= to_fit_in->hit_count
1546                    : entry->priority > to_fit_in->priority;
1547             }
1548
1549           /* keepers or destroyers? */
1550           if (keep)
1551             {
1552               /* Moving entries around is not for free -> track costs. */
1553               moved_size += entry->size;
1554               moved_count++;
1555
1556               move_entry(cache, entry);
1557             }
1558           else
1559             {
1560               /* Drop the entry from the end of the insertion window.
1561                * Count the "hit importance" such that we are not sacrificing
1562                * too much of the high-hit contents.  However, don't count
1563                * low-priority hits because higher prio entries will often
1564                * provide the same data but in a further stage of processing.
1565                */
1566               if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1567                 drop_hits += entry->hit_count * (apr_uint64_t)entry->priority;
1568
1569               drop_entry(cache, entry);
1570             }
1571         }
1572     }
1573
1574   /* This will never be reached. But if it was, "can't insert" was the
1575    * right answer. */
1576 }
1577
1578 /* This function implements the cache insertion / eviction strategy for L1.
1579  *
1580  * If necessary, enlarge the insertion window of CACHE->L1 by promoting
1581  * entries to L2 until it is at least SIZE bytes long.
1582  *
1583  * Return TRUE if enough room could be found or made.  A FALSE result
1584  * indicates that the respective item shall not be added because it is
1585  * too large.
1586  */
1587 static svn_boolean_t
1588 ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
1589 {
1590   /* Guarantees that the while loop will terminate. */
1591   if (size > cache->l1.size)
1592     return FALSE;
1593
1594   /* This loop will eventually terminate because every cache entry
1595    * would get dropped eventually.
1596    */
1597   while (1)
1598     {
1599       /* first offset behind the insertion window
1600        */
1601       apr_uint32_t entry_index = cache->l1.next;
1602       entry_t *entry = get_entry(cache, entry_index);
1603       apr_uint64_t end = cache->l1.next == NO_INDEX
1604                        ? cache->l1.start_offset + cache->l1.size
1605                        : entry->offset;
1606
1607       /* leave function as soon as the insertion window is large enough
1608        */
1609       if (end >= size + cache->l1.current_data)
1610         return TRUE;
1611
1612       /* Enlarge the insertion window
1613        */
1614       if (cache->l1.next == NO_INDEX)
1615         {
1616           /* We reached the end of the data buffer; restart at the beginning.
1617            * Due to the randomized nature of our LFU implementation, very
1618            * large data items may require multiple passes. Therefore, SIZE
1619            * should be restricted to significantly less than data_size.
1620            */
1621           cache->l1.current_data = cache->l1.start_offset;
1622           cache->l1.next = cache->l1.first;
1623         }
1624       else
1625         {
1626           /* Remove the entry from the end of insertion window and promote
1627            * it to L2, if it is important enough.
1628            */
1629           svn_boolean_t keep = ensure_data_insertable_l2(cache, entry);
1630
1631           /* We might have touched the group that contains ENTRY. Recheck. */
1632           if (entry_index == cache->l1.next)
1633             {
1634               if (keep)
1635                 promote_entry(cache, entry);
1636               else
1637                 drop_entry(cache, entry);
1638             }
1639         }
1640     }
1641
1642   /* This will never be reached. But if it was, "can't insert" was the
1643    * right answer. */
1644 }
1645
1646 /* Mimic apr_pcalloc in APR_POOL_DEBUG mode, i.e. handle failed allocations
1647  * (e.g. OOM) properly: Allocate at least SIZE bytes from POOL and zero
1648  * the content of the allocated memory if ZERO has been set. Return NULL
1649  * upon failed allocations.
1650  *
1651  * Also, satisfy our buffer alignment needs for performance reasons.
1652  */
1653 static void* secure_aligned_alloc(apr_pool_t *pool,
1654                                   apr_size_t size,
1655                                   svn_boolean_t zero)
1656 {
1657   void* memory = apr_palloc(pool, size + ITEM_ALIGNMENT);
1658   if (memory != NULL)
1659     {
1660       memory = ALIGN_POINTER(memory);
1661       if (zero)
1662         memset(memory, 0, size);
1663     }
1664
1665   return memory;
1666 }
1667
1668 svn_error_t *
1669 svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1670                                   apr_size_t total_size,
1671                                   apr_size_t directory_size,
1672                                   apr_size_t segment_count,
1673                                   svn_boolean_t thread_safe,
1674                                   svn_boolean_t allow_blocking_writes,
1675                                   apr_pool_t *pool)
1676 {
1677   svn_membuffer_t *c;
1678
1679   apr_uint32_t seg;
1680   apr_uint32_t group_count;
1681   apr_uint32_t main_group_count;
1682   apr_uint32_t spare_group_count;
1683   apr_uint32_t group_init_size;
1684   apr_uint64_t data_size;
1685   apr_uint64_t max_entry_size;
1686
1687   /* Limit the total size (only relevant if we can address > 4GB)
1688    */
1689 #if APR_SIZEOF_VOIDP > 4
1690   if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1691     total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1692 #endif
1693
1694   /* Limit the segment count
1695    */
1696   if (segment_count > MAX_SEGMENT_COUNT)
1697     segment_count = MAX_SEGMENT_COUNT;
1698   if (segment_count * MIN_SEGMENT_SIZE > total_size)
1699     segment_count = total_size / MIN_SEGMENT_SIZE;
1700
1701   /* The segment count must be a power of two. Round it down as necessary.
1702    */
1703   while ((segment_count & (segment_count-1)) != 0)
1704     segment_count &= segment_count-1;
1705
1706   /* if the caller hasn't provided a reasonable segment count or the above
1707    * limitations set it to 0, derive one from the absolute cache size
1708    */
1709   if (segment_count < 1)
1710     {
1711       /* Determine a reasonable number of cache segments. Segmentation is
1712        * only useful for multi-threaded / multi-core servers as it reduces
1713        * lock contention on these systems.
1714        *
1715        * But on these systems, we can assume that ample memory has been
1716        * allocated to this cache. Smaller caches should not be segmented
1717        * as this severely limits the maximum size of cachable items.
1718        *
1719        * Segments should not be smaller than 32MB and max. cachable item
1720        * size should grow as fast as segmentation.
1721        */
1722
1723       apr_uint32_t segment_count_shift = 0;
1724       while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1725              < total_size)
1726         ++segment_count_shift;
1727
1728       segment_count = (apr_size_t)1 << segment_count_shift;
1729     }
1730
1731   /* If we have an extremely large cache (>512 GB), the default segment
1732    * size may exceed the amount allocatable as one chunk. In that case,
1733    * increase segmentation until we are under the threshold.
1734    */
1735   while (   total_size / segment_count > MAX_SEGMENT_SIZE
1736          && segment_count < MAX_SEGMENT_COUNT)
1737     segment_count *= 2;
1738
1739   /* allocate cache as an array of segments / cache objects */
1740   c = apr_palloc(pool, segment_count * sizeof(*c));
1741
1742   /* Split total cache size into segments of equal size
1743    */
1744   total_size /= segment_count;
1745   directory_size /= segment_count;
1746
1747   /* prevent pathological conditions: ensure a certain minimum cache size
1748    */
1749   if (total_size < 2 * sizeof(entry_group_t))
1750     total_size = 2 * sizeof(entry_group_t);
1751
1752   /* adapt the dictionary size accordingly, if necessary:
1753    * It must hold at least one group and must not exceed the cache size.
1754    */
1755   if (directory_size > total_size - sizeof(entry_group_t))
1756     directory_size = total_size - sizeof(entry_group_t);
1757   if (directory_size < 2 * sizeof(entry_group_t))
1758     directory_size = 2 * sizeof(entry_group_t);
1759
1760   /* limit the data size to what we can address.
1761    * Note that this cannot overflow since all values are of size_t.
1762    * Also, make it a multiple of the item placement granularity to
1763    * prevent subtle overflows.
1764    */
1765   data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1766
1767   /* For cache sizes > 16TB, individual cache segments will be larger
1768    * than 32GB allowing for >4GB entries.  But caching chunks larger
1769    * than 4GB are simply not supported.
1770    */
1771   max_entry_size = data_size / 8 > MAX_ITEM_SIZE
1772                  ? MAX_ITEM_SIZE
1773                  : data_size / 8;
1774
1775   /* to keep the entries small, we use 32 bit indexes only
1776    * -> we need to ensure that no more then 4G entries exist.
1777    *
1778    * Note, that this limit could only be exceeded in a very
1779    * theoretical setup with about 1EB of cache.
1780    */
1781   group_count = directory_size / sizeof(entry_group_t)
1782                     >= (APR_UINT32_MAX / GROUP_SIZE)
1783               ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1784               : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1785
1786   /* set some of the index directory aside as over-flow (spare) buffers */
1787   spare_group_count = MAX(group_count / 4, 1);
1788   main_group_count = group_count - spare_group_count;
1789   assert(spare_group_count > 0 && main_group_count > 0);
1790
1791   group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1792   for (seg = 0; seg < segment_count; ++seg)
1793     {
1794       /* allocate buffers and initialize cache members
1795        */
1796       c[seg].segment_count = (apr_uint32_t)segment_count;
1797
1798       c[seg].group_count = main_group_count;
1799       c[seg].spare_group_count = spare_group_count;
1800       c[seg].first_spare_group = NO_INDEX;
1801       c[seg].max_spare_used = 0;
1802
1803       c[seg].directory = apr_pcalloc(pool,
1804                                      group_count * sizeof(entry_group_t));
1805
1806       /* Allocate and initialize directory entries as "not initialized",
1807          hence "unused" */
1808       c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1809
1810       /* Allocate 1/4th of the data buffer to L1
1811        */
1812       c[seg].l1.first = NO_INDEX;
1813       c[seg].l1.last = NO_INDEX;
1814       c[seg].l1.next = NO_INDEX;
1815       c[seg].l1.start_offset = 0;
1816       c[seg].l1.size = ALIGN_VALUE(data_size / 4);
1817       c[seg].l1.current_data = 0;
1818
1819       /* The remaining 3/4th will be used as L2
1820        */
1821       c[seg].l2.first = NO_INDEX;
1822       c[seg].l2.last = NO_INDEX;
1823       c[seg].l2.next = NO_INDEX;
1824       c[seg].l2.start_offset = c[seg].l1.size;
1825       c[seg].l2.size = data_size - c[seg].l1.size;
1826       c[seg].l2.current_data = c[seg].l2.start_offset;
1827
1828       c[seg].data = secure_aligned_alloc(pool, (apr_size_t)data_size, FALSE);
1829       c[seg].data_used = 0;
1830       c[seg].max_entry_size = max_entry_size;
1831
1832       c[seg].used_entries = 0;
1833       c[seg].total_reads = 0;
1834       c[seg].total_writes = 0;
1835       c[seg].total_hits = 0;
1836
1837       /* were allocations successful?
1838        * If not, initialize a minimal cache structure.
1839        */
1840       if (c[seg].data == NULL || c[seg].directory == NULL)
1841         {
1842           /* We are OOM. There is no need to proceed with "half a cache".
1843            */
1844           return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1845         }
1846
1847 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
1848       /* A lock for intra-process synchronization to the cache, or NULL if
1849        * the cache's creator doesn't feel the cache needs to be
1850        * thread-safe.
1851        */
1852       SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
1853 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
1854       /* Same for read-write lock. */
1855       c[seg].lock = NULL;
1856       if (thread_safe)
1857         {
1858           apr_status_t status =
1859               apr_thread_rwlock_create(&(c[seg].lock), pool);
1860           if (status)
1861             return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1862         }
1863
1864       /* Select the behavior of write operations.
1865        */
1866       c[seg].allow_blocking_writes = allow_blocking_writes;
1867 #endif
1868     }
1869
1870   /* done here
1871    */
1872   *cache = c;
1873   return SVN_NO_ERROR;
1874 }
1875
1876 svn_error_t *
1877 svn_cache__membuffer_clear(svn_membuffer_t *cache)
1878 {
1879   apr_size_t seg;
1880   apr_size_t segment_count = cache->segment_count;
1881
1882   /* Length of the group_initialized array in bytes.
1883      See also svn_cache__membuffer_cache_create(). */
1884   apr_size_t group_init_size
1885     = 1 + (cache->group_count + cache->spare_group_count)
1886             / (8 * GROUP_INIT_GRANULARITY);
1887
1888   /* Clear segment by segment.  This implies that other thread may read
1889      and write to other segments after we cleared them and before the
1890      last segment is done.
1891
1892      However, that is no different from a write request coming through
1893      right after we cleared all segments because dependencies between
1894      cache entries (recursive lookup / access locks) are not allowed.
1895    */
1896   for (seg = 0; seg < segment_count; ++seg)
1897     {
1898       /* Unconditionally acquire the write lock. */
1899       SVN_ERR(force_write_lock_cache(&cache[seg]));
1900
1901       /* Mark all groups as "not initialized", which implies "empty". */
1902       cache[seg].first_spare_group = NO_INDEX;
1903       cache[seg].max_spare_used = 0;
1904
1905       memset(cache[seg].group_initialized, 0, group_init_size);
1906
1907       /* Unlink L1 contents. */
1908       cache[seg].l1.first = NO_INDEX;
1909       cache[seg].l1.last = NO_INDEX;
1910       cache[seg].l1.next = NO_INDEX;
1911       cache[seg].l1.current_data = cache[seg].l1.start_offset;
1912
1913       /* Unlink L2 contents. */
1914       cache[seg].l2.first = NO_INDEX;
1915       cache[seg].l2.last = NO_INDEX;
1916       cache[seg].l2.next = NO_INDEX;
1917       cache[seg].l2.current_data = cache[seg].l2.start_offset;
1918
1919       /* Reset content counters. */
1920       cache[seg].data_used = 0;
1921       cache[seg].used_entries = 0;
1922
1923       /* Segment may be used again. */
1924       SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR));
1925     }
1926
1927   /* done here */
1928   return SVN_NO_ERROR;
1929 }
1930
1931 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1932  * by the hash value TO_FIND and set *FOUND accordingly.
1933  *
1934  * Note: This function requires the caller to serialize access.
1935  * Don't call it directly, call entry_exists instead.
1936  */
1937 static svn_error_t *
1938 entry_exists_internal(svn_membuffer_t *cache,
1939                       apr_uint32_t group_index,
1940                       const full_key_t *to_find,
1941                       svn_boolean_t *found)
1942 {
1943   *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1944   return SVN_NO_ERROR;
1945 }
1946
1947 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1948  * by the hash value TO_FIND and set *FOUND accordingly.
1949  */
1950 static svn_error_t *
1951 entry_exists(svn_membuffer_t *cache,
1952              apr_uint32_t group_index,
1953              const full_key_t *to_find,
1954              svn_boolean_t *found)
1955 {
1956   WITH_READ_LOCK(cache,
1957                  entry_exists_internal(cache,
1958                                        group_index,
1959                                        to_find,
1960                                        found));
1961
1962   return SVN_NO_ERROR;
1963 }
1964
1965 /* Given the SIZE and PRIORITY of a new item, return the cache level
1966    (L1 or L2) in fragment CACHE that this item shall be inserted into.
1967    If we can't find nor make enough room for the item, return NULL.
1968  */
1969 static cache_level_t *
1970 select_level(svn_membuffer_t *cache,
1971              apr_size_t size,
1972              apr_uint32_t priority)
1973 {
1974   if (cache->max_entry_size >= size)
1975     {
1976       /* Small items go into L1. */
1977       return ensure_data_insertable_l1(cache, size)
1978            ? &cache->l1
1979            : NULL;
1980     }
1981   else if (   cache->l2.size >= size
1982            && MAX_ITEM_SIZE >= size
1983            && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1984     {
1985       /* Large but important items go into L2. */
1986       entry_t dummy_entry = { { { 0 } } };
1987       dummy_entry.priority = priority;
1988       dummy_entry.size = size;
1989
1990       return ensure_data_insertable_l2(cache, &dummy_entry)
1991            ? &cache->l2
1992            : NULL;
1993     }
1994
1995   /* Don't cache large, unimportant items. */
1996   return NULL;
1997 }
1998
1999 /* Try to insert the serialized item given in BUFFER with ITEM_SIZE
2000  * into the group GROUP_INDEX of CACHE and uniquely identify it by
2001  * hash value TO_FIND.
2002  *
2003  * However, there is no guarantee that it will actually be put into
2004  * the cache. If there is already some data associated with TO_FIND,
2005  * it will be removed from the cache even if the new data cannot
2006  * be inserted.
2007  *
2008  * Note: This function requires the caller to serialization access.
2009  * Don't call it directly, call membuffer_cache_set instead.
2010  */
2011 static svn_error_t *
2012 membuffer_cache_set_internal(svn_membuffer_t *cache,
2013                              const full_key_t *to_find,
2014                              apr_uint32_t group_index,
2015                              char *buffer,
2016                              apr_size_t item_size,
2017                              apr_uint32_t priority,
2018                              DEBUG_CACHE_MEMBUFFER_TAG_ARG
2019                              apr_pool_t *scratch_pool)
2020 {
2021   cache_level_t *level;
2022   apr_size_t size = item_size + to_find->entry_key.key_len;
2023
2024   /* first, look for a previous entry for the given key */
2025   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2026
2027   /* if there is an old version of that entry and the new data fits into
2028    * the old spot, just re-use that space. */
2029   if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
2030     {
2031       /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
2032        * lest we run into trouble with 32 bit underflow *not* treated as a
2033        * negative value.
2034        */
2035       cache->data_used += (apr_uint64_t)size - entry->size;
2036       entry->size = size;
2037       entry->priority = priority;
2038
2039 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2040
2041       /* Remember original content, type and key (hashes)
2042        */
2043       SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2044       memcpy(&entry->tag, tag, sizeof(*tag));
2045
2046 #endif
2047
2048       if (entry->key.key_len)
2049         memcpy(cache->data + entry->offset, to_find->full_key.data,
2050                entry->key.key_len);
2051       if (item_size)
2052         memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2053                item_size);
2054
2055       cache->total_writes++;
2056       return SVN_NO_ERROR;
2057     }
2058
2059   /* if necessary, enlarge the insertion window.
2060    */
2061   level = buffer ? select_level(cache, size, priority) : NULL;
2062   if (level)
2063     {
2064       /* Remove old data for this key, if that exists.
2065        * Get an unused entry for the key and and initialize it with
2066        * the serialized item's (future) position within data buffer.
2067        */
2068       entry = find_entry(cache, group_index, to_find, TRUE);
2069       entry->size = size;
2070       entry->offset = level->current_data;
2071       entry->priority = priority;
2072
2073 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2074
2075       /* Remember original content, type and key (hashes)
2076        */
2077       SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2078       memcpy(&entry->tag, tag, sizeof(*tag));
2079
2080 #endif
2081
2082       /* Link the entry properly.
2083        */
2084       insert_entry(cache, entry);
2085
2086       /* Copy the serialized item data into the cache.
2087        */
2088       if (entry->key.key_len)
2089         memcpy(cache->data + entry->offset, to_find->full_key.data,
2090                entry->key.key_len);
2091       if (item_size)
2092         memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2093                item_size);
2094
2095       cache->total_writes++;
2096     }
2097   else
2098     {
2099       /* if there is already an entry for this key, drop it.
2100        * Since ensure_data_insertable may have removed entries from
2101        * ENTRY's group, re-do the lookup.
2102        */
2103       entry = find_entry(cache, group_index, to_find, FALSE);
2104       if (entry)
2105         drop_entry(cache, entry);
2106     }
2107
2108   return SVN_NO_ERROR;
2109 }
2110
2111 /* Try to insert the ITEM and use the KEY to uniquely identify it.
2112  * However, there is no guarantee that it will actually be put into
2113  * the cache. If there is already some data associated to the KEY,
2114  * it will be removed from the cache even if the new data cannot
2115  * be inserted.
2116  *
2117  * The SERIALIZER is called to transform the ITEM into a single,
2118  * flat data buffer. Temporary allocations may be done in POOL.
2119  */
2120 static svn_error_t *
2121 membuffer_cache_set(svn_membuffer_t *cache,
2122                     const full_key_t *key,
2123                     void *item,
2124                     svn_cache__serialize_func_t serializer,
2125                     apr_uint32_t priority,
2126                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2127                     apr_pool_t *scratch_pool)
2128 {
2129   apr_uint32_t group_index;
2130   void *buffer = NULL;
2131   apr_size_t size = 0;
2132
2133   /* find the entry group that will hold the key.
2134    */
2135   group_index = get_group_index(&cache, &key->entry_key);
2136
2137   /* Serialize data data.
2138    */
2139   if (item)
2140     SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
2141
2142   /* The actual cache data access needs to sync'ed
2143    */
2144   WITH_WRITE_LOCK(cache,
2145                   membuffer_cache_set_internal(cache,
2146                                                key,
2147                                                group_index,
2148                                                buffer,
2149                                                size,
2150                                                priority,
2151                                                DEBUG_CACHE_MEMBUFFER_TAG
2152                                                scratch_pool));
2153   return SVN_NO_ERROR;
2154 }
2155
2156 /* Count a hit in ENTRY within CACHE.
2157  */
2158 static void
2159 increment_hit_counters(svn_membuffer_t *cache, entry_t *entry)
2160 {
2161   /* To minimize the memory footprint of the cache index, we limit local
2162    * hit counters to 32 bits.  These may overflow but we don't really
2163    * care because at worst, ENTRY will be dropped from cache once every
2164    * few billion hits. */
2165   svn_atomic_inc(&entry->hit_count);
2166
2167   /* That one is for stats only. */
2168   cache->total_hits++;
2169 }
2170
2171 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2172  * by the hash value TO_FIND. If no item has been stored for KEY,
2173  * *BUFFER will be NULL. Otherwise, return a copy of the serialized
2174  * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
2175  * be done in POOL.
2176  *
2177  * Note: This function requires the caller to serialization access.
2178  * Don't call it directly, call membuffer_cache_get instead.
2179  */
2180 static svn_error_t *
2181 membuffer_cache_get_internal(svn_membuffer_t *cache,
2182                              apr_uint32_t group_index,
2183                              const full_key_t *to_find,
2184                              char **buffer,
2185                              apr_size_t *item_size,
2186                              DEBUG_CACHE_MEMBUFFER_TAG_ARG
2187                              apr_pool_t *result_pool)
2188 {
2189   entry_t *entry;
2190   apr_size_t size;
2191
2192   /* The actual cache data access needs to sync'ed
2193    */
2194   entry = find_entry(cache, group_index, to_find, FALSE);
2195   cache->total_reads++;
2196   if (entry == NULL)
2197     {
2198       /* no such entry found.
2199        */
2200       *buffer = NULL;
2201       *item_size = 0;
2202
2203       return SVN_NO_ERROR;
2204     }
2205
2206   size = ALIGN_VALUE(entry->size) - entry->key.key_len;
2207   *buffer = ALIGN_POINTER(apr_palloc(result_pool, size + ITEM_ALIGNMENT-1));
2208   memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size);
2209
2210 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2211
2212   /* Check for overlapping entries.
2213    */
2214   SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2215                  entry->offset + size
2216                     <= get_entry(cache, entry->next)->offset);
2217
2218   /* Compare original content, type and key (hashes)
2219    */
2220   SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len,
2221                              result_pool));
2222   SVN_ERR(assert_equal_tags(&entry->tag, tag));
2223
2224 #endif
2225
2226   /* update hit statistics
2227    */
2228   increment_hit_counters(cache, entry);
2229   *item_size = entry->size - entry->key.key_len;
2230
2231   return SVN_NO_ERROR;
2232 }
2233
2234 /* Look for the *ITEM identified by KEY. If no item has been stored
2235  * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
2236  * re-construct the proper object from the serialized data.
2237  * Allocations will be done in POOL.
2238  */
2239 static svn_error_t *
2240 membuffer_cache_get(svn_membuffer_t *cache,
2241                     const full_key_t *key,
2242                     void **item,
2243                     svn_cache__deserialize_func_t deserializer,
2244                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2245                     apr_pool_t *result_pool)
2246 {
2247   apr_uint32_t group_index;
2248   char *buffer;
2249   apr_size_t size;
2250
2251   /* find the entry group that will hold the key.
2252    */
2253   group_index = get_group_index(&cache, &key->entry_key);
2254   WITH_READ_LOCK(cache,
2255                  membuffer_cache_get_internal(cache,
2256                                               group_index,
2257                                               key,
2258                                               &buffer,
2259                                               &size,
2260                                               DEBUG_CACHE_MEMBUFFER_TAG
2261                                               result_pool));
2262
2263   /* re-construct the original data object from its serialized form.
2264    */
2265   if (buffer == NULL)
2266     {
2267       *item = NULL;
2268       return SVN_NO_ERROR;
2269     }
2270
2271   return deserializer(item, buffer, size, result_pool);
2272 }
2273
2274 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2275  * by the hash value TO_FIND.  If no item has been stored for KEY, *FOUND
2276  * will be FALSE and TRUE otherwise.
2277  */
2278 static svn_error_t *
2279 membuffer_cache_has_key_internal(svn_membuffer_t *cache,
2280                                  apr_uint32_t group_index,
2281                                  const full_key_t *to_find,
2282                                  svn_boolean_t *found)
2283 {
2284   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2285   if (entry)
2286     {
2287       /* This often be called by "block read" when most data is already
2288          in L2 and only a few previously evicted items are added to L1
2289          again.  While items in L1 are well protected for a while, L2
2290          items may get evicted soon.  Thus, mark all them as "hit" to give
2291          them a higher chance of survival. */
2292       increment_hit_counters(cache, entry);
2293       *found = TRUE;
2294     }
2295   else
2296     {
2297       *found = FALSE;
2298     }
2299
2300   return SVN_NO_ERROR;
2301 }
2302
2303 /* Look for an entry identified by KEY.  If no item has been stored
2304  * for KEY, *FOUND will be set to FALSE and TRUE otherwise.
2305  */
2306 /* Implements svn_cache__has_key for membuffer caches.
2307  */
2308 static svn_error_t *
2309 membuffer_cache_has_key(svn_membuffer_t *cache,
2310                         const full_key_t *key,
2311                         svn_boolean_t *found)
2312 {
2313   /* find the entry group that will hold the key.
2314    */
2315   apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2316   cache->total_reads++;
2317
2318   WITH_READ_LOCK(cache,
2319                  membuffer_cache_has_key_internal(cache,
2320                                                   group_index,
2321                                                   key,
2322                                                   found));
2323
2324   return SVN_NO_ERROR;
2325 }
2326
2327 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2328  * by the hash value TO_FIND. FOUND indicates whether that entry exists.
2329  * If not found, *ITEM will be NULL.
2330  *
2331  * Otherwise, the DESERIALIZER is called with that entry and the BATON
2332  * provided and will extract the desired information. The result is set
2333  * in *ITEM. Allocations will be done in POOL.
2334  *
2335  * Note: This function requires the caller to serialization access.
2336  * Don't call it directly, call membuffer_cache_get_partial instead.
2337  */
2338 static svn_error_t *
2339 membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
2340                                      apr_uint32_t group_index,
2341                                      const full_key_t *to_find,
2342                                      void **item,
2343                                      svn_boolean_t *found,
2344                                      svn_cache__partial_getter_func_t deserializer,
2345                                      void *baton,
2346                                      DEBUG_CACHE_MEMBUFFER_TAG_ARG
2347                                      apr_pool_t *result_pool)
2348 {
2349   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2350   cache->total_reads++;
2351   if (entry == NULL)
2352     {
2353       *item = NULL;
2354       *found = FALSE;
2355
2356       return SVN_NO_ERROR;
2357     }
2358   else
2359     {
2360       const void *item_data = cache->data + entry->offset + entry->key.key_len;
2361       apr_size_t item_size = entry->size - entry->key.key_len;
2362       *found = TRUE;
2363       increment_hit_counters(cache, entry);
2364
2365 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2366
2367       /* Check for overlapping entries.
2368        */
2369       SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2370                      entry->offset + entry->size
2371                         <= get_entry(cache, entry->next)->offset);
2372
2373       /* Compare original content, type and key (hashes)
2374        */
2375       SVN_ERR(store_content_part(tag, item_data, item_size, result_pool));
2376       SVN_ERR(assert_equal_tags(&entry->tag, tag));
2377
2378 #endif
2379
2380       return deserializer(item, item_data, item_size, baton, result_pool);
2381     }
2382 }
2383
2384 /* Look for the cache entry identified by KEY. FOUND indicates
2385  * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
2386  * the DESERIALIZER is called with that entry and the BATON provided
2387  * and will extract the desired information. The result is set in *ITEM.
2388  * Allocations will be done in POOL.
2389  */
2390 static svn_error_t *
2391 membuffer_cache_get_partial(svn_membuffer_t *cache,
2392                             const full_key_t *key,
2393                             void **item,
2394                             svn_boolean_t *found,
2395                             svn_cache__partial_getter_func_t deserializer,
2396                             void *baton,
2397                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2398                             apr_pool_t *result_pool)
2399 {
2400   apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2401
2402   WITH_READ_LOCK(cache,
2403                  membuffer_cache_get_partial_internal
2404                      (cache, group_index, key, item, found,
2405                       deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
2406                       result_pool));
2407
2408   return SVN_NO_ERROR;
2409 }
2410
2411 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2412  * by the hash value TO_FIND. If no entry has been found, the function
2413  * returns without modifying the cache.
2414  *
2415  * Otherwise, FUNC is called with that entry and the BATON provided
2416  * and may modify the cache entry. Allocations will be done in POOL.
2417  *
2418  * Note: This function requires the caller to serialization access.
2419  * Don't call it directly, call membuffer_cache_set_partial instead.
2420  */
2421 static svn_error_t *
2422 membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
2423                                      apr_uint32_t group_index,
2424                                      const full_key_t *to_find,
2425                                      svn_cache__partial_setter_func_t func,
2426                                      void *baton,
2427                                      DEBUG_CACHE_MEMBUFFER_TAG_ARG
2428                                      apr_pool_t *scratch_pool)
2429 {
2430   /* cache item lookup
2431    */
2432   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2433   cache->total_reads++;
2434
2435   /* this function is a no-op if the item is not in cache
2436    */
2437   if (entry != NULL)
2438     {
2439       svn_error_t *err;
2440
2441       /* access the serialized cache item */
2442       apr_size_t key_len = entry->key.key_len;
2443       void *item_data = cache->data + entry->offset + key_len;
2444       void *orig_data = item_data;
2445       apr_size_t item_size = entry->size - key_len;
2446
2447       increment_hit_counters(cache, entry);
2448       cache->total_writes++;
2449
2450 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2451
2452       /* Check for overlapping entries.
2453        */
2454       SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2455                      entry->offset + entry->size
2456                         <= get_entry(cache, entry->next)->offset);
2457
2458       /* Compare original content, type and key (hashes)
2459        */
2460       SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2461       SVN_ERR(assert_equal_tags(&entry->tag, tag));
2462
2463 #endif
2464
2465       /* modify it, preferably in-situ.
2466        */
2467       err = func(&item_data, &item_size, baton, scratch_pool);
2468
2469       if (err)
2470         {
2471           /* Something somewhere when wrong while FUNC was modifying the
2472            * changed item. Thus, it might have become invalid /corrupted.
2473            * We better drop that.
2474            */
2475           drop_entry(cache, entry);
2476
2477           return err;
2478         }
2479       else
2480         {
2481           /* if the modification caused a re-allocation, we need to remove
2482            * the old entry and to copy the new data back into cache.
2483            */
2484           if (item_data != orig_data)
2485             {
2486               /* Remove the old entry and try to make space for the new one.
2487                */
2488               drop_entry(cache, entry);
2489               if (   (cache->max_entry_size >= item_size + key_len)
2490                   && ensure_data_insertable_l1(cache, item_size + key_len))
2491                 {
2492                   /* Write the new entry.
2493                    */
2494                   entry = find_entry(cache, group_index, to_find, TRUE);
2495                   entry->size = item_size + key_len;
2496                   entry->offset = cache->l1.current_data;
2497
2498                   if (key_len)
2499                     memcpy(cache->data + entry->offset,
2500                            to_find->full_key.data, key_len);
2501                   if (item_size)
2502                     memcpy(cache->data + entry->offset + key_len, item_data,
2503                            item_size);
2504
2505                   /* Link the entry properly.
2506                    */
2507                   insert_entry(cache, entry);
2508                 }
2509             }
2510
2511 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2512
2513           /* Remember original content, type and key (hashes)
2514            */
2515           SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2516           memcpy(&entry->tag, tag, sizeof(*tag));
2517
2518 #endif
2519         }
2520     }
2521
2522   return SVN_NO_ERROR;
2523 }
2524
2525 /* Look for the cache entry identified by KEY. If no entry
2526  * has been found, the function returns without modifying the cache.
2527  * Otherwise, FUNC is called with that entry and the BATON provided
2528  * and may modify the cache entry. Allocations will be done in POOL.
2529  */
2530 static svn_error_t *
2531 membuffer_cache_set_partial(svn_membuffer_t *cache,
2532                             const full_key_t *key,
2533                             svn_cache__partial_setter_func_t func,
2534                             void *baton,
2535                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2536                             apr_pool_t *scratch_pool)
2537 {
2538   /* cache item lookup
2539    */
2540   apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2541   WITH_WRITE_LOCK(cache,
2542                   membuffer_cache_set_partial_internal
2543                      (cache, group_index, key, func, baton,
2544                       DEBUG_CACHE_MEMBUFFER_TAG
2545                       scratch_pool));
2546
2547   /* done here -> unlock the cache
2548    */
2549   return SVN_NO_ERROR;
2550 }
2551
2552 /* Implement the svn_cache__t interface on top of a shared membuffer cache.
2553  *
2554  * Because membuffer caches tend to be very large, there will be rather few
2555  * of them (usually only one). Thus, the same instance shall be used as the
2556  * backend to many application-visible svn_cache__t instances. This should
2557  * also achieve global resource usage fairness.
2558  *
2559  * To accommodate items from multiple resources, the individual keys must be
2560  * unique over all sources. This is achieved by simply adding a prefix key
2561  * that unambiguously identifies the item's context (e.g. path to the
2562  * respective repository). The prefix will be set upon construction of the
2563  * svn_cache__t instance.
2564  */
2565
2566 /* Internal cache structure (used in svn_cache__t.cache_internal) basically
2567  * holding the additional parameters needed to call the respective membuffer
2568  * functions.
2569  */
2570 typedef struct svn_membuffer_cache_t
2571 {
2572   /* this is where all our data will end up in
2573    */
2574   svn_membuffer_t *membuffer;
2575
2576   /* use this conversion function when inserting an item into the memcache
2577    */
2578   svn_cache__serialize_func_t serializer;
2579
2580   /* use this conversion function when reading an item from the memcache
2581    */
2582   svn_cache__deserialize_func_t deserializer;
2583
2584   /* Prepend this byte sequence to any key passed to us.
2585    * This makes our keys different from all keys used by svn_membuffer_cache_t
2586    * instances that we don't want to share cached data with.
2587    */
2588   full_key_t prefix;
2589
2590   /* length of the keys that will be passed to us through the
2591    * svn_cache_t interface. May be APR_HASH_KEY_STRING.
2592    */
2593   apr_ssize_t key_len;
2594
2595   /* priority class for all items written through this interface */
2596   apr_uint32_t priority;
2597
2598   /* Temporary buffer containing the hash key for the current access
2599    */
2600   full_key_t combined_key;
2601
2602   /* if enabled, this will serialize the access to this instance.
2603    */
2604   svn_mutex__t *mutex;
2605 } svn_membuffer_cache_t;
2606
2607 /* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
2608  * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
2609  */
2610 #define ALLOCATIONS_PER_POOL_CLEAR 10
2611
2612 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2613  * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2614  * This could replace combine_key() entirely but we actually use it only
2615  * when the quick path failed.
2616  */
2617 static void
2618 combine_long_key(svn_membuffer_cache_t *cache,
2619                  const void *key,
2620                  apr_ssize_t key_len)
2621 {
2622   apr_uint32_t *digest_buffer;
2623   char *key_copy;
2624   apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2625   apr_size_t aligned_key_len;
2626
2627   /* handle variable-length keys */
2628   if (key_len == APR_HASH_KEY_STRING)
2629     key_len = strlen((const char *) key);
2630
2631   aligned_key_len = ALIGN_VALUE(key_len);
2632
2633   /* Combine keys. */
2634   svn_membuf__ensure(&cache->combined_key.full_key,
2635                      aligned_key_len + prefix_len);
2636
2637   key_copy = (char *)cache->combined_key.full_key.data + prefix_len;
2638   cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len;
2639   memcpy(key_copy, key, key_len);
2640   memset(key_copy + key_len, 0, aligned_key_len - key_len);
2641
2642   /* Hash key into 16 bytes. */
2643   digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint;
2644   svn__fnv1a_32x4_raw(digest_buffer, key, key_len);
2645
2646   /* Combine with prefix. */
2647   cache->combined_key.entry_key.fingerprint[0]
2648     ^= cache->prefix.entry_key.fingerprint[0];
2649   cache->combined_key.entry_key.fingerprint[1]
2650     ^= cache->prefix.entry_key.fingerprint[1];
2651 }
2652
2653 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2654  * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2655  */
2656 static void
2657 combine_key(svn_membuffer_cache_t *cache,
2658             const void *key,
2659             apr_ssize_t key_len)
2660 {
2661   /* short, fixed-size keys are the most common case */
2662   if (key_len != APR_HASH_KEY_STRING && key_len <= 16)
2663     {
2664       const apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2665
2666       /* Copy of *key, padded with 0.
2667        * We put it just behind the prefix already copied into the COMBINED_KEY.
2668        * The buffer space has been allocated when the cache was created. */
2669       apr_uint64_t *data = (void *)((char *)cache->combined_key.full_key.data +
2670                                     prefix_len);
2671       assert(prefix_len <= cache->combined_key.full_key.size - 16);
2672       cache->combined_key.entry_key.key_len = prefix_len + 16;
2673
2674       data[0] = 0;
2675       data[1] = 0;
2676       memcpy(data, key, key_len);
2677
2678       /* scramble key DATA.  All of this must be reversible to prevent key
2679        * collisions.  So, we limit ourselves to xor and permutations. */
2680       data[1] = (data[1] << 27) | (data[1] >> 37);
2681       data[1] ^= data[0] & 0xffff;
2682       data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
2683
2684       /* combine with this cache's namespace */
2685       cache->combined_key.entry_key.fingerprint[0]
2686         = data[0] ^ cache->prefix.entry_key.fingerprint[0];
2687       cache->combined_key.entry_key.fingerprint[1]
2688         = data[1] ^ cache->prefix.entry_key.fingerprint[1];
2689     }
2690   else
2691     {
2692       /* longer or variably sized keys */
2693       combine_long_key(cache, key, key_len);
2694     }
2695 }
2696
2697 /* Implement svn_cache__vtable_t.get (not thread-safe)
2698  */
2699 static svn_error_t *
2700 svn_membuffer_cache_get(void **value_p,
2701                         svn_boolean_t *found,
2702                         void *cache_void,
2703                         const void *key,
2704                         apr_pool_t *result_pool)
2705 {
2706   svn_membuffer_cache_t *cache = cache_void;
2707
2708   DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2709
2710   /* special case */
2711   if (key == NULL)
2712     {
2713       *value_p = NULL;
2714       *found = FALSE;
2715
2716       return SVN_NO_ERROR;
2717     }
2718
2719   /* construct the full, i.e. globally unique, key by adding
2720    * this cache instances' prefix
2721    */
2722   combine_key(cache, key, cache->key_len);
2723
2724   /* Look the item up. */
2725   SVN_ERR(membuffer_cache_get(cache->membuffer,
2726                               &cache->combined_key,
2727                               value_p,
2728                               cache->deserializer,
2729                               DEBUG_CACHE_MEMBUFFER_TAG
2730                               result_pool));
2731
2732   /* return result */
2733   *found = *value_p != NULL;
2734
2735   return SVN_NO_ERROR;
2736 }
2737
2738 /* Implement svn_cache__vtable_t.has_key (not thread-safe)
2739  */
2740 static svn_error_t *
2741 svn_membuffer_cache_has_key(svn_boolean_t *found,
2742                             void *cache_void,
2743                             const void *key,
2744                             apr_pool_t *scratch_pool)
2745 {
2746   svn_membuffer_cache_t *cache = cache_void;
2747
2748   /* special case */
2749   if (key == NULL)
2750     {
2751       *found = FALSE;
2752
2753       return SVN_NO_ERROR;
2754     }
2755
2756   /* construct the full, i.e. globally unique, key by adding
2757    * this cache instances' prefix
2758    */
2759   combine_key(cache, key, cache->key_len);
2760
2761   /* Look the item up. */
2762   SVN_ERR(membuffer_cache_has_key(cache->membuffer,
2763                                   &cache->combined_key,
2764                                   found));
2765
2766   /* return result */
2767   return SVN_NO_ERROR;
2768 }
2769
2770 /* Implement svn_cache__vtable_t.set (not thread-safe)
2771  */
2772 static svn_error_t *
2773 svn_membuffer_cache_set(void *cache_void,
2774                         const void *key,
2775                         void *value,
2776                         apr_pool_t *scratch_pool)
2777 {
2778   svn_membuffer_cache_t *cache = cache_void;
2779
2780   DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2781
2782   /* special case */
2783   if (key == NULL)
2784     return SVN_NO_ERROR;
2785
2786   /* construct the full, i.e. globally unique, key by adding
2787    * this cache instances' prefix
2788    */
2789   combine_key(cache, key, cache->key_len);
2790
2791   /* (probably) add the item to the cache. But there is no real guarantee
2792    * that the item will actually be cached afterwards.
2793    */
2794   return membuffer_cache_set(cache->membuffer,
2795                              &cache->combined_key,
2796                              value,
2797                              cache->serializer,
2798                              cache->priority,
2799                              DEBUG_CACHE_MEMBUFFER_TAG
2800                              scratch_pool);
2801 }
2802
2803 /* Implement svn_cache__vtable_t.iter as "not implemented"
2804  */
2805 static svn_error_t *
2806 svn_membuffer_cache_iter(svn_boolean_t *completed,
2807                           void *cache_void,
2808                           svn_iter_apr_hash_cb_t user_cb,
2809                           void *user_baton,
2810                           apr_pool_t *scratch_pool)
2811 {
2812   return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2813                           _("Can't iterate a membuffer-based cache"));
2814 }
2815
2816 /* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2817  */
2818 static svn_error_t *
2819 svn_membuffer_cache_get_partial(void **value_p,
2820                                 svn_boolean_t *found,
2821                                 void *cache_void,
2822                                 const void *key,
2823                                 svn_cache__partial_getter_func_t func,
2824                                 void *baton,
2825                                 apr_pool_t *result_pool)
2826 {
2827   svn_membuffer_cache_t *cache = cache_void;
2828
2829   DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2830
2831   if (key == NULL)
2832     {
2833       *value_p = NULL;
2834       *found = FALSE;
2835
2836       return SVN_NO_ERROR;
2837     }
2838
2839   combine_key(cache, key, cache->key_len);
2840   SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2841                                       &cache->combined_key,
2842                                       value_p,
2843                                       found,
2844                                       func,
2845                                       baton,
2846                                       DEBUG_CACHE_MEMBUFFER_TAG
2847                                       result_pool));
2848
2849   return SVN_NO_ERROR;
2850 }
2851
2852 /* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2853  */
2854 static svn_error_t *
2855 svn_membuffer_cache_set_partial(void *cache_void,
2856                                 const void *key,
2857                                 svn_cache__partial_setter_func_t func,
2858                                 void *baton,
2859                                 apr_pool_t *scratch_pool)
2860 {
2861   svn_membuffer_cache_t *cache = cache_void;
2862
2863   DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2864
2865   if (key != NULL)
2866     {
2867       combine_key(cache, key, cache->key_len);
2868       SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2869                                           &cache->combined_key,
2870                                           func,
2871                                           baton,
2872                                           DEBUG_CACHE_MEMBUFFER_TAG
2873                                           scratch_pool));
2874     }
2875   return SVN_NO_ERROR;
2876 }
2877
2878 /* Implement svn_cache__vtable_t.is_cachable
2879  * (thread-safe even without mutex)
2880  */
2881 static svn_boolean_t
2882 svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2883 {
2884   /* Don't allow extremely large element sizes. Otherwise, the cache
2885    * might by thrashed by a few extremely large entries. And the size
2886    * must be small enough to be stored in a 32 bit value.
2887    */
2888   svn_membuffer_cache_t *cache = cache_void;
2889   return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY
2890        ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size
2891        : size <= cache->membuffer->max_entry_size;
2892 }
2893
2894 /* Add statistics of SEGMENT to INFO.  If INCLUDE_HISTOGRAM is TRUE,
2895  * accumulate index bucket fill levels in INFO->HISTOGRAM.
2896  */
2897 static svn_error_t *
2898 svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2899                                svn_cache__info_t *info,
2900                                svn_boolean_t include_histogram)
2901 {
2902   apr_uint32_t i;
2903
2904   info->data_size += segment->l1.size + segment->l2.size;
2905   info->used_size += segment->data_used;
2906   info->total_size += segment->l1.size + segment->l2.size +
2907       segment->group_count * GROUP_SIZE * sizeof(entry_t);
2908
2909   info->used_entries += segment->used_entries;
2910   info->total_entries += segment->group_count * GROUP_SIZE;
2911
2912   if (include_histogram)
2913     for (i = 0; i < segment->group_count; ++i)
2914       if (is_group_initialized(segment, i))
2915         {
2916           entry_group_t *chain_end
2917             = last_group_in_chain(segment, &segment->directory[i]);
2918           apr_size_t use
2919             = MIN(chain_end->header.used,
2920                   sizeof(info->histogram) / sizeof(info->histogram[0]) - 1);
2921           info->histogram[use]++;
2922         }
2923
2924   return SVN_NO_ERROR;
2925 }
2926
2927 /* Implement svn_cache__vtable_t.get_info
2928  * (thread-safe even without mutex)
2929  */
2930 static svn_error_t *
2931 svn_membuffer_cache_get_info(void *cache_void,
2932                              svn_cache__info_t *info,
2933                              svn_boolean_t reset,
2934                              apr_pool_t *result_pool)
2935 {
2936   svn_membuffer_cache_t *cache = cache_void;
2937   apr_uint32_t i;
2938
2939   /* cache front-end specific data */
2940
2941   info->id = apr_pstrdup(result_pool, cache->prefix.full_key.data);
2942
2943   /* collect info from shared cache back-end */
2944
2945   for (i = 0; i < cache->membuffer->segment_count; ++i)
2946     {
2947       svn_membuffer_t *segment = cache->membuffer + i;
2948       WITH_READ_LOCK(segment,
2949                      svn_membuffer_get_segment_info(segment, info, FALSE));
2950     }
2951
2952   return SVN_NO_ERROR;
2953 }
2954
2955
2956 /* the v-table for membuffer-based caches (single-threaded access)
2957  */
2958 static svn_cache__vtable_t membuffer_cache_vtable = {
2959   svn_membuffer_cache_get,
2960   svn_membuffer_cache_has_key,
2961   svn_membuffer_cache_set,
2962   svn_membuffer_cache_iter,
2963   svn_membuffer_cache_is_cachable,
2964   svn_membuffer_cache_get_partial,
2965   svn_membuffer_cache_set_partial,
2966   svn_membuffer_cache_get_info
2967 };
2968
2969 /* Implement svn_cache__vtable_t.get and serialize all cache access.
2970  */
2971 static svn_error_t *
2972 svn_membuffer_cache_get_synced(void **value_p,
2973                                svn_boolean_t *found,
2974                                void *cache_void,
2975                                const void *key,
2976                                apr_pool_t *result_pool)
2977 {
2978   svn_membuffer_cache_t *cache = cache_void;
2979   SVN_MUTEX__WITH_LOCK(cache->mutex,
2980                        svn_membuffer_cache_get(value_p,
2981                                                found,
2982                                                cache_void,
2983                                                key,
2984                                                result_pool));
2985
2986   return SVN_NO_ERROR;
2987 }
2988
2989 /* Implement svn_cache__vtable_t.has_key and serialize all cache access.
2990  */
2991 static svn_error_t *
2992 svn_membuffer_cache_has_key_synced(svn_boolean_t *found,
2993                                    void *cache_void,
2994                                    const void *key,
2995                                    apr_pool_t *result_pool)
2996 {
2997   svn_membuffer_cache_t *cache = cache_void;
2998   SVN_MUTEX__WITH_LOCK(cache->mutex,
2999                        svn_membuffer_cache_has_key(found,
3000                                                    cache_void,
3001                                                    key,
3002                                                    result_pool));
3003
3004   return SVN_NO_ERROR;
3005 }
3006
3007 /* Implement svn_cache__vtable_t.set and serialize all cache access.
3008  */
3009 static svn_error_t *
3010 svn_membuffer_cache_set_synced(void *cache_void,
3011                                const void *key,
3012                                void *value,
3013                                apr_pool_t *scratch_pool)
3014 {
3015   svn_membuffer_cache_t *cache = cache_void;
3016   SVN_MUTEX__WITH_LOCK(cache->mutex,
3017                        svn_membuffer_cache_set(cache_void,
3018                                                key,
3019                                                value,
3020                                                scratch_pool));
3021
3022   return SVN_NO_ERROR;
3023 }
3024
3025 /* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
3026  */
3027 static svn_error_t *
3028 svn_membuffer_cache_get_partial_synced(void **value_p,
3029                                        svn_boolean_t *found,
3030                                        void *cache_void,
3031                                        const void *key,
3032                                        svn_cache__partial_getter_func_t func,
3033                                        void *baton,
3034                                        apr_pool_t *result_pool)
3035 {
3036   svn_membuffer_cache_t *cache = cache_void;
3037   SVN_MUTEX__WITH_LOCK(cache->mutex,
3038                        svn_membuffer_cache_get_partial(value_p,
3039                                                        found,
3040                                                        cache_void,
3041                                                        key,
3042                                                        func,
3043                                                        baton,
3044                                                        result_pool));
3045
3046   return SVN_NO_ERROR;
3047 }
3048
3049 /* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
3050  */
3051 static svn_error_t *
3052 svn_membuffer_cache_set_partial_synced(void *cache_void,
3053                                        const void *key,
3054                                        svn_cache__partial_setter_func_t func,
3055                                        void *baton,
3056                                        apr_pool_t *scratch_pool)
3057 {
3058   svn_membuffer_cache_t *cache = cache_void;
3059   SVN_MUTEX__WITH_LOCK(cache->mutex,
3060                        svn_membuffer_cache_set_partial(cache_void,
3061                                                        key,
3062                                                        func,
3063                                                        baton,
3064                                                        scratch_pool));
3065
3066   return SVN_NO_ERROR;
3067 }
3068
3069 /* the v-table for membuffer-based caches with multi-threading support)
3070  */
3071 static svn_cache__vtable_t membuffer_cache_synced_vtable = {
3072   svn_membuffer_cache_get_synced,
3073   svn_membuffer_cache_has_key_synced,
3074   svn_membuffer_cache_set_synced,
3075   svn_membuffer_cache_iter,               /* no sync required */
3076   svn_membuffer_cache_is_cachable,        /* no sync required */
3077   svn_membuffer_cache_get_partial_synced,
3078   svn_membuffer_cache_set_partial_synced,
3079   svn_membuffer_cache_get_info            /* no sync required */
3080 };
3081
3082 /* standard serialization function for svn_stringbuf_t items.
3083  * Implements svn_cache__serialize_func_t.
3084  */
3085 static svn_error_t *
3086 serialize_svn_stringbuf(void **buffer,
3087                         apr_size_t *buffer_size,
3088                         void *item,
3089                         apr_pool_t *result_pool)
3090 {
3091   svn_stringbuf_t *value_str = item;
3092
3093   *buffer = value_str->data;
3094   *buffer_size = value_str->len + 1;
3095
3096   return SVN_NO_ERROR;
3097 }
3098
3099 /* standard de-serialization function for svn_stringbuf_t items.
3100  * Implements svn_cache__deserialize_func_t.
3101  */
3102 static svn_error_t *
3103 deserialize_svn_stringbuf(void **item,
3104                           void *buffer,
3105                           apr_size_t buffer_size,
3106                           apr_pool_t *result_pool)
3107 {
3108   svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
3109
3110   value_str->pool = result_pool;
3111   value_str->blocksize = buffer_size;
3112   value_str->data = buffer;
3113   value_str->len = buffer_size-1;
3114   *item = value_str;
3115
3116   return SVN_NO_ERROR;
3117 }
3118
3119 /* Construct a svn_cache__t object on top of a shared memcache.
3120  */
3121 svn_error_t *
3122 svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
3123                                   svn_membuffer_t *membuffer,
3124                                   svn_cache__serialize_func_t serializer,
3125                                   svn_cache__deserialize_func_t deserializer,
3126                                   apr_ssize_t klen,
3127                                   const char *prefix,
3128                                   apr_uint32_t priority,
3129                                   svn_boolean_t thread_safe,
3130                                   apr_pool_t *result_pool,
3131                                   apr_pool_t *scratch_pool)
3132 {
3133   svn_checksum_t *checksum;
3134   apr_size_t prefix_len, prefix_orig_len;
3135
3136   /* allocate the cache header structures
3137    */
3138   svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
3139   svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache));
3140
3141   /* initialize our internal cache header
3142    */
3143   cache->membuffer = membuffer;
3144   cache->serializer = serializer
3145                     ? serializer
3146                     : serialize_svn_stringbuf;
3147   cache->deserializer = deserializer
3148                       ? deserializer
3149                       : deserialize_svn_stringbuf;
3150   cache->priority = priority;
3151   cache->key_len = klen;
3152
3153   SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool));
3154
3155   /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT.
3156    * Don't forget to include the terminating NUL. */
3157   prefix_orig_len = strlen(prefix) + 1;
3158   prefix_len = ALIGN_VALUE(prefix_orig_len);
3159
3160   svn_membuf__create(&cache->prefix.full_key, prefix_len, result_pool);
3161   memcpy((char *)cache->prefix.full_key.data, prefix, prefix_orig_len);
3162   memset((char *)cache->prefix.full_key.data + prefix_orig_len, 0,
3163          prefix_len - prefix_orig_len);
3164
3165   /* Construct the folded prefix key. */
3166   SVN_ERR(svn_checksum(&checksum,
3167                        svn_checksum_md5,
3168                        prefix,
3169                        strlen(prefix),
3170                        scratch_pool));
3171   memcpy(cache->prefix.entry_key.fingerprint, checksum->digest,
3172          sizeof(cache->prefix.entry_key.fingerprint));
3173   cache->prefix.entry_key.key_len = prefix_len;
3174
3175   /* Initialize the combined key. Pre-allocate some extra room in the full
3176    * key such that we probably don't need to re-alloc. */
3177   cache->combined_key.entry_key = cache->prefix.entry_key;
3178   svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200,
3179                      result_pool);
3180   memcpy(cache->combined_key.full_key.data, cache->prefix.full_key.data,
3181          prefix_len);
3182
3183   /* initialize the generic cache wrapper
3184    */
3185   wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
3186                                 : &membuffer_cache_vtable;
3187   wrapper->cache_internal = cache;
3188   wrapper->error_handler = 0;
3189   wrapper->error_baton = 0;
3190   wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT");
3191
3192   *cache_p = wrapper;
3193   return SVN_NO_ERROR;
3194 }
3195
3196 static svn_error_t *
3197 svn_membuffer_get_global_segment_info(svn_membuffer_t *segment,
3198                                       svn_cache__info_t *info)
3199 {
3200   info->gets += segment->total_reads;
3201   info->sets += segment->total_writes;
3202   info->hits += segment->total_hits;
3203
3204   WITH_READ_LOCK(segment,
3205                   svn_membuffer_get_segment_info(segment, info, TRUE));
3206
3207   return SVN_NO_ERROR;
3208 }
3209
3210 svn_cache__info_t *
3211 svn_cache__membuffer_get_global_info(apr_pool_t *pool)
3212 {
3213   apr_uint32_t i;
3214
3215   svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache();
3216   svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info));
3217
3218   /* cache front-end specific data */
3219
3220   info->id = "membuffer globals";
3221
3222   /* collect info from shared cache back-end */
3223
3224   for (i = 0; i < membuffer->segment_count; ++i)
3225     svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i,
3226                                                           info));
3227
3228   return info;
3229 }