contrib/subversion/subversion/libsvn_subr/cache-membuffer.c

   1 /*
   2  * cache-membuffer.c: in-memory caching for Subversion
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24 #include <assert.h>
  25 #include <apr_md5.h>
  26 #include <apr_thread_rwlock.h>
  27
  28 #include "svn_pools.h"
  29 #include "svn_checksum.h"
  30 #include "svn_private_config.h"
  31 #include "svn_string.h"
  32 #include "svn_sorts.h"  /* get the MIN macro */
  33
  34 #include "private/svn_atomic.h"
  35 #include "private/svn_dep_compat.h"
  36 #include "private/svn_mutex.h"
  37 #include "private/svn_string_private.h"
  38
  39 #include "cache.h"
  40 #include "fnv1a.h"
  41
  42 /*
  43  * This svn_cache__t implementation actually consists of two parts:
  44  * a shared (per-process) singleton membuffer cache instance and shallow
  45  * svn_cache__t front-end instances that each use different key spaces.
  46  * For data management, they all forward to the singleton membuffer cache.
  47  *
  48  * A membuffer cache consists of two parts:
  49  *
  50  * 1. A linear data buffer containing cached items in a serialized
  51  *    representation, prefixed by their full cache keys. There may be
  52  *    arbitrary gaps between entries.  This buffer is sub-devided into
  53  *    (currently two) cache levels.
  54  *
  55  * 2. A directory of cache entries. This is organized similar to CPU
  56  *    data caches: for every possible key, there is exactly one group
  57  *    of entries that may contain the header info for an item with
  58  *    that given key.  The result is a GROUP_SIZE+-way associative cache
  59  *    whose associativity can be dynamically increased.
  60  *
  61  * Only the start address of these two data parts are given as a native
  62  * pointer. All other references are expressed as offsets to these pointers.
  63  * With that design, it is relatively easy to share the same data structure
  64  * between different processes and / or to persist them on disk. These
  65  * out-of-process features have not been implemented, yet.
  66  *
  67  * Superficially, cache levels are being used as usual: insertion happens
  68  * into L1 and evictions will promote items to L2.  But their whole point
  69  * is a different one.  L1 uses a circular buffer, i.e. we have perfect
  70  * caching for the last N bytes where N is the size of L1.  L2 uses a more
  71  * elaborate scheme based on priorities and hit counts as described below.
  72  *
  73  * The data buffer usage information is implicitly given by the directory
  74  * entries. Every USED entry has a reference to the previous and the next
  75  * used dictionary entry and this double-linked list is ordered by the
  76  * offsets of their item data within the data buffer. So removing data,
  77  * for instance, is done simply by unlinking it from the chain, implicitly
  78  * marking the entry as well as the data buffer section previously
  79  * associated to it as unused.  First and last element of that chain are
  80  * being referenced from the respective cache level.
  81  *
  82  * Insertion can occur at only one, sliding position per cache level.  It is
  83  * marked by its offset in the data buffer and the index of the first used
  84  * entry at or behind that position.  If this gap is too small to accommodate
  85  * the new item (plus its full key), the insertion window is extended as
  86  * described below.  The new entry will always be inserted at the bottom end
  87  * of the window and since the next used entry is known, properly sorted
  88  * insertion is possible.
  89  *
  90  * To make the cache perform robustly in a wide range of usage scenarios,
  91  * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
  92  * details). Every item holds a read hit counter and there is a global read
  93  * hit counter. The more hits an entry has in relation to the average, the
  94  * more it is likely to be kept using a rand()-based condition. The test is
  95  * applied only to the entry following the insertion window. If it doesn't
  96  * get evicted, it is moved to the begin of that window and the window is
  97  * moved.
  98  *
  99  * Moreover, the entry's hits get halved to make that entry more likely to
 100  * be removed the next time the sliding insertion / removal window comes by.
 101  * As a result, frequently used entries are likely not to be dropped until
 102  * they get not used for a while. Also, even a cache thrashing situation
 103  * about 50% of the content survives every 50% of the cache being re-written
 104  * with new entries. For details on the fine-tuning involved, see the
 105  * comments in ensure_data_insertable_l2().
 106  *
 107  * Due to the randomized mapping of keys to entry groups, some groups may
 108  * overflow.  In that case, there are spare groups that can be chained to
 109  * an already used group to extend it.
 110  *
 111  * To limit the entry size and management overhead, not the actual item keys
 112  * but only their hashed "fingerprint" will be stored.  These are reasonably
 113  * unique to prevent collisions, so we only need to support up to one entry
 114  * per entry key.  To guarantee that there are no conflicts, however, we
 115  * store the actual full key immediately in front of the serialized item
 116  * data.  That is, the entry offset actually points to the full key and the
 117  * key length stored in the entry acts as an additional offset to find the
 118  * actual item.
 119  *
 120  * All access to the cached data needs to be serialized. Because we want
 121  * to scale well despite that bottleneck, we simply segment the cache into
 122  * a number of independent caches (segments). Items will be multiplexed based
 123  * on their hash key.
 124  */
 125
 126 /* APR's read-write lock implementation on Windows is horribly inefficient.
 127  * Even with very low contention a runtime overhead of 35% percent has been
 128  * measured for 'svn-bench null-export' over ra_serf.
 129  *
 130  * Use a simple mutex on Windows.  Because there is one mutex per segment,
 131  * large machines should (and usually can) be configured with large caches
 132  * such that read contention is kept low.  This is basically the situation
 133  * we had before 1.8.
 134  */
 135 #ifdef WIN32
 136 #  define USE_SIMPLE_MUTEX 1
 137 #else
 138 #  define USE_SIMPLE_MUTEX 0
 139 #endif
 140
 141 /* For more efficient copy operations, let's align all data items properly.
 142  * Since we can't portably align pointers, this is rather the item size
 143  * granularity which ensures *relative* alignment within the cache - still
 144  * giving us decent copy speeds on most machines.
 145  *
 146  * Must be a power of 2.
 147  */
 148 #define ITEM_ALIGNMENT 16
 149
 150 /* By default, don't create cache segments smaller than this value unless
 151  * the total cache size itself is smaller.
 152  */
 153 #define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
 154
 155 /* The minimum segment size we will allow for multi-segmented caches
 156  */
 157 #define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
 158
 159 /* The maximum number of segments allowed. Larger numbers reduce the size
 160  * of each segment, in turn reducing the max size of a cachable item.
 161  * Also, each segment gets its own lock object. The actual number supported
 162  * by the OS may therefore be lower and svn_cache__membuffer_cache_create
 163  * may return an error.
 164  */
 165 #define MAX_SEGMENT_COUNT 0x10000
 166
 167 /* As of today, APR won't allocate chunks of 4GB or more. So, limit the
 168  * segment size to slightly below that.
 169  */
 170 #define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
 171
 172 /* We don't mark the initialization status for every group but initialize
 173  * a number of groups at once. That will allow for a very small init flags
 174  * vector that is likely to fit into the CPU caches even for fairly large
 175  * membuffer caches. For instance, the default of 32 means 8x32 groups per
 176  * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
 177  * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
 178  */
 179 #define GROUP_INIT_GRANULARITY 32
 180
 181 /* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
 182  */
 183 #define NO_INDEX APR_UINT32_MAX
 184
 185 /* To save space in our group structure, we only use 32 bit size values
 186  * and, therefore, limit the size of each entry to just below 4GB.
 187  * Supporting larger items is not a good idea as the data transfer
 188  * to and from the cache would block other threads for a very long time.
 189  */
 190 #define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
 191
 192 /* We use this structure to identify cache entries. There cannot be two
 193  * entries with the same entry key. However unlikely, though, two different
 194  * full keys (see full_key_t) may have the same entry key.  That is a
 195  * collision and at most one of them can be stored in the cache at any time.
 196  */
 197 typedef struct entry_key_t
 198 {
 199   /* 16 byte finger print of the full key. */
 200   apr_uint64_t fingerprint[2];
 201
 202   /* Length of the full key.  This value is aligned to ITEM_ALIGNMENT to
 203    * make sure the subsequent item content is properly aligned. */
 204   apr_size_t key_len;
 205 } entry_key_t;
 206
 207 /* A full key, i.e. the combination of the cache's key prefix with some
 208  * dynamic part appended to it.  It also contains its ENTRY_KEY.
 209  */
 210 typedef struct full_key_t
 211 {
 212   /* Reduced form identifying the cache entry (if such an entry exists). */
 213   entry_key_t entry_key;
 214
 215   /* This contains the full combination.  Note that the SIZE element may
 216    * be larger than ENTRY_KEY.KEY_LEN, but only the latter determines the
 217    * valid key size. */
 218   svn_membuf_t full_key;
 219 } full_key_t;
 220
 221 /* Debugging / corruption detection support.
 222  * If you define this macro, the getter functions will performed expensive
 223  * checks on the item data, requested keys and entry types. If there is
 224  * a mismatch found in any of them when being compared with the values
 225  * remembered in the setter function, an error will be returned.
 226  */
 227 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
 228
 229 /* The prefix passed to svn_cache__create_membuffer_cache() effectively
 230  * defines the type of all items stored by that cache instance. We'll take
 231  * the last 15 bytes + \0 as plaintext for easy identification by the dev.
 232  */
 233 #define PREFIX_TAIL_LEN 16
 234
 235 /* This record will be attached to any cache entry. It tracks item data
 236  * (content), key and type as hash values and is the baseline against which
 237  * the getters will compare their results to detect inconsistencies.
 238  */
 239 typedef struct entry_tag_t
 240 {
 241   /* MD5 checksum over the serialized item data.
 242    */
 243   unsigned char content_hash[APR_MD5_DIGESTSIZE];
 244
 245   /* Hash value of the svn_cache_t instance that wrote the item
 246    * (i.e. a combination of type and repository)
 247    */
 248   unsigned char prefix_hash[APR_MD5_DIGESTSIZE];
 249
 250   /* Note that this only covers the variable part of the key,
 251    * i.e. it will be different from the full key hash used for
 252    * cache indexing.
 253    */
 254   unsigned char key_hash[APR_MD5_DIGESTSIZE];
 255
 256   /* Last letters from of the key in human readable format
 257    * (ends with the type identifier, e.g. "DAG")
 258    */
 259   char prefix_tail[PREFIX_TAIL_LEN];
 260
 261   /* Length of the variable key part.
 262    */
 263   apr_size_t key_len;
 264
 265 } entry_tag_t;
 266
 267 /* Initialize all members of TAG except for the content hash.
 268  */
 269 static svn_error_t *store_key_part(entry_tag_t *tag,
 270                                    const full_key_t *prefix_key,
 271                                    const void *key,
 272                                    apr_size_t key_len,
 273                                    apr_pool_t *pool)
 274 {
 275   svn_checksum_t *checksum;
 276   const char *prefix = prefix_key->full_key.data;
 277   apr_size_t prefix_len = strlen(prefix);
 278
 279   if (prefix_len > sizeof(tag->prefix_tail))
 280     {
 281       prefix += prefix_len - (sizeof(tag->prefix_tail) - 1);
 282       prefix_len = sizeof(tag->prefix_tail) - 1;
 283     }
 284
 285   SVN_ERR(svn_checksum(&checksum,
 286                        svn_checksum_md5,
 287                        key,
 288                        key_len,
 289                        pool));
 290
 291   memcpy(tag->prefix_hash, prefix_key->entry_key.fingerprint,
 292          sizeof(tag->prefix_hash));
 293   memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
 294
 295   memset(tag->prefix_tail, 0, sizeof(tag->key_hash));
 296   memcpy(tag->prefix_tail, prefix, prefix_len + 1);
 297
 298   tag->key_len = key_len;
 299
 300   return SVN_NO_ERROR;
 301 }
 302
 303 /* Initialize the content hash member of TAG.
 304  */
 305 static svn_error_t* store_content_part(entry_tag_t *tag,
 306                                        const void *data,
 307                                        apr_size_t size,
 308                                        apr_pool_t *pool)
 309 {
 310   svn_checksum_t *checksum;
 311   SVN_ERR(svn_checksum(&checksum,
 312                        svn_checksum_md5,
 313                        data,
 314                        size,
 315                        pool));
 316
 317   memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
 318
 319   return SVN_NO_ERROR;
 320 }
 321
 322 /* Compare two tags and fail with an assertion upon differences.
 323  */
 324 static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
 325                                       const entry_tag_t *rhs)
 326 {
 327   SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
 328                         sizeof(lhs->content_hash)) == 0);
 329   SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
 330                         sizeof(lhs->prefix_hash)) == 0);
 331   SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
 332                         sizeof(lhs->key_hash)) == 0);
 333   SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
 334                         sizeof(lhs->prefix_tail)) == 0);
 335
 336   SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
 337
 338   return SVN_NO_ERROR;
 339 }
 340
 341 /* Reoccurring code snippets.
 342  */
 343
 344 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
 345
 346 #define DEBUG_CACHE_MEMBUFFER_TAG tag,
 347
 348 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)                     \
 349   entry_tag_t _tag;                                              \
 350   entry_tag_t *tag = &_tag;                                      \
 351   if (key)                                                       \
 352     SVN_ERR(store_key_part(tag,                                  \
 353                            &cache->prefix,                       \
 354                            key,                                  \
 355                            cache->key_len == APR_HASH_KEY_STRING \
 356                                ? strlen((const char *) key)      \
 357                                : cache->key_len,                 \
 358                            pool));
 359
 360 #else
 361
 362 /* Don't generate any checks if consistency checks have not been enabled.
 363  */
 364 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG
 365 #define DEBUG_CACHE_MEMBUFFER_TAG
 366 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)
 367
 368 #endif /* SVN_DEBUG_CACHE_MEMBUFFER */
 369
 370 /* A single dictionary entry. Since all entries will be allocated once
 371  * during cache creation, those entries might be either used or unused.
 372  * An entry is used if and only if it is contained in the doubly-linked
 373  * list of used entries per cache level.
 374  */
 375 typedef struct entry_t
 376 {
 377   /* Identifying the data item. Only valid for used entries.
 378    */
 379   entry_key_t key;
 380
 381   /* The offset of the cached item's serialized data within the caches
 382    * DATA buffer.
 383    */
 384   apr_uint64_t offset;
 385
 386   /* Size of the serialized item data. May be 0.  The MAX_ITEM_SIZE macro
 387    * above ensures that there will be no overflows.
 388    * Only valid for used entries.
 389    */
 390   apr_size_t size;
 391
 392   /* Number of (read) hits for this entry. Will be reset upon write.
 393    * Only valid for used entries.
 394    */
 395   svn_atomic_t hit_count;
 396
 397   /* Reference to the next used entry in the order defined by offset.
 398    * NO_INDEX indicates the end of the list; this entry must be referenced
 399    * by the caches cache_level_t.last member.  NO_INDEX also implies that
 400    * the data buffer is not used beyond offset+size.
 401    * Only valid for used entries.
 402    */
 403   apr_uint32_t next;
 404
 405   /* Reference to the previous used entry in the order defined by offset.
 406    * NO_INDEX indicates the end of the list; this entry must be referenced
 407    * by the caches cache_level_t.first member.
 408    * Only valid for used entries.
 409    */
 410   apr_uint32_t previous;
 411
 412   /* Priority of this entry.  This entry will not be replaced by lower-
 413    * priority items.
 414    */
 415   apr_uint32_t priority;
 416 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
 417   /* Remember type, content and key hashes.
 418    */
 419   entry_tag_t tag;
 420 #endif
 421 } entry_t;
 422
 423 /* Group header struct.
 424  */
 425 typedef struct group_header_t
 426 {
 427   /* number of entries used [0 .. USED-1] */
 428   apr_uint32_t used;
 429
 430   /* next group in the chain or NO_INDEX for the last.
 431    * For recycleable unused spare groups, this points to the next
 432    * unused spare group */
 433   apr_uint32_t next;
 434
 435   /* previously group in the chain or NO_INDEX for the first */
 436   apr_uint32_t previous;
 437
 438   /* number of elements in the chain from start to here.
 439    * >= 1 for used groups, 0 for unused spare groups */
 440   apr_uint32_t chain_length;
 441
 442 } group_header_t;
 443
 444 /* The size of the group struct should be a power of two make sure it does
 445  * not cross memory page boundaries.  Since we already access the cache
 446  * randomly, having two page table lookups instead of one is bad.
 447  */
 448 #define GROUP_BLOCK_SIZE 512
 449
 450 /* A ~10-way associative cache seems to be a good compromise between
 451  * performance (worst-case lookups) and efficiency-loss due to collisions.
 452  *
 453  * This value may be changed to any positive integer.
 454  */
 455 #define GROUP_SIZE \
 456           ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t))
 457
 458 /* Maximum number of groups in a chain, i.e. a cache index group can hold
 459  * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries.
 460  */
 461 #define MAX_GROUP_CHAIN_LENGTH 8
 462
 463 /* We group dictionary entries to make this GROUP-SIZE-way associative.
 464  */
 465 typedef struct entry_group_t
 466 {
 467   /* group globals */
 468   group_header_t header;
 469
 470   /* padding and also room for future extensions */
 471   char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t)
 472                - sizeof(entry_t) * GROUP_SIZE];
 473
 474   /* the actual entries */
 475   entry_t entries[GROUP_SIZE];
 476
 477 } entry_group_t;
 478
 479 /* Per-cache level header structure.  Instances of this are members of
 480  * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
 481  * All offset values are global / absolute to that whole buffer.
 482  */
 483 typedef struct cache_level_t
 484 {
 485   /* Reference to the first (defined by the order content in the data
 486    * buffer) dictionary entry used by any data item.
 487    * NO_INDEX for an empty cache.
 488    */
 489   apr_uint32_t first;
 490
 491   /* Reference to the last (defined by the order content in the data
 492    * buffer) dictionary entry used by any data item.
 493    * NO_INDEX for an empty cache.
 494    */
 495   apr_uint32_t last;
 496
 497   /* Reference to the first (defined by the order content in the data
 498    * buffer) used dictionary entry behind the insertion position
 499    * (current_data). If NO_INDEX, the data buffer is free starting at the
 500    * current_data offset.
 501    */
 502   apr_uint32_t next;
 503
 504
 505   /* First offset in the caches DATA buffer that belongs to this level.
 506    */
 507   apr_uint64_t start_offset;
 508
 509   /* Size of data buffer allocated to this level in bytes. Must be > 0.
 510    */
 511   apr_uint64_t size;
 512
 513   /* Offset in the data buffer where the next insertion shall occur.
 514    */
 515   apr_uint64_t current_data;
 516
 517 } cache_level_t;
 518
 519 /* The cache header structure.
 520  */
 521 struct svn_membuffer_t
 522 {
 523   /* Number of cache segments. Must be a power of 2.
 524      Please note that this structure represents only one such segment
 525      and that all segments must / will report the same values here. */
 526   apr_uint32_t segment_count;
 527
 528   /* The dictionary, GROUP_SIZE * (group_count + spare_group_count)
 529    * entries long.  Never NULL.
 530    */
 531   entry_group_t *directory;
 532
 533   /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
 534    * Allows for efficiently marking groups as "not initialized".
 535    */
 536   unsigned char *group_initialized;
 537
 538   /* Size of dictionary in groups. Must be > 0.
 539    */
 540   apr_uint32_t group_count;
 541
 542   /* Total number of spare groups.
 543    */
 544   apr_uint32_t spare_group_count;
 545
 546   /* First recycleable spare group.
 547    */
 548   apr_uint32_t first_spare_group;
 549
 550   /* Maximum number of spare groups ever used.  I.e. group index
 551    * group_count + max_spare_used is the first unused spare group
 552    * if first_spare_group is NO_INDEX.
 553    */
 554   apr_uint32_t max_spare_used;
 555
 556   /* Pointer to the data buffer, data_size bytes long. Never NULL.
 557    */
 558   unsigned char *data;
 559
 560   /* Total number of data buffer bytes in use.
 561    */
 562   apr_uint64_t data_used;
 563
 564   /* Largest entry size that we would accept.  For total cache sizes
 565    * less than 4TB (sic!), this is determined by the total cache size.
 566    */
 567   apr_uint64_t max_entry_size;
 568
 569   /* The cache levels, organized as sub-buffers.  Since entries in the
 570    * DIRECTORY use offsets in DATA for addressing, a cache lookup does
 571    * not need to know the cache level of a specific item.  Cache levels
 572    * are only used to implement a hybrid insertion / eviction strategy.
 573    */
 574
 575   /* First cache level, i.e. most insertions happen here.  Very large
 576    * items might get inserted directly into L2.  L1 is a strict FIFO
 577    * ring buffer that does not care about item priorities.  All evicted
 578    * items get a chance to be promoted to L2.
 579    */
 580   cache_level_t l1;
 581
 582   /* Second cache level, i.e. data evicted from L1 will be added here
 583    * if the item is "important" enough or the L2 insertion window is large
 584    * enough.
 585    */
 586   cache_level_t l2;
 587
 588
 589   /* Number of used dictionary entries, i.e. number of cached items.
 590    * Purely statistical information that may be used for profiling only.
 591    * Updates are not synchronized and values may be nonsensicle on some
 592    * platforms.
 593    */
 594   apr_uint32_t used_entries;
 595
 596   /* Total number of calls to membuffer_cache_get.
 597    * Purely statistical information that may be used for profiling only.
 598    * Updates are not synchronized and values may be nonsensicle on some
 599    * platforms.
 600    */
 601   apr_uint64_t total_reads;
 602
 603   /* Total number of calls to membuffer_cache_set.
 604    * Purely statistical information that may be used for profiling only.
 605    * Updates are not synchronized and values may be nonsensicle on some
 606    * platforms.
 607    */
 608   apr_uint64_t total_writes;
 609
 610   /* Total number of hits since the cache's creation.
 611    * Purely statistical information that may be used for profiling only.
 612    * Updates are not synchronized and values may be nonsensicle on some
 613    * platforms.
 614    */
 615   apr_uint64_t total_hits;
 616
 617 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 618   /* A lock for intra-process synchronization to the cache, or NULL if
 619    * the cache's creator doesn't feel the cache needs to be
 620    * thread-safe.
 621    */
 622   svn_mutex__t *lock;
 623 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 624   /* Same for read-write lock. */
 625   apr_thread_rwlock_t *lock;
 626
 627   /* If set, write access will wait until they get exclusive access.
 628    * Otherwise, they will become no-ops if the segment is currently
 629    * read-locked.  Only used when LOCK is an r/w lock.
 630    */
 631   svn_boolean_t allow_blocking_writes;
 632 #endif
 633 };
 634
 635 /* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
 636  */
 637 #define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
 638
 639 /* If locking is supported for CACHE, acquire a read lock for it.
 640  */
 641 static svn_error_t *
 642 read_lock_cache(svn_membuffer_t *cache)
 643 {
 644 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 645   return svn_mutex__lock(cache->lock);
 646 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 647   if (cache->lock)
 648   {
 649     apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
 650     if (status)
 651       return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
 652   }
 653
 654   return SVN_NO_ERROR;
 655 #else
 656   return SVN_NO_ERROR;
 657 #endif
 658 }
 659
 660 /* If locking is supported for CACHE, acquire a write lock for it.
 661  * Set *SUCCESS to FALSE, if we couldn't acquire the write lock;
 662  * leave it untouched otherwise.
 663  */
 664 static svn_error_t *
 665 write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
 666 {
 667 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 668   return svn_mutex__lock(cache->lock);
 669 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 670   if (cache->lock)
 671     {
 672       apr_status_t status;
 673       if (cache->allow_blocking_writes)
 674         {
 675           status = apr_thread_rwlock_wrlock(cache->lock);
 676         }
 677       else
 678         {
 679           status = apr_thread_rwlock_trywrlock(cache->lock);
 680           if (SVN_LOCK_IS_BUSY(status))
 681             {
 682               *success = FALSE;
 683               status = APR_SUCCESS;
 684             }
 685         }
 686
 687       if (status)
 688         return svn_error_wrap_apr(status,
 689                                   _("Can't write-lock cache mutex"));
 690     }
 691
 692   return SVN_NO_ERROR;
 693 #else
 694   return SVN_NO_ERROR;
 695 #endif
 696 }
 697
 698 /* If locking is supported for CACHE, acquire an unconditional write lock
 699  * for it.
 700  */
 701 static svn_error_t *
 702 force_write_lock_cache(svn_membuffer_t *cache)
 703 {
 704 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 705   return svn_mutex__lock(cache->lock);
 706 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 707   apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
 708   if (status)
 709     return svn_error_wrap_apr(status,
 710                               _("Can't write-lock cache mutex"));
 711
 712   return SVN_NO_ERROR;
 713 #else
 714   return SVN_NO_ERROR;
 715 #endif
 716 }
 717
 718 /* If locking is supported for CACHE, release the current lock
 719  * (read or write).  Return ERR upon success.
 720  */
 721 static svn_error_t *
 722 unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
 723 {
 724 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
 725   return svn_mutex__unlock(cache->lock, err);
 726 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
 727   if (cache->lock)
 728   {
 729     apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
 730     if (err)
 731       return err;
 732
 733     if (status)
 734       return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
 735   }
 736
 737   return err;
 738 #else
 739   return err;
 740 #endif
 741 }
 742
 743 /* If supported, guard the execution of EXPR with a read lock to CACHE.
 744  * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
 745  */
 746 #define WITH_READ_LOCK(cache, expr)         \
 747 do {                                        \
 748   SVN_ERR(read_lock_cache(cache));          \
 749   SVN_ERR(unlock_cache(cache, (expr)));     \
 750 } while (0)
 751
 752 /* If supported, guard the execution of EXPR with a write lock to CACHE.
 753  * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
 754  *
 755  * The write lock process is complicated if we don't allow to wait for
 756  * the lock: If we didn't get the lock, we may still need to remove an
 757  * existing entry for the given key because that content is now stale.
 758  * Once we discovered such an entry, we unconditionally do a blocking
 759  * wait for the write lock.  In case no old content could be found, a
 760  * failing lock attempt is simply a no-op and we exit the macro.
 761  */
 762 #define WITH_WRITE_LOCK(cache, expr)                            \
 763 do {                                                            \
 764   svn_boolean_t got_lock = TRUE;                                \
 765   SVN_ERR(write_lock_cache(cache, &got_lock));                  \
 766   if (!got_lock)                                                \
 767     {                                                           \
 768       svn_boolean_t exists;                                     \
 769       SVN_ERR(entry_exists(cache, group_index, key, &exists));  \
 770       if (exists)                                               \
 771         SVN_ERR(force_write_lock_cache(cache));                 \
 772       else                                                      \
 773         break;                                                  \
 774     }                                                           \
 775   SVN_ERR(unlock_cache(cache, (expr)));                         \
 776 } while (0)
 777
 778 /* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
 779  * been initialized, yet. In that case, this group can not data. Otherwise,
 780  * a non-zero value is returned.
 781  */
 782 static APR_INLINE unsigned char
 783 is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
 784 {
 785   unsigned char flags
 786     = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
 787   unsigned char bit_mask
 788     = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
 789
 790   return flags & bit_mask;
 791 }
 792
 793 /* Initializes the section of the directory in CACHE that contains
 794  * the entry group identified by GROUP_INDEX. */
 795 static void
 796 initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
 797 {
 798   unsigned char bit_mask;
 799   apr_uint32_t i;
 800
 801   /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
 802   apr_uint32_t first_index =
 803       (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
 804   apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
 805   if (last_index > cache->group_count + cache->spare_group_count)
 806     last_index = cache->group_count + cache->spare_group_count;
 807
 808   for (i = first_index; i < last_index; ++i)
 809     {
 810       group_header_t *header = &cache->directory[i].header;
 811       header->used = 0;
 812       header->chain_length = 1;
 813       header->next = NO_INDEX;
 814       header->previous = NO_INDEX;
 815     }
 816
 817   /* set the "initialized" bit for these groups */
 818   bit_mask
 819     = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
 820   cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
 821     |= bit_mask;
 822 }
 823
 824 /* Return the next available spare group from CACHE and mark it as used.
 825  * May return NULL.
 826  */
 827 static entry_group_t *
 828 allocate_spare_group(svn_membuffer_t *cache)
 829 {
 830   entry_group_t *group = NULL;
 831
 832   /* is there some ready-to-use group? */
 833   if (cache->first_spare_group != NO_INDEX)
 834     {
 835       group = &cache->directory[cache->first_spare_group];
 836       cache->first_spare_group = group->header.next;
 837     }
 838
 839   /* any so far untouched spares available? */
 840   else if (cache->max_spare_used < cache->spare_group_count)
 841     {
 842       apr_uint32_t group_index = cache->group_count + cache->max_spare_used;
 843       ++cache->max_spare_used;
 844
 845       if (!is_group_initialized(cache, group_index))
 846         initialize_group(cache, group_index);
 847
 848       group = &cache->directory[group_index];
 849     }
 850
 851   /* spare groups must be empty */
 852   assert(!group || !group->header.used);
 853   return group;
 854 }
 855
 856 /* Mark previously allocated spare group GROUP in CACHE as "unused".
 857  */
 858 static void
 859 free_spare_group(svn_membuffer_t *cache,
 860                  entry_group_t *group)
 861 {
 862   assert(group->header.used == 0);
 863   assert(group->header.previous != NO_INDEX);
 864   assert(group - cache->directory >= (apr_ssize_t)cache->group_count);
 865
 866   /* unchain */
 867   cache->directory[group->header.previous].header.next = NO_INDEX;
 868   group->header.chain_length = 0;
 869   group->header.previous = NO_INDEX;
 870
 871   /* add to chain of spares */
 872   group->header.next = cache->first_spare_group;
 873   cache->first_spare_group = (apr_uint32_t) (group - cache->directory);
 874 }
 875
 876 /* Follow the group chain from GROUP in CACHE to its end and return the last
 877  * group.  May return GROUP.
 878  */
 879 static entry_group_t *
 880 last_group_in_chain(svn_membuffer_t *cache,
 881                     entry_group_t *group)
 882 {
 883   while (group->header.next != NO_INDEX)
 884     group = &cache->directory[group->header.next];
 885
 886   return group;
 887 }
 888
 889 /* Return the CHAIN_INDEX-th element in the group chain starting from group
 890  * START_GROUP_INDEX in CACHE.
 891  */
 892 static entry_group_t *
 893 get_group(svn_membuffer_t *cache,
 894           apr_uint32_t start_group_index,
 895           apr_uint32_t chain_index)
 896 {
 897   entry_group_t *group = &cache->directory[start_group_index];
 898   for (; chain_index; --chain_index)
 899     group = &cache->directory[group->header.next];
 900
 901   return group;
 902 }
 903
 904 /* Resolve a dictionary entry reference, i.e. return the entry
 905  * for the given IDX.
 906  */
 907 static APR_INLINE entry_t *
 908 get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
 909 {
 910   return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
 911 }
 912
 913 /* Get the entry references for the given ENTRY.
 914  */
 915 static APR_INLINE apr_uint32_t
 916 get_index(svn_membuffer_t *cache, entry_t *entry)
 917 {
 918   apr_size_t group_index
 919     = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
 920
 921   return (apr_uint32_t)group_index * GROUP_SIZE
 922        + (apr_uint32_t)(entry - cache->directory[group_index].entries);
 923 }
 924
 925 /* Return the cache level of ENTRY in CACHE.
 926  */
 927 static cache_level_t *
 928 get_cache_level(svn_membuffer_t *cache, entry_t *entry)
 929 {
 930   return entry->offset < cache->l1.size ? &cache->l1
 931                                         : &cache->l2;
 932 }
 933
 934 /* Insert ENTRY to the chain of items that belong to LEVEL in CACHE.  IDX
 935  * is ENTRY's item index and is only given for efficiency.  The insertion
 936  * takes place just before LEVEL->NEXT.  *CACHE will not be modified.
 937  */
 938 static void
 939 chain_entry(svn_membuffer_t *cache,
 940             cache_level_t *level,
 941             entry_t *entry,
 942             apr_uint32_t idx)
 943 {
 944   /* insert ENTRY before this item */
 945   entry_t *next = level->next == NO_INDEX
 946                 ? NULL
 947                 : get_entry(cache, level->next);
 948   assert(idx == get_index(cache, entry));
 949
 950   /* update entry chain
 951    */
 952   entry->next = level->next;
 953   if (level->first == NO_INDEX)
 954     {
 955       /* insert as the first entry and only in the chain
 956        */
 957       entry->previous = NO_INDEX;
 958       level->last = idx;
 959       level->first = idx;
 960     }
 961   else if (next == NULL)
 962     {
 963       /* insert as the last entry in the chain.
 964        * Note that it cannot also be at the beginning of the chain.
 965        */
 966       entry->previous = level->last;
 967       get_entry(cache, level->last)->next = idx;
 968       level->last = idx;
 969     }
 970   else
 971     {
 972       /* insert either at the start of a non-empty list or
 973        * somewhere in the middle
 974        */
 975       entry->previous = next->previous;
 976       next->previous = idx;
 977
 978       if (entry->previous != NO_INDEX)
 979         get_entry(cache, entry->previous)->next = idx;
 980       else
 981         level->first = idx;
 982     }
 983 }
 984
 985 /* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
 986  * is ENTRY's item index and is only given for efficiency.  Please note
 987  * that neither *CACHE nor *ENTRY will not be modified.
 988  */
 989 static void
 990 unchain_entry(svn_membuffer_t *cache,
 991               cache_level_t *level,
 992               entry_t *entry,
 993               apr_uint32_t idx)
 994 {
 995   assert(idx == get_index(cache, entry));
 996
 997   /* update
 998    */
 999   if (level->next == idx)
1000     level->next = entry->next;
1001
1002   /* unlink it from the chain of used entries
1003    */
1004   if (entry->previous == NO_INDEX)
1005     level->first = entry->next;
1006   else
1007     get_entry(cache, entry->previous)->next = entry->next;
1008
1009   if (entry->next == NO_INDEX)
1010     level->last = entry->previous;
1011   else
1012     get_entry(cache, entry->next)->previous = entry->previous;
1013 }
1014
1015 /* Remove the used ENTRY from the CACHE, i.e. make it "unused".
1016  * In contrast to insertion, removal is possible for any entry.
1017  */
1018 static void
1019 drop_entry(svn_membuffer_t *cache, entry_t *entry)
1020 {
1021   /* the group that ENTRY belongs to plus a number of useful index values
1022    */
1023   apr_uint32_t idx = get_index(cache, entry);
1024   apr_uint32_t group_index = idx / GROUP_SIZE;
1025   entry_group_t *last_group
1026     = last_group_in_chain(cache, &cache->directory[group_index]);
1027   apr_uint32_t last_in_group
1028     = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE
1029     + last_group->header.used - 1);
1030
1031   cache_level_t *level = get_cache_level(cache, entry);
1032
1033   /* update global cache usage counters
1034    */
1035   cache->used_entries--;
1036   cache->data_used -= entry->size;
1037
1038   /* extend the insertion window, if the entry happens to border it
1039    */
1040   if (idx == level->next)
1041     level->next = entry->next;
1042   else
1043     if (entry->next == level->next)
1044       {
1045         /* insertion window starts right behind the entry to remove
1046          */
1047         if (entry->previous == NO_INDEX)
1048           {
1049             /* remove the first entry -> insertion may start at pos 0, now */
1050             level->current_data = level->start_offset;
1051           }
1052         else
1053           {
1054             /* insertion may start right behind the previous entry */
1055             entry_t *previous = get_entry(cache, entry->previous);
1056             level->current_data = ALIGN_VALUE(  previous->offset
1057                                               + previous->size);
1058           }
1059       }
1060
1061   /* unlink it from the chain of used entries
1062    */
1063   unchain_entry(cache, level, entry, idx);
1064
1065   /* Move last entry into hole (if the removed one is not the last used).
1066    * We need to do this since all used entries are at the beginning of
1067    * the group's entries array.
1068    */
1069   if (idx != last_in_group)
1070     {
1071       /* copy the last used entry to the removed entry's index
1072        */
1073       *entry = last_group->entries[last_group->header.used-1];
1074
1075       /* this ENTRY may belong to a different cache level than the entry
1076        * we have just removed */
1077       level = get_cache_level(cache, entry);
1078
1079       /* update foreign links to new index
1080        */
1081       if (last_in_group == level->next)
1082         level->next = idx;
1083
1084       if (entry->previous == NO_INDEX)
1085         level->first = idx;
1086       else
1087         get_entry(cache, entry->previous)->next = idx;
1088
1089       if (entry->next == NO_INDEX)
1090         level->last = idx;
1091       else
1092         get_entry(cache, entry->next)->previous = idx;
1093     }
1094
1095   /* Update the number of used entries.
1096    */
1097   last_group->header.used--;
1098
1099   /* Release the last group in the chain if it is a spare group
1100    */
1101   if (!last_group->header.used && last_group->header.previous != NO_INDEX)
1102     free_spare_group(cache, last_group);
1103 }
1104
1105 /* Insert ENTRY into the chain of used dictionary entries. The entry's
1106  * offset and size members must already have been initialized. Also,
1107  * the offset must match the beginning of the insertion window.
1108  */
1109 static void
1110 insert_entry(svn_membuffer_t *cache, entry_t *entry)
1111 {
1112   /* the group that ENTRY belongs to plus a number of useful index values
1113    */
1114   apr_uint32_t idx = get_index(cache, entry);
1115   apr_uint32_t group_index = idx / GROUP_SIZE;
1116   entry_group_t *group = &cache->directory[group_index];
1117   cache_level_t *level = get_cache_level(cache, entry);
1118
1119   /* The entry must start at the beginning of the insertion window.
1120    * It must also be the first unused entry in the group.
1121    */
1122   assert(entry->offset == level->current_data);
1123   assert(idx == group_index * GROUP_SIZE + group->header.used);
1124   level->current_data = ALIGN_VALUE(entry->offset + entry->size);
1125
1126   /* update usage counters
1127    */
1128   cache->used_entries++;
1129   cache->data_used += entry->size;
1130   entry->hit_count = 0;
1131   group->header.used++;
1132
1133   /* update entry chain
1134    */
1135   chain_entry(cache, level, entry, idx);
1136
1137   /* The current insertion position must never point outside our
1138    * data buffer.
1139    */
1140   assert(level->current_data <= level->start_offset + level->size);
1141 }
1142
1143 /* Map a KEY of 16 bytes to the CACHE and group that shall contain the
1144  * respective item.
1145  */
1146 static apr_uint32_t
1147 get_group_index(svn_membuffer_t **cache,
1148                 const entry_key_t *key)
1149 {
1150   svn_membuffer_t *segment0 = *cache;
1151   apr_uint64_t key0 = key->fingerprint[0];
1152   apr_uint64_t key1 = key->fingerprint[1];
1153
1154   /* select the cache segment to use. they have all the same group_count.
1155    * Since key may not be well-distributed, pre-fold it to a smaller but
1156    * "denser" ranger.  The modulus is a prime larger than the largest
1157    * counts. */
1158   *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37))
1159                      & (segment0->segment_count - 1)];
1160   return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count;
1161 }
1162
1163 /* Reduce the hit count of ENTRY and update the accumulated hit info
1164  * in CACHE accordingly.
1165  */
1166 static APR_INLINE void
1167 let_entry_age(svn_membuffer_t *cache, entry_t *entry)
1168 {
1169   apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
1170
1171   if (hits_removed)
1172     {
1173       entry->hit_count -= hits_removed;
1174     }
1175   else
1176     {
1177       entry->priority /= 2;
1178     }
1179 }
1180
1181 /* Return whether the keys in LHS and RHS match.
1182  */
1183 static svn_boolean_t
1184 entry_keys_match(const entry_key_t *lhs,
1185                  const entry_key_t *rhs)
1186 {
1187   return (lhs->fingerprint[0] == rhs->fingerprint[0])
1188       && (lhs->fingerprint[1] == rhs->fingerprint[1])
1189       && (lhs->key_len == rhs->key_len);
1190 }
1191
1192 /* Given the GROUP_INDEX that shall contain an entry with the hash key
1193  * TO_FIND, find that entry in the specified group.
1194  *
1195  * If FIND_EMPTY is not set, this function will return the one used entry
1196  * that actually matches the hash or NULL, if no such entry exists.
1197  *
1198  * If FIND_EMPTY has been set, this function will drop the one used entry
1199  * that actually matches the hash (i.e. make it fit to be replaced with
1200  * new content), an unused entry or a forcibly removed entry (if all
1201  * group entries are currently in use). The entries' hash value will be
1202  * initialized with TO_FIND.
1203  *
1204  * Note: This function requires the caller to appropriately lock the CACHE.
1205  * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE,
1206  * the write lock must have been acquired.
1207  */
1208 static entry_t *
1209 find_entry(svn_membuffer_t *cache,
1210            apr_uint32_t group_index,
1211            const full_key_t *to_find,
1212            svn_boolean_t find_empty)
1213 {
1214   entry_group_t *group;
1215   entry_t *entry = NULL;
1216   apr_size_t i;
1217
1218   /* get the group that *must* contain the entry
1219    */
1220   group = &cache->directory[group_index];
1221
1222   /* If the entry group has not been initialized, yet, there is no data.
1223    */
1224   if (! is_group_initialized(cache, group_index))
1225     {
1226       if (find_empty)
1227         {
1228           initialize_group(cache, group_index);
1229           entry = &group->entries[0];
1230
1231           /* initialize entry for the new key */
1232           entry->key = to_find->entry_key;
1233         }
1234
1235       return entry;
1236     }
1237
1238   /* try to find the matching entry
1239    */
1240   while (1)
1241     {
1242       for (i = 0; i < group->header.used; ++i)
1243         if (entry_keys_match(&group->entries[i].key, &to_find->entry_key))
1244           {
1245             /* This is the only entry that _may_ contain the correct data. */
1246             entry = &group->entries[i];
1247
1248             /* If we want to preserve it, check that it is actual a match. */
1249             if (!find_empty)
1250               {
1251                 /* If there is no full key to compare, we are done. */
1252                 if (!entry->key.key_len)
1253                   return entry;
1254
1255                 /* Compare the full key. */
1256                 if (memcmp(to_find->full_key.data,
1257                            cache->data + entry->offset,
1258                            entry->key.key_len) == 0)
1259                   return entry;
1260
1261                 /* Key conflict. The entry to find cannot be anywhere else.
1262                  * Therefore, it is not cached. */
1263                 return NULL;
1264               }
1265
1266             /* need to empty that entry */
1267             drop_entry(cache, entry);
1268             if (group->header.used == GROUP_SIZE)
1269               group = last_group_in_chain(cache, group);
1270             else if (group->header.chain_length == 0)
1271               group = last_group_in_chain(cache,
1272                                           &cache->directory[group_index]);
1273
1274             /* No entry found (actually, none left to find). */
1275             entry = NULL;
1276             break;
1277           }
1278
1279       /* end of chain? */
1280       if (group->header.next == NO_INDEX)
1281         break;
1282
1283       /* only full groups may chain */
1284       assert(group->header.used == GROUP_SIZE);
1285       group = &cache->directory[group->header.next];
1286     }
1287
1288   /* None found. Are we looking for a free entry?
1289    */
1290   if (find_empty)
1291     {
1292       /* There is no empty entry in the chain, try chaining a spare group.
1293        */
1294       if (   group->header.used == GROUP_SIZE
1295           && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH)
1296         {
1297           entry_group_t *new_group = allocate_spare_group(cache);
1298           if (new_group)
1299             {
1300               /* chain groups
1301                */
1302               new_group->header.chain_length = group->header.chain_length + 1;
1303               new_group->header.previous = (apr_uint32_t) (group -
1304                                                            cache->directory);
1305               new_group->header.next = NO_INDEX;
1306               group->header.next = (apr_uint32_t) (new_group -
1307                                                    cache->directory);
1308               group = new_group;
1309             }
1310         }
1311
1312       /* if GROUP is still filled, we need to remove a random entry */
1313       if (group->header.used == GROUP_SIZE)
1314         {
1315           /* every entry gets the same chance of being removed.
1316            * Otherwise, we free the first entry, fill it and
1317            * remove it again on the next occasion without considering
1318            * the other entries in this group.
1319            *
1320            * We hit only one random group instead of processing all
1321            * groups in the chain.
1322            */
1323           cache_level_t *entry_level;
1324           int to_remove = rand() % (GROUP_SIZE * group->header.chain_length);
1325           entry_group_t *to_shrink
1326             = get_group(cache, group_index, to_remove / GROUP_SIZE);
1327
1328           entry = &to_shrink->entries[to_remove % GROUP_SIZE];
1329           entry_level = get_cache_level(cache, entry);
1330           for (i = 0; i < GROUP_SIZE; ++i)
1331             {
1332               /* keep L1 entries whenever possible */
1333
1334               cache_level_t *level
1335                 = get_cache_level(cache, &to_shrink->entries[i]);
1336               if (   (level != entry_level && entry_level == &cache->l1)
1337                   || (entry->hit_count > to_shrink->entries[i].hit_count))
1338                 {
1339                   entry_level = level;
1340                   entry = &to_shrink->entries[i];
1341                 }
1342             }
1343
1344           /* for the entries that don't have been removed,
1345            * reduce their hit counts to put them at a relative
1346            * disadvantage the next time.
1347            */
1348           for (i = 0; i < GROUP_SIZE; ++i)
1349             if (entry != &to_shrink->entries[i])
1350               let_entry_age(cache, entry);
1351
1352           drop_entry(cache, entry);
1353         }
1354
1355       /* initialize entry for the new key
1356        */
1357       entry = &group->entries[group->header.used];
1358       entry->key = to_find->entry_key;
1359     }
1360
1361   return entry;
1362 }
1363
1364 /* Move a surviving ENTRY from just behind the insertion window to
1365  * its beginning and move the insertion window up accordingly.
1366  */
1367 static void
1368 move_entry(svn_membuffer_t *cache, entry_t *entry)
1369 {
1370   apr_size_t size = ALIGN_VALUE(entry->size);
1371   cache_level_t *level = get_cache_level(cache, entry);
1372
1373   /* This entry survived this cleansing run. Reset half of its
1374    * hit count so that its removal gets more likely in the next
1375    * run unless someone read / hit this entry in the meantime.
1376    */
1377   let_entry_age(cache, entry);
1378
1379   /* Move the entry to the start of the empty / insertion section
1380    * (if it isn't there already). Size-aligned moves are legal
1381    * since all offsets and block sizes share this same alignment.
1382    * Size-aligned moves tend to be faster than non-aligned ones
1383    * because no "odd" bytes at the end need to special treatment.
1384    */
1385   if (entry->offset != level->current_data)
1386     {
1387       memmove(cache->data + level->current_data,
1388               cache->data + entry->offset,
1389               size);
1390       entry->offset = level->current_data;
1391     }
1392
1393   /* The insertion position is now directly behind this entry.
1394    */
1395   level->current_data = entry->offset + size;
1396   level->next = entry->next;
1397
1398   /* The current insertion position must never point outside our
1399    * data buffer.
1400    */
1401   assert(level->current_data <= level->start_offset + level->size);
1402 }
1403
1404 /* Move ENTRY in CACHE from L1 to L2.
1405  */
1406 static void
1407 promote_entry(svn_membuffer_t *cache, entry_t *entry)
1408 {
1409   apr_uint32_t idx = get_index(cache, entry);
1410   apr_size_t size = ALIGN_VALUE(entry->size);
1411   assert(get_cache_level(cache, entry) == &cache->l1);
1412   assert(idx == cache->l1.next);
1413
1414   /* copy item from the current location in L1 to the start of L2's
1415    * insertion window */
1416   memmove(cache->data + cache->l2.current_data,
1417           cache->data + entry->offset,
1418           size);
1419   entry->offset = cache->l2.current_data;
1420
1421   /* The insertion position is now directly behind this entry.
1422    */
1423   cache->l2.current_data += size;
1424
1425   /* remove ENTRY from chain of L1 entries and put it into L2
1426    */
1427   unchain_entry(cache, &cache->l1, entry, idx);
1428   chain_entry(cache, &cache->l2, entry, idx);
1429 }
1430
1431 /* This function implements the cache insertion / eviction strategy for L2.
1432  *
1433  * If necessary, enlarge the insertion window of CACHE->L2 until it is at
1434  * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
1435  * data buffer size allocated to CACHE->L2.  IDX is the item index of
1436  * TO_FIT_IN and is given for performance reasons.
1437  *
1438  * Return TRUE if enough room could be found or made.  A FALSE result
1439  * indicates that the respective item shall not be added.
1440  */
1441 static svn_boolean_t
1442 ensure_data_insertable_l2(svn_membuffer_t *cache,
1443                           entry_t *to_fit_in)
1444 {
1445   entry_t *entry;
1446
1447   /* accumulated size of the entries that have been removed to make
1448    * room for the new one.
1449    */
1450   apr_size_t moved_size = 0;
1451
1452   /* count the number of entries that got moved.  A single large entry
1453    * being moved is not enough to reject an insertion.
1454    */
1455   apr_size_t moved_count = 0;
1456
1457   /* accumulated "worth" of items dropped so far */
1458   apr_uint64_t drop_hits = 0;
1459
1460   /* estimated "worth" of the new entry */
1461   apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1)
1462                                * (apr_uint64_t)to_fit_in->priority;
1463
1464   /* This loop will eventually terminate because every cache entry
1465    * would get dropped eventually:
1466    *
1467    * - the incoming entry is small enough to fit into L2
1468    * - every iteration either frees parts of L2 or counts the moved size
1469    * - eventually, we either moved too many items with too much total size
1470    *   to accept the new entry, or made enough room in L2 for the new entry
1471    *
1472    * Low-prio items get rejected even sooner.
1473    */
1474   while (1)
1475     {
1476       /* first offset behind the insertion window
1477        */
1478       apr_uint64_t end = cache->l2.next == NO_INDEX
1479                        ? cache->l2.start_offset + cache->l2.size
1480                        : get_entry(cache, cache->l2.next)->offset;
1481
1482       /* leave function as soon as the insertion window is large enough
1483        */
1484       if (end >= to_fit_in->size + cache->l2.current_data)
1485         return TRUE;
1486
1487       /* Don't be too eager to cache data.  If a lot of data has been moved
1488        * around, the current item has probably a relatively low priority.
1489        * We must also limit the effort spent here (if even in case of faulty
1490        * heuristics).  Therefore, give up after some time.
1491        */
1492       if (moved_size > 4 * to_fit_in->size && moved_count > 7)
1493         return FALSE;
1494
1495       /* if the net worth (in weighted hits) of items removed is already
1496        * larger than what we want to insert, reject TO_FIT_IN because it
1497        * still does not fit in. */
1498       if (drop_hits > drop_hits_limit)
1499         return FALSE;
1500
1501       /* try to enlarge the insertion window
1502        */
1503       if (cache->l2.next == NO_INDEX)
1504         {
1505           /* We reached the end of the data buffer; restart at the beginning.
1506            * Due to the randomized nature of our LFU implementation, very
1507            * large data items may require multiple passes. Therefore, SIZE
1508            * should be restricted to significantly less than data_size.
1509            */
1510           cache->l2.current_data = cache->l2.start_offset;
1511           cache->l2.next = cache->l2.first;
1512         }
1513       else
1514         {
1515           svn_boolean_t keep;
1516           entry = get_entry(cache, cache->l2.next);
1517
1518           if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1519             {
1520               /* Low prio items can only be accepted only if the current
1521                * entry is of even lower prio and has fewer hits.
1522                */
1523               if (   entry->priority > to_fit_in->priority
1524                   || entry->hit_count > to_fit_in->hit_count)
1525                 return FALSE;
1526             }
1527
1528           if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1529             {
1530               /* Be quick to remove low-prio entries - even if the incoming
1531                * one is low-prio as well.  This makes room for more important
1532                * data and replaces existing data with newly read information.
1533                */
1534               keep = FALSE;
1535             }
1536           else
1537             {
1538               /* If the existing data is the same prio as the incoming data,
1539                * drop the existing entry if it had seen fewer (probably 0)
1540                * hits than the entry coming in from L1.  In case of different
1541                * priorities, keep the current entry of it has higher prio.
1542                * The new entry may still find room by ousting other entries.
1543                */
1544               keep = to_fit_in->priority == entry->priority
1545                    ? entry->hit_count >= to_fit_in->hit_count
1546                    : entry->priority > to_fit_in->priority;
1547             }
1548
1549           /* keepers or destroyers? */
1550           if (keep)
1551             {
1552               /* Moving entries around is not for free -> track costs. */
1553               moved_size += entry->size;
1554               moved_count++;
1555
1556               move_entry(cache, entry);
1557             }
1558           else
1559             {
1560               /* Drop the entry from the end of the insertion window.
1561                * Count the "hit importance" such that we are not sacrificing
1562                * too much of the high-hit contents.  However, don't count
1563                * low-priority hits because higher prio entries will often
1564                * provide the same data but in a further stage of processing.
1565                */
1566               if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1567                 drop_hits += entry->hit_count * (apr_uint64_t)entry->priority;
1568
1569               drop_entry(cache, entry);
1570             }
1571         }
1572     }
1573
1574   /* This will never be reached. But if it was, "can't insert" was the
1575    * right answer. */
1576 }
1577
1578 /* This function implements the cache insertion / eviction strategy for L1.
1579  *
1580  * If necessary, enlarge the insertion window of CACHE->L1 by promoting
1581  * entries to L2 until it is at least SIZE bytes long.
1582  *
1583  * Return TRUE if enough room could be found or made.  A FALSE result
1584  * indicates that the respective item shall not be added because it is
1585  * too large.
1586  */
1587 static svn_boolean_t
1588 ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
1589 {
1590   /* Guarantees that the while loop will terminate. */
1591   if (size > cache->l1.size)
1592     return FALSE;
1593
1594   /* This loop will eventually terminate because every cache entry
1595    * would get dropped eventually.
1596    */
1597   while (1)
1598     {
1599       /* first offset behind the insertion window
1600        */
1601       apr_uint32_t entry_index = cache->l1.next;
1602       entry_t *entry = get_entry(cache, entry_index);
1603       apr_uint64_t end = cache->l1.next == NO_INDEX
1604                        ? cache->l1.start_offset + cache->l1.size
1605                        : entry->offset;
1606
1607       /* leave function as soon as the insertion window is large enough
1608        */
1609       if (end >= size + cache->l1.current_data)
1610         return TRUE;
1611
1612       /* Enlarge the insertion window
1613        */
1614       if (cache->l1.next == NO_INDEX)
1615         {
1616           /* We reached the end of the data buffer; restart at the beginning.
1617            * Due to the randomized nature of our LFU implementation, very
1618            * large data items may require multiple passes. Therefore, SIZE
1619            * should be restricted to significantly less than data_size.
1620            */
1621           cache->l1.current_data = cache->l1.start_offset;
1622           cache->l1.next = cache->l1.first;
1623         }
1624       else
1625         {
1626           /* Remove the entry from the end of insertion window and promote
1627            * it to L2, if it is important enough.
1628            */
1629           svn_boolean_t keep = ensure_data_insertable_l2(cache, entry);
1630
1631           /* We might have touched the group that contains ENTRY. Recheck. */
1632           if (entry_index == cache->l1.next)
1633             {
1634               if (keep)
1635                 promote_entry(cache, entry);
1636               else
1637                 drop_entry(cache, entry);
1638             }
1639         }
1640     }
1641
1642   /* This will never be reached. But if it was, "can't insert" was the
1643    * right answer. */
1644 }
1645
1646 svn_error_t *
1647 svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1648                                   apr_size_t total_size,
1649                                   apr_size_t directory_size,
1650                                   apr_size_t segment_count,
1651                                   svn_boolean_t thread_safe,
1652                                   svn_boolean_t allow_blocking_writes,
1653                                   apr_pool_t *pool)
1654 {
1655   svn_membuffer_t *c;
1656
1657   apr_uint32_t seg;
1658   apr_uint32_t group_count;
1659   apr_uint32_t main_group_count;
1660   apr_uint32_t spare_group_count;
1661   apr_uint32_t group_init_size;
1662   apr_uint64_t data_size;
1663   apr_uint64_t max_entry_size;
1664
1665   /* Limit the total size (only relevant if we can address > 4GB)
1666    */
1667 #if APR_SIZEOF_VOIDP > 4
1668   if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1669     total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1670 #endif
1671
1672   /* Limit the segment count
1673    */
1674   if (segment_count > MAX_SEGMENT_COUNT)
1675     segment_count = MAX_SEGMENT_COUNT;
1676   if (segment_count * MIN_SEGMENT_SIZE > total_size)
1677     segment_count = total_size / MIN_SEGMENT_SIZE;
1678
1679   /* The segment count must be a power of two. Round it down as necessary.
1680    */
1681   while ((segment_count & (segment_count-1)) != 0)
1682     segment_count &= segment_count-1;
1683
1684   /* if the caller hasn't provided a reasonable segment count or the above
1685    * limitations set it to 0, derive one from the absolute cache size
1686    */
1687   if (segment_count < 1)
1688     {
1689       /* Determine a reasonable number of cache segments. Segmentation is
1690        * only useful for multi-threaded / multi-core servers as it reduces
1691        * lock contention on these systems.
1692        *
1693        * But on these systems, we can assume that ample memory has been
1694        * allocated to this cache. Smaller caches should not be segmented
1695        * as this severely limits the maximum size of cachable items.
1696        *
1697        * Segments should not be smaller than 32MB and max. cachable item
1698        * size should grow as fast as segmentation.
1699        */
1700
1701       apr_uint32_t segment_count_shift = 0;
1702       while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1703              < total_size)
1704         ++segment_count_shift;
1705
1706       segment_count = (apr_size_t)1 << segment_count_shift;
1707     }
1708
1709   /* If we have an extremely large cache (>512 GB), the default segment
1710    * size may exceed the amount allocatable as one chunk. In that case,
1711    * increase segmentation until we are under the threshold.
1712    */
1713   while (   total_size / segment_count > MAX_SEGMENT_SIZE
1714          && segment_count < MAX_SEGMENT_COUNT)
1715     segment_count *= 2;
1716
1717   /* allocate cache as an array of segments / cache objects */
1718   c = apr_palloc(pool, segment_count * sizeof(*c));
1719
1720   /* Split total cache size into segments of equal size
1721    */
1722   total_size /= segment_count;
1723   directory_size /= segment_count;
1724
1725   /* prevent pathological conditions: ensure a certain minimum cache size
1726    */
1727   if (total_size < 2 * sizeof(entry_group_t))
1728     total_size = 2 * sizeof(entry_group_t);
1729
1730   /* adapt the dictionary size accordingly, if necessary:
1731    * It must hold at least one group and must not exceed the cache size.
1732    */
1733   if (directory_size > total_size - sizeof(entry_group_t))
1734     directory_size = total_size - sizeof(entry_group_t);
1735   if (directory_size < 2 * sizeof(entry_group_t))
1736     directory_size = 2 * sizeof(entry_group_t);
1737
1738   /* limit the data size to what we can address.
1739    * Note that this cannot overflow since all values are of size_t.
1740    * Also, make it a multiple of the item placement granularity to
1741    * prevent subtle overflows.
1742    */
1743   data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1744
1745   /* For cache sizes > 16TB, individual cache segments will be larger
1746    * than 32GB allowing for >4GB entries.  But caching chunks larger
1747    * than 4GB are simply not supported.
1748    */
1749   max_entry_size = data_size / 8 > MAX_ITEM_SIZE
1750                  ? MAX_ITEM_SIZE
1751                  : data_size / 8;
1752
1753   /* to keep the entries small, we use 32 bit indexes only
1754    * -> we need to ensure that no more then 4G entries exist.
1755    *
1756    * Note, that this limit could only be exceeded in a very
1757    * theoretical setup with about 1EB of cache.
1758    */
1759   group_count = directory_size / sizeof(entry_group_t)
1760                     >= (APR_UINT32_MAX / GROUP_SIZE)
1761               ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1762               : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1763
1764   /* set some of the index directory aside as over-flow (spare) buffers */
1765   spare_group_count = MAX(group_count / 4, 1);
1766   main_group_count = group_count - spare_group_count;
1767   assert(spare_group_count > 0 && main_group_count > 0);
1768
1769   group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1770   for (seg = 0; seg < segment_count; ++seg)
1771     {
1772       /* allocate buffers and initialize cache members
1773        */
1774       c[seg].segment_count = (apr_uint32_t)segment_count;
1775
1776       c[seg].group_count = main_group_count;
1777       c[seg].spare_group_count = spare_group_count;
1778       c[seg].first_spare_group = NO_INDEX;
1779       c[seg].max_spare_used = 0;
1780
1781       c[seg].directory = apr_pcalloc(pool,
1782                                      group_count * sizeof(entry_group_t));
1783
1784       /* Allocate and initialize directory entries as "not initialized",
1785          hence "unused" */
1786       c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1787
1788       /* Allocate 1/4th of the data buffer to L1
1789        */
1790       c[seg].l1.first = NO_INDEX;
1791       c[seg].l1.last = NO_INDEX;
1792       c[seg].l1.next = NO_INDEX;
1793       c[seg].l1.start_offset = 0;
1794       c[seg].l1.size = ALIGN_VALUE(data_size / 4);
1795       c[seg].l1.current_data = 0;
1796
1797       /* The remaining 3/4th will be used as L2
1798        */
1799       c[seg].l2.first = NO_INDEX;
1800       c[seg].l2.last = NO_INDEX;
1801       c[seg].l2.next = NO_INDEX;
1802       c[seg].l2.start_offset = c[seg].l1.size;
1803       c[seg].l2.size = ALIGN_VALUE(data_size) - c[seg].l1.size;
1804       c[seg].l2.current_data = c[seg].l2.start_offset;
1805
1806       /* This cast is safe because DATA_SIZE <= MAX_SEGMENT_SIZE. */
1807       c[seg].data = apr_palloc(pool, (apr_size_t)ALIGN_VALUE(data_size));
1808       c[seg].data_used = 0;
1809       c[seg].max_entry_size = max_entry_size;
1810
1811       c[seg].used_entries = 0;
1812       c[seg].total_reads = 0;
1813       c[seg].total_writes = 0;
1814       c[seg].total_hits = 0;
1815
1816       /* were allocations successful?
1817        * If not, initialize a minimal cache structure.
1818        */
1819       if (c[seg].data == NULL || c[seg].directory == NULL)
1820         {
1821           /* We are OOM. There is no need to proceed with "half a cache".
1822            */
1823           return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1824         }
1825
1826 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
1827       /* A lock for intra-process synchronization to the cache, or NULL if
1828        * the cache's creator doesn't feel the cache needs to be
1829        * thread-safe.
1830        */
1831       SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
1832 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
1833       /* Same for read-write lock. */
1834       c[seg].lock = NULL;
1835       if (thread_safe)
1836         {
1837           apr_status_t status =
1838               apr_thread_rwlock_create(&(c[seg].lock), pool);
1839           if (status)
1840             return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1841         }
1842
1843       /* Select the behavior of write operations.
1844        */
1845       c[seg].allow_blocking_writes = allow_blocking_writes;
1846 #endif
1847     }
1848
1849   /* done here
1850    */
1851   *cache = c;
1852   return SVN_NO_ERROR;
1853 }
1854
1855 svn_error_t *
1856 svn_cache__membuffer_clear(svn_membuffer_t *cache)
1857 {
1858   apr_size_t seg;
1859   apr_size_t segment_count = cache->segment_count;
1860
1861   /* Length of the group_initialized array in bytes.
1862      See also svn_cache__membuffer_cache_create(). */
1863   apr_size_t group_init_size
1864     = 1 + (cache->group_count + cache->spare_group_count)
1865             / (8 * GROUP_INIT_GRANULARITY);
1866
1867   /* Clear segment by segment.  This implies that other thread may read
1868      and write to other segments after we cleared them and before the
1869      last segment is done.
1870
1871      However, that is no different from a write request coming through
1872      right after we cleared all segments because dependencies between
1873      cache entries (recursive lookup / access locks) are not allowed.
1874    */
1875   for (seg = 0; seg < segment_count; ++seg)
1876     {
1877       /* Unconditionally acquire the write lock. */
1878       SVN_ERR(force_write_lock_cache(&cache[seg]));
1879
1880       /* Mark all groups as "not initialized", which implies "empty". */
1881       cache[seg].first_spare_group = NO_INDEX;
1882       cache[seg].max_spare_used = 0;
1883
1884       memset(cache[seg].group_initialized, 0, group_init_size);
1885
1886       /* Unlink L1 contents. */
1887       cache[seg].l1.first = NO_INDEX;
1888       cache[seg].l1.last = NO_INDEX;
1889       cache[seg].l1.next = NO_INDEX;
1890       cache[seg].l1.current_data = cache[seg].l1.start_offset;
1891
1892       /* Unlink L2 contents. */
1893       cache[seg].l2.first = NO_INDEX;
1894       cache[seg].l2.last = NO_INDEX;
1895       cache[seg].l2.next = NO_INDEX;
1896       cache[seg].l2.current_data = cache[seg].l2.start_offset;
1897
1898       /* Reset content counters. */
1899       cache[seg].data_used = 0;
1900       cache[seg].used_entries = 0;
1901
1902       /* Segment may be used again. */
1903       SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR));
1904     }
1905
1906   /* done here */
1907   return SVN_NO_ERROR;
1908 }
1909
1910 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1911  * by the hash value TO_FIND and set *FOUND accordingly.
1912  *
1913  * Note: This function requires the caller to serialize access.
1914  * Don't call it directly, call entry_exists instead.
1915  */
1916 static svn_error_t *
1917 entry_exists_internal(svn_membuffer_t *cache,
1918                       apr_uint32_t group_index,
1919                       const full_key_t *to_find,
1920                       svn_boolean_t *found)
1921 {
1922   *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1923   return SVN_NO_ERROR;
1924 }
1925
1926 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1927  * by the hash value TO_FIND and set *FOUND accordingly.
1928  */
1929 static svn_error_t *
1930 entry_exists(svn_membuffer_t *cache,
1931              apr_uint32_t group_index,
1932              const full_key_t *to_find,
1933              svn_boolean_t *found)
1934 {
1935   WITH_READ_LOCK(cache,
1936                  entry_exists_internal(cache,
1937                                        group_index,
1938                                        to_find,
1939                                        found));
1940
1941   return SVN_NO_ERROR;
1942 }
1943
1944 /* Given the SIZE and PRIORITY of a new item, return the cache level
1945    (L1 or L2) in fragment CACHE that this item shall be inserted into.
1946    If we can't find nor make enough room for the item, return NULL.
1947  */
1948 static cache_level_t *
1949 select_level(svn_membuffer_t *cache,
1950              apr_size_t size,
1951              apr_uint32_t priority)
1952 {
1953   if (cache->max_entry_size >= size)
1954     {
1955       /* Small items go into L1. */
1956       return ensure_data_insertable_l1(cache, size)
1957            ? &cache->l1
1958            : NULL;
1959     }
1960   else if (   cache->l2.size >= size
1961            && MAX_ITEM_SIZE >= size
1962            && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1963     {
1964       /* Large but important items go into L2. */
1965       entry_t dummy_entry = { { { 0 } } };
1966       dummy_entry.priority = priority;
1967       dummy_entry.size = size;
1968
1969       return ensure_data_insertable_l2(cache, &dummy_entry)
1970            ? &cache->l2
1971            : NULL;
1972     }
1973
1974   /* Don't cache large, unimportant items. */
1975   return NULL;
1976 }
1977
1978 /* Try to insert the serialized item given in BUFFER with ITEM_SIZE
1979  * into the group GROUP_INDEX of CACHE and uniquely identify it by
1980  * hash value TO_FIND.
1981  *
1982  * However, there is no guarantee that it will actually be put into
1983  * the cache. If there is already some data associated with TO_FIND,
1984  * it will be removed from the cache even if the new data cannot
1985  * be inserted.
1986  *
1987  * Note: This function requires the caller to serialization access.
1988  * Don't call it directly, call membuffer_cache_set instead.
1989  */
1990 static svn_error_t *
1991 membuffer_cache_set_internal(svn_membuffer_t *cache,
1992                              const full_key_t *to_find,
1993                              apr_uint32_t group_index,
1994                              char *buffer,
1995                              apr_size_t item_size,
1996                              apr_uint32_t priority,
1997                              DEBUG_CACHE_MEMBUFFER_TAG_ARG
1998                              apr_pool_t *scratch_pool)
1999 {
2000   cache_level_t *level;
2001   apr_size_t size = item_size + to_find->entry_key.key_len;
2002
2003   /* first, look for a previous entry for the given key */
2004   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2005
2006   /* if there is an old version of that entry and the new data fits into
2007    * the old spot, just re-use that space. */
2008   if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
2009     {
2010       /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
2011        * lest we run into trouble with 32 bit underflow *not* treated as a
2012        * negative value.
2013        */
2014       cache->data_used += (apr_uint64_t)size - entry->size;
2015       entry->size = size;
2016       entry->priority = priority;
2017
2018 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2019
2020       /* Remember original content, type and key (hashes)
2021        */
2022       SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2023       memcpy(&entry->tag, tag, sizeof(*tag));
2024
2025 #endif
2026
2027       if (entry->key.key_len)
2028         memcpy(cache->data + entry->offset, to_find->full_key.data,
2029                entry->key.key_len);
2030       if (item_size)
2031         memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2032                item_size);
2033
2034       cache->total_writes++;
2035       return SVN_NO_ERROR;
2036     }
2037
2038   /* if necessary, enlarge the insertion window.
2039    */
2040   level = buffer ? select_level(cache, size, priority) : NULL;
2041   if (level)
2042     {
2043       /* Remove old data for this key, if that exists.
2044        * Get an unused entry for the key and and initialize it with
2045        * the serialized item's (future) position within data buffer.
2046        */
2047       entry = find_entry(cache, group_index, to_find, TRUE);
2048       entry->size = size;
2049       entry->offset = level->current_data;
2050       entry->priority = priority;
2051
2052 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2053
2054       /* Remember original content, type and key (hashes)
2055        */
2056       SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2057       memcpy(&entry->tag, tag, sizeof(*tag));
2058
2059 #endif
2060
2061       /* Link the entry properly.
2062        */
2063       insert_entry(cache, entry);
2064
2065       /* Copy the serialized item data into the cache.
2066        */
2067       if (entry->key.key_len)
2068         memcpy(cache->data + entry->offset, to_find->full_key.data,
2069                entry->key.key_len);
2070       if (item_size)
2071         memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2072                item_size);
2073
2074       cache->total_writes++;
2075     }
2076   else
2077     {
2078       /* if there is already an entry for this key, drop it.
2079        * Since ensure_data_insertable may have removed entries from
2080        * ENTRY's group, re-do the lookup.
2081        */
2082       entry = find_entry(cache, group_index, to_find, FALSE);
2083       if (entry)
2084         drop_entry(cache, entry);
2085     }
2086
2087   return SVN_NO_ERROR;
2088 }
2089
2090 /* Try to insert the ITEM and use the KEY to uniquely identify it.
2091  * However, there is no guarantee that it will actually be put into
2092  * the cache. If there is already some data associated to the KEY,
2093  * it will be removed from the cache even if the new data cannot
2094  * be inserted.
2095  *
2096  * The SERIALIZER is called to transform the ITEM into a single,
2097  * flat data buffer. Temporary allocations may be done in POOL.
2098  */
2099 static svn_error_t *
2100 membuffer_cache_set(svn_membuffer_t *cache,
2101                     const full_key_t *key,
2102                     void *item,
2103                     svn_cache__serialize_func_t serializer,
2104                     apr_uint32_t priority,
2105                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2106                     apr_pool_t *scratch_pool)
2107 {
2108   apr_uint32_t group_index;
2109   void *buffer = NULL;
2110   apr_size_t size = 0;
2111
2112   /* find the entry group that will hold the key.
2113    */
2114   group_index = get_group_index(&cache, &key->entry_key);
2115
2116   /* Serialize data data.
2117    */
2118   if (item)
2119     SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
2120
2121   /* The actual cache data access needs to sync'ed
2122    */
2123   WITH_WRITE_LOCK(cache,
2124                   membuffer_cache_set_internal(cache,
2125                                                key,
2126                                                group_index,
2127                                                buffer,
2128                                                size,
2129                                                priority,
2130                                                DEBUG_CACHE_MEMBUFFER_TAG
2131                                                scratch_pool));
2132   return SVN_NO_ERROR;
2133 }
2134
2135 /* Count a hit in ENTRY within CACHE.
2136  */
2137 static void
2138 increment_hit_counters(svn_membuffer_t *cache, entry_t *entry)
2139 {
2140   /* To minimize the memory footprint of the cache index, we limit local
2141    * hit counters to 32 bits.  These may overflow but we don't really
2142    * care because at worst, ENTRY will be dropped from cache once every
2143    * few billion hits. */
2144   svn_atomic_inc(&entry->hit_count);
2145
2146   /* That one is for stats only. */
2147   cache->total_hits++;
2148 }
2149
2150 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2151  * by the hash value TO_FIND. If no item has been stored for KEY,
2152  * *BUFFER will be NULL. Otherwise, return a copy of the serialized
2153  * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
2154  * be done in POOL.
2155  *
2156  * Note: This function requires the caller to serialization access.
2157  * Don't call it directly, call membuffer_cache_get instead.
2158  */
2159 static svn_error_t *
2160 membuffer_cache_get_internal(svn_membuffer_t *cache,
2161                              apr_uint32_t group_index,
2162                              const full_key_t *to_find,
2163                              char **buffer,
2164                              apr_size_t *item_size,
2165                              DEBUG_CACHE_MEMBUFFER_TAG_ARG
2166                              apr_pool_t *result_pool)
2167 {
2168   entry_t *entry;
2169   apr_size_t size;
2170
2171   /* The actual cache data access needs to sync'ed
2172    */
2173   entry = find_entry(cache, group_index, to_find, FALSE);
2174   cache->total_reads++;
2175   if (entry == NULL)
2176     {
2177       /* no such entry found.
2178        */
2179       *buffer = NULL;
2180       *item_size = 0;
2181
2182       return SVN_NO_ERROR;
2183     }
2184
2185   size = ALIGN_VALUE(entry->size) - entry->key.key_len;
2186   *buffer = apr_palloc(result_pool, size);
2187   memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size);
2188
2189 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2190
2191   /* Check for overlapping entries.
2192    */
2193   SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2194                  entry->offset + size
2195                     <= get_entry(cache, entry->next)->offset);
2196
2197   /* Compare original content, type and key (hashes)
2198    */
2199   SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len,
2200                              result_pool));
2201   SVN_ERR(assert_equal_tags(&entry->tag, tag));
2202
2203 #endif
2204
2205   /* update hit statistics
2206    */
2207   increment_hit_counters(cache, entry);
2208   *item_size = entry->size - entry->key.key_len;
2209
2210   return SVN_NO_ERROR;
2211 }
2212
2213 /* Look for the *ITEM identified by KEY. If no item has been stored
2214  * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
2215  * re-construct the proper object from the serialized data.
2216  * Allocations will be done in POOL.
2217  */
2218 static svn_error_t *
2219 membuffer_cache_get(svn_membuffer_t *cache,
2220                     const full_key_t *key,
2221                     void **item,
2222                     svn_cache__deserialize_func_t deserializer,
2223                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2224                     apr_pool_t *result_pool)
2225 {
2226   apr_uint32_t group_index;
2227   char *buffer;
2228   apr_size_t size;
2229
2230   /* find the entry group that will hold the key.
2231    */
2232   group_index = get_group_index(&cache, &key->entry_key);
2233   WITH_READ_LOCK(cache,
2234                  membuffer_cache_get_internal(cache,
2235                                               group_index,
2236                                               key,
2237                                               &buffer,
2238                                               &size,
2239                                               DEBUG_CACHE_MEMBUFFER_TAG
2240                                               result_pool));
2241
2242   /* re-construct the original data object from its serialized form.
2243    */
2244   if (buffer == NULL)
2245     {
2246       *item = NULL;
2247       return SVN_NO_ERROR;
2248     }
2249
2250   return deserializer(item, buffer, size, result_pool);
2251 }
2252
2253 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2254  * by the hash value TO_FIND.  If no item has been stored for KEY, *FOUND
2255  * will be FALSE and TRUE otherwise.
2256  */
2257 static svn_error_t *
2258 membuffer_cache_has_key_internal(svn_membuffer_t *cache,
2259                                  apr_uint32_t group_index,
2260                                  const full_key_t *to_find,
2261                                  svn_boolean_t *found)
2262 {
2263   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2264   if (entry)
2265     {
2266       /* This often be called by "block read" when most data is already
2267          in L2 and only a few previously evicted items are added to L1
2268          again.  While items in L1 are well protected for a while, L2
2269          items may get evicted soon.  Thus, mark all them as "hit" to give
2270          them a higher chance of survival. */
2271       increment_hit_counters(cache, entry);
2272       *found = TRUE;
2273     }
2274   else
2275     {
2276       *found = FALSE;
2277     }
2278
2279   return SVN_NO_ERROR;
2280 }
2281
2282 /* Look for an entry identified by KEY.  If no item has been stored
2283  * for KEY, *FOUND will be set to FALSE and TRUE otherwise.
2284  */
2285 /* Implements svn_cache__has_key for membuffer caches.
2286  */
2287 static svn_error_t *
2288 membuffer_cache_has_key(svn_membuffer_t *cache,
2289                         const full_key_t *key,
2290                         svn_boolean_t *found)
2291 {
2292   /* find the entry group that will hold the key.
2293    */
2294   apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2295   cache->total_reads++;
2296
2297   WITH_READ_LOCK(cache,
2298                  membuffer_cache_has_key_internal(cache,
2299                                                   group_index,
2300                                                   key,
2301                                                   found));
2302
2303   return SVN_NO_ERROR;
2304 }
2305
2306 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2307  * by the hash value TO_FIND. FOUND indicates whether that entry exists.
2308  * If not found, *ITEM will be NULL.
2309  *
2310  * Otherwise, the DESERIALIZER is called with that entry and the BATON
2311  * provided and will extract the desired information. The result is set
2312  * in *ITEM. Allocations will be done in POOL.
2313  *
2314  * Note: This function requires the caller to serialization access.
2315  * Don't call it directly, call membuffer_cache_get_partial instead.
2316  */
2317 static svn_error_t *
2318 membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
2319                                      apr_uint32_t group_index,
2320                                      const full_key_t *to_find,
2321                                      void **item,
2322                                      svn_boolean_t *found,
2323                                      svn_cache__partial_getter_func_t deserializer,
2324                                      void *baton,
2325                                      DEBUG_CACHE_MEMBUFFER_TAG_ARG
2326                                      apr_pool_t *result_pool)
2327 {
2328   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2329   cache->total_reads++;
2330   if (entry == NULL)
2331     {
2332       *item = NULL;
2333       *found = FALSE;
2334
2335       return SVN_NO_ERROR;
2336     }
2337   else
2338     {
2339       const void *item_data = cache->data + entry->offset + entry->key.key_len;
2340       apr_size_t item_size = entry->size - entry->key.key_len;
2341       *found = TRUE;
2342       increment_hit_counters(cache, entry);
2343
2344 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2345
2346       /* Check for overlapping entries.
2347        */
2348       SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2349                      entry->offset + entry->size
2350                         <= get_entry(cache, entry->next)->offset);
2351
2352       /* Compare original content, type and key (hashes)
2353        */
2354       SVN_ERR(store_content_part(tag, item_data, item_size, result_pool));
2355       SVN_ERR(assert_equal_tags(&entry->tag, tag));
2356
2357 #endif
2358
2359       return deserializer(item, item_data, item_size, baton, result_pool);
2360     }
2361 }
2362
2363 /* Look for the cache entry identified by KEY. FOUND indicates
2364  * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
2365  * the DESERIALIZER is called with that entry and the BATON provided
2366  * and will extract the desired information. The result is set in *ITEM.
2367  * Allocations will be done in POOL.
2368  */
2369 static svn_error_t *
2370 membuffer_cache_get_partial(svn_membuffer_t *cache,
2371                             const full_key_t *key,
2372                             void **item,
2373                             svn_boolean_t *found,
2374                             svn_cache__partial_getter_func_t deserializer,
2375                             void *baton,
2376                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2377                             apr_pool_t *result_pool)
2378 {
2379   apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2380
2381   WITH_READ_LOCK(cache,
2382                  membuffer_cache_get_partial_internal
2383                      (cache, group_index, key, item, found,
2384                       deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
2385                       result_pool));
2386
2387   return SVN_NO_ERROR;
2388 }
2389
2390 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2391  * by the hash value TO_FIND. If no entry has been found, the function
2392  * returns without modifying the cache.
2393  *
2394  * Otherwise, FUNC is called with that entry and the BATON provided
2395  * and may modify the cache entry. Allocations will be done in POOL.
2396  *
2397  * Note: This function requires the caller to serialization access.
2398  * Don't call it directly, call membuffer_cache_set_partial instead.
2399  */
2400 static svn_error_t *
2401 membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
2402                                      apr_uint32_t group_index,
2403                                      const full_key_t *to_find,
2404                                      svn_cache__partial_setter_func_t func,
2405                                      void *baton,
2406                                      DEBUG_CACHE_MEMBUFFER_TAG_ARG
2407                                      apr_pool_t *scratch_pool)
2408 {
2409   /* cache item lookup
2410    */
2411   entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2412   cache->total_reads++;
2413
2414   /* this function is a no-op if the item is not in cache
2415    */
2416   if (entry != NULL)
2417     {
2418       svn_error_t *err;
2419
2420       /* access the serialized cache item */
2421       apr_size_t key_len = entry->key.key_len;
2422       void *item_data = cache->data + entry->offset + key_len;
2423       void *orig_data = item_data;
2424       apr_size_t item_size = entry->size - key_len;
2425
2426       increment_hit_counters(cache, entry);
2427       cache->total_writes++;
2428
2429 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2430
2431       /* Check for overlapping entries.
2432        */
2433       SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2434                      entry->offset + entry->size
2435                         <= get_entry(cache, entry->next)->offset);
2436
2437       /* Compare original content, type and key (hashes)
2438        */
2439       SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2440       SVN_ERR(assert_equal_tags(&entry->tag, tag));
2441
2442 #endif
2443
2444       /* modify it, preferably in-situ.
2445        */
2446       err = func(&item_data, &item_size, baton, scratch_pool);
2447
2448       if (err)
2449         {
2450           /* Something somewhere when wrong while FUNC was modifying the
2451            * changed item. Thus, it might have become invalid /corrupted.
2452            * We better drop that.
2453            */
2454           drop_entry(cache, entry);
2455
2456           return err;
2457         }
2458       else
2459         {
2460           /* if the modification caused a re-allocation, we need to remove
2461            * the old entry and to copy the new data back into cache.
2462            */
2463           if (item_data != orig_data)
2464             {
2465               /* Remove the old entry and try to make space for the new one.
2466                */
2467               drop_entry(cache, entry);
2468               if (   (cache->max_entry_size >= item_size + key_len)
2469                   && ensure_data_insertable_l1(cache, item_size + key_len))
2470                 {
2471                   /* Write the new entry.
2472                    */
2473                   entry = find_entry(cache, group_index, to_find, TRUE);
2474                   entry->size = item_size + key_len;
2475                   entry->offset = cache->l1.current_data;
2476
2477                   if (key_len)
2478                     memcpy(cache->data + entry->offset,
2479                            to_find->full_key.data, key_len);
2480                   if (item_size)
2481                     memcpy(cache->data + entry->offset + key_len, item_data,
2482                            item_size);
2483
2484                   /* Link the entry properly.
2485                    */
2486                   insert_entry(cache, entry);
2487                 }
2488             }
2489
2490 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2491
2492           /* Remember original content, type and key (hashes)
2493            */
2494           SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2495           memcpy(&entry->tag, tag, sizeof(*tag));
2496
2497 #endif
2498         }
2499     }
2500
2501   return SVN_NO_ERROR;
2502 }
2503
2504 /* Look for the cache entry identified by KEY. If no entry
2505  * has been found, the function returns without modifying the cache.
2506  * Otherwise, FUNC is called with that entry and the BATON provided
2507  * and may modify the cache entry. Allocations will be done in POOL.
2508  */
2509 static svn_error_t *
2510 membuffer_cache_set_partial(svn_membuffer_t *cache,
2511                             const full_key_t *key,
2512                             svn_cache__partial_setter_func_t func,
2513                             void *baton,
2514                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2515                             apr_pool_t *scratch_pool)
2516 {
2517   /* cache item lookup
2518    */
2519   apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2520   WITH_WRITE_LOCK(cache,
2521                   membuffer_cache_set_partial_internal
2522                      (cache, group_index, key, func, baton,
2523                       DEBUG_CACHE_MEMBUFFER_TAG
2524                       scratch_pool));
2525
2526   /* done here -> unlock the cache
2527    */
2528   return SVN_NO_ERROR;
2529 }
2530
2531 /* Implement the svn_cache__t interface on top of a shared membuffer cache.
2532  *
2533  * Because membuffer caches tend to be very large, there will be rather few
2534  * of them (usually only one). Thus, the same instance shall be used as the
2535  * backend to many application-visible svn_cache__t instances. This should
2536  * also achieve global resource usage fairness.
2537  *
2538  * To accommodate items from multiple resources, the individual keys must be
2539  * unique over all sources. This is achieved by simply adding a prefix key
2540  * that unambiguously identifies the item's context (e.g. path to the
2541  * respective repository). The prefix will be set upon construction of the
2542  * svn_cache__t instance.
2543  */
2544
2545 /* Internal cache structure (used in svn_cache__t.cache_internal) basically
2546  * holding the additional parameters needed to call the respective membuffer
2547  * functions.
2548  */
2549 typedef struct svn_membuffer_cache_t
2550 {
2551   /* this is where all our data will end up in
2552    */
2553   svn_membuffer_t *membuffer;
2554
2555   /* use this conversion function when inserting an item into the memcache
2556    */
2557   svn_cache__serialize_func_t serializer;
2558
2559   /* use this conversion function when reading an item from the memcache
2560    */
2561   svn_cache__deserialize_func_t deserializer;
2562
2563   /* Prepend this byte sequence to any key passed to us.
2564    * This makes our keys different from all keys used by svn_membuffer_cache_t
2565    * instances that we don't want to share cached data with.
2566    */
2567   full_key_t prefix;
2568
2569   /* length of the keys that will be passed to us through the
2570    * svn_cache_t interface. May be APR_HASH_KEY_STRING.
2571    */
2572   apr_ssize_t key_len;
2573
2574   /* priority class for all items written through this interface */
2575   apr_uint32_t priority;
2576
2577   /* Temporary buffer containing the hash key for the current access
2578    */
2579   full_key_t combined_key;
2580
2581   /* if enabled, this will serialize the access to this instance.
2582    */
2583   svn_mutex__t *mutex;
2584 } svn_membuffer_cache_t;
2585
2586 /* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
2587  * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
2588  */
2589 #define ALLOCATIONS_PER_POOL_CLEAR 10
2590
2591 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2592  * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2593  * This could replace combine_key() entirely but we actually use it only
2594  * when the quick path failed.
2595  */
2596 static void
2597 combine_long_key(svn_membuffer_cache_t *cache,
2598                  const void *key,
2599                  apr_ssize_t key_len)
2600 {
2601   apr_uint32_t *digest_buffer;
2602   char *key_copy;
2603   apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2604   apr_size_t aligned_key_len;
2605
2606   /* handle variable-length keys */
2607   if (key_len == APR_HASH_KEY_STRING)
2608     key_len = strlen((const char *) key);
2609
2610   aligned_key_len = ALIGN_VALUE(key_len);
2611
2612   /* Combine keys. */
2613   svn_membuf__ensure(&cache->combined_key.full_key,
2614                      aligned_key_len + prefix_len);
2615
2616   key_copy = (char *)cache->combined_key.full_key.data + prefix_len;
2617   cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len;
2618   memcpy(key_copy, key, key_len);
2619   memset(key_copy + key_len, 0, aligned_key_len - key_len);
2620
2621   /* Hash key into 16 bytes. */
2622   digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint;
2623   svn__fnv1a_32x4_raw(digest_buffer, key, key_len);
2624
2625   /* Combine with prefix. */
2626   cache->combined_key.entry_key.fingerprint[0]
2627     ^= cache->prefix.entry_key.fingerprint[0];
2628   cache->combined_key.entry_key.fingerprint[1]
2629     ^= cache->prefix.entry_key.fingerprint[1];
2630 }
2631
2632 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2633  * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2634  */
2635 static void
2636 combine_key(svn_membuffer_cache_t *cache,
2637             const void *key,
2638             apr_ssize_t key_len)
2639 {
2640   /* short, fixed-size keys are the most common case */
2641   if (key_len != APR_HASH_KEY_STRING && key_len <= 16)
2642     {
2643       const apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2644
2645       /* Copy of *key, padded with 0.
2646        * We put it just behind the prefix already copied into the COMBINED_KEY.
2647        * The buffer space has been allocated when the cache was created. */
2648       apr_uint64_t *data = (void *)((char *)cache->combined_key.full_key.data +
2649                                     prefix_len);
2650       assert(prefix_len <= cache->combined_key.full_key.size - 16);
2651       cache->combined_key.entry_key.key_len = prefix_len + 16;
2652
2653       data[0] = 0;
2654       data[1] = 0;
2655       memcpy(data, key, key_len);
2656
2657       /* scramble key DATA.  All of this must be reversible to prevent key
2658        * collisions.  So, we limit ourselves to xor and permutations. */
2659       data[1] = (data[1] << 27) | (data[1] >> 37);
2660       data[1] ^= data[0] & 0xffff;
2661       data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
2662
2663       /* combine with this cache's namespace */
2664       cache->combined_key.entry_key.fingerprint[0]
2665         = data[0] ^ cache->prefix.entry_key.fingerprint[0];
2666       cache->combined_key.entry_key.fingerprint[1]
2667         = data[1] ^ cache->prefix.entry_key.fingerprint[1];
2668     }
2669   else
2670     {
2671       /* longer or variably sized keys */
2672       combine_long_key(cache, key, key_len);
2673     }
2674 }
2675
2676 /* Implement svn_cache__vtable_t.get (not thread-safe)
2677  */
2678 static svn_error_t *
2679 svn_membuffer_cache_get(void **value_p,
2680                         svn_boolean_t *found,
2681                         void *cache_void,
2682                         const void *key,
2683                         apr_pool_t *result_pool)
2684 {
2685   svn_membuffer_cache_t *cache = cache_void;
2686
2687   DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2688
2689   /* special case */
2690   if (key == NULL)
2691     {
2692       *value_p = NULL;
2693       *found = FALSE;
2694
2695       return SVN_NO_ERROR;
2696     }
2697
2698   /* construct the full, i.e. globally unique, key by adding
2699    * this cache instances' prefix
2700    */
2701   combine_key(cache, key, cache->key_len);
2702
2703   /* Look the item up. */
2704   SVN_ERR(membuffer_cache_get(cache->membuffer,
2705                               &cache->combined_key,
2706                               value_p,
2707                               cache->deserializer,
2708                               DEBUG_CACHE_MEMBUFFER_TAG
2709                               result_pool));
2710
2711   /* return result */
2712   *found = *value_p != NULL;
2713
2714   return SVN_NO_ERROR;
2715 }
2716
2717 /* Implement svn_cache__vtable_t.has_key (not thread-safe)
2718  */
2719 static svn_error_t *
2720 svn_membuffer_cache_has_key(svn_boolean_t *found,
2721                             void *cache_void,
2722                             const void *key,
2723                             apr_pool_t *scratch_pool)
2724 {
2725   svn_membuffer_cache_t *cache = cache_void;
2726
2727   /* special case */
2728   if (key == NULL)
2729     {
2730       *found = FALSE;
2731
2732       return SVN_NO_ERROR;
2733     }
2734
2735   /* construct the full, i.e. globally unique, key by adding
2736    * this cache instances' prefix
2737    */
2738   combine_key(cache, key, cache->key_len);
2739
2740   /* Look the item up. */
2741   SVN_ERR(membuffer_cache_has_key(cache->membuffer,
2742                                   &cache->combined_key,
2743                                   found));
2744
2745   /* return result */
2746   return SVN_NO_ERROR;
2747 }
2748
2749 /* Implement svn_cache__vtable_t.set (not thread-safe)
2750  */
2751 static svn_error_t *
2752 svn_membuffer_cache_set(void *cache_void,
2753                         const void *key,
2754                         void *value,
2755                         apr_pool_t *scratch_pool)
2756 {
2757   svn_membuffer_cache_t *cache = cache_void;
2758
2759   DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2760
2761   /* special case */
2762   if (key == NULL)
2763     return SVN_NO_ERROR;
2764
2765   /* construct the full, i.e. globally unique, key by adding
2766    * this cache instances' prefix
2767    */
2768   combine_key(cache, key, cache->key_len);
2769
2770   /* (probably) add the item to the cache. But there is no real guarantee
2771    * that the item will actually be cached afterwards.
2772    */
2773   return membuffer_cache_set(cache->membuffer,
2774                              &cache->combined_key,
2775                              value,
2776                              cache->serializer,
2777                              cache->priority,
2778                              DEBUG_CACHE_MEMBUFFER_TAG
2779                              scratch_pool);
2780 }
2781
2782 /* Implement svn_cache__vtable_t.iter as "not implemented"
2783  */
2784 static svn_error_t *
2785 svn_membuffer_cache_iter(svn_boolean_t *completed,
2786                           void *cache_void,
2787                           svn_iter_apr_hash_cb_t user_cb,
2788                           void *user_baton,
2789                           apr_pool_t *scratch_pool)
2790 {
2791   return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2792                           _("Can't iterate a membuffer-based cache"));
2793 }
2794
2795 /* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2796  */
2797 static svn_error_t *
2798 svn_membuffer_cache_get_partial(void **value_p,
2799                                 svn_boolean_t *found,
2800                                 void *cache_void,
2801                                 const void *key,
2802                                 svn_cache__partial_getter_func_t func,
2803                                 void *baton,
2804                                 apr_pool_t *result_pool)
2805 {
2806   svn_membuffer_cache_t *cache = cache_void;
2807
2808   DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2809
2810   if (key == NULL)
2811     {
2812       *value_p = NULL;
2813       *found = FALSE;
2814
2815       return SVN_NO_ERROR;
2816     }
2817
2818   combine_key(cache, key, cache->key_len);
2819   SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2820                                       &cache->combined_key,
2821                                       value_p,
2822                                       found,
2823                                       func,
2824                                       baton,
2825                                       DEBUG_CACHE_MEMBUFFER_TAG
2826                                       result_pool));
2827
2828   return SVN_NO_ERROR;
2829 }
2830
2831 /* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2832  */
2833 static svn_error_t *
2834 svn_membuffer_cache_set_partial(void *cache_void,
2835                                 const void *key,
2836                                 svn_cache__partial_setter_func_t func,
2837                                 void *baton,
2838                                 apr_pool_t *scratch_pool)
2839 {
2840   svn_membuffer_cache_t *cache = cache_void;
2841
2842   DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2843
2844   if (key != NULL)
2845     {
2846       combine_key(cache, key, cache->key_len);
2847       SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2848                                           &cache->combined_key,
2849                                           func,
2850                                           baton,
2851                                           DEBUG_CACHE_MEMBUFFER_TAG
2852                                           scratch_pool));
2853     }
2854   return SVN_NO_ERROR;
2855 }
2856
2857 /* Implement svn_cache__vtable_t.is_cachable
2858  * (thread-safe even without mutex)
2859  */
2860 static svn_boolean_t
2861 svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2862 {
2863   /* Don't allow extremely large element sizes. Otherwise, the cache
2864    * might by thrashed by a few extremely large entries. And the size
2865    * must be small enough to be stored in a 32 bit value.
2866    */
2867   svn_membuffer_cache_t *cache = cache_void;
2868   return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY
2869        ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size
2870        : size <= cache->membuffer->max_entry_size;
2871 }
2872
2873 /* Add statistics of SEGMENT to INFO.  If INCLUDE_HISTOGRAM is TRUE,
2874  * accumulate index bucket fill levels in INFO->HISTOGRAM.
2875  */
2876 static svn_error_t *
2877 svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2878                                svn_cache__info_t *info,
2879                                svn_boolean_t include_histogram)
2880 {
2881   apr_uint32_t i;
2882
2883   info->data_size += segment->l1.size + segment->l2.size;
2884   info->used_size += segment->data_used;
2885   info->total_size += segment->l1.size + segment->l2.size +
2886       segment->group_count * GROUP_SIZE * sizeof(entry_t);
2887
2888   info->used_entries += segment->used_entries;
2889   info->total_entries += segment->group_count * GROUP_SIZE;
2890
2891   if (include_histogram)
2892     for (i = 0; i < segment->group_count; ++i)
2893       if (is_group_initialized(segment, i))
2894         {
2895           entry_group_t *chain_end
2896             = last_group_in_chain(segment, &segment->directory[i]);
2897           apr_size_t use
2898             = MIN(chain_end->header.used,
2899                   sizeof(info->histogram) / sizeof(info->histogram[0]) - 1);
2900           info->histogram[use]++;
2901         }
2902
2903   return SVN_NO_ERROR;
2904 }
2905
2906 /* Implement svn_cache__vtable_t.get_info
2907  * (thread-safe even without mutex)
2908  */
2909 static svn_error_t *
2910 svn_membuffer_cache_get_info(void *cache_void,
2911                              svn_cache__info_t *info,
2912                              svn_boolean_t reset,
2913                              apr_pool_t *result_pool)
2914 {
2915   svn_membuffer_cache_t *cache = cache_void;
2916   apr_uint32_t i;
2917
2918   /* cache front-end specific data */
2919
2920   info->id = apr_pstrdup(result_pool, cache->prefix.full_key.data);
2921
2922   /* collect info from shared cache back-end */
2923
2924   for (i = 0; i < cache->membuffer->segment_count; ++i)
2925     {
2926       svn_membuffer_t *segment = cache->membuffer + i;
2927       WITH_READ_LOCK(segment,
2928                      svn_membuffer_get_segment_info(segment, info, FALSE));
2929     }
2930
2931   return SVN_NO_ERROR;
2932 }
2933
2934
2935 /* the v-table for membuffer-based caches (single-threaded access)
2936  */
2937 static svn_cache__vtable_t membuffer_cache_vtable = {
2938   svn_membuffer_cache_get,
2939   svn_membuffer_cache_has_key,
2940   svn_membuffer_cache_set,
2941   svn_membuffer_cache_iter,
2942   svn_membuffer_cache_is_cachable,
2943   svn_membuffer_cache_get_partial,
2944   svn_membuffer_cache_set_partial,
2945   svn_membuffer_cache_get_info
2946 };
2947
2948 /* Implement svn_cache__vtable_t.get and serialize all cache access.
2949  */
2950 static svn_error_t *
2951 svn_membuffer_cache_get_synced(void **value_p,
2952                                svn_boolean_t *found,
2953                                void *cache_void,
2954                                const void *key,
2955                                apr_pool_t *result_pool)
2956 {
2957   svn_membuffer_cache_t *cache = cache_void;
2958   SVN_MUTEX__WITH_LOCK(cache->mutex,
2959                        svn_membuffer_cache_get(value_p,
2960                                                found,
2961                                                cache_void,
2962                                                key,
2963                                                result_pool));
2964
2965   return SVN_NO_ERROR;
2966 }
2967
2968 /* Implement svn_cache__vtable_t.has_key and serialize all cache access.
2969  */
2970 static svn_error_t *
2971 svn_membuffer_cache_has_key_synced(svn_boolean_t *found,
2972                                    void *cache_void,
2973                                    const void *key,
2974                                    apr_pool_t *result_pool)
2975 {
2976   svn_membuffer_cache_t *cache = cache_void;
2977   SVN_MUTEX__WITH_LOCK(cache->mutex,
2978                        svn_membuffer_cache_has_key(found,
2979                                                    cache_void,
2980                                                    key,
2981                                                    result_pool));
2982
2983   return SVN_NO_ERROR;
2984 }
2985
2986 /* Implement svn_cache__vtable_t.set and serialize all cache access.
2987  */
2988 static svn_error_t *
2989 svn_membuffer_cache_set_synced(void *cache_void,
2990                                const void *key,
2991                                void *value,
2992                                apr_pool_t *scratch_pool)
2993 {
2994   svn_membuffer_cache_t *cache = cache_void;
2995   SVN_MUTEX__WITH_LOCK(cache->mutex,
2996                        svn_membuffer_cache_set(cache_void,
2997                                                key,
2998                                                value,
2999                                                scratch_pool));
3000
3001   return SVN_NO_ERROR;
3002 }
3003
3004 /* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
3005  */
3006 static svn_error_t *
3007 svn_membuffer_cache_get_partial_synced(void **value_p,
3008                                        svn_boolean_t *found,
3009                                        void *cache_void,
3010                                        const void *key,
3011                                        svn_cache__partial_getter_func_t func,
3012                                        void *baton,
3013                                        apr_pool_t *result_pool)
3014 {
3015   svn_membuffer_cache_t *cache = cache_void;
3016   SVN_MUTEX__WITH_LOCK(cache->mutex,
3017                        svn_membuffer_cache_get_partial(value_p,
3018                                                        found,
3019                                                        cache_void,
3020                                                        key,
3021                                                        func,
3022                                                        baton,
3023                                                        result_pool));
3024
3025   return SVN_NO_ERROR;
3026 }
3027
3028 /* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
3029  */
3030 static svn_error_t *
3031 svn_membuffer_cache_set_partial_synced(void *cache_void,
3032                                        const void *key,
3033                                        svn_cache__partial_setter_func_t func,
3034                                        void *baton,
3035                                        apr_pool_t *scratch_pool)
3036 {
3037   svn_membuffer_cache_t *cache = cache_void;
3038   SVN_MUTEX__WITH_LOCK(cache->mutex,
3039                        svn_membuffer_cache_set_partial(cache_void,
3040                                                        key,
3041                                                        func,
3042                                                        baton,
3043                                                        scratch_pool));
3044
3045   return SVN_NO_ERROR;
3046 }
3047
3048 /* the v-table for membuffer-based caches with multi-threading support)
3049  */
3050 static svn_cache__vtable_t membuffer_cache_synced_vtable = {
3051   svn_membuffer_cache_get_synced,
3052   svn_membuffer_cache_has_key_synced,
3053   svn_membuffer_cache_set_synced,
3054   svn_membuffer_cache_iter,               /* no sync required */
3055   svn_membuffer_cache_is_cachable,        /* no sync required */
3056   svn_membuffer_cache_get_partial_synced,
3057   svn_membuffer_cache_set_partial_synced,
3058   svn_membuffer_cache_get_info            /* no sync required */
3059 };
3060
3061 /* standard serialization function for svn_stringbuf_t items.
3062  * Implements svn_cache__serialize_func_t.
3063  */
3064 static svn_error_t *
3065 serialize_svn_stringbuf(void **buffer,
3066                         apr_size_t *buffer_size,
3067                         void *item,
3068                         apr_pool_t *result_pool)
3069 {
3070   svn_stringbuf_t *value_str = item;
3071
3072   *buffer = value_str->data;
3073   *buffer_size = value_str->len + 1;
3074
3075   return SVN_NO_ERROR;
3076 }
3077
3078 /* standard de-serialization function for svn_stringbuf_t items.
3079  * Implements svn_cache__deserialize_func_t.
3080  */
3081 static svn_error_t *
3082 deserialize_svn_stringbuf(void **item,
3083                           void *buffer,
3084                           apr_size_t buffer_size,
3085                           apr_pool_t *result_pool)
3086 {
3087   svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
3088
3089   value_str->pool = result_pool;
3090   value_str->blocksize = buffer_size;
3091   value_str->data = buffer;
3092   value_str->len = buffer_size-1;
3093   *item = value_str;
3094
3095   return SVN_NO_ERROR;
3096 }
3097
3098 /* Construct a svn_cache__t object on top of a shared memcache.
3099  */
3100 svn_error_t *
3101 svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
3102                                   svn_membuffer_t *membuffer,
3103                                   svn_cache__serialize_func_t serializer,
3104                                   svn_cache__deserialize_func_t deserializer,
3105                                   apr_ssize_t klen,
3106                                   const char *prefix,
3107                                   apr_uint32_t priority,
3108                                   svn_boolean_t thread_safe,
3109                                   apr_pool_t *result_pool,
3110                                   apr_pool_t *scratch_pool)
3111 {
3112   svn_checksum_t *checksum;
3113   apr_size_t prefix_len, prefix_orig_len;
3114
3115   /* allocate the cache header structures
3116    */
3117   svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
3118   svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache));
3119
3120   /* initialize our internal cache header
3121    */
3122   cache->membuffer = membuffer;
3123   cache->serializer = serializer
3124                     ? serializer
3125                     : serialize_svn_stringbuf;
3126   cache->deserializer = deserializer
3127                       ? deserializer
3128                       : deserialize_svn_stringbuf;
3129   cache->priority = priority;
3130   cache->key_len = klen;
3131
3132   SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool));
3133
3134   /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT.
3135    * Don't forget to include the terminating NUL. */
3136   prefix_orig_len = strlen(prefix) + 1;
3137   prefix_len = ALIGN_VALUE(prefix_orig_len);
3138
3139   svn_membuf__create(&cache->prefix.full_key, prefix_len, result_pool);
3140   memcpy((char *)cache->prefix.full_key.data, prefix, prefix_orig_len);
3141   memset((char *)cache->prefix.full_key.data + prefix_orig_len, 0,
3142          prefix_len - prefix_orig_len);
3143
3144   /* Construct the folded prefix key. */
3145   SVN_ERR(svn_checksum(&checksum,
3146                        svn_checksum_md5,
3147                        prefix,
3148                        strlen(prefix),
3149                        scratch_pool));
3150   memcpy(cache->prefix.entry_key.fingerprint, checksum->digest,
3151          sizeof(cache->prefix.entry_key.fingerprint));
3152   cache->prefix.entry_key.key_len = prefix_len;
3153
3154   /* Initialize the combined key. Pre-allocate some extra room in the full
3155    * key such that we probably don't need to re-alloc. */
3156   cache->combined_key.entry_key = cache->prefix.entry_key;
3157   svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200,
3158                      result_pool);
3159   memcpy(cache->combined_key.full_key.data, cache->prefix.full_key.data,
3160          prefix_len);
3161
3162   /* initialize the generic cache wrapper
3163    */
3164   wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
3165                                 : &membuffer_cache_vtable;
3166   wrapper->cache_internal = cache;
3167   wrapper->error_handler = 0;
3168   wrapper->error_baton = 0;
3169   wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT");
3170
3171   *cache_p = wrapper;
3172   return SVN_NO_ERROR;
3173 }
3174
3175 static svn_error_t *
3176 svn_membuffer_get_global_segment_info(svn_membuffer_t *segment,
3177                                       svn_cache__info_t *info)
3178 {
3179   info->gets += segment->total_reads;
3180   info->sets += segment->total_writes;
3181   info->hits += segment->total_hits;
3182
3183   WITH_READ_LOCK(segment,
3184                   svn_membuffer_get_segment_info(segment, info, TRUE));
3185
3186   return SVN_NO_ERROR;
3187 }
3188
3189 svn_cache__info_t *
3190 svn_cache__membuffer_get_global_info(apr_pool_t *pool)
3191 {
3192   apr_uint32_t i;
3193
3194   svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache();
3195   svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info));
3196
3197   /* cache front-end specific data */
3198
3199   info->id = "membuffer globals";
3200
3201   /* collect info from shared cache back-end */
3202
3203   for (i = 0; i < membuffer->segment_count; ++i)
3204     svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i,
3205                                                           info));
3206
3207   return info;
3208 }