2 * cache-membuffer.c: in-memory caching for Subversion
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_thread_rwlock.h>
28 #include "svn_pools.h"
29 #include "svn_checksum.h"
30 #include "svn_private_config.h"
31 #include "svn_string.h"
32 #include "svn_sorts.h" /* get the MIN macro */
34 #include "private/svn_atomic.h"
35 #include "private/svn_dep_compat.h"
36 #include "private/svn_mutex.h"
37 #include "private/svn_string_private.h"
43 * This svn_cache__t implementation actually consists of two parts:
44 * a shared (per-process) singleton membuffer cache instance and shallow
45 * svn_cache__t front-end instances that each use different key spaces.
46 * For data management, they all forward to the singleton membuffer cache.
48 * A membuffer cache consists of two parts:
50 * 1. A linear data buffer containing cached items in a serialized
51 * representation, prefixed by their full cache keys. There may be
52 * arbitrary gaps between entries. This buffer is sub-devided into
53 * (currently two) cache levels.
55 * 2. A directory of cache entries. This is organized similar to CPU
56 * data caches: for every possible key, there is exactly one group
57 * of entries that may contain the header info for an item with
58 * that given key. The result is a GROUP_SIZE+-way associative cache
59 * whose associativity can be dynamically increased.
61 * Only the start address of these two data parts are given as a native
62 * pointer. All other references are expressed as offsets to these pointers.
63 * With that design, it is relatively easy to share the same data structure
64 * between different processes and / or to persist them on disk. These
65 * out-of-process features have not been implemented, yet.
67 * Superficially, cache levels are being used as usual: insertion happens
68 * into L1 and evictions will promote items to L2. But their whole point
69 * is a different one. L1 uses a circular buffer, i.e. we have perfect
70 * caching for the last N bytes where N is the size of L1. L2 uses a more
71 * elaborate scheme based on priorities and hit counts as described below.
73 * The data buffer usage information is implicitly given by the directory
74 * entries. Every USED entry has a reference to the previous and the next
75 * used dictionary entry and this double-linked list is ordered by the
76 * offsets of their item data within the data buffer. So removing data,
77 * for instance, is done simply by unlinking it from the chain, implicitly
78 * marking the entry as well as the data buffer section previously
79 * associated to it as unused. First and last element of that chain are
80 * being referenced from the respective cache level.
82 * Insertion can occur at only one, sliding position per cache level. It is
83 * marked by its offset in the data buffer and the index of the first used
84 * entry at or behind that position. If this gap is too small to accommodate
85 * the new item (plus its full key), the insertion window is extended as
86 * described below. The new entry will always be inserted at the bottom end
87 * of the window and since the next used entry is known, properly sorted
88 * insertion is possible.
90 * To make the cache perform robustly in a wide range of usage scenarios,
91 * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
92 * details). Every item holds a read hit counter and there is a global read
93 * hit counter. The more hits an entry has in relation to the average, the
94 * more it is likely to be kept using a rand()-based condition. The test is
95 * applied only to the entry following the insertion window. If it doesn't
96 * get evicted, it is moved to the begin of that window and the window is
99 * Moreover, the entry's hits get halved to make that entry more likely to
100 * be removed the next time the sliding insertion / removal window comes by.
101 * As a result, frequently used entries are likely not to be dropped until
102 * they get not used for a while. Also, even a cache thrashing situation
103 * about 50% of the content survives every 50% of the cache being re-written
104 * with new entries. For details on the fine-tuning involved, see the
105 * comments in ensure_data_insertable_l2().
107 * Due to the randomized mapping of keys to entry groups, some groups may
108 * overflow. In that case, there are spare groups that can be chained to
109 * an already used group to extend it.
111 * To limit the entry size and management overhead, not the actual item keys
112 * but only their hashed "fingerprint" will be stored. These are reasonably
113 * unique to prevent collisions, so we only need to support up to one entry
114 * per entry key. To guarantee that there are no conflicts, however, we
115 * store the actual full key immediately in front of the serialized item
116 * data. That is, the entry offset actually points to the full key and the
117 * key length stored in the entry acts as an additional offset to find the
120 * All access to the cached data needs to be serialized. Because we want
121 * to scale well despite that bottleneck, we simply segment the cache into
122 * a number of independent caches (segments). Items will be multiplexed based
126 /* APR's read-write lock implementation on Windows is horribly inefficient.
127 * Even with very low contention a runtime overhead of 35% percent has been
128 * measured for 'svn-bench null-export' over ra_serf.
130 * Use a simple mutex on Windows. Because there is one mutex per segment,
131 * large machines should (and usually can) be configured with large caches
132 * such that read contention is kept low. This is basically the situation
136 # define USE_SIMPLE_MUTEX 1
138 # define USE_SIMPLE_MUTEX 0
141 /* For more efficient copy operations, let's align all data items properly.
142 * Since we can't portably align pointers, this is rather the item size
143 * granularity which ensures *relative* alignment within the cache - still
144 * giving us decent copy speeds on most machines.
146 * Must be a power of 2.
148 #define ITEM_ALIGNMENT 16
150 /* By default, don't create cache segments smaller than this value unless
151 * the total cache size itself is smaller.
153 #define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
155 /* The minimum segment size we will allow for multi-segmented caches
157 #define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
159 /* The maximum number of segments allowed. Larger numbers reduce the size
160 * of each segment, in turn reducing the max size of a cachable item.
161 * Also, each segment gets its own lock object. The actual number supported
162 * by the OS may therefore be lower and svn_cache__membuffer_cache_create
163 * may return an error.
165 #define MAX_SEGMENT_COUNT 0x10000
167 /* As of today, APR won't allocate chunks of 4GB or more. So, limit the
168 * segment size to slightly below that.
170 #define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
172 /* We don't mark the initialization status for every group but initialize
173 * a number of groups at once. That will allow for a very small init flags
174 * vector that is likely to fit into the CPU caches even for fairly large
175 * membuffer caches. For instance, the default of 32 means 8x32 groups per
176 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
177 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
179 #define GROUP_INIT_GRANULARITY 32
181 /* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
183 #define NO_INDEX APR_UINT32_MAX
185 /* To save space in our group structure, we only use 32 bit size values
186 * and, therefore, limit the size of each entry to just below 4GB.
187 * Supporting larger items is not a good idea as the data transfer
188 * to and from the cache would block other threads for a very long time.
190 #define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
192 /* We use this structure to identify cache entries. There cannot be two
193 * entries with the same entry key. However unlikely, though, two different
194 * full keys (see full_key_t) may have the same entry key. That is a
195 * collision and at most one of them can be stored in the cache at any time.
197 typedef struct entry_key_t
199 /* 16 byte finger print of the full key. */
200 apr_uint64_t fingerprint[2];
202 /* Length of the full key. This value is aligned to ITEM_ALIGNMENT to
203 * make sure the subsequent item content is properly aligned. */
207 /* A full key, i.e. the combination of the cache's key prefix with some
208 * dynamic part appended to it. It also contains its ENTRY_KEY.
210 typedef struct full_key_t
212 /* Reduced form identifying the cache entry (if such an entry exists). */
213 entry_key_t entry_key;
215 /* This contains the full combination. Note that the SIZE element may
216 * be larger than ENTRY_KEY.KEY_LEN, but only the latter determines the
218 svn_membuf_t full_key;
221 /* Debugging / corruption detection support.
222 * If you define this macro, the getter functions will performed expensive
223 * checks on the item data, requested keys and entry types. If there is
224 * a mismatch found in any of them when being compared with the values
225 * remembered in the setter function, an error will be returned.
227 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
229 /* The prefix passed to svn_cache__create_membuffer_cache() effectively
230 * defines the type of all items stored by that cache instance. We'll take
231 * the last 15 bytes + \0 as plaintext for easy identification by the dev.
233 #define PREFIX_TAIL_LEN 16
235 /* This record will be attached to any cache entry. It tracks item data
236 * (content), key and type as hash values and is the baseline against which
237 * the getters will compare their results to detect inconsistencies.
239 typedef struct entry_tag_t
241 /* MD5 checksum over the serialized item data.
243 unsigned char content_hash[APR_MD5_DIGESTSIZE];
245 /* Hash value of the svn_cache_t instance that wrote the item
246 * (i.e. a combination of type and repository)
248 unsigned char prefix_hash[APR_MD5_DIGESTSIZE];
250 /* Note that this only covers the variable part of the key,
251 * i.e. it will be different from the full key hash used for
254 unsigned char key_hash[APR_MD5_DIGESTSIZE];
256 /* Last letters from of the key in human readable format
257 * (ends with the type identifier, e.g. "DAG")
259 char prefix_tail[PREFIX_TAIL_LEN];
261 /* Length of the variable key part.
267 /* Initialize all members of TAG except for the content hash.
269 static svn_error_t *store_key_part(entry_tag_t *tag,
270 const full_key_t *prefix_key,
275 svn_checksum_t *checksum;
276 const char *prefix = prefix_key->full_key.data;
277 apr_size_t prefix_len = strlen(prefix);
279 if (prefix_len > sizeof(tag->prefix_tail))
281 prefix += prefix_len - (sizeof(tag->prefix_tail) - 1);
282 prefix_len = sizeof(tag->prefix_tail) - 1;
285 SVN_ERR(svn_checksum(&checksum,
291 memcpy(tag->prefix_hash, prefix_key->entry_key.fingerprint,
292 sizeof(tag->prefix_hash));
293 memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
295 memset(tag->prefix_tail, 0, sizeof(tag->key_hash));
296 memcpy(tag->prefix_tail, prefix, prefix_len + 1);
298 tag->key_len = key_len;
303 /* Initialize the content hash member of TAG.
305 static svn_error_t* store_content_part(entry_tag_t *tag,
310 svn_checksum_t *checksum;
311 SVN_ERR(svn_checksum(&checksum,
317 memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
322 /* Compare two tags and fail with an assertion upon differences.
324 static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
325 const entry_tag_t *rhs)
327 SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
328 sizeof(lhs->content_hash)) == 0);
329 SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
330 sizeof(lhs->prefix_hash)) == 0);
331 SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
332 sizeof(lhs->key_hash)) == 0);
333 SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
334 sizeof(lhs->prefix_tail)) == 0);
336 SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
341 /* Reoccurring code snippets.
344 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
346 #define DEBUG_CACHE_MEMBUFFER_TAG tag,
348 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool) \
350 entry_tag_t *tag = &_tag; \
352 SVN_ERR(store_key_part(tag, \
355 cache->key_len == APR_HASH_KEY_STRING \
356 ? strlen((const char *) key) \
362 /* Don't generate any checks if consistency checks have not been enabled.
364 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG
365 #define DEBUG_CACHE_MEMBUFFER_TAG
366 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)
368 #endif /* SVN_DEBUG_CACHE_MEMBUFFER */
370 /* A single dictionary entry. Since all entries will be allocated once
371 * during cache creation, those entries might be either used or unused.
372 * An entry is used if and only if it is contained in the doubly-linked
373 * list of used entries per cache level.
375 typedef struct entry_t
377 /* Identifying the data item. Only valid for used entries.
381 /* The offset of the cached item's serialized data within the caches
386 /* Size of the serialized item data. May be 0. The MAX_ITEM_SIZE macro
387 * above ensures that there will be no overflows.
388 * Only valid for used entries.
392 /* Number of (read) hits for this entry. Will be reset upon write.
393 * Only valid for used entries.
395 svn_atomic_t hit_count;
397 /* Reference to the next used entry in the order defined by offset.
398 * NO_INDEX indicates the end of the list; this entry must be referenced
399 * by the caches cache_level_t.last member. NO_INDEX also implies that
400 * the data buffer is not used beyond offset+size.
401 * Only valid for used entries.
405 /* Reference to the previous used entry in the order defined by offset.
406 * NO_INDEX indicates the end of the list; this entry must be referenced
407 * by the caches cache_level_t.first member.
408 * Only valid for used entries.
410 apr_uint32_t previous;
412 /* Priority of this entry. This entry will not be replaced by lower-
415 apr_uint32_t priority;
416 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
417 /* Remember type, content and key hashes.
423 /* Group header struct.
425 typedef struct group_header_t
427 /* number of entries used [0 .. USED-1] */
430 /* next group in the chain or NO_INDEX for the last.
431 * For recycleable unused spare groups, this points to the next
432 * unused spare group */
435 /* previously group in the chain or NO_INDEX for the first */
436 apr_uint32_t previous;
438 /* number of elements in the chain from start to here.
439 * >= 1 for used groups, 0 for unused spare groups */
440 apr_uint32_t chain_length;
444 /* The size of the group struct should be a power of two make sure it does
445 * not cross memory page boundaries. Since we already access the cache
446 * randomly, having two page table lookups instead of one is bad.
448 #define GROUP_BLOCK_SIZE 512
450 /* A ~10-way associative cache seems to be a good compromise between
451 * performance (worst-case lookups) and efficiency-loss due to collisions.
453 * This value may be changed to any positive integer.
456 ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t))
458 /* Maximum number of groups in a chain, i.e. a cache index group can hold
459 * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries.
461 #define MAX_GROUP_CHAIN_LENGTH 8
463 /* We group dictionary entries to make this GROUP-SIZE-way associative.
465 typedef struct entry_group_t
468 group_header_t header;
470 /* padding and also room for future extensions */
471 char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t)
472 - sizeof(entry_t) * GROUP_SIZE];
474 /* the actual entries */
475 entry_t entries[GROUP_SIZE];
479 /* Per-cache level header structure. Instances of this are members of
480 * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
481 * All offset values are global / absolute to that whole buffer.
483 typedef struct cache_level_t
485 /* Reference to the first (defined by the order content in the data
486 * buffer) dictionary entry used by any data item.
487 * NO_INDEX for an empty cache.
491 /* Reference to the last (defined by the order content in the data
492 * buffer) dictionary entry used by any data item.
493 * NO_INDEX for an empty cache.
497 /* Reference to the first (defined by the order content in the data
498 * buffer) used dictionary entry behind the insertion position
499 * (current_data). If NO_INDEX, the data buffer is free starting at the
500 * current_data offset.
505 /* First offset in the caches DATA buffer that belongs to this level.
507 apr_uint64_t start_offset;
509 /* Size of data buffer allocated to this level in bytes. Must be > 0.
513 /* Offset in the data buffer where the next insertion shall occur.
515 apr_uint64_t current_data;
519 /* The cache header structure.
521 struct svn_membuffer_t
523 /* Number of cache segments. Must be a power of 2.
524 Please note that this structure represents only one such segment
525 and that all segments must / will report the same values here. */
526 apr_uint32_t segment_count;
528 /* The dictionary, GROUP_SIZE * (group_count + spare_group_count)
529 * entries long. Never NULL.
531 entry_group_t *directory;
533 /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
534 * Allows for efficiently marking groups as "not initialized".
536 unsigned char *group_initialized;
538 /* Size of dictionary in groups. Must be > 0.
540 apr_uint32_t group_count;
542 /* Total number of spare groups.
544 apr_uint32_t spare_group_count;
546 /* First recycleable spare group.
548 apr_uint32_t first_spare_group;
550 /* Maximum number of spare groups ever used. I.e. group index
551 * group_count + max_spare_used is the first unused spare group
552 * if first_spare_group is NO_INDEX.
554 apr_uint32_t max_spare_used;
556 /* Pointer to the data buffer, data_size bytes long. Never NULL.
560 /* Total number of data buffer bytes in use.
562 apr_uint64_t data_used;
564 /* Largest entry size that we would accept. For total cache sizes
565 * less than 4TB (sic!), this is determined by the total cache size.
567 apr_uint64_t max_entry_size;
569 /* The cache levels, organized as sub-buffers. Since entries in the
570 * DIRECTORY use offsets in DATA for addressing, a cache lookup does
571 * not need to know the cache level of a specific item. Cache levels
572 * are only used to implement a hybrid insertion / eviction strategy.
575 /* First cache level, i.e. most insertions happen here. Very large
576 * items might get inserted directly into L2. L1 is a strict FIFO
577 * ring buffer that does not care about item priorities. All evicted
578 * items get a chance to be promoted to L2.
582 /* Second cache level, i.e. data evicted from L1 will be added here
583 * if the item is "important" enough or the L2 insertion window is large
589 /* Number of used dictionary entries, i.e. number of cached items.
590 * Purely statistical information that may be used for profiling only.
591 * Updates are not synchronized and values may be nonsensicle on some
594 apr_uint32_t used_entries;
596 /* Total number of calls to membuffer_cache_get.
597 * Purely statistical information that may be used for profiling only.
598 * Updates are not synchronized and values may be nonsensicle on some
601 apr_uint64_t total_reads;
603 /* Total number of calls to membuffer_cache_set.
604 * Purely statistical information that may be used for profiling only.
605 * Updates are not synchronized and values may be nonsensicle on some
608 apr_uint64_t total_writes;
610 /* Total number of hits since the cache's creation.
611 * Purely statistical information that may be used for profiling only.
612 * Updates are not synchronized and values may be nonsensicle on some
615 apr_uint64_t total_hits;
617 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
618 /* A lock for intra-process synchronization to the cache, or NULL if
619 * the cache's creator doesn't feel the cache needs to be
623 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
624 /* Same for read-write lock. */
625 apr_thread_rwlock_t *lock;
627 /* If set, write access will wait until they get exclusive access.
628 * Otherwise, they will become no-ops if the segment is currently
629 * read-locked. Only used when LOCK is an r/w lock.
631 svn_boolean_t allow_blocking_writes;
635 /* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
637 #define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
639 /* If locking is supported for CACHE, acquire a read lock for it.
642 read_lock_cache(svn_membuffer_t *cache)
644 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
645 return svn_mutex__lock(cache->lock);
646 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
649 apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
651 return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
660 /* If locking is supported for CACHE, acquire a write lock for it.
661 * Set *SUCCESS to FALSE, if we couldn't acquire the write lock;
662 * leave it untouched otherwise.
665 write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
667 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
668 return svn_mutex__lock(cache->lock);
669 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
673 if (cache->allow_blocking_writes)
675 status = apr_thread_rwlock_wrlock(cache->lock);
679 status = apr_thread_rwlock_trywrlock(cache->lock);
680 if (SVN_LOCK_IS_BUSY(status))
683 status = APR_SUCCESS;
688 return svn_error_wrap_apr(status,
689 _("Can't write-lock cache mutex"));
698 /* If locking is supported for CACHE, acquire an unconditional write lock
702 force_write_lock_cache(svn_membuffer_t *cache)
704 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
705 return svn_mutex__lock(cache->lock);
706 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
707 apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
709 return svn_error_wrap_apr(status,
710 _("Can't write-lock cache mutex"));
718 /* If locking is supported for CACHE, release the current lock
719 * (read or write). Return ERR upon success.
722 unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
724 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
725 return svn_mutex__unlock(cache->lock, err);
726 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
729 apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
734 return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
743 /* If supported, guard the execution of EXPR with a read lock to CACHE.
744 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
746 #define WITH_READ_LOCK(cache, expr) \
748 SVN_ERR(read_lock_cache(cache)); \
749 SVN_ERR(unlock_cache(cache, (expr))); \
752 /* If supported, guard the execution of EXPR with a write lock to CACHE.
753 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
755 * The write lock process is complicated if we don't allow to wait for
756 * the lock: If we didn't get the lock, we may still need to remove an
757 * existing entry for the given key because that content is now stale.
758 * Once we discovered such an entry, we unconditionally do a blocking
759 * wait for the write lock. In case no old content could be found, a
760 * failing lock attempt is simply a no-op and we exit the macro.
762 #define WITH_WRITE_LOCK(cache, expr) \
764 svn_boolean_t got_lock = TRUE; \
765 SVN_ERR(write_lock_cache(cache, &got_lock)); \
768 svn_boolean_t exists; \
769 SVN_ERR(entry_exists(cache, group_index, key, &exists)); \
771 SVN_ERR(force_write_lock_cache(cache)); \
775 SVN_ERR(unlock_cache(cache, (expr))); \
778 /* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
779 * been initialized, yet. In that case, this group can not data. Otherwise,
780 * a non-zero value is returned.
782 static APR_INLINE unsigned char
783 is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
786 = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
787 unsigned char bit_mask
788 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
790 return flags & bit_mask;
793 /* Initializes the section of the directory in CACHE that contains
794 * the entry group identified by GROUP_INDEX. */
796 initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
798 unsigned char bit_mask;
801 /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
802 apr_uint32_t first_index =
803 (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
804 apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
805 if (last_index > cache->group_count + cache->spare_group_count)
806 last_index = cache->group_count + cache->spare_group_count;
808 for (i = first_index; i < last_index; ++i)
810 group_header_t *header = &cache->directory[i].header;
812 header->chain_length = 1;
813 header->next = NO_INDEX;
814 header->previous = NO_INDEX;
817 /* set the "initialized" bit for these groups */
819 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
820 cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
824 /* Return the next available spare group from CACHE and mark it as used.
827 static entry_group_t *
828 allocate_spare_group(svn_membuffer_t *cache)
830 entry_group_t *group = NULL;
832 /* is there some ready-to-use group? */
833 if (cache->first_spare_group != NO_INDEX)
835 group = &cache->directory[cache->first_spare_group];
836 cache->first_spare_group = group->header.next;
839 /* any so far untouched spares available? */
840 else if (cache->max_spare_used < cache->spare_group_count)
842 apr_uint32_t group_index = cache->group_count + cache->max_spare_used;
843 ++cache->max_spare_used;
845 if (!is_group_initialized(cache, group_index))
846 initialize_group(cache, group_index);
848 group = &cache->directory[group_index];
851 /* spare groups must be empty */
852 assert(!group || !group->header.used);
856 /* Mark previously allocated spare group GROUP in CACHE as "unused".
859 free_spare_group(svn_membuffer_t *cache,
860 entry_group_t *group)
862 assert(group->header.used == 0);
863 assert(group->header.previous != NO_INDEX);
864 assert(group - cache->directory >= (apr_ssize_t)cache->group_count);
867 cache->directory[group->header.previous].header.next = NO_INDEX;
868 group->header.chain_length = 0;
869 group->header.previous = NO_INDEX;
871 /* add to chain of spares */
872 group->header.next = cache->first_spare_group;
873 cache->first_spare_group = (apr_uint32_t) (group - cache->directory);
876 /* Follow the group chain from GROUP in CACHE to its end and return the last
877 * group. May return GROUP.
879 static entry_group_t *
880 last_group_in_chain(svn_membuffer_t *cache,
881 entry_group_t *group)
883 while (group->header.next != NO_INDEX)
884 group = &cache->directory[group->header.next];
889 /* Return the CHAIN_INDEX-th element in the group chain starting from group
890 * START_GROUP_INDEX in CACHE.
892 static entry_group_t *
893 get_group(svn_membuffer_t *cache,
894 apr_uint32_t start_group_index,
895 apr_uint32_t chain_index)
897 entry_group_t *group = &cache->directory[start_group_index];
898 for (; chain_index; --chain_index)
899 group = &cache->directory[group->header.next];
904 /* Resolve a dictionary entry reference, i.e. return the entry
907 static APR_INLINE entry_t *
908 get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
910 return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
913 /* Get the entry references for the given ENTRY.
915 static APR_INLINE apr_uint32_t
916 get_index(svn_membuffer_t *cache, entry_t *entry)
918 apr_size_t group_index
919 = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
921 return (apr_uint32_t)group_index * GROUP_SIZE
922 + (apr_uint32_t)(entry - cache->directory[group_index].entries);
925 /* Return the cache level of ENTRY in CACHE.
927 static cache_level_t *
928 get_cache_level(svn_membuffer_t *cache, entry_t *entry)
930 return entry->offset < cache->l1.size ? &cache->l1
934 /* Insert ENTRY to the chain of items that belong to LEVEL in CACHE. IDX
935 * is ENTRY's item index and is only given for efficiency. The insertion
936 * takes place just before LEVEL->NEXT. *CACHE will not be modified.
939 chain_entry(svn_membuffer_t *cache,
940 cache_level_t *level,
944 /* insert ENTRY before this item */
945 entry_t *next = level->next == NO_INDEX
947 : get_entry(cache, level->next);
948 assert(idx == get_index(cache, entry));
950 /* update entry chain
952 entry->next = level->next;
953 if (level->first == NO_INDEX)
955 /* insert as the first entry and only in the chain
957 entry->previous = NO_INDEX;
961 else if (next == NULL)
963 /* insert as the last entry in the chain.
964 * Note that it cannot also be at the beginning of the chain.
966 entry->previous = level->last;
967 get_entry(cache, level->last)->next = idx;
972 /* insert either at the start of a non-empty list or
973 * somewhere in the middle
975 entry->previous = next->previous;
976 next->previous = idx;
978 if (entry->previous != NO_INDEX)
979 get_entry(cache, entry->previous)->next = idx;
985 /* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
986 * is ENTRY's item index and is only given for efficiency. Please note
987 * that neither *CACHE nor *ENTRY will not be modified.
990 unchain_entry(svn_membuffer_t *cache,
991 cache_level_t *level,
995 assert(idx == get_index(cache, entry));
999 if (level->next == idx)
1000 level->next = entry->next;
1002 /* unlink it from the chain of used entries
1004 if (entry->previous == NO_INDEX)
1005 level->first = entry->next;
1007 get_entry(cache, entry->previous)->next = entry->next;
1009 if (entry->next == NO_INDEX)
1010 level->last = entry->previous;
1012 get_entry(cache, entry->next)->previous = entry->previous;
1015 /* Remove the used ENTRY from the CACHE, i.e. make it "unused".
1016 * In contrast to insertion, removal is possible for any entry.
1019 drop_entry(svn_membuffer_t *cache, entry_t *entry)
1021 /* the group that ENTRY belongs to plus a number of useful index values
1023 apr_uint32_t idx = get_index(cache, entry);
1024 apr_uint32_t group_index = idx / GROUP_SIZE;
1025 entry_group_t *last_group
1026 = last_group_in_chain(cache, &cache->directory[group_index]);
1027 apr_uint32_t last_in_group
1028 = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE
1029 + last_group->header.used - 1);
1031 cache_level_t *level = get_cache_level(cache, entry);
1033 /* update global cache usage counters
1035 cache->used_entries--;
1036 cache->data_used -= entry->size;
1038 /* extend the insertion window, if the entry happens to border it
1040 if (idx == level->next)
1041 level->next = entry->next;
1043 if (entry->next == level->next)
1045 /* insertion window starts right behind the entry to remove
1047 if (entry->previous == NO_INDEX)
1049 /* remove the first entry -> insertion may start at pos 0, now */
1050 level->current_data = level->start_offset;
1054 /* insertion may start right behind the previous entry */
1055 entry_t *previous = get_entry(cache, entry->previous);
1056 level->current_data = ALIGN_VALUE( previous->offset
1061 /* unlink it from the chain of used entries
1063 unchain_entry(cache, level, entry, idx);
1065 /* Move last entry into hole (if the removed one is not the last used).
1066 * We need to do this since all used entries are at the beginning of
1067 * the group's entries array.
1069 if (idx != last_in_group)
1071 /* copy the last used entry to the removed entry's index
1073 *entry = last_group->entries[last_group->header.used-1];
1075 /* this ENTRY may belong to a different cache level than the entry
1076 * we have just removed */
1077 level = get_cache_level(cache, entry);
1079 /* update foreign links to new index
1081 if (last_in_group == level->next)
1084 if (entry->previous == NO_INDEX)
1087 get_entry(cache, entry->previous)->next = idx;
1089 if (entry->next == NO_INDEX)
1092 get_entry(cache, entry->next)->previous = idx;
1095 /* Update the number of used entries.
1097 last_group->header.used--;
1099 /* Release the last group in the chain if it is a spare group
1101 if (!last_group->header.used && last_group->header.previous != NO_INDEX)
1102 free_spare_group(cache, last_group);
1105 /* Insert ENTRY into the chain of used dictionary entries. The entry's
1106 * offset and size members must already have been initialized. Also,
1107 * the offset must match the beginning of the insertion window.
1110 insert_entry(svn_membuffer_t *cache, entry_t *entry)
1112 /* the group that ENTRY belongs to plus a number of useful index values
1114 apr_uint32_t idx = get_index(cache, entry);
1115 apr_uint32_t group_index = idx / GROUP_SIZE;
1116 entry_group_t *group = &cache->directory[group_index];
1117 cache_level_t *level = get_cache_level(cache, entry);
1119 /* The entry must start at the beginning of the insertion window.
1120 * It must also be the first unused entry in the group.
1122 assert(entry->offset == level->current_data);
1123 assert(idx == group_index * GROUP_SIZE + group->header.used);
1124 level->current_data = ALIGN_VALUE(entry->offset + entry->size);
1126 /* update usage counters
1128 cache->used_entries++;
1129 cache->data_used += entry->size;
1130 entry->hit_count = 0;
1131 group->header.used++;
1133 /* update entry chain
1135 chain_entry(cache, level, entry, idx);
1137 /* The current insertion position must never point outside our
1140 assert(level->current_data <= level->start_offset + level->size);
1143 /* Map a KEY of 16 bytes to the CACHE and group that shall contain the
1147 get_group_index(svn_membuffer_t **cache,
1148 const entry_key_t *key)
1150 svn_membuffer_t *segment0 = *cache;
1151 apr_uint64_t key0 = key->fingerprint[0];
1152 apr_uint64_t key1 = key->fingerprint[1];
1154 /* select the cache segment to use. they have all the same group_count.
1155 * Since key may not be well-distributed, pre-fold it to a smaller but
1156 * "denser" ranger. The modulus is a prime larger than the largest
1158 *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37))
1159 & (segment0->segment_count - 1)];
1160 return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count;
1163 /* Reduce the hit count of ENTRY and update the accumulated hit info
1164 * in CACHE accordingly.
1166 static APR_INLINE void
1167 let_entry_age(svn_membuffer_t *cache, entry_t *entry)
1169 apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
1173 entry->hit_count -= hits_removed;
1177 entry->priority /= 2;
1181 /* Return whether the keys in LHS and RHS match.
1183 static svn_boolean_t
1184 entry_keys_match(const entry_key_t *lhs,
1185 const entry_key_t *rhs)
1187 return (lhs->fingerprint[0] == rhs->fingerprint[0])
1188 && (lhs->fingerprint[1] == rhs->fingerprint[1])
1189 && (lhs->key_len == rhs->key_len);
1192 /* Given the GROUP_INDEX that shall contain an entry with the hash key
1193 * TO_FIND, find that entry in the specified group.
1195 * If FIND_EMPTY is not set, this function will return the one used entry
1196 * that actually matches the hash or NULL, if no such entry exists.
1198 * If FIND_EMPTY has been set, this function will drop the one used entry
1199 * that actually matches the hash (i.e. make it fit to be replaced with
1200 * new content), an unused entry or a forcibly removed entry (if all
1201 * group entries are currently in use). The entries' hash value will be
1202 * initialized with TO_FIND.
1204 * Note: This function requires the caller to appropriately lock the CACHE.
1205 * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE,
1206 * the write lock must have been acquired.
1209 find_entry(svn_membuffer_t *cache,
1210 apr_uint32_t group_index,
1211 const full_key_t *to_find,
1212 svn_boolean_t find_empty)
1214 entry_group_t *group;
1215 entry_t *entry = NULL;
1218 /* get the group that *must* contain the entry
1220 group = &cache->directory[group_index];
1222 /* If the entry group has not been initialized, yet, there is no data.
1224 if (! is_group_initialized(cache, group_index))
1228 initialize_group(cache, group_index);
1229 entry = &group->entries[0];
1231 /* initialize entry for the new key */
1232 entry->key = to_find->entry_key;
1238 /* try to find the matching entry
1242 for (i = 0; i < group->header.used; ++i)
1243 if (entry_keys_match(&group->entries[i].key, &to_find->entry_key))
1245 /* This is the only entry that _may_ contain the correct data. */
1246 entry = &group->entries[i];
1248 /* If we want to preserve it, check that it is actual a match. */
1251 /* If there is no full key to compare, we are done. */
1252 if (!entry->key.key_len)
1255 /* Compare the full key. */
1256 if (memcmp(to_find->full_key.data,
1257 cache->data + entry->offset,
1258 entry->key.key_len) == 0)
1261 /* Key conflict. The entry to find cannot be anywhere else.
1262 * Therefore, it is not cached. */
1266 /* need to empty that entry */
1267 drop_entry(cache, entry);
1268 if (group->header.used == GROUP_SIZE)
1269 group = last_group_in_chain(cache, group);
1270 else if (group->header.chain_length == 0)
1271 group = last_group_in_chain(cache,
1272 &cache->directory[group_index]);
1274 /* No entry found (actually, none left to find). */
1280 if (group->header.next == NO_INDEX)
1283 /* only full groups may chain */
1284 assert(group->header.used == GROUP_SIZE);
1285 group = &cache->directory[group->header.next];
1288 /* None found. Are we looking for a free entry?
1292 /* There is no empty entry in the chain, try chaining a spare group.
1294 if ( group->header.used == GROUP_SIZE
1295 && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH)
1297 entry_group_t *new_group = allocate_spare_group(cache);
1302 new_group->header.chain_length = group->header.chain_length + 1;
1303 new_group->header.previous = (apr_uint32_t) (group -
1305 new_group->header.next = NO_INDEX;
1306 group->header.next = (apr_uint32_t) (new_group -
1312 /* if GROUP is still filled, we need to remove a random entry */
1313 if (group->header.used == GROUP_SIZE)
1315 /* every entry gets the same chance of being removed.
1316 * Otherwise, we free the first entry, fill it and
1317 * remove it again on the next occasion without considering
1318 * the other entries in this group.
1320 * We hit only one random group instead of processing all
1321 * groups in the chain.
1323 cache_level_t *entry_level;
1324 int to_remove = rand() % (GROUP_SIZE * group->header.chain_length);
1325 entry_group_t *to_shrink
1326 = get_group(cache, group_index, to_remove / GROUP_SIZE);
1328 entry = &to_shrink->entries[to_remove % GROUP_SIZE];
1329 entry_level = get_cache_level(cache, entry);
1330 for (i = 0; i < GROUP_SIZE; ++i)
1332 /* keep L1 entries whenever possible */
1334 cache_level_t *level
1335 = get_cache_level(cache, &to_shrink->entries[i]);
1336 if ( (level != entry_level && entry_level == &cache->l1)
1337 || (entry->hit_count > to_shrink->entries[i].hit_count))
1339 entry_level = level;
1340 entry = &to_shrink->entries[i];
1344 /* for the entries that don't have been removed,
1345 * reduce their hit counts to put them at a relative
1346 * disadvantage the next time.
1348 for (i = 0; i < GROUP_SIZE; ++i)
1349 if (entry != &to_shrink->entries[i])
1350 let_entry_age(cache, entry);
1352 drop_entry(cache, entry);
1355 /* initialize entry for the new key
1357 entry = &group->entries[group->header.used];
1358 entry->key = to_find->entry_key;
1364 /* Move a surviving ENTRY from just behind the insertion window to
1365 * its beginning and move the insertion window up accordingly.
1368 move_entry(svn_membuffer_t *cache, entry_t *entry)
1370 apr_size_t size = ALIGN_VALUE(entry->size);
1371 cache_level_t *level = get_cache_level(cache, entry);
1373 /* This entry survived this cleansing run. Reset half of its
1374 * hit count so that its removal gets more likely in the next
1375 * run unless someone read / hit this entry in the meantime.
1377 let_entry_age(cache, entry);
1379 /* Move the entry to the start of the empty / insertion section
1380 * (if it isn't there already). Size-aligned moves are legal
1381 * since all offsets and block sizes share this same alignment.
1382 * Size-aligned moves tend to be faster than non-aligned ones
1383 * because no "odd" bytes at the end need to special treatment.
1385 if (entry->offset != level->current_data)
1387 memmove(cache->data + level->current_data,
1388 cache->data + entry->offset,
1390 entry->offset = level->current_data;
1393 /* The insertion position is now directly behind this entry.
1395 level->current_data = entry->offset + size;
1396 level->next = entry->next;
1398 /* The current insertion position must never point outside our
1401 assert(level->current_data <= level->start_offset + level->size);
1404 /* Move ENTRY in CACHE from L1 to L2.
1407 promote_entry(svn_membuffer_t *cache, entry_t *entry)
1409 apr_uint32_t idx = get_index(cache, entry);
1410 apr_size_t size = ALIGN_VALUE(entry->size);
1411 assert(get_cache_level(cache, entry) == &cache->l1);
1412 assert(idx == cache->l1.next);
1414 /* copy item from the current location in L1 to the start of L2's
1415 * insertion window */
1416 memmove(cache->data + cache->l2.current_data,
1417 cache->data + entry->offset,
1419 entry->offset = cache->l2.current_data;
1421 /* The insertion position is now directly behind this entry.
1423 cache->l2.current_data += size;
1425 /* remove ENTRY from chain of L1 entries and put it into L2
1427 unchain_entry(cache, &cache->l1, entry, idx);
1428 chain_entry(cache, &cache->l2, entry, idx);
1431 /* This function implements the cache insertion / eviction strategy for L2.
1433 * If necessary, enlarge the insertion window of CACHE->L2 until it is at
1434 * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
1435 * data buffer size allocated to CACHE->L2. IDX is the item index of
1436 * TO_FIT_IN and is given for performance reasons.
1438 * Return TRUE if enough room could be found or made. A FALSE result
1439 * indicates that the respective item shall not be added.
1441 static svn_boolean_t
1442 ensure_data_insertable_l2(svn_membuffer_t *cache,
1447 /* accumulated size of the entries that have been removed to make
1448 * room for the new one.
1450 apr_size_t moved_size = 0;
1452 /* count the number of entries that got moved. A single large entry
1453 * being moved is not enough to reject an insertion.
1455 apr_size_t moved_count = 0;
1457 /* accumulated "worth" of items dropped so far */
1458 apr_uint64_t drop_hits = 0;
1460 /* estimated "worth" of the new entry */
1461 apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1)
1462 * (apr_uint64_t)to_fit_in->priority;
1464 /* This loop will eventually terminate because every cache entry
1465 * would get dropped eventually:
1467 * - the incoming entry is small enough to fit into L2
1468 * - every iteration either frees parts of L2 or counts the moved size
1469 * - eventually, we either moved too many items with too much total size
1470 * to accept the new entry, or made enough room in L2 for the new entry
1472 * Low-prio items get rejected even sooner.
1476 /* first offset behind the insertion window
1478 apr_uint64_t end = cache->l2.next == NO_INDEX
1479 ? cache->l2.start_offset + cache->l2.size
1480 : get_entry(cache, cache->l2.next)->offset;
1482 /* leave function as soon as the insertion window is large enough
1484 if (end >= to_fit_in->size + cache->l2.current_data)
1487 /* Don't be too eager to cache data. If a lot of data has been moved
1488 * around, the current item has probably a relatively low priority.
1489 * We must also limit the effort spent here (if even in case of faulty
1490 * heuristics). Therefore, give up after some time.
1492 if (moved_size > 4 * to_fit_in->size && moved_count > 7)
1495 /* if the net worth (in weighted hits) of items removed is already
1496 * larger than what we want to insert, reject TO_FIT_IN because it
1497 * still does not fit in. */
1498 if (drop_hits > drop_hits_limit)
1501 /* try to enlarge the insertion window
1503 if (cache->l2.next == NO_INDEX)
1505 /* We reached the end of the data buffer; restart at the beginning.
1506 * Due to the randomized nature of our LFU implementation, very
1507 * large data items may require multiple passes. Therefore, SIZE
1508 * should be restricted to significantly less than data_size.
1510 cache->l2.current_data = cache->l2.start_offset;
1511 cache->l2.next = cache->l2.first;
1516 entry = get_entry(cache, cache->l2.next);
1518 if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1520 /* Low prio items can only be accepted only if the current
1521 * entry is of even lower prio and has fewer hits.
1523 if ( entry->priority > to_fit_in->priority
1524 || entry->hit_count > to_fit_in->hit_count)
1528 if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1530 /* Be quick to remove low-prio entries - even if the incoming
1531 * one is low-prio as well. This makes room for more important
1532 * data and replaces existing data with newly read information.
1538 /* If the existing data is the same prio as the incoming data,
1539 * drop the existing entry if it had seen fewer (probably 0)
1540 * hits than the entry coming in from L1. In case of different
1541 * priorities, keep the current entry of it has higher prio.
1542 * The new entry may still find room by ousting other entries.
1544 keep = to_fit_in->priority == entry->priority
1545 ? entry->hit_count >= to_fit_in->hit_count
1546 : entry->priority > to_fit_in->priority;
1549 /* keepers or destroyers? */
1552 /* Moving entries around is not for free -> track costs. */
1553 moved_size += entry->size;
1556 move_entry(cache, entry);
1560 /* Drop the entry from the end of the insertion window.
1561 * Count the "hit importance" such that we are not sacrificing
1562 * too much of the high-hit contents. However, don't count
1563 * low-priority hits because higher prio entries will often
1564 * provide the same data but in a further stage of processing.
1566 if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1567 drop_hits += entry->hit_count * (apr_uint64_t)entry->priority;
1569 drop_entry(cache, entry);
1574 /* This will never be reached. But if it was, "can't insert" was the
1578 /* This function implements the cache insertion / eviction strategy for L1.
1580 * If necessary, enlarge the insertion window of CACHE->L1 by promoting
1581 * entries to L2 until it is at least SIZE bytes long.
1583 * Return TRUE if enough room could be found or made. A FALSE result
1584 * indicates that the respective item shall not be added because it is
1587 static svn_boolean_t
1588 ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
1590 /* Guarantees that the while loop will terminate. */
1591 if (size > cache->l1.size)
1594 /* This loop will eventually terminate because every cache entry
1595 * would get dropped eventually.
1599 /* first offset behind the insertion window
1601 apr_uint32_t entry_index = cache->l1.next;
1602 entry_t *entry = get_entry(cache, entry_index);
1603 apr_uint64_t end = cache->l1.next == NO_INDEX
1604 ? cache->l1.start_offset + cache->l1.size
1607 /* leave function as soon as the insertion window is large enough
1609 if (end >= size + cache->l1.current_data)
1612 /* Enlarge the insertion window
1614 if (cache->l1.next == NO_INDEX)
1616 /* We reached the end of the data buffer; restart at the beginning.
1617 * Due to the randomized nature of our LFU implementation, very
1618 * large data items may require multiple passes. Therefore, SIZE
1619 * should be restricted to significantly less than data_size.
1621 cache->l1.current_data = cache->l1.start_offset;
1622 cache->l1.next = cache->l1.first;
1626 /* Remove the entry from the end of insertion window and promote
1627 * it to L2, if it is important enough.
1629 svn_boolean_t keep = ensure_data_insertable_l2(cache, entry);
1631 /* We might have touched the group that contains ENTRY. Recheck. */
1632 if (entry_index == cache->l1.next)
1635 promote_entry(cache, entry);
1637 drop_entry(cache, entry);
1642 /* This will never be reached. But if it was, "can't insert" was the
1647 svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1648 apr_size_t total_size,
1649 apr_size_t directory_size,
1650 apr_size_t segment_count,
1651 svn_boolean_t thread_safe,
1652 svn_boolean_t allow_blocking_writes,
1658 apr_uint32_t group_count;
1659 apr_uint32_t main_group_count;
1660 apr_uint32_t spare_group_count;
1661 apr_uint32_t group_init_size;
1662 apr_uint64_t data_size;
1663 apr_uint64_t max_entry_size;
1665 /* Limit the total size (only relevant if we can address > 4GB)
1667 #if APR_SIZEOF_VOIDP > 4
1668 if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1669 total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1672 /* Limit the segment count
1674 if (segment_count > MAX_SEGMENT_COUNT)
1675 segment_count = MAX_SEGMENT_COUNT;
1676 if (segment_count * MIN_SEGMENT_SIZE > total_size)
1677 segment_count = total_size / MIN_SEGMENT_SIZE;
1679 /* The segment count must be a power of two. Round it down as necessary.
1681 while ((segment_count & (segment_count-1)) != 0)
1682 segment_count &= segment_count-1;
1684 /* if the caller hasn't provided a reasonable segment count or the above
1685 * limitations set it to 0, derive one from the absolute cache size
1687 if (segment_count < 1)
1689 /* Determine a reasonable number of cache segments. Segmentation is
1690 * only useful for multi-threaded / multi-core servers as it reduces
1691 * lock contention on these systems.
1693 * But on these systems, we can assume that ample memory has been
1694 * allocated to this cache. Smaller caches should not be segmented
1695 * as this severely limits the maximum size of cachable items.
1697 * Segments should not be smaller than 32MB and max. cachable item
1698 * size should grow as fast as segmentation.
1701 apr_uint32_t segment_count_shift = 0;
1702 while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1704 ++segment_count_shift;
1706 segment_count = (apr_size_t)1 << segment_count_shift;
1709 /* If we have an extremely large cache (>512 GB), the default segment
1710 * size may exceed the amount allocatable as one chunk. In that case,
1711 * increase segmentation until we are under the threshold.
1713 while ( total_size / segment_count > MAX_SEGMENT_SIZE
1714 && segment_count < MAX_SEGMENT_COUNT)
1717 /* allocate cache as an array of segments / cache objects */
1718 c = apr_palloc(pool, segment_count * sizeof(*c));
1720 /* Split total cache size into segments of equal size
1722 total_size /= segment_count;
1723 directory_size /= segment_count;
1725 /* prevent pathological conditions: ensure a certain minimum cache size
1727 if (total_size < 2 * sizeof(entry_group_t))
1728 total_size = 2 * sizeof(entry_group_t);
1730 /* adapt the dictionary size accordingly, if necessary:
1731 * It must hold at least one group and must not exceed the cache size.
1733 if (directory_size > total_size - sizeof(entry_group_t))
1734 directory_size = total_size - sizeof(entry_group_t);
1735 if (directory_size < 2 * sizeof(entry_group_t))
1736 directory_size = 2 * sizeof(entry_group_t);
1738 /* limit the data size to what we can address.
1739 * Note that this cannot overflow since all values are of size_t.
1740 * Also, make it a multiple of the item placement granularity to
1741 * prevent subtle overflows.
1743 data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1745 /* For cache sizes > 16TB, individual cache segments will be larger
1746 * than 32GB allowing for >4GB entries. But caching chunks larger
1747 * than 4GB are simply not supported.
1749 max_entry_size = data_size / 8 > MAX_ITEM_SIZE
1753 /* to keep the entries small, we use 32 bit indexes only
1754 * -> we need to ensure that no more then 4G entries exist.
1756 * Note, that this limit could only be exceeded in a very
1757 * theoretical setup with about 1EB of cache.
1759 group_count = directory_size / sizeof(entry_group_t)
1760 >= (APR_UINT32_MAX / GROUP_SIZE)
1761 ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1762 : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1764 /* set some of the index directory aside as over-flow (spare) buffers */
1765 spare_group_count = MAX(group_count / 4, 1);
1766 main_group_count = group_count - spare_group_count;
1767 assert(spare_group_count > 0 && main_group_count > 0);
1769 group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1770 for (seg = 0; seg < segment_count; ++seg)
1772 /* allocate buffers and initialize cache members
1774 c[seg].segment_count = (apr_uint32_t)segment_count;
1776 c[seg].group_count = main_group_count;
1777 c[seg].spare_group_count = spare_group_count;
1778 c[seg].first_spare_group = NO_INDEX;
1779 c[seg].max_spare_used = 0;
1781 c[seg].directory = apr_pcalloc(pool,
1782 group_count * sizeof(entry_group_t));
1784 /* Allocate and initialize directory entries as "not initialized",
1786 c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1788 /* Allocate 1/4th of the data buffer to L1
1790 c[seg].l1.first = NO_INDEX;
1791 c[seg].l1.last = NO_INDEX;
1792 c[seg].l1.next = NO_INDEX;
1793 c[seg].l1.start_offset = 0;
1794 c[seg].l1.size = ALIGN_VALUE(data_size / 4);
1795 c[seg].l1.current_data = 0;
1797 /* The remaining 3/4th will be used as L2
1799 c[seg].l2.first = NO_INDEX;
1800 c[seg].l2.last = NO_INDEX;
1801 c[seg].l2.next = NO_INDEX;
1802 c[seg].l2.start_offset = c[seg].l1.size;
1803 c[seg].l2.size = ALIGN_VALUE(data_size) - c[seg].l1.size;
1804 c[seg].l2.current_data = c[seg].l2.start_offset;
1806 /* This cast is safe because DATA_SIZE <= MAX_SEGMENT_SIZE. */
1807 c[seg].data = apr_palloc(pool, (apr_size_t)ALIGN_VALUE(data_size));
1808 c[seg].data_used = 0;
1809 c[seg].max_entry_size = max_entry_size;
1811 c[seg].used_entries = 0;
1812 c[seg].total_reads = 0;
1813 c[seg].total_writes = 0;
1814 c[seg].total_hits = 0;
1816 /* were allocations successful?
1817 * If not, initialize a minimal cache structure.
1819 if (c[seg].data == NULL || c[seg].directory == NULL)
1821 /* We are OOM. There is no need to proceed with "half a cache".
1823 return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1826 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
1827 /* A lock for intra-process synchronization to the cache, or NULL if
1828 * the cache's creator doesn't feel the cache needs to be
1831 SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
1832 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
1833 /* Same for read-write lock. */
1837 apr_status_t status =
1838 apr_thread_rwlock_create(&(c[seg].lock), pool);
1840 return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1843 /* Select the behavior of write operations.
1845 c[seg].allow_blocking_writes = allow_blocking_writes;
1852 return SVN_NO_ERROR;
1856 svn_cache__membuffer_clear(svn_membuffer_t *cache)
1859 apr_size_t segment_count = cache->segment_count;
1861 /* Length of the group_initialized array in bytes.
1862 See also svn_cache__membuffer_cache_create(). */
1863 apr_size_t group_init_size
1864 = 1 + (cache->group_count + cache->spare_group_count)
1865 / (8 * GROUP_INIT_GRANULARITY);
1867 /* Clear segment by segment. This implies that other thread may read
1868 and write to other segments after we cleared them and before the
1869 last segment is done.
1871 However, that is no different from a write request coming through
1872 right after we cleared all segments because dependencies between
1873 cache entries (recursive lookup / access locks) are not allowed.
1875 for (seg = 0; seg < segment_count; ++seg)
1877 /* Unconditionally acquire the write lock. */
1878 SVN_ERR(force_write_lock_cache(&cache[seg]));
1880 /* Mark all groups as "not initialized", which implies "empty". */
1881 cache[seg].first_spare_group = NO_INDEX;
1882 cache[seg].max_spare_used = 0;
1884 memset(cache[seg].group_initialized, 0, group_init_size);
1886 /* Unlink L1 contents. */
1887 cache[seg].l1.first = NO_INDEX;
1888 cache[seg].l1.last = NO_INDEX;
1889 cache[seg].l1.next = NO_INDEX;
1890 cache[seg].l1.current_data = cache[seg].l1.start_offset;
1892 /* Unlink L2 contents. */
1893 cache[seg].l2.first = NO_INDEX;
1894 cache[seg].l2.last = NO_INDEX;
1895 cache[seg].l2.next = NO_INDEX;
1896 cache[seg].l2.current_data = cache[seg].l2.start_offset;
1898 /* Reset content counters. */
1899 cache[seg].data_used = 0;
1900 cache[seg].used_entries = 0;
1902 /* Segment may be used again. */
1903 SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR));
1907 return SVN_NO_ERROR;
1910 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1911 * by the hash value TO_FIND and set *FOUND accordingly.
1913 * Note: This function requires the caller to serialize access.
1914 * Don't call it directly, call entry_exists instead.
1916 static svn_error_t *
1917 entry_exists_internal(svn_membuffer_t *cache,
1918 apr_uint32_t group_index,
1919 const full_key_t *to_find,
1920 svn_boolean_t *found)
1922 *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1923 return SVN_NO_ERROR;
1926 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1927 * by the hash value TO_FIND and set *FOUND accordingly.
1929 static svn_error_t *
1930 entry_exists(svn_membuffer_t *cache,
1931 apr_uint32_t group_index,
1932 const full_key_t *to_find,
1933 svn_boolean_t *found)
1935 WITH_READ_LOCK(cache,
1936 entry_exists_internal(cache,
1941 return SVN_NO_ERROR;
1944 /* Given the SIZE and PRIORITY of a new item, return the cache level
1945 (L1 or L2) in fragment CACHE that this item shall be inserted into.
1946 If we can't find nor make enough room for the item, return NULL.
1948 static cache_level_t *
1949 select_level(svn_membuffer_t *cache,
1951 apr_uint32_t priority)
1953 if (cache->max_entry_size >= size)
1955 /* Small items go into L1. */
1956 return ensure_data_insertable_l1(cache, size)
1960 else if ( cache->l2.size >= size
1961 && MAX_ITEM_SIZE >= size
1962 && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1964 /* Large but important items go into L2. */
1965 entry_t dummy_entry = { { { 0 } } };
1966 dummy_entry.priority = priority;
1967 dummy_entry.size = size;
1969 return ensure_data_insertable_l2(cache, &dummy_entry)
1974 /* Don't cache large, unimportant items. */
1978 /* Try to insert the serialized item given in BUFFER with ITEM_SIZE
1979 * into the group GROUP_INDEX of CACHE and uniquely identify it by
1980 * hash value TO_FIND.
1982 * However, there is no guarantee that it will actually be put into
1983 * the cache. If there is already some data associated with TO_FIND,
1984 * it will be removed from the cache even if the new data cannot
1987 * Note: This function requires the caller to serialization access.
1988 * Don't call it directly, call membuffer_cache_set instead.
1990 static svn_error_t *
1991 membuffer_cache_set_internal(svn_membuffer_t *cache,
1992 const full_key_t *to_find,
1993 apr_uint32_t group_index,
1995 apr_size_t item_size,
1996 apr_uint32_t priority,
1997 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1998 apr_pool_t *scratch_pool)
2000 cache_level_t *level;
2001 apr_size_t size = item_size + to_find->entry_key.key_len;
2003 /* first, look for a previous entry for the given key */
2004 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2006 /* if there is an old version of that entry and the new data fits into
2007 * the old spot, just re-use that space. */
2008 if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
2010 /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
2011 * lest we run into trouble with 32 bit underflow *not* treated as a
2014 cache->data_used += (apr_uint64_t)size - entry->size;
2016 entry->priority = priority;
2018 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2020 /* Remember original content, type and key (hashes)
2022 SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2023 memcpy(&entry->tag, tag, sizeof(*tag));
2027 if (entry->key.key_len)
2028 memcpy(cache->data + entry->offset, to_find->full_key.data,
2029 entry->key.key_len);
2031 memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2034 cache->total_writes++;
2035 return SVN_NO_ERROR;
2038 /* if necessary, enlarge the insertion window.
2040 level = buffer ? select_level(cache, size, priority) : NULL;
2043 /* Remove old data for this key, if that exists.
2044 * Get an unused entry for the key and and initialize it with
2045 * the serialized item's (future) position within data buffer.
2047 entry = find_entry(cache, group_index, to_find, TRUE);
2049 entry->offset = level->current_data;
2050 entry->priority = priority;
2052 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2054 /* Remember original content, type and key (hashes)
2056 SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2057 memcpy(&entry->tag, tag, sizeof(*tag));
2061 /* Link the entry properly.
2063 insert_entry(cache, entry);
2065 /* Copy the serialized item data into the cache.
2067 if (entry->key.key_len)
2068 memcpy(cache->data + entry->offset, to_find->full_key.data,
2069 entry->key.key_len);
2071 memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2074 cache->total_writes++;
2078 /* if there is already an entry for this key, drop it.
2079 * Since ensure_data_insertable may have removed entries from
2080 * ENTRY's group, re-do the lookup.
2082 entry = find_entry(cache, group_index, to_find, FALSE);
2084 drop_entry(cache, entry);
2087 return SVN_NO_ERROR;
2090 /* Try to insert the ITEM and use the KEY to uniquely identify it.
2091 * However, there is no guarantee that it will actually be put into
2092 * the cache. If there is already some data associated to the KEY,
2093 * it will be removed from the cache even if the new data cannot
2096 * The SERIALIZER is called to transform the ITEM into a single,
2097 * flat data buffer. Temporary allocations may be done in POOL.
2099 static svn_error_t *
2100 membuffer_cache_set(svn_membuffer_t *cache,
2101 const full_key_t *key,
2103 svn_cache__serialize_func_t serializer,
2104 apr_uint32_t priority,
2105 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2106 apr_pool_t *scratch_pool)
2108 apr_uint32_t group_index;
2109 void *buffer = NULL;
2110 apr_size_t size = 0;
2112 /* find the entry group that will hold the key.
2114 group_index = get_group_index(&cache, &key->entry_key);
2116 /* Serialize data data.
2119 SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
2121 /* The actual cache data access needs to sync'ed
2123 WITH_WRITE_LOCK(cache,
2124 membuffer_cache_set_internal(cache,
2130 DEBUG_CACHE_MEMBUFFER_TAG
2132 return SVN_NO_ERROR;
2135 /* Count a hit in ENTRY within CACHE.
2138 increment_hit_counters(svn_membuffer_t *cache, entry_t *entry)
2140 /* To minimize the memory footprint of the cache index, we limit local
2141 * hit counters to 32 bits. These may overflow but we don't really
2142 * care because at worst, ENTRY will be dropped from cache once every
2143 * few billion hits. */
2144 svn_atomic_inc(&entry->hit_count);
2146 /* That one is for stats only. */
2147 cache->total_hits++;
2150 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2151 * by the hash value TO_FIND. If no item has been stored for KEY,
2152 * *BUFFER will be NULL. Otherwise, return a copy of the serialized
2153 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
2156 * Note: This function requires the caller to serialization access.
2157 * Don't call it directly, call membuffer_cache_get instead.
2159 static svn_error_t *
2160 membuffer_cache_get_internal(svn_membuffer_t *cache,
2161 apr_uint32_t group_index,
2162 const full_key_t *to_find,
2164 apr_size_t *item_size,
2165 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2166 apr_pool_t *result_pool)
2171 /* The actual cache data access needs to sync'ed
2173 entry = find_entry(cache, group_index, to_find, FALSE);
2174 cache->total_reads++;
2177 /* no such entry found.
2182 return SVN_NO_ERROR;
2185 size = ALIGN_VALUE(entry->size) - entry->key.key_len;
2186 *buffer = apr_palloc(result_pool, size);
2187 memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size);
2189 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2191 /* Check for overlapping entries.
2193 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2194 entry->offset + size
2195 <= get_entry(cache, entry->next)->offset);
2197 /* Compare original content, type and key (hashes)
2199 SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len,
2201 SVN_ERR(assert_equal_tags(&entry->tag, tag));
2205 /* update hit statistics
2207 increment_hit_counters(cache, entry);
2208 *item_size = entry->size - entry->key.key_len;
2210 return SVN_NO_ERROR;
2213 /* Look for the *ITEM identified by KEY. If no item has been stored
2214 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
2215 * re-construct the proper object from the serialized data.
2216 * Allocations will be done in POOL.
2218 static svn_error_t *
2219 membuffer_cache_get(svn_membuffer_t *cache,
2220 const full_key_t *key,
2222 svn_cache__deserialize_func_t deserializer,
2223 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2224 apr_pool_t *result_pool)
2226 apr_uint32_t group_index;
2230 /* find the entry group that will hold the key.
2232 group_index = get_group_index(&cache, &key->entry_key);
2233 WITH_READ_LOCK(cache,
2234 membuffer_cache_get_internal(cache,
2239 DEBUG_CACHE_MEMBUFFER_TAG
2242 /* re-construct the original data object from its serialized form.
2247 return SVN_NO_ERROR;
2250 return deserializer(item, buffer, size, result_pool);
2253 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2254 * by the hash value TO_FIND. If no item has been stored for KEY, *FOUND
2255 * will be FALSE and TRUE otherwise.
2257 static svn_error_t *
2258 membuffer_cache_has_key_internal(svn_membuffer_t *cache,
2259 apr_uint32_t group_index,
2260 const full_key_t *to_find,
2261 svn_boolean_t *found)
2263 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2266 /* This often be called by "block read" when most data is already
2267 in L2 and only a few previously evicted items are added to L1
2268 again. While items in L1 are well protected for a while, L2
2269 items may get evicted soon. Thus, mark all them as "hit" to give
2270 them a higher chance of survival. */
2271 increment_hit_counters(cache, entry);
2279 return SVN_NO_ERROR;
2282 /* Look for an entry identified by KEY. If no item has been stored
2283 * for KEY, *FOUND will be set to FALSE and TRUE otherwise.
2285 /* Implements svn_cache__has_key for membuffer caches.
2287 static svn_error_t *
2288 membuffer_cache_has_key(svn_membuffer_t *cache,
2289 const full_key_t *key,
2290 svn_boolean_t *found)
2292 /* find the entry group that will hold the key.
2294 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2295 cache->total_reads++;
2297 WITH_READ_LOCK(cache,
2298 membuffer_cache_has_key_internal(cache,
2303 return SVN_NO_ERROR;
2306 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2307 * by the hash value TO_FIND. FOUND indicates whether that entry exists.
2308 * If not found, *ITEM will be NULL.
2310 * Otherwise, the DESERIALIZER is called with that entry and the BATON
2311 * provided and will extract the desired information. The result is set
2312 * in *ITEM. Allocations will be done in POOL.
2314 * Note: This function requires the caller to serialization access.
2315 * Don't call it directly, call membuffer_cache_get_partial instead.
2317 static svn_error_t *
2318 membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
2319 apr_uint32_t group_index,
2320 const full_key_t *to_find,
2322 svn_boolean_t *found,
2323 svn_cache__partial_getter_func_t deserializer,
2325 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2326 apr_pool_t *result_pool)
2328 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2329 cache->total_reads++;
2335 return SVN_NO_ERROR;
2339 const void *item_data = cache->data + entry->offset + entry->key.key_len;
2340 apr_size_t item_size = entry->size - entry->key.key_len;
2342 increment_hit_counters(cache, entry);
2344 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2346 /* Check for overlapping entries.
2348 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2349 entry->offset + entry->size
2350 <= get_entry(cache, entry->next)->offset);
2352 /* Compare original content, type and key (hashes)
2354 SVN_ERR(store_content_part(tag, item_data, item_size, result_pool));
2355 SVN_ERR(assert_equal_tags(&entry->tag, tag));
2359 return deserializer(item, item_data, item_size, baton, result_pool);
2363 /* Look for the cache entry identified by KEY. FOUND indicates
2364 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
2365 * the DESERIALIZER is called with that entry and the BATON provided
2366 * and will extract the desired information. The result is set in *ITEM.
2367 * Allocations will be done in POOL.
2369 static svn_error_t *
2370 membuffer_cache_get_partial(svn_membuffer_t *cache,
2371 const full_key_t *key,
2373 svn_boolean_t *found,
2374 svn_cache__partial_getter_func_t deserializer,
2376 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2377 apr_pool_t *result_pool)
2379 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2381 WITH_READ_LOCK(cache,
2382 membuffer_cache_get_partial_internal
2383 (cache, group_index, key, item, found,
2384 deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
2387 return SVN_NO_ERROR;
2390 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2391 * by the hash value TO_FIND. If no entry has been found, the function
2392 * returns without modifying the cache.
2394 * Otherwise, FUNC is called with that entry and the BATON provided
2395 * and may modify the cache entry. Allocations will be done in POOL.
2397 * Note: This function requires the caller to serialization access.
2398 * Don't call it directly, call membuffer_cache_set_partial instead.
2400 static svn_error_t *
2401 membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
2402 apr_uint32_t group_index,
2403 const full_key_t *to_find,
2404 svn_cache__partial_setter_func_t func,
2406 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2407 apr_pool_t *scratch_pool)
2409 /* cache item lookup
2411 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2412 cache->total_reads++;
2414 /* this function is a no-op if the item is not in cache
2420 /* access the serialized cache item */
2421 apr_size_t key_len = entry->key.key_len;
2422 void *item_data = cache->data + entry->offset + key_len;
2423 void *orig_data = item_data;
2424 apr_size_t item_size = entry->size - key_len;
2426 increment_hit_counters(cache, entry);
2427 cache->total_writes++;
2429 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2431 /* Check for overlapping entries.
2433 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2434 entry->offset + entry->size
2435 <= get_entry(cache, entry->next)->offset);
2437 /* Compare original content, type and key (hashes)
2439 SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2440 SVN_ERR(assert_equal_tags(&entry->tag, tag));
2444 /* modify it, preferably in-situ.
2446 err = func(&item_data, &item_size, baton, scratch_pool);
2450 /* Something somewhere when wrong while FUNC was modifying the
2451 * changed item. Thus, it might have become invalid /corrupted.
2452 * We better drop that.
2454 drop_entry(cache, entry);
2460 /* if the modification caused a re-allocation, we need to remove
2461 * the old entry and to copy the new data back into cache.
2463 if (item_data != orig_data)
2465 /* Remove the old entry and try to make space for the new one.
2467 drop_entry(cache, entry);
2468 if ( (cache->max_entry_size >= item_size + key_len)
2469 && ensure_data_insertable_l1(cache, item_size + key_len))
2471 /* Write the new entry.
2473 entry = find_entry(cache, group_index, to_find, TRUE);
2474 entry->size = item_size + key_len;
2475 entry->offset = cache->l1.current_data;
2478 memcpy(cache->data + entry->offset,
2479 to_find->full_key.data, key_len);
2481 memcpy(cache->data + entry->offset + key_len, item_data,
2484 /* Link the entry properly.
2486 insert_entry(cache, entry);
2490 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2492 /* Remember original content, type and key (hashes)
2494 SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2495 memcpy(&entry->tag, tag, sizeof(*tag));
2501 return SVN_NO_ERROR;
2504 /* Look for the cache entry identified by KEY. If no entry
2505 * has been found, the function returns without modifying the cache.
2506 * Otherwise, FUNC is called with that entry and the BATON provided
2507 * and may modify the cache entry. Allocations will be done in POOL.
2509 static svn_error_t *
2510 membuffer_cache_set_partial(svn_membuffer_t *cache,
2511 const full_key_t *key,
2512 svn_cache__partial_setter_func_t func,
2514 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2515 apr_pool_t *scratch_pool)
2517 /* cache item lookup
2519 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2520 WITH_WRITE_LOCK(cache,
2521 membuffer_cache_set_partial_internal
2522 (cache, group_index, key, func, baton,
2523 DEBUG_CACHE_MEMBUFFER_TAG
2526 /* done here -> unlock the cache
2528 return SVN_NO_ERROR;
2531 /* Implement the svn_cache__t interface on top of a shared membuffer cache.
2533 * Because membuffer caches tend to be very large, there will be rather few
2534 * of them (usually only one). Thus, the same instance shall be used as the
2535 * backend to many application-visible svn_cache__t instances. This should
2536 * also achieve global resource usage fairness.
2538 * To accommodate items from multiple resources, the individual keys must be
2539 * unique over all sources. This is achieved by simply adding a prefix key
2540 * that unambiguously identifies the item's context (e.g. path to the
2541 * respective repository). The prefix will be set upon construction of the
2542 * svn_cache__t instance.
2545 /* Internal cache structure (used in svn_cache__t.cache_internal) basically
2546 * holding the additional parameters needed to call the respective membuffer
2549 typedef struct svn_membuffer_cache_t
2551 /* this is where all our data will end up in
2553 svn_membuffer_t *membuffer;
2555 /* use this conversion function when inserting an item into the memcache
2557 svn_cache__serialize_func_t serializer;
2559 /* use this conversion function when reading an item from the memcache
2561 svn_cache__deserialize_func_t deserializer;
2563 /* Prepend this byte sequence to any key passed to us.
2564 * This makes our keys different from all keys used by svn_membuffer_cache_t
2565 * instances that we don't want to share cached data with.
2569 /* length of the keys that will be passed to us through the
2570 * svn_cache_t interface. May be APR_HASH_KEY_STRING.
2572 apr_ssize_t key_len;
2574 /* priority class for all items written through this interface */
2575 apr_uint32_t priority;
2577 /* Temporary buffer containing the hash key for the current access
2579 full_key_t combined_key;
2581 /* if enabled, this will serialize the access to this instance.
2583 svn_mutex__t *mutex;
2584 } svn_membuffer_cache_t;
2586 /* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
2587 * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
2589 #define ALLOCATIONS_PER_POOL_CLEAR 10
2591 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2592 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2593 * This could replace combine_key() entirely but we actually use it only
2594 * when the quick path failed.
2597 combine_long_key(svn_membuffer_cache_t *cache,
2599 apr_ssize_t key_len)
2601 apr_uint32_t *digest_buffer;
2603 apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2604 apr_size_t aligned_key_len;
2606 /* handle variable-length keys */
2607 if (key_len == APR_HASH_KEY_STRING)
2608 key_len = strlen((const char *) key);
2610 aligned_key_len = ALIGN_VALUE(key_len);
2613 svn_membuf__ensure(&cache->combined_key.full_key,
2614 aligned_key_len + prefix_len);
2616 key_copy = (char *)cache->combined_key.full_key.data + prefix_len;
2617 cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len;
2618 memcpy(key_copy, key, key_len);
2619 memset(key_copy + key_len, 0, aligned_key_len - key_len);
2621 /* Hash key into 16 bytes. */
2622 digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint;
2623 svn__fnv1a_32x4_raw(digest_buffer, key, key_len);
2625 /* Combine with prefix. */
2626 cache->combined_key.entry_key.fingerprint[0]
2627 ^= cache->prefix.entry_key.fingerprint[0];
2628 cache->combined_key.entry_key.fingerprint[1]
2629 ^= cache->prefix.entry_key.fingerprint[1];
2632 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2633 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2636 combine_key(svn_membuffer_cache_t *cache,
2638 apr_ssize_t key_len)
2640 /* short, fixed-size keys are the most common case */
2641 if (key_len != APR_HASH_KEY_STRING && key_len <= 16)
2643 const apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2645 /* Copy of *key, padded with 0.
2646 * We put it just behind the prefix already copied into the COMBINED_KEY.
2647 * The buffer space has been allocated when the cache was created. */
2648 apr_uint64_t *data = (void *)((char *)cache->combined_key.full_key.data +
2650 assert(prefix_len <= cache->combined_key.full_key.size - 16);
2651 cache->combined_key.entry_key.key_len = prefix_len + 16;
2655 memcpy(data, key, key_len);
2657 /* scramble key DATA. All of this must be reversible to prevent key
2658 * collisions. So, we limit ourselves to xor and permutations. */
2659 data[1] = (data[1] << 27) | (data[1] >> 37);
2660 data[1] ^= data[0] & 0xffff;
2661 data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
2663 /* combine with this cache's namespace */
2664 cache->combined_key.entry_key.fingerprint[0]
2665 = data[0] ^ cache->prefix.entry_key.fingerprint[0];
2666 cache->combined_key.entry_key.fingerprint[1]
2667 = data[1] ^ cache->prefix.entry_key.fingerprint[1];
2671 /* longer or variably sized keys */
2672 combine_long_key(cache, key, key_len);
2676 /* Implement svn_cache__vtable_t.get (not thread-safe)
2678 static svn_error_t *
2679 svn_membuffer_cache_get(void **value_p,
2680 svn_boolean_t *found,
2683 apr_pool_t *result_pool)
2685 svn_membuffer_cache_t *cache = cache_void;
2687 DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2695 return SVN_NO_ERROR;
2698 /* construct the full, i.e. globally unique, key by adding
2699 * this cache instances' prefix
2701 combine_key(cache, key, cache->key_len);
2703 /* Look the item up. */
2704 SVN_ERR(membuffer_cache_get(cache->membuffer,
2705 &cache->combined_key,
2707 cache->deserializer,
2708 DEBUG_CACHE_MEMBUFFER_TAG
2712 *found = *value_p != NULL;
2714 return SVN_NO_ERROR;
2717 /* Implement svn_cache__vtable_t.has_key (not thread-safe)
2719 static svn_error_t *
2720 svn_membuffer_cache_has_key(svn_boolean_t *found,
2723 apr_pool_t *scratch_pool)
2725 svn_membuffer_cache_t *cache = cache_void;
2732 return SVN_NO_ERROR;
2735 /* construct the full, i.e. globally unique, key by adding
2736 * this cache instances' prefix
2738 combine_key(cache, key, cache->key_len);
2740 /* Look the item up. */
2741 SVN_ERR(membuffer_cache_has_key(cache->membuffer,
2742 &cache->combined_key,
2746 return SVN_NO_ERROR;
2749 /* Implement svn_cache__vtable_t.set (not thread-safe)
2751 static svn_error_t *
2752 svn_membuffer_cache_set(void *cache_void,
2755 apr_pool_t *scratch_pool)
2757 svn_membuffer_cache_t *cache = cache_void;
2759 DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2763 return SVN_NO_ERROR;
2765 /* construct the full, i.e. globally unique, key by adding
2766 * this cache instances' prefix
2768 combine_key(cache, key, cache->key_len);
2770 /* (probably) add the item to the cache. But there is no real guarantee
2771 * that the item will actually be cached afterwards.
2773 return membuffer_cache_set(cache->membuffer,
2774 &cache->combined_key,
2778 DEBUG_CACHE_MEMBUFFER_TAG
2782 /* Implement svn_cache__vtable_t.iter as "not implemented"
2784 static svn_error_t *
2785 svn_membuffer_cache_iter(svn_boolean_t *completed,
2787 svn_iter_apr_hash_cb_t user_cb,
2789 apr_pool_t *scratch_pool)
2791 return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2792 _("Can't iterate a membuffer-based cache"));
2795 /* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2797 static svn_error_t *
2798 svn_membuffer_cache_get_partial(void **value_p,
2799 svn_boolean_t *found,
2802 svn_cache__partial_getter_func_t func,
2804 apr_pool_t *result_pool)
2806 svn_membuffer_cache_t *cache = cache_void;
2808 DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2815 return SVN_NO_ERROR;
2818 combine_key(cache, key, cache->key_len);
2819 SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2820 &cache->combined_key,
2825 DEBUG_CACHE_MEMBUFFER_TAG
2828 return SVN_NO_ERROR;
2831 /* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2833 static svn_error_t *
2834 svn_membuffer_cache_set_partial(void *cache_void,
2836 svn_cache__partial_setter_func_t func,
2838 apr_pool_t *scratch_pool)
2840 svn_membuffer_cache_t *cache = cache_void;
2842 DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2846 combine_key(cache, key, cache->key_len);
2847 SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2848 &cache->combined_key,
2851 DEBUG_CACHE_MEMBUFFER_TAG
2854 return SVN_NO_ERROR;
2857 /* Implement svn_cache__vtable_t.is_cachable
2858 * (thread-safe even without mutex)
2860 static svn_boolean_t
2861 svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2863 /* Don't allow extremely large element sizes. Otherwise, the cache
2864 * might by thrashed by a few extremely large entries. And the size
2865 * must be small enough to be stored in a 32 bit value.
2867 svn_membuffer_cache_t *cache = cache_void;
2868 return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY
2869 ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size
2870 : size <= cache->membuffer->max_entry_size;
2873 /* Add statistics of SEGMENT to INFO. If INCLUDE_HISTOGRAM is TRUE,
2874 * accumulate index bucket fill levels in INFO->HISTOGRAM.
2876 static svn_error_t *
2877 svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2878 svn_cache__info_t *info,
2879 svn_boolean_t include_histogram)
2883 info->data_size += segment->l1.size + segment->l2.size;
2884 info->used_size += segment->data_used;
2885 info->total_size += segment->l1.size + segment->l2.size +
2886 segment->group_count * GROUP_SIZE * sizeof(entry_t);
2888 info->used_entries += segment->used_entries;
2889 info->total_entries += segment->group_count * GROUP_SIZE;
2891 if (include_histogram)
2892 for (i = 0; i < segment->group_count; ++i)
2893 if (is_group_initialized(segment, i))
2895 entry_group_t *chain_end
2896 = last_group_in_chain(segment, &segment->directory[i]);
2898 = MIN(chain_end->header.used,
2899 sizeof(info->histogram) / sizeof(info->histogram[0]) - 1);
2900 info->histogram[use]++;
2903 return SVN_NO_ERROR;
2906 /* Implement svn_cache__vtable_t.get_info
2907 * (thread-safe even without mutex)
2909 static svn_error_t *
2910 svn_membuffer_cache_get_info(void *cache_void,
2911 svn_cache__info_t *info,
2912 svn_boolean_t reset,
2913 apr_pool_t *result_pool)
2915 svn_membuffer_cache_t *cache = cache_void;
2918 /* cache front-end specific data */
2920 info->id = apr_pstrdup(result_pool, cache->prefix.full_key.data);
2922 /* collect info from shared cache back-end */
2924 for (i = 0; i < cache->membuffer->segment_count; ++i)
2926 svn_membuffer_t *segment = cache->membuffer + i;
2927 WITH_READ_LOCK(segment,
2928 svn_membuffer_get_segment_info(segment, info, FALSE));
2931 return SVN_NO_ERROR;
2935 /* the v-table for membuffer-based caches (single-threaded access)
2937 static svn_cache__vtable_t membuffer_cache_vtable = {
2938 svn_membuffer_cache_get,
2939 svn_membuffer_cache_has_key,
2940 svn_membuffer_cache_set,
2941 svn_membuffer_cache_iter,
2942 svn_membuffer_cache_is_cachable,
2943 svn_membuffer_cache_get_partial,
2944 svn_membuffer_cache_set_partial,
2945 svn_membuffer_cache_get_info
2948 /* Implement svn_cache__vtable_t.get and serialize all cache access.
2950 static svn_error_t *
2951 svn_membuffer_cache_get_synced(void **value_p,
2952 svn_boolean_t *found,
2955 apr_pool_t *result_pool)
2957 svn_membuffer_cache_t *cache = cache_void;
2958 SVN_MUTEX__WITH_LOCK(cache->mutex,
2959 svn_membuffer_cache_get(value_p,
2965 return SVN_NO_ERROR;
2968 /* Implement svn_cache__vtable_t.has_key and serialize all cache access.
2970 static svn_error_t *
2971 svn_membuffer_cache_has_key_synced(svn_boolean_t *found,
2974 apr_pool_t *result_pool)
2976 svn_membuffer_cache_t *cache = cache_void;
2977 SVN_MUTEX__WITH_LOCK(cache->mutex,
2978 svn_membuffer_cache_has_key(found,
2983 return SVN_NO_ERROR;
2986 /* Implement svn_cache__vtable_t.set and serialize all cache access.
2988 static svn_error_t *
2989 svn_membuffer_cache_set_synced(void *cache_void,
2992 apr_pool_t *scratch_pool)
2994 svn_membuffer_cache_t *cache = cache_void;
2995 SVN_MUTEX__WITH_LOCK(cache->mutex,
2996 svn_membuffer_cache_set(cache_void,
3001 return SVN_NO_ERROR;
3004 /* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
3006 static svn_error_t *
3007 svn_membuffer_cache_get_partial_synced(void **value_p,
3008 svn_boolean_t *found,
3011 svn_cache__partial_getter_func_t func,
3013 apr_pool_t *result_pool)
3015 svn_membuffer_cache_t *cache = cache_void;
3016 SVN_MUTEX__WITH_LOCK(cache->mutex,
3017 svn_membuffer_cache_get_partial(value_p,
3025 return SVN_NO_ERROR;
3028 /* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
3030 static svn_error_t *
3031 svn_membuffer_cache_set_partial_synced(void *cache_void,
3033 svn_cache__partial_setter_func_t func,
3035 apr_pool_t *scratch_pool)
3037 svn_membuffer_cache_t *cache = cache_void;
3038 SVN_MUTEX__WITH_LOCK(cache->mutex,
3039 svn_membuffer_cache_set_partial(cache_void,
3045 return SVN_NO_ERROR;
3048 /* the v-table for membuffer-based caches with multi-threading support)
3050 static svn_cache__vtable_t membuffer_cache_synced_vtable = {
3051 svn_membuffer_cache_get_synced,
3052 svn_membuffer_cache_has_key_synced,
3053 svn_membuffer_cache_set_synced,
3054 svn_membuffer_cache_iter, /* no sync required */
3055 svn_membuffer_cache_is_cachable, /* no sync required */
3056 svn_membuffer_cache_get_partial_synced,
3057 svn_membuffer_cache_set_partial_synced,
3058 svn_membuffer_cache_get_info /* no sync required */
3061 /* standard serialization function for svn_stringbuf_t items.
3062 * Implements svn_cache__serialize_func_t.
3064 static svn_error_t *
3065 serialize_svn_stringbuf(void **buffer,
3066 apr_size_t *buffer_size,
3068 apr_pool_t *result_pool)
3070 svn_stringbuf_t *value_str = item;
3072 *buffer = value_str->data;
3073 *buffer_size = value_str->len + 1;
3075 return SVN_NO_ERROR;
3078 /* standard de-serialization function for svn_stringbuf_t items.
3079 * Implements svn_cache__deserialize_func_t.
3081 static svn_error_t *
3082 deserialize_svn_stringbuf(void **item,
3084 apr_size_t buffer_size,
3085 apr_pool_t *result_pool)
3087 svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
3089 value_str->pool = result_pool;
3090 value_str->blocksize = buffer_size;
3091 value_str->data = buffer;
3092 value_str->len = buffer_size-1;
3095 return SVN_NO_ERROR;
3098 /* Construct a svn_cache__t object on top of a shared memcache.
3101 svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
3102 svn_membuffer_t *membuffer,
3103 svn_cache__serialize_func_t serializer,
3104 svn_cache__deserialize_func_t deserializer,
3107 apr_uint32_t priority,
3108 svn_boolean_t thread_safe,
3109 apr_pool_t *result_pool,
3110 apr_pool_t *scratch_pool)
3112 svn_checksum_t *checksum;
3113 apr_size_t prefix_len, prefix_orig_len;
3115 /* allocate the cache header structures
3117 svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
3118 svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache));
3120 /* initialize our internal cache header
3122 cache->membuffer = membuffer;
3123 cache->serializer = serializer
3125 : serialize_svn_stringbuf;
3126 cache->deserializer = deserializer
3128 : deserialize_svn_stringbuf;
3129 cache->priority = priority;
3130 cache->key_len = klen;
3132 SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool));
3134 /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT.
3135 * Don't forget to include the terminating NUL. */
3136 prefix_orig_len = strlen(prefix) + 1;
3137 prefix_len = ALIGN_VALUE(prefix_orig_len);
3139 svn_membuf__create(&cache->prefix.full_key, prefix_len, result_pool);
3140 memcpy((char *)cache->prefix.full_key.data, prefix, prefix_orig_len);
3141 memset((char *)cache->prefix.full_key.data + prefix_orig_len, 0,
3142 prefix_len - prefix_orig_len);
3144 /* Construct the folded prefix key. */
3145 SVN_ERR(svn_checksum(&checksum,
3150 memcpy(cache->prefix.entry_key.fingerprint, checksum->digest,
3151 sizeof(cache->prefix.entry_key.fingerprint));
3152 cache->prefix.entry_key.key_len = prefix_len;
3154 /* Initialize the combined key. Pre-allocate some extra room in the full
3155 * key such that we probably don't need to re-alloc. */
3156 cache->combined_key.entry_key = cache->prefix.entry_key;
3157 svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200,
3159 memcpy(cache->combined_key.full_key.data, cache->prefix.full_key.data,
3162 /* initialize the generic cache wrapper
3164 wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
3165 : &membuffer_cache_vtable;
3166 wrapper->cache_internal = cache;
3167 wrapper->error_handler = 0;
3168 wrapper->error_baton = 0;
3169 wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT");
3172 return SVN_NO_ERROR;
3175 static svn_error_t *
3176 svn_membuffer_get_global_segment_info(svn_membuffer_t *segment,
3177 svn_cache__info_t *info)
3179 info->gets += segment->total_reads;
3180 info->sets += segment->total_writes;
3181 info->hits += segment->total_hits;
3183 WITH_READ_LOCK(segment,
3184 svn_membuffer_get_segment_info(segment, info, TRUE));
3186 return SVN_NO_ERROR;
3190 svn_cache__membuffer_get_global_info(apr_pool_t *pool)
3194 svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache();
3195 svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info));
3197 /* cache front-end specific data */
3199 info->id = "membuffer globals";
3201 /* collect info from shared cache back-end */
3203 for (i = 0; i < membuffer->segment_count; ++i)
3204 svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i,