2 * cache-membuffer.c: in-memory caching for Subversion
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_thread_rwlock.h>
28 #include "svn_pools.h"
29 #include "svn_checksum.h"
30 #include "svn_private_config.h"
31 #include "svn_string.h"
32 #include "svn_sorts.h" /* get the MIN macro */
34 #include "private/svn_atomic.h"
35 #include "private/svn_dep_compat.h"
36 #include "private/svn_mutex.h"
37 #include "private/svn_string_private.h"
43 * This svn_cache__t implementation actually consists of two parts:
44 * a shared (per-process) singleton membuffer cache instance and shallow
45 * svn_cache__t front-end instances that each use different key spaces.
46 * For data management, they all forward to the singleton membuffer cache.
48 * A membuffer cache consists of two parts:
50 * 1. A linear data buffer containing cached items in a serialized
51 * representation, prefixed by their full cache keys. There may be
52 * arbitrary gaps between entries. This buffer is sub-devided into
53 * (currently two) cache levels.
55 * 2. A directory of cache entries. This is organized similar to CPU
56 * data caches: for every possible key, there is exactly one group
57 * of entries that may contain the header info for an item with
58 * that given key. The result is a GROUP_SIZE+-way associative cache
59 * whose associativity can be dynamically increased.
61 * Only the start address of these two data parts are given as a native
62 * pointer. All other references are expressed as offsets to these pointers.
63 * With that design, it is relatively easy to share the same data structure
64 * between different processes and / or to persist them on disk. These
65 * out-of-process features have not been implemented, yet.
67 * Superficially, cache levels are being used as usual: insertion happens
68 * into L1 and evictions will promote items to L2. But their whole point
69 * is a different one. L1 uses a circular buffer, i.e. we have perfect
70 * caching for the last N bytes where N is the size of L1. L2 uses a more
71 * elaborate scheme based on priorities and hit counts as described below.
73 * The data buffer usage information is implicitly given by the directory
74 * entries. Every USED entry has a reference to the previous and the next
75 * used dictionary entry and this double-linked list is ordered by the
76 * offsets of their item data within the data buffer. So removing data,
77 * for instance, is done simply by unlinking it from the chain, implicitly
78 * marking the entry as well as the data buffer section previously
79 * associated to it as unused. First and last element of that chain are
80 * being referenced from the respective cache level.
82 * Insertion can occur at only one, sliding position per cache level. It is
83 * marked by its offset in the data buffer and the index of the first used
84 * entry at or behind that position. If this gap is too small to accommodate
85 * the new item (plus its full key), the insertion window is extended as
86 * described below. The new entry will always be inserted at the bottom end
87 * of the window and since the next used entry is known, properly sorted
88 * insertion is possible.
90 * To make the cache perform robustly in a wide range of usage scenarios,
91 * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
92 * details). Every item holds a read hit counter and there is a global read
93 * hit counter. The more hits an entry has in relation to the average, the
94 * more it is likely to be kept using a rand()-based condition. The test is
95 * applied only to the entry following the insertion window. If it doesn't
96 * get evicted, it is moved to the begin of that window and the window is
99 * Moreover, the entry's hits get halved to make that entry more likely to
100 * be removed the next time the sliding insertion / removal window comes by.
101 * As a result, frequently used entries are likely not to be dropped until
102 * they get not used for a while. Also, even a cache thrashing situation
103 * about 50% of the content survives every 50% of the cache being re-written
104 * with new entries. For details on the fine-tuning involved, see the
105 * comments in ensure_data_insertable_l2().
107 * Due to the randomized mapping of keys to entry groups, some groups may
108 * overflow. In that case, there are spare groups that can be chained to
109 * an already used group to extend it.
111 * To limit the entry size and management overhead, not the actual item keys
112 * but only their hashed "fingerprint" will be stored. These are reasonably
113 * unique to prevent collisions, so we only need to support up to one entry
114 * per entry key. To guarantee that there are no conflicts, however, we
115 * store the actual full key immediately in front of the serialized item
116 * data. That is, the entry offset actually points to the full key and the
117 * key length stored in the entry acts as an additional offset to find the
120 * All access to the cached data needs to be serialized. Because we want
121 * to scale well despite that bottleneck, we simply segment the cache into
122 * a number of independent caches (segments). Items will be multiplexed based
126 /* APR's read-write lock implementation on Windows is horribly inefficient.
127 * Even with very low contention a runtime overhead of 35% percent has been
128 * measured for 'svn-bench null-export' over ra_serf.
130 * Use a simple mutex on Windows. Because there is one mutex per segment,
131 * large machines should (and usually can) be configured with large caches
132 * such that read contention is kept low. This is basically the situation
136 # define USE_SIMPLE_MUTEX 1
138 # define USE_SIMPLE_MUTEX 0
141 /* For more efficient copy operations, let's align all data items properly.
142 * Must be a power of 2.
144 #define ITEM_ALIGNMENT 16
146 /* By default, don't create cache segments smaller than this value unless
147 * the total cache size itself is smaller.
149 #define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
151 /* The minimum segment size we will allow for multi-segmented caches
153 #define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
155 /* The maximum number of segments allowed. Larger numbers reduce the size
156 * of each segment, in turn reducing the max size of a cachable item.
157 * Also, each segment gets its own lock object. The actual number supported
158 * by the OS may therefore be lower and svn_cache__membuffer_cache_create
159 * may return an error.
161 #define MAX_SEGMENT_COUNT 0x10000
163 /* As of today, APR won't allocate chunks of 4GB or more. So, limit the
164 * segment size to slightly below that.
166 #define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
168 /* We don't mark the initialization status for every group but initialize
169 * a number of groups at once. That will allow for a very small init flags
170 * vector that is likely to fit into the CPU caches even for fairly large
171 * membuffer caches. For instance, the default of 32 means 8x32 groups per
172 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
173 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
175 #define GROUP_INIT_GRANULARITY 32
177 /* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
179 #define NO_INDEX APR_UINT32_MAX
181 /* To save space in our group structure, we only use 32 bit size values
182 * and, therefore, limit the size of each entry to just below 4GB.
183 * Supporting larger items is not a good idea as the data transfer
184 * to and from the cache would block other threads for a very long time.
186 #define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
188 /* We use this structure to identify cache entries. There cannot be two
189 * entries with the same entry key. However unlikely, though, two different
190 * full keys (see full_key_t) may have the same entry key. That is a
191 * collision and at most one of them can be stored in the cache at any time.
193 typedef struct entry_key_t
195 /* 16 byte finger print of the full key. */
196 apr_uint64_t fingerprint[2];
198 /* Length of the full key. This value is aligned to ITEM_ALIGNMENT to
199 * make sure the subsequent item content is properly aligned. */
203 /* A full key, i.e. the combination of the cache's key prefix with some
204 * dynamic part appended to it. It also contains its ENTRY_KEY.
206 typedef struct full_key_t
208 /* Reduced form identifying the cache entry (if such an entry exists). */
209 entry_key_t entry_key;
211 /* This contains the full combination. Note that the SIZE element may
212 * be larger than ENTRY_KEY.KEY_LEN, but only the latter determines the
214 svn_membuf_t full_key;
217 /* Debugging / corruption detection support.
218 * If you define this macro, the getter functions will performed expensive
219 * checks on the item data, requested keys and entry types. If there is
220 * a mismatch found in any of them when being compared with the values
221 * remembered in the setter function, an error will be returned.
223 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
225 /* The prefix passed to svn_cache__create_membuffer_cache() effectively
226 * defines the type of all items stored by that cache instance. We'll take
227 * the last 15 bytes + \0 as plaintext for easy identification by the dev.
229 #define PREFIX_TAIL_LEN 16
231 /* This record will be attached to any cache entry. It tracks item data
232 * (content), key and type as hash values and is the baseline against which
233 * the getters will compare their results to detect inconsistencies.
235 typedef struct entry_tag_t
237 /* MD5 checksum over the serialized item data.
239 unsigned char content_hash[APR_MD5_DIGESTSIZE];
241 /* Hash value of the svn_cache_t instance that wrote the item
242 * (i.e. a combination of type and repository)
244 unsigned char prefix_hash[APR_MD5_DIGESTSIZE];
246 /* Note that this only covers the variable part of the key,
247 * i.e. it will be different from the full key hash used for
250 unsigned char key_hash[APR_MD5_DIGESTSIZE];
252 /* Last letters from of the key in human readable format
253 * (ends with the type identifier, e.g. "DAG")
255 char prefix_tail[PREFIX_TAIL_LEN];
257 /* Length of the variable key part.
263 /* Initialize all members of TAG except for the content hash.
265 static svn_error_t *store_key_part(entry_tag_t *tag,
266 const full_key_t *prefix_key,
271 svn_checksum_t *checksum;
272 const char *prefix = prefix_key->full_key.data;
273 apr_size_t prefix_len = strlen(prefix);
275 if (prefix_len > sizeof(tag->prefix_tail))
277 prefix += prefix_len - (sizeof(tag->prefix_tail) - 1);
278 prefix_len = sizeof(tag->prefix_tail) - 1;
281 SVN_ERR(svn_checksum(&checksum,
287 memcpy(tag->prefix_hash, prefix_key->entry_key.fingerprint,
288 sizeof(tag->prefix_hash));
289 memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
291 memset(tag->prefix_tail, 0, sizeof(tag->key_hash));
292 memcpy(tag->prefix_tail, prefix, prefix_len + 1);
294 tag->key_len = key_len;
299 /* Initialize the content hash member of TAG.
301 static svn_error_t* store_content_part(entry_tag_t *tag,
306 svn_checksum_t *checksum;
307 SVN_ERR(svn_checksum(&checksum,
313 memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
318 /* Compare two tags and fail with an assertion upon differences.
320 static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
321 const entry_tag_t *rhs)
323 SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
324 sizeof(lhs->content_hash)) == 0);
325 SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
326 sizeof(lhs->prefix_hash)) == 0);
327 SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
328 sizeof(lhs->key_hash)) == 0);
329 SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
330 sizeof(lhs->prefix_tail)) == 0);
332 SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
337 /* Reoccurring code snippets.
340 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
342 #define DEBUG_CACHE_MEMBUFFER_TAG tag,
344 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool) \
346 entry_tag_t *tag = &_tag; \
348 SVN_ERR(store_key_part(tag, \
351 cache->key_len == APR_HASH_KEY_STRING \
352 ? strlen((const char *) key) \
358 /* Don't generate any checks if consistency checks have not been enabled.
360 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG
361 #define DEBUG_CACHE_MEMBUFFER_TAG
362 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)
364 #endif /* SVN_DEBUG_CACHE_MEMBUFFER */
366 /* A single dictionary entry. Since all entries will be allocated once
367 * during cache creation, those entries might be either used or unused.
368 * An entry is used if and only if it is contained in the doubly-linked
369 * list of used entries per cache level.
371 typedef struct entry_t
373 /* Identifying the data item. Only valid for used entries.
377 /* The offset of the cached item's serialized data within the caches
382 /* Size of the serialized item data. May be 0. The MAX_ITEM_SIZE macro
383 * above ensures that there will be no overflows.
384 * Only valid for used entries.
388 /* Number of (read) hits for this entry. Will be reset upon write.
389 * Only valid for used entries.
391 svn_atomic_t hit_count;
393 /* Reference to the next used entry in the order defined by offset.
394 * NO_INDEX indicates the end of the list; this entry must be referenced
395 * by the caches cache_level_t.last member. NO_INDEX also implies that
396 * the data buffer is not used beyond offset+size.
397 * Only valid for used entries.
401 /* Reference to the previous used entry in the order defined by offset.
402 * NO_INDEX indicates the end of the list; this entry must be referenced
403 * by the caches cache_level_t.first member.
404 * Only valid for used entries.
406 apr_uint32_t previous;
408 /* Priority of this entry. This entry will not be replaced by lower-
411 apr_uint32_t priority;
412 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
413 /* Remember type, content and key hashes.
419 /* Group header struct.
421 typedef struct group_header_t
423 /* number of entries used [0 .. USED-1] */
426 /* next group in the chain or NO_INDEX for the last.
427 * For recycleable unused spare groups, this points to the next
428 * unused spare group */
431 /* previously group in the chain or NO_INDEX for the first */
432 apr_uint32_t previous;
434 /* number of elements in the chain from start to here.
435 * >= 1 for used groups, 0 for unused spare groups */
436 apr_uint32_t chain_length;
440 /* The size of the group struct should be a power of two make sure it does
441 * not cross memory page boundaries. Since we already access the cache
442 * randomly, having two page table lookups instead of one is bad.
444 #define GROUP_BLOCK_SIZE 512
446 /* A ~10-way associative cache seems to be a good compromise between
447 * performance (worst-case lookups) and efficiency-loss due to collisions.
449 * This value may be changed to any positive integer.
452 ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t))
454 /* Maximum number of groups in a chain, i.e. a cache index group can hold
455 * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries.
457 #define MAX_GROUP_CHAIN_LENGTH 8
459 /* We group dictionary entries to make this GROUP-SIZE-way associative.
461 typedef struct entry_group_t
464 group_header_t header;
466 /* padding and also room for future extensions */
467 char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t)
468 - sizeof(entry_t) * GROUP_SIZE];
470 /* the actual entries */
471 entry_t entries[GROUP_SIZE];
475 /* Per-cache level header structure. Instances of this are members of
476 * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
477 * All offset values are global / absolute to that whole buffer.
479 typedef struct cache_level_t
481 /* Reference to the first (defined by the order content in the data
482 * buffer) dictionary entry used by any data item.
483 * NO_INDEX for an empty cache.
487 /* Reference to the last (defined by the order content in the data
488 * buffer) dictionary entry used by any data item.
489 * NO_INDEX for an empty cache.
493 /* Reference to the first (defined by the order content in the data
494 * buffer) used dictionary entry behind the insertion position
495 * (current_data). If NO_INDEX, the data buffer is free starting at the
496 * current_data offset.
501 /* First offset in the caches DATA buffer that belongs to this level.
503 apr_uint64_t start_offset;
505 /* Size of data buffer allocated to this level in bytes. Must be > 0.
509 /* Offset in the data buffer where the next insertion shall occur.
511 apr_uint64_t current_data;
515 /* The cache header structure.
517 struct svn_membuffer_t
519 /* Number of cache segments. Must be a power of 2.
520 Please note that this structure represents only one such segment
521 and that all segments must / will report the same values here. */
522 apr_uint32_t segment_count;
524 /* The dictionary, GROUP_SIZE * (group_count + spare_group_count)
525 * entries long. Never NULL.
527 entry_group_t *directory;
529 /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
530 * Allows for efficiently marking groups as "not initialized".
532 unsigned char *group_initialized;
534 /* Size of dictionary in groups. Must be > 0.
536 apr_uint32_t group_count;
538 /* Total number of spare groups.
540 apr_uint32_t spare_group_count;
542 /* First recycleable spare group.
544 apr_uint32_t first_spare_group;
546 /* Maximum number of spare groups ever used. I.e. group index
547 * group_count + max_spare_used is the first unused spare group
548 * if first_spare_group is NO_INDEX.
550 apr_uint32_t max_spare_used;
552 /* Pointer to the data buffer, data_size bytes long. Never NULL.
556 /* Total number of data buffer bytes in use.
558 apr_uint64_t data_used;
560 /* Largest entry size that we would accept. For total cache sizes
561 * less than 4TB (sic!), this is determined by the total cache size.
563 apr_uint64_t max_entry_size;
565 /* The cache levels, organized as sub-buffers. Since entries in the
566 * DIRECTORY use offsets in DATA for addressing, a cache lookup does
567 * not need to know the cache level of a specific item. Cache levels
568 * are only used to implement a hybrid insertion / eviction strategy.
571 /* First cache level, i.e. most insertions happen here. Very large
572 * items might get inserted directly into L2. L1 is a strict FIFO
573 * ring buffer that does not care about item priorities. All evicted
574 * items get a chance to be promoted to L2.
578 /* Second cache level, i.e. data evicted from L1 will be added here
579 * if the item is "important" enough or the L2 insertion window is large
585 /* Number of used dictionary entries, i.e. number of cached items.
586 * Purely statistical information that may be used for profiling only.
587 * Updates are not synchronized and values may be nonsensicle on some
590 apr_uint32_t used_entries;
592 /* Total number of calls to membuffer_cache_get.
593 * Purely statistical information that may be used for profiling only.
594 * Updates are not synchronized and values may be nonsensicle on some
597 apr_uint64_t total_reads;
599 /* Total number of calls to membuffer_cache_set.
600 * Purely statistical information that may be used for profiling only.
601 * Updates are not synchronized and values may be nonsensicle on some
604 apr_uint64_t total_writes;
606 /* Total number of hits since the cache's creation.
607 * Purely statistical information that may be used for profiling only.
608 * Updates are not synchronized and values may be nonsensicle on some
611 apr_uint64_t total_hits;
613 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
614 /* A lock for intra-process synchronization to the cache, or NULL if
615 * the cache's creator doesn't feel the cache needs to be
619 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
620 /* Same for read-write lock. */
621 apr_thread_rwlock_t *lock;
623 /* If set, write access will wait until they get exclusive access.
624 * Otherwise, they will become no-ops if the segment is currently
625 * read-locked. Only used when LOCK is an r/w lock.
627 svn_boolean_t allow_blocking_writes;
631 /* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
633 #define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
635 /* Align POINTER value to the next ITEM_ALIGNMENT boundary.
637 #define ALIGN_POINTER(pointer) ((void*)ALIGN_VALUE((apr_size_t)(char*)(pointer)))
639 /* If locking is supported for CACHE, acquire a read lock for it.
642 read_lock_cache(svn_membuffer_t *cache)
644 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
645 return svn_mutex__lock(cache->lock);
646 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
649 apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
651 return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
660 /* If locking is supported for CACHE, acquire a write lock for it.
661 * Set *SUCCESS to FALSE, if we couldn't acquire the write lock;
662 * leave it untouched otherwise.
665 write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
667 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
668 return svn_mutex__lock(cache->lock);
669 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
673 if (cache->allow_blocking_writes)
675 status = apr_thread_rwlock_wrlock(cache->lock);
679 status = apr_thread_rwlock_trywrlock(cache->lock);
680 if (SVN_LOCK_IS_BUSY(status))
683 status = APR_SUCCESS;
688 return svn_error_wrap_apr(status,
689 _("Can't write-lock cache mutex"));
698 /* If locking is supported for CACHE, acquire an unconditional write lock
702 force_write_lock_cache(svn_membuffer_t *cache)
704 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
705 return svn_mutex__lock(cache->lock);
706 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
707 apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
709 return svn_error_wrap_apr(status,
710 _("Can't write-lock cache mutex"));
718 /* If locking is supported for CACHE, release the current lock
719 * (read or write). Return ERR upon success.
722 unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
724 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
725 return svn_mutex__unlock(cache->lock, err);
726 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
729 apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
734 return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
743 /* If supported, guard the execution of EXPR with a read lock to CACHE.
744 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
746 #define WITH_READ_LOCK(cache, expr) \
748 SVN_ERR(read_lock_cache(cache)); \
749 SVN_ERR(unlock_cache(cache, (expr))); \
752 /* If supported, guard the execution of EXPR with a write lock to CACHE.
753 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
755 * The write lock process is complicated if we don't allow to wait for
756 * the lock: If we didn't get the lock, we may still need to remove an
757 * existing entry for the given key because that content is now stale.
758 * Once we discovered such an entry, we unconditionally do a blocking
759 * wait for the write lock. In case no old content could be found, a
760 * failing lock attempt is simply a no-op and we exit the macro.
762 #define WITH_WRITE_LOCK(cache, expr) \
764 svn_boolean_t got_lock = TRUE; \
765 SVN_ERR(write_lock_cache(cache, &got_lock)); \
768 svn_boolean_t exists; \
769 SVN_ERR(entry_exists(cache, group_index, key, &exists)); \
771 SVN_ERR(force_write_lock_cache(cache)); \
775 SVN_ERR(unlock_cache(cache, (expr))); \
778 /* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
779 * been initialized, yet. In that case, this group can not data. Otherwise,
780 * a non-zero value is returned.
782 static APR_INLINE unsigned char
783 is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
786 = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
787 unsigned char bit_mask
788 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
790 return flags & bit_mask;
793 /* Initializes the section of the directory in CACHE that contains
794 * the entry group identified by GROUP_INDEX. */
796 initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
798 unsigned char bit_mask;
801 /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
802 apr_uint32_t first_index =
803 (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
804 apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
805 if (last_index > cache->group_count + cache->spare_group_count)
806 last_index = cache->group_count + cache->spare_group_count;
808 for (i = first_index; i < last_index; ++i)
810 group_header_t *header = &cache->directory[i].header;
812 header->chain_length = 1;
813 header->next = NO_INDEX;
814 header->previous = NO_INDEX;
817 /* set the "initialized" bit for these groups */
819 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
820 cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
824 /* Return the next available spare group from CACHE and mark it as used.
827 static entry_group_t *
828 allocate_spare_group(svn_membuffer_t *cache)
830 entry_group_t *group = NULL;
832 /* is there some ready-to-use group? */
833 if (cache->first_spare_group != NO_INDEX)
835 group = &cache->directory[cache->first_spare_group];
836 cache->first_spare_group = group->header.next;
839 /* any so far untouched spares available? */
840 else if (cache->max_spare_used < cache->spare_group_count)
842 apr_uint32_t group_index = cache->group_count + cache->max_spare_used;
843 ++cache->max_spare_used;
845 if (!is_group_initialized(cache, group_index))
846 initialize_group(cache, group_index);
848 group = &cache->directory[group_index];
851 /* spare groups must be empty */
852 assert(!group || !group->header.used);
856 /* Mark previously allocated spare group GROUP in CACHE as "unused".
859 free_spare_group(svn_membuffer_t *cache,
860 entry_group_t *group)
862 assert(group->header.used == 0);
863 assert(group->header.previous != NO_INDEX);
864 assert(group - cache->directory >= (apr_ssize_t)cache->group_count);
867 cache->directory[group->header.previous].header.next = NO_INDEX;
868 group->header.chain_length = 0;
869 group->header.previous = NO_INDEX;
871 /* add to chain of spares */
872 group->header.next = cache->first_spare_group;
873 cache->first_spare_group = (apr_uint32_t) (group - cache->directory);
876 /* Follow the group chain from GROUP in CACHE to its end and return the last
877 * group. May return GROUP.
879 static entry_group_t *
880 last_group_in_chain(svn_membuffer_t *cache,
881 entry_group_t *group)
883 while (group->header.next != NO_INDEX)
884 group = &cache->directory[group->header.next];
889 /* Return the CHAIN_INDEX-th element in the group chain starting from group
890 * START_GROUP_INDEX in CACHE.
892 static entry_group_t *
893 get_group(svn_membuffer_t *cache,
894 apr_uint32_t start_group_index,
895 apr_uint32_t chain_index)
897 entry_group_t *group = &cache->directory[start_group_index];
898 for (; chain_index; --chain_index)
899 group = &cache->directory[group->header.next];
904 /* Resolve a dictionary entry reference, i.e. return the entry
907 static APR_INLINE entry_t *
908 get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
910 return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
913 /* Get the entry references for the given ENTRY.
915 static APR_INLINE apr_uint32_t
916 get_index(svn_membuffer_t *cache, entry_t *entry)
918 apr_size_t group_index
919 = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
921 return (apr_uint32_t)group_index * GROUP_SIZE
922 + (apr_uint32_t)(entry - cache->directory[group_index].entries);
925 /* Return the cache level of ENTRY in CACHE.
927 static cache_level_t *
928 get_cache_level(svn_membuffer_t *cache, entry_t *entry)
930 return entry->offset < cache->l1.size ? &cache->l1
934 /* Insert ENTRY to the chain of items that belong to LEVEL in CACHE. IDX
935 * is ENTRY's item index and is only given for efficiency. The insertion
936 * takes place just before LEVEL->NEXT. *CACHE will not be modified.
939 chain_entry(svn_membuffer_t *cache,
940 cache_level_t *level,
944 /* insert ENTRY before this item */
945 entry_t *next = level->next == NO_INDEX
947 : get_entry(cache, level->next);
948 assert(idx == get_index(cache, entry));
950 /* update entry chain
952 entry->next = level->next;
953 if (level->first == NO_INDEX)
955 /* insert as the first entry and only in the chain
957 entry->previous = NO_INDEX;
961 else if (next == NULL)
963 /* insert as the last entry in the chain.
964 * Note that it cannot also be at the beginning of the chain.
966 entry->previous = level->last;
967 get_entry(cache, level->last)->next = idx;
972 /* insert either at the start of a non-empty list or
973 * somewhere in the middle
975 entry->previous = next->previous;
976 next->previous = idx;
978 if (entry->previous != NO_INDEX)
979 get_entry(cache, entry->previous)->next = idx;
985 /* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
986 * is ENTRY's item index and is only given for efficiency. Please note
987 * that neither *CACHE nor *ENTRY will not be modified.
990 unchain_entry(svn_membuffer_t *cache,
991 cache_level_t *level,
995 assert(idx == get_index(cache, entry));
999 if (level->next == idx)
1000 level->next = entry->next;
1002 /* unlink it from the chain of used entries
1004 if (entry->previous == NO_INDEX)
1005 level->first = entry->next;
1007 get_entry(cache, entry->previous)->next = entry->next;
1009 if (entry->next == NO_INDEX)
1010 level->last = entry->previous;
1012 get_entry(cache, entry->next)->previous = entry->previous;
1015 /* Remove the used ENTRY from the CACHE, i.e. make it "unused".
1016 * In contrast to insertion, removal is possible for any entry.
1019 drop_entry(svn_membuffer_t *cache, entry_t *entry)
1021 /* the group that ENTRY belongs to plus a number of useful index values
1023 apr_uint32_t idx = get_index(cache, entry);
1024 apr_uint32_t group_index = idx / GROUP_SIZE;
1025 entry_group_t *last_group
1026 = last_group_in_chain(cache, &cache->directory[group_index]);
1027 apr_uint32_t last_in_group
1028 = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE
1029 + last_group->header.used - 1);
1031 cache_level_t *level = get_cache_level(cache, entry);
1033 /* update global cache usage counters
1035 cache->used_entries--;
1036 cache->data_used -= entry->size;
1038 /* extend the insertion window, if the entry happens to border it
1040 if (idx == level->next)
1041 level->next = entry->next;
1043 if (entry->next == level->next)
1045 /* insertion window starts right behind the entry to remove
1047 if (entry->previous == NO_INDEX)
1049 /* remove the first entry -> insertion may start at pos 0, now */
1050 level->current_data = level->start_offset;
1054 /* insertion may start right behind the previous entry */
1055 entry_t *previous = get_entry(cache, entry->previous);
1056 level->current_data = ALIGN_VALUE( previous->offset
1061 /* unlink it from the chain of used entries
1063 unchain_entry(cache, level, entry, idx);
1065 /* Move last entry into hole (if the removed one is not the last used).
1066 * We need to do this since all used entries are at the beginning of
1067 * the group's entries array.
1069 if (idx != last_in_group)
1071 /* copy the last used entry to the removed entry's index
1073 *entry = last_group->entries[last_group->header.used-1];
1075 /* this ENTRY may belong to a different cache level than the entry
1076 * we have just removed */
1077 level = get_cache_level(cache, entry);
1079 /* update foreign links to new index
1081 if (last_in_group == level->next)
1084 if (entry->previous == NO_INDEX)
1087 get_entry(cache, entry->previous)->next = idx;
1089 if (entry->next == NO_INDEX)
1092 get_entry(cache, entry->next)->previous = idx;
1095 /* Update the number of used entries.
1097 last_group->header.used--;
1099 /* Release the last group in the chain if it is a spare group
1101 if (!last_group->header.used && last_group->header.previous != NO_INDEX)
1102 free_spare_group(cache, last_group);
1105 /* Insert ENTRY into the chain of used dictionary entries. The entry's
1106 * offset and size members must already have been initialized. Also,
1107 * the offset must match the beginning of the insertion window.
1110 insert_entry(svn_membuffer_t *cache, entry_t *entry)
1112 /* the group that ENTRY belongs to plus a number of useful index values
1114 apr_uint32_t idx = get_index(cache, entry);
1115 apr_uint32_t group_index = idx / GROUP_SIZE;
1116 entry_group_t *group = &cache->directory[group_index];
1117 cache_level_t *level = get_cache_level(cache, entry);
1119 /* The entry must start at the beginning of the insertion window.
1120 * It must also be the first unused entry in the group.
1122 assert(entry->offset == level->current_data);
1123 assert(idx == group_index * GROUP_SIZE + group->header.used);
1124 level->current_data = ALIGN_VALUE(entry->offset + entry->size);
1126 /* update usage counters
1128 cache->used_entries++;
1129 cache->data_used += entry->size;
1130 entry->hit_count = 0;
1131 group->header.used++;
1133 /* update entry chain
1135 chain_entry(cache, level, entry, idx);
1137 /* The current insertion position must never point outside our
1140 assert(level->current_data <= level->start_offset + level->size);
1143 /* Map a KEY of 16 bytes to the CACHE and group that shall contain the
1147 get_group_index(svn_membuffer_t **cache,
1148 const entry_key_t *key)
1150 svn_membuffer_t *segment0 = *cache;
1151 apr_uint64_t key0 = key->fingerprint[0];
1152 apr_uint64_t key1 = key->fingerprint[1];
1154 /* select the cache segment to use. they have all the same group_count.
1155 * Since key may not be well-distributed, pre-fold it to a smaller but
1156 * "denser" ranger. The modulus is a prime larger than the largest
1158 *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37))
1159 & (segment0->segment_count - 1)];
1160 return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count;
1163 /* Reduce the hit count of ENTRY and update the accumulated hit info
1164 * in CACHE accordingly.
1166 static APR_INLINE void
1167 let_entry_age(svn_membuffer_t *cache, entry_t *entry)
1169 apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
1173 entry->hit_count -= hits_removed;
1177 entry->priority /= 2;
1181 /* Return whether the keys in LHS and RHS match.
1183 static svn_boolean_t
1184 entry_keys_match(const entry_key_t *lhs,
1185 const entry_key_t *rhs)
1187 return (lhs->fingerprint[0] == rhs->fingerprint[0])
1188 && (lhs->fingerprint[1] == rhs->fingerprint[1])
1189 && (lhs->key_len == rhs->key_len);
1192 /* Given the GROUP_INDEX that shall contain an entry with the hash key
1193 * TO_FIND, find that entry in the specified group.
1195 * If FIND_EMPTY is not set, this function will return the one used entry
1196 * that actually matches the hash or NULL, if no such entry exists.
1198 * If FIND_EMPTY has been set, this function will drop the one used entry
1199 * that actually matches the hash (i.e. make it fit to be replaced with
1200 * new content), an unused entry or a forcibly removed entry (if all
1201 * group entries are currently in use). The entries' hash value will be
1202 * initialized with TO_FIND.
1204 * Note: This function requires the caller to appropriately lock the CACHE.
1205 * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE,
1206 * the write lock must have been acquired.
1209 find_entry(svn_membuffer_t *cache,
1210 apr_uint32_t group_index,
1211 const full_key_t *to_find,
1212 svn_boolean_t find_empty)
1214 entry_group_t *group;
1215 entry_t *entry = NULL;
1218 /* get the group that *must* contain the entry
1220 group = &cache->directory[group_index];
1222 /* If the entry group has not been initialized, yet, there is no data.
1224 if (! is_group_initialized(cache, group_index))
1228 initialize_group(cache, group_index);
1229 entry = &group->entries[0];
1231 /* initialize entry for the new key */
1232 entry->key = to_find->entry_key;
1238 /* try to find the matching entry
1242 for (i = 0; i < group->header.used; ++i)
1243 if (entry_keys_match(&group->entries[i].key, &to_find->entry_key))
1245 /* This is the only entry that _may_ contain the correct data. */
1246 entry = &group->entries[i];
1248 /* If we want to preserve it, check that it is actual a match. */
1251 /* If there is no full key to compare, we are done. */
1252 if (!entry->key.key_len)
1255 /* Compare the full key. */
1256 if (memcmp(to_find->full_key.data,
1257 cache->data + entry->offset,
1258 entry->key.key_len) == 0)
1261 /* Key conflict. The entry to find cannot be anywhere else.
1262 * Therefore, it is not cached. */
1266 /* need to empty that entry */
1267 drop_entry(cache, entry);
1268 if (group->header.used == GROUP_SIZE)
1269 group = last_group_in_chain(cache, group);
1270 else if (group->header.chain_length == 0)
1271 group = last_group_in_chain(cache,
1272 &cache->directory[group_index]);
1274 /* No entry found (actually, none left to find). */
1280 if (group->header.next == NO_INDEX)
1283 /* only full groups may chain */
1284 assert(group->header.used == GROUP_SIZE);
1285 group = &cache->directory[group->header.next];
1288 /* None found. Are we looking for a free entry?
1292 /* There is no empty entry in the chain, try chaining a spare group.
1294 if ( group->header.used == GROUP_SIZE
1295 && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH)
1297 entry_group_t *new_group = allocate_spare_group(cache);
1302 new_group->header.chain_length = group->header.chain_length + 1;
1303 new_group->header.previous = (apr_uint32_t) (group -
1305 new_group->header.next = NO_INDEX;
1306 group->header.next = (apr_uint32_t) (new_group -
1312 /* if GROUP is still filled, we need to remove a random entry */
1313 if (group->header.used == GROUP_SIZE)
1315 /* every entry gets the same chance of being removed.
1316 * Otherwise, we free the first entry, fill it and
1317 * remove it again on the next occasion without considering
1318 * the other entries in this group.
1320 * We hit only one random group instead of processing all
1321 * groups in the chain.
1323 cache_level_t *entry_level;
1324 int to_remove = rand() % (GROUP_SIZE * group->header.chain_length);
1325 entry_group_t *to_shrink
1326 = get_group(cache, group_index, to_remove / GROUP_SIZE);
1328 entry = &to_shrink->entries[to_remove % GROUP_SIZE];
1329 entry_level = get_cache_level(cache, entry);
1330 for (i = 0; i < GROUP_SIZE; ++i)
1332 /* keep L1 entries whenever possible */
1334 cache_level_t *level
1335 = get_cache_level(cache, &to_shrink->entries[i]);
1336 if ( (level != entry_level && entry_level == &cache->l1)
1337 || (entry->hit_count > to_shrink->entries[i].hit_count))
1339 entry_level = level;
1340 entry = &to_shrink->entries[i];
1344 /* for the entries that don't have been removed,
1345 * reduce their hit counts to put them at a relative
1346 * disadvantage the next time.
1348 for (i = 0; i < GROUP_SIZE; ++i)
1349 if (entry != &to_shrink->entries[i])
1350 let_entry_age(cache, entry);
1352 drop_entry(cache, entry);
1355 /* initialize entry for the new key
1357 entry = &group->entries[group->header.used];
1358 entry->key = to_find->entry_key;
1364 /* Move a surviving ENTRY from just behind the insertion window to
1365 * its beginning and move the insertion window up accordingly.
1368 move_entry(svn_membuffer_t *cache, entry_t *entry)
1370 apr_size_t size = ALIGN_VALUE(entry->size);
1371 cache_level_t *level = get_cache_level(cache, entry);
1373 /* This entry survived this cleansing run. Reset half of its
1374 * hit count so that its removal gets more likely in the next
1375 * run unless someone read / hit this entry in the meantime.
1377 let_entry_age(cache, entry);
1379 /* Move the entry to the start of the empty / insertion section
1380 * (if it isn't there already). Size-aligned moves are legal
1381 * since all offsets and block sizes share this same alignment.
1382 * Size-aligned moves tend to be faster than non-aligned ones
1383 * because no "odd" bytes at the end need to special treatment.
1385 if (entry->offset != level->current_data)
1387 memmove(cache->data + level->current_data,
1388 cache->data + entry->offset,
1390 entry->offset = level->current_data;
1393 /* The insertion position is now directly behind this entry.
1395 level->current_data = entry->offset + size;
1396 level->next = entry->next;
1398 /* The current insertion position must never point outside our
1401 assert(level->current_data <= level->start_offset + level->size);
1404 /* Move ENTRY in CACHE from L1 to L2.
1407 promote_entry(svn_membuffer_t *cache, entry_t *entry)
1409 apr_uint32_t idx = get_index(cache, entry);
1410 apr_size_t size = ALIGN_VALUE(entry->size);
1411 assert(get_cache_level(cache, entry) == &cache->l1);
1412 assert(idx == cache->l1.next);
1414 /* copy item from the current location in L1 to the start of L2's
1415 * insertion window */
1416 memmove(cache->data + cache->l2.current_data,
1417 cache->data + entry->offset,
1419 entry->offset = cache->l2.current_data;
1421 /* The insertion position is now directly behind this entry.
1423 cache->l2.current_data += size;
1425 /* remove ENTRY from chain of L1 entries and put it into L2
1427 unchain_entry(cache, &cache->l1, entry, idx);
1428 chain_entry(cache, &cache->l2, entry, idx);
1431 /* This function implements the cache insertion / eviction strategy for L2.
1433 * If necessary, enlarge the insertion window of CACHE->L2 until it is at
1434 * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
1435 * data buffer size allocated to CACHE->L2. IDX is the item index of
1436 * TO_FIT_IN and is given for performance reasons.
1438 * Return TRUE if enough room could be found or made. A FALSE result
1439 * indicates that the respective item shall not be added.
1441 static svn_boolean_t
1442 ensure_data_insertable_l2(svn_membuffer_t *cache,
1447 /* accumulated size of the entries that have been removed to make
1448 * room for the new one.
1450 apr_size_t moved_size = 0;
1452 /* count the number of entries that got moved. A single large entry
1453 * being moved is not enough to reject an insertion.
1455 apr_size_t moved_count = 0;
1457 /* accumulated "worth" of items dropped so far */
1458 apr_uint64_t drop_hits = 0;
1460 /* estimated "worth" of the new entry */
1461 apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1)
1462 * (apr_uint64_t)to_fit_in->priority;
1464 /* This loop will eventually terminate because every cache entry
1465 * would get dropped eventually:
1467 * - the incoming entry is small enough to fit into L2
1468 * - every iteration either frees parts of L2 or counts the moved size
1469 * - eventually, we either moved too many items with too much total size
1470 * to accept the new entry, or made enough room in L2 for the new entry
1472 * Low-prio items get rejected even sooner.
1476 /* first offset behind the insertion window
1478 apr_uint64_t end = cache->l2.next == NO_INDEX
1479 ? cache->l2.start_offset + cache->l2.size
1480 : get_entry(cache, cache->l2.next)->offset;
1482 /* leave function as soon as the insertion window is large enough
1484 if (end >= to_fit_in->size + cache->l2.current_data)
1487 /* Don't be too eager to cache data. If a lot of data has been moved
1488 * around, the current item has probably a relatively low priority.
1489 * We must also limit the effort spent here (if even in case of faulty
1490 * heuristics). Therefore, give up after some time.
1492 if (moved_size > 4 * to_fit_in->size && moved_count > 7)
1495 /* if the net worth (in weighted hits) of items removed is already
1496 * larger than what we want to insert, reject TO_FIT_IN because it
1497 * still does not fit in. */
1498 if (drop_hits > drop_hits_limit)
1501 /* try to enlarge the insertion window
1503 if (cache->l2.next == NO_INDEX)
1505 /* We reached the end of the data buffer; restart at the beginning.
1506 * Due to the randomized nature of our LFU implementation, very
1507 * large data items may require multiple passes. Therefore, SIZE
1508 * should be restricted to significantly less than data_size.
1510 cache->l2.current_data = cache->l2.start_offset;
1511 cache->l2.next = cache->l2.first;
1516 entry = get_entry(cache, cache->l2.next);
1518 if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1520 /* Low prio items can only be accepted only if the current
1521 * entry is of even lower prio and has fewer hits.
1523 if ( entry->priority > to_fit_in->priority
1524 || entry->hit_count > to_fit_in->hit_count)
1528 if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1530 /* Be quick to remove low-prio entries - even if the incoming
1531 * one is low-prio as well. This makes room for more important
1532 * data and replaces existing data with newly read information.
1538 /* If the existing data is the same prio as the incoming data,
1539 * drop the existing entry if it had seen fewer (probably 0)
1540 * hits than the entry coming in from L1. In case of different
1541 * priorities, keep the current entry of it has higher prio.
1542 * The new entry may still find room by ousting other entries.
1544 keep = to_fit_in->priority == entry->priority
1545 ? entry->hit_count >= to_fit_in->hit_count
1546 : entry->priority > to_fit_in->priority;
1549 /* keepers or destroyers? */
1552 /* Moving entries around is not for free -> track costs. */
1553 moved_size += entry->size;
1556 move_entry(cache, entry);
1560 /* Drop the entry from the end of the insertion window.
1561 * Count the "hit importance" such that we are not sacrificing
1562 * too much of the high-hit contents. However, don't count
1563 * low-priority hits because higher prio entries will often
1564 * provide the same data but in a further stage of processing.
1566 if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1567 drop_hits += entry->hit_count * (apr_uint64_t)entry->priority;
1569 drop_entry(cache, entry);
1574 /* This will never be reached. But if it was, "can't insert" was the
1578 /* This function implements the cache insertion / eviction strategy for L1.
1580 * If necessary, enlarge the insertion window of CACHE->L1 by promoting
1581 * entries to L2 until it is at least SIZE bytes long.
1583 * Return TRUE if enough room could be found or made. A FALSE result
1584 * indicates that the respective item shall not be added because it is
1587 static svn_boolean_t
1588 ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
1590 /* Guarantees that the while loop will terminate. */
1591 if (size > cache->l1.size)
1594 /* This loop will eventually terminate because every cache entry
1595 * would get dropped eventually.
1599 /* first offset behind the insertion window
1601 apr_uint32_t entry_index = cache->l1.next;
1602 entry_t *entry = get_entry(cache, entry_index);
1603 apr_uint64_t end = cache->l1.next == NO_INDEX
1604 ? cache->l1.start_offset + cache->l1.size
1607 /* leave function as soon as the insertion window is large enough
1609 if (end >= size + cache->l1.current_data)
1612 /* Enlarge the insertion window
1614 if (cache->l1.next == NO_INDEX)
1616 /* We reached the end of the data buffer; restart at the beginning.
1617 * Due to the randomized nature of our LFU implementation, very
1618 * large data items may require multiple passes. Therefore, SIZE
1619 * should be restricted to significantly less than data_size.
1621 cache->l1.current_data = cache->l1.start_offset;
1622 cache->l1.next = cache->l1.first;
1626 /* Remove the entry from the end of insertion window and promote
1627 * it to L2, if it is important enough.
1629 svn_boolean_t keep = ensure_data_insertable_l2(cache, entry);
1631 /* We might have touched the group that contains ENTRY. Recheck. */
1632 if (entry_index == cache->l1.next)
1635 promote_entry(cache, entry);
1637 drop_entry(cache, entry);
1642 /* This will never be reached. But if it was, "can't insert" was the
1646 /* Mimic apr_pcalloc in APR_POOL_DEBUG mode, i.e. handle failed allocations
1647 * (e.g. OOM) properly: Allocate at least SIZE bytes from POOL and zero
1648 * the content of the allocated memory if ZERO has been set. Return NULL
1649 * upon failed allocations.
1651 * Also, satisfy our buffer alignment needs for performance reasons.
1653 static void* secure_aligned_alloc(apr_pool_t *pool,
1657 void* memory = apr_palloc(pool, size + ITEM_ALIGNMENT);
1660 memory = ALIGN_POINTER(memory);
1662 memset(memory, 0, size);
1669 svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1670 apr_size_t total_size,
1671 apr_size_t directory_size,
1672 apr_size_t segment_count,
1673 svn_boolean_t thread_safe,
1674 svn_boolean_t allow_blocking_writes,
1680 apr_uint32_t group_count;
1681 apr_uint32_t main_group_count;
1682 apr_uint32_t spare_group_count;
1683 apr_uint32_t group_init_size;
1684 apr_uint64_t data_size;
1685 apr_uint64_t max_entry_size;
1687 /* Limit the total size (only relevant if we can address > 4GB)
1689 #if APR_SIZEOF_VOIDP > 4
1690 if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1691 total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1694 /* Limit the segment count
1696 if (segment_count > MAX_SEGMENT_COUNT)
1697 segment_count = MAX_SEGMENT_COUNT;
1698 if (segment_count * MIN_SEGMENT_SIZE > total_size)
1699 segment_count = total_size / MIN_SEGMENT_SIZE;
1701 /* The segment count must be a power of two. Round it down as necessary.
1703 while ((segment_count & (segment_count-1)) != 0)
1704 segment_count &= segment_count-1;
1706 /* if the caller hasn't provided a reasonable segment count or the above
1707 * limitations set it to 0, derive one from the absolute cache size
1709 if (segment_count < 1)
1711 /* Determine a reasonable number of cache segments. Segmentation is
1712 * only useful for multi-threaded / multi-core servers as it reduces
1713 * lock contention on these systems.
1715 * But on these systems, we can assume that ample memory has been
1716 * allocated to this cache. Smaller caches should not be segmented
1717 * as this severely limits the maximum size of cachable items.
1719 * Segments should not be smaller than 32MB and max. cachable item
1720 * size should grow as fast as segmentation.
1723 apr_uint32_t segment_count_shift = 0;
1724 while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1726 ++segment_count_shift;
1728 segment_count = (apr_size_t)1 << segment_count_shift;
1731 /* If we have an extremely large cache (>512 GB), the default segment
1732 * size may exceed the amount allocatable as one chunk. In that case,
1733 * increase segmentation until we are under the threshold.
1735 while ( total_size / segment_count > MAX_SEGMENT_SIZE
1736 && segment_count < MAX_SEGMENT_COUNT)
1739 /* allocate cache as an array of segments / cache objects */
1740 c = apr_palloc(pool, segment_count * sizeof(*c));
1742 /* Split total cache size into segments of equal size
1744 total_size /= segment_count;
1745 directory_size /= segment_count;
1747 /* prevent pathological conditions: ensure a certain minimum cache size
1749 if (total_size < 2 * sizeof(entry_group_t))
1750 total_size = 2 * sizeof(entry_group_t);
1752 /* adapt the dictionary size accordingly, if necessary:
1753 * It must hold at least one group and must not exceed the cache size.
1755 if (directory_size > total_size - sizeof(entry_group_t))
1756 directory_size = total_size - sizeof(entry_group_t);
1757 if (directory_size < 2 * sizeof(entry_group_t))
1758 directory_size = 2 * sizeof(entry_group_t);
1760 /* limit the data size to what we can address.
1761 * Note that this cannot overflow since all values are of size_t.
1762 * Also, make it a multiple of the item placement granularity to
1763 * prevent subtle overflows.
1765 data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1767 /* For cache sizes > 16TB, individual cache segments will be larger
1768 * than 32GB allowing for >4GB entries. But caching chunks larger
1769 * than 4GB are simply not supported.
1771 max_entry_size = data_size / 8 > MAX_ITEM_SIZE
1775 /* to keep the entries small, we use 32 bit indexes only
1776 * -> we need to ensure that no more then 4G entries exist.
1778 * Note, that this limit could only be exceeded in a very
1779 * theoretical setup with about 1EB of cache.
1781 group_count = directory_size / sizeof(entry_group_t)
1782 >= (APR_UINT32_MAX / GROUP_SIZE)
1783 ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1784 : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1786 /* set some of the index directory aside as over-flow (spare) buffers */
1787 spare_group_count = MAX(group_count / 4, 1);
1788 main_group_count = group_count - spare_group_count;
1789 assert(spare_group_count > 0 && main_group_count > 0);
1791 group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1792 for (seg = 0; seg < segment_count; ++seg)
1794 /* allocate buffers and initialize cache members
1796 c[seg].segment_count = (apr_uint32_t)segment_count;
1798 c[seg].group_count = main_group_count;
1799 c[seg].spare_group_count = spare_group_count;
1800 c[seg].first_spare_group = NO_INDEX;
1801 c[seg].max_spare_used = 0;
1803 c[seg].directory = apr_pcalloc(pool,
1804 group_count * sizeof(entry_group_t));
1806 /* Allocate and initialize directory entries as "not initialized",
1808 c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1810 /* Allocate 1/4th of the data buffer to L1
1812 c[seg].l1.first = NO_INDEX;
1813 c[seg].l1.last = NO_INDEX;
1814 c[seg].l1.next = NO_INDEX;
1815 c[seg].l1.start_offset = 0;
1816 c[seg].l1.size = ALIGN_VALUE(data_size / 4);
1817 c[seg].l1.current_data = 0;
1819 /* The remaining 3/4th will be used as L2
1821 c[seg].l2.first = NO_INDEX;
1822 c[seg].l2.last = NO_INDEX;
1823 c[seg].l2.next = NO_INDEX;
1824 c[seg].l2.start_offset = c[seg].l1.size;
1825 c[seg].l2.size = data_size - c[seg].l1.size;
1826 c[seg].l2.current_data = c[seg].l2.start_offset;
1828 c[seg].data = secure_aligned_alloc(pool, (apr_size_t)data_size, FALSE);
1829 c[seg].data_used = 0;
1830 c[seg].max_entry_size = max_entry_size;
1832 c[seg].used_entries = 0;
1833 c[seg].total_reads = 0;
1834 c[seg].total_writes = 0;
1835 c[seg].total_hits = 0;
1837 /* were allocations successful?
1838 * If not, initialize a minimal cache structure.
1840 if (c[seg].data == NULL || c[seg].directory == NULL)
1842 /* We are OOM. There is no need to proceed with "half a cache".
1844 return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1847 #if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
1848 /* A lock for intra-process synchronization to the cache, or NULL if
1849 * the cache's creator doesn't feel the cache needs to be
1852 SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
1853 #elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
1854 /* Same for read-write lock. */
1858 apr_status_t status =
1859 apr_thread_rwlock_create(&(c[seg].lock), pool);
1861 return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1864 /* Select the behavior of write operations.
1866 c[seg].allow_blocking_writes = allow_blocking_writes;
1873 return SVN_NO_ERROR;
1877 svn_cache__membuffer_clear(svn_membuffer_t *cache)
1880 apr_size_t segment_count = cache->segment_count;
1882 /* Length of the group_initialized array in bytes.
1883 See also svn_cache__membuffer_cache_create(). */
1884 apr_size_t group_init_size
1885 = 1 + (cache->group_count + cache->spare_group_count)
1886 / (8 * GROUP_INIT_GRANULARITY);
1888 /* Clear segment by segment. This implies that other thread may read
1889 and write to other segments after we cleared them and before the
1890 last segment is done.
1892 However, that is no different from a write request coming through
1893 right after we cleared all segments because dependencies between
1894 cache entries (recursive lookup / access locks) are not allowed.
1896 for (seg = 0; seg < segment_count; ++seg)
1898 /* Unconditionally acquire the write lock. */
1899 SVN_ERR(force_write_lock_cache(&cache[seg]));
1901 /* Mark all groups as "not initialized", which implies "empty". */
1902 cache[seg].first_spare_group = NO_INDEX;
1903 cache[seg].max_spare_used = 0;
1905 memset(cache[seg].group_initialized, 0, group_init_size);
1907 /* Unlink L1 contents. */
1908 cache[seg].l1.first = NO_INDEX;
1909 cache[seg].l1.last = NO_INDEX;
1910 cache[seg].l1.next = NO_INDEX;
1911 cache[seg].l1.current_data = cache[seg].l1.start_offset;
1913 /* Unlink L2 contents. */
1914 cache[seg].l2.first = NO_INDEX;
1915 cache[seg].l2.last = NO_INDEX;
1916 cache[seg].l2.next = NO_INDEX;
1917 cache[seg].l2.current_data = cache[seg].l2.start_offset;
1919 /* Reset content counters. */
1920 cache[seg].data_used = 0;
1921 cache[seg].used_entries = 0;
1923 /* Segment may be used again. */
1924 SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR));
1928 return SVN_NO_ERROR;
1931 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1932 * by the hash value TO_FIND and set *FOUND accordingly.
1934 * Note: This function requires the caller to serialize access.
1935 * Don't call it directly, call entry_exists instead.
1937 static svn_error_t *
1938 entry_exists_internal(svn_membuffer_t *cache,
1939 apr_uint32_t group_index,
1940 const full_key_t *to_find,
1941 svn_boolean_t *found)
1943 *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1944 return SVN_NO_ERROR;
1947 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1948 * by the hash value TO_FIND and set *FOUND accordingly.
1950 static svn_error_t *
1951 entry_exists(svn_membuffer_t *cache,
1952 apr_uint32_t group_index,
1953 const full_key_t *to_find,
1954 svn_boolean_t *found)
1956 WITH_READ_LOCK(cache,
1957 entry_exists_internal(cache,
1962 return SVN_NO_ERROR;
1965 /* Given the SIZE and PRIORITY of a new item, return the cache level
1966 (L1 or L2) in fragment CACHE that this item shall be inserted into.
1967 If we can't find nor make enough room for the item, return NULL.
1969 static cache_level_t *
1970 select_level(svn_membuffer_t *cache,
1972 apr_uint32_t priority)
1974 if (cache->max_entry_size >= size)
1976 /* Small items go into L1. */
1977 return ensure_data_insertable_l1(cache, size)
1981 else if ( cache->l2.size >= size
1982 && MAX_ITEM_SIZE >= size
1983 && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1985 /* Large but important items go into L2. */
1986 entry_t dummy_entry = { { { 0 } } };
1987 dummy_entry.priority = priority;
1988 dummy_entry.size = size;
1990 return ensure_data_insertable_l2(cache, &dummy_entry)
1995 /* Don't cache large, unimportant items. */
1999 /* Try to insert the serialized item given in BUFFER with ITEM_SIZE
2000 * into the group GROUP_INDEX of CACHE and uniquely identify it by
2001 * hash value TO_FIND.
2003 * However, there is no guarantee that it will actually be put into
2004 * the cache. If there is already some data associated with TO_FIND,
2005 * it will be removed from the cache even if the new data cannot
2008 * Note: This function requires the caller to serialization access.
2009 * Don't call it directly, call membuffer_cache_set instead.
2011 static svn_error_t *
2012 membuffer_cache_set_internal(svn_membuffer_t *cache,
2013 const full_key_t *to_find,
2014 apr_uint32_t group_index,
2016 apr_size_t item_size,
2017 apr_uint32_t priority,
2018 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2019 apr_pool_t *scratch_pool)
2021 cache_level_t *level;
2022 apr_size_t size = item_size + to_find->entry_key.key_len;
2024 /* first, look for a previous entry for the given key */
2025 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2027 /* if there is an old version of that entry and the new data fits into
2028 * the old spot, just re-use that space. */
2029 if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
2031 /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
2032 * lest we run into trouble with 32 bit underflow *not* treated as a
2035 cache->data_used += (apr_uint64_t)size - entry->size;
2037 entry->priority = priority;
2039 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2041 /* Remember original content, type and key (hashes)
2043 SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2044 memcpy(&entry->tag, tag, sizeof(*tag));
2048 if (entry->key.key_len)
2049 memcpy(cache->data + entry->offset, to_find->full_key.data,
2050 entry->key.key_len);
2052 memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2055 cache->total_writes++;
2056 return SVN_NO_ERROR;
2059 /* if necessary, enlarge the insertion window.
2061 level = buffer ? select_level(cache, size, priority) : NULL;
2064 /* Remove old data for this key, if that exists.
2065 * Get an unused entry for the key and and initialize it with
2066 * the serialized item's (future) position within data buffer.
2068 entry = find_entry(cache, group_index, to_find, TRUE);
2070 entry->offset = level->current_data;
2071 entry->priority = priority;
2073 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2075 /* Remember original content, type and key (hashes)
2077 SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2078 memcpy(&entry->tag, tag, sizeof(*tag));
2082 /* Link the entry properly.
2084 insert_entry(cache, entry);
2086 /* Copy the serialized item data into the cache.
2088 if (entry->key.key_len)
2089 memcpy(cache->data + entry->offset, to_find->full_key.data,
2090 entry->key.key_len);
2092 memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2095 cache->total_writes++;
2099 /* if there is already an entry for this key, drop it.
2100 * Since ensure_data_insertable may have removed entries from
2101 * ENTRY's group, re-do the lookup.
2103 entry = find_entry(cache, group_index, to_find, FALSE);
2105 drop_entry(cache, entry);
2108 return SVN_NO_ERROR;
2111 /* Try to insert the ITEM and use the KEY to uniquely identify it.
2112 * However, there is no guarantee that it will actually be put into
2113 * the cache. If there is already some data associated to the KEY,
2114 * it will be removed from the cache even if the new data cannot
2117 * The SERIALIZER is called to transform the ITEM into a single,
2118 * flat data buffer. Temporary allocations may be done in POOL.
2120 static svn_error_t *
2121 membuffer_cache_set(svn_membuffer_t *cache,
2122 const full_key_t *key,
2124 svn_cache__serialize_func_t serializer,
2125 apr_uint32_t priority,
2126 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2127 apr_pool_t *scratch_pool)
2129 apr_uint32_t group_index;
2130 void *buffer = NULL;
2131 apr_size_t size = 0;
2133 /* find the entry group that will hold the key.
2135 group_index = get_group_index(&cache, &key->entry_key);
2137 /* Serialize data data.
2140 SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
2142 /* The actual cache data access needs to sync'ed
2144 WITH_WRITE_LOCK(cache,
2145 membuffer_cache_set_internal(cache,
2151 DEBUG_CACHE_MEMBUFFER_TAG
2153 return SVN_NO_ERROR;
2156 /* Count a hit in ENTRY within CACHE.
2159 increment_hit_counters(svn_membuffer_t *cache, entry_t *entry)
2161 /* To minimize the memory footprint of the cache index, we limit local
2162 * hit counters to 32 bits. These may overflow but we don't really
2163 * care because at worst, ENTRY will be dropped from cache once every
2164 * few billion hits. */
2165 svn_atomic_inc(&entry->hit_count);
2167 /* That one is for stats only. */
2168 cache->total_hits++;
2171 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2172 * by the hash value TO_FIND. If no item has been stored for KEY,
2173 * *BUFFER will be NULL. Otherwise, return a copy of the serialized
2174 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
2177 * Note: This function requires the caller to serialization access.
2178 * Don't call it directly, call membuffer_cache_get instead.
2180 static svn_error_t *
2181 membuffer_cache_get_internal(svn_membuffer_t *cache,
2182 apr_uint32_t group_index,
2183 const full_key_t *to_find,
2185 apr_size_t *item_size,
2186 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2187 apr_pool_t *result_pool)
2192 /* The actual cache data access needs to sync'ed
2194 entry = find_entry(cache, group_index, to_find, FALSE);
2195 cache->total_reads++;
2198 /* no such entry found.
2203 return SVN_NO_ERROR;
2206 size = ALIGN_VALUE(entry->size) - entry->key.key_len;
2207 *buffer = ALIGN_POINTER(apr_palloc(result_pool, size + ITEM_ALIGNMENT-1));
2208 memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size);
2210 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2212 /* Check for overlapping entries.
2214 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2215 entry->offset + size
2216 <= get_entry(cache, entry->next)->offset);
2218 /* Compare original content, type and key (hashes)
2220 SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len,
2222 SVN_ERR(assert_equal_tags(&entry->tag, tag));
2226 /* update hit statistics
2228 increment_hit_counters(cache, entry);
2229 *item_size = entry->size - entry->key.key_len;
2231 return SVN_NO_ERROR;
2234 /* Look for the *ITEM identified by KEY. If no item has been stored
2235 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
2236 * re-construct the proper object from the serialized data.
2237 * Allocations will be done in POOL.
2239 static svn_error_t *
2240 membuffer_cache_get(svn_membuffer_t *cache,
2241 const full_key_t *key,
2243 svn_cache__deserialize_func_t deserializer,
2244 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2245 apr_pool_t *result_pool)
2247 apr_uint32_t group_index;
2251 /* find the entry group that will hold the key.
2253 group_index = get_group_index(&cache, &key->entry_key);
2254 WITH_READ_LOCK(cache,
2255 membuffer_cache_get_internal(cache,
2260 DEBUG_CACHE_MEMBUFFER_TAG
2263 /* re-construct the original data object from its serialized form.
2268 return SVN_NO_ERROR;
2271 return deserializer(item, buffer, size, result_pool);
2274 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2275 * by the hash value TO_FIND. If no item has been stored for KEY, *FOUND
2276 * will be FALSE and TRUE otherwise.
2278 static svn_error_t *
2279 membuffer_cache_has_key_internal(svn_membuffer_t *cache,
2280 apr_uint32_t group_index,
2281 const full_key_t *to_find,
2282 svn_boolean_t *found)
2284 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2287 /* This often be called by "block read" when most data is already
2288 in L2 and only a few previously evicted items are added to L1
2289 again. While items in L1 are well protected for a while, L2
2290 items may get evicted soon. Thus, mark all them as "hit" to give
2291 them a higher chance of survival. */
2292 increment_hit_counters(cache, entry);
2300 return SVN_NO_ERROR;
2303 /* Look for an entry identified by KEY. If no item has been stored
2304 * for KEY, *FOUND will be set to FALSE and TRUE otherwise.
2306 /* Implements svn_cache__has_key for membuffer caches.
2308 static svn_error_t *
2309 membuffer_cache_has_key(svn_membuffer_t *cache,
2310 const full_key_t *key,
2311 svn_boolean_t *found)
2313 /* find the entry group that will hold the key.
2315 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2316 cache->total_reads++;
2318 WITH_READ_LOCK(cache,
2319 membuffer_cache_has_key_internal(cache,
2324 return SVN_NO_ERROR;
2327 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2328 * by the hash value TO_FIND. FOUND indicates whether that entry exists.
2329 * If not found, *ITEM will be NULL.
2331 * Otherwise, the DESERIALIZER is called with that entry and the BATON
2332 * provided and will extract the desired information. The result is set
2333 * in *ITEM. Allocations will be done in POOL.
2335 * Note: This function requires the caller to serialization access.
2336 * Don't call it directly, call membuffer_cache_get_partial instead.
2338 static svn_error_t *
2339 membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
2340 apr_uint32_t group_index,
2341 const full_key_t *to_find,
2343 svn_boolean_t *found,
2344 svn_cache__partial_getter_func_t deserializer,
2346 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2347 apr_pool_t *result_pool)
2349 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2350 cache->total_reads++;
2356 return SVN_NO_ERROR;
2360 const void *item_data = cache->data + entry->offset + entry->key.key_len;
2361 apr_size_t item_size = entry->size - entry->key.key_len;
2363 increment_hit_counters(cache, entry);
2365 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2367 /* Check for overlapping entries.
2369 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2370 entry->offset + entry->size
2371 <= get_entry(cache, entry->next)->offset);
2373 /* Compare original content, type and key (hashes)
2375 SVN_ERR(store_content_part(tag, item_data, item_size, result_pool));
2376 SVN_ERR(assert_equal_tags(&entry->tag, tag));
2380 return deserializer(item, item_data, item_size, baton, result_pool);
2384 /* Look for the cache entry identified by KEY. FOUND indicates
2385 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
2386 * the DESERIALIZER is called with that entry and the BATON provided
2387 * and will extract the desired information. The result is set in *ITEM.
2388 * Allocations will be done in POOL.
2390 static svn_error_t *
2391 membuffer_cache_get_partial(svn_membuffer_t *cache,
2392 const full_key_t *key,
2394 svn_boolean_t *found,
2395 svn_cache__partial_getter_func_t deserializer,
2397 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2398 apr_pool_t *result_pool)
2400 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2402 WITH_READ_LOCK(cache,
2403 membuffer_cache_get_partial_internal
2404 (cache, group_index, key, item, found,
2405 deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
2408 return SVN_NO_ERROR;
2411 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2412 * by the hash value TO_FIND. If no entry has been found, the function
2413 * returns without modifying the cache.
2415 * Otherwise, FUNC is called with that entry and the BATON provided
2416 * and may modify the cache entry. Allocations will be done in POOL.
2418 * Note: This function requires the caller to serialization access.
2419 * Don't call it directly, call membuffer_cache_set_partial instead.
2421 static svn_error_t *
2422 membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
2423 apr_uint32_t group_index,
2424 const full_key_t *to_find,
2425 svn_cache__partial_setter_func_t func,
2427 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2428 apr_pool_t *scratch_pool)
2430 /* cache item lookup
2432 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2433 cache->total_reads++;
2435 /* this function is a no-op if the item is not in cache
2441 /* access the serialized cache item */
2442 apr_size_t key_len = entry->key.key_len;
2443 void *item_data = cache->data + entry->offset + key_len;
2444 void *orig_data = item_data;
2445 apr_size_t item_size = entry->size - key_len;
2447 increment_hit_counters(cache, entry);
2448 cache->total_writes++;
2450 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2452 /* Check for overlapping entries.
2454 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2455 entry->offset + entry->size
2456 <= get_entry(cache, entry->next)->offset);
2458 /* Compare original content, type and key (hashes)
2460 SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2461 SVN_ERR(assert_equal_tags(&entry->tag, tag));
2465 /* modify it, preferably in-situ.
2467 err = func(&item_data, &item_size, baton, scratch_pool);
2471 /* Something somewhere when wrong while FUNC was modifying the
2472 * changed item. Thus, it might have become invalid /corrupted.
2473 * We better drop that.
2475 drop_entry(cache, entry);
2481 /* if the modification caused a re-allocation, we need to remove
2482 * the old entry and to copy the new data back into cache.
2484 if (item_data != orig_data)
2486 /* Remove the old entry and try to make space for the new one.
2488 drop_entry(cache, entry);
2489 if ( (cache->max_entry_size >= item_size + key_len)
2490 && ensure_data_insertable_l1(cache, item_size + key_len))
2492 /* Write the new entry.
2494 entry = find_entry(cache, group_index, to_find, TRUE);
2495 entry->size = item_size + key_len;
2496 entry->offset = cache->l1.current_data;
2499 memcpy(cache->data + entry->offset,
2500 to_find->full_key.data, key_len);
2502 memcpy(cache->data + entry->offset + key_len, item_data,
2505 /* Link the entry properly.
2507 insert_entry(cache, entry);
2511 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2513 /* Remember original content, type and key (hashes)
2515 SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2516 memcpy(&entry->tag, tag, sizeof(*tag));
2522 return SVN_NO_ERROR;
2525 /* Look for the cache entry identified by KEY. If no entry
2526 * has been found, the function returns without modifying the cache.
2527 * Otherwise, FUNC is called with that entry and the BATON provided
2528 * and may modify the cache entry. Allocations will be done in POOL.
2530 static svn_error_t *
2531 membuffer_cache_set_partial(svn_membuffer_t *cache,
2532 const full_key_t *key,
2533 svn_cache__partial_setter_func_t func,
2535 DEBUG_CACHE_MEMBUFFER_TAG_ARG
2536 apr_pool_t *scratch_pool)
2538 /* cache item lookup
2540 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2541 WITH_WRITE_LOCK(cache,
2542 membuffer_cache_set_partial_internal
2543 (cache, group_index, key, func, baton,
2544 DEBUG_CACHE_MEMBUFFER_TAG
2547 /* done here -> unlock the cache
2549 return SVN_NO_ERROR;
2552 /* Implement the svn_cache__t interface on top of a shared membuffer cache.
2554 * Because membuffer caches tend to be very large, there will be rather few
2555 * of them (usually only one). Thus, the same instance shall be used as the
2556 * backend to many application-visible svn_cache__t instances. This should
2557 * also achieve global resource usage fairness.
2559 * To accommodate items from multiple resources, the individual keys must be
2560 * unique over all sources. This is achieved by simply adding a prefix key
2561 * that unambiguously identifies the item's context (e.g. path to the
2562 * respective repository). The prefix will be set upon construction of the
2563 * svn_cache__t instance.
2566 /* Internal cache structure (used in svn_cache__t.cache_internal) basically
2567 * holding the additional parameters needed to call the respective membuffer
2570 typedef struct svn_membuffer_cache_t
2572 /* this is where all our data will end up in
2574 svn_membuffer_t *membuffer;
2576 /* use this conversion function when inserting an item into the memcache
2578 svn_cache__serialize_func_t serializer;
2580 /* use this conversion function when reading an item from the memcache
2582 svn_cache__deserialize_func_t deserializer;
2584 /* Prepend this byte sequence to any key passed to us.
2585 * This makes our keys different from all keys used by svn_membuffer_cache_t
2586 * instances that we don't want to share cached data with.
2590 /* length of the keys that will be passed to us through the
2591 * svn_cache_t interface. May be APR_HASH_KEY_STRING.
2593 apr_ssize_t key_len;
2595 /* priority class for all items written through this interface */
2596 apr_uint32_t priority;
2598 /* Temporary buffer containing the hash key for the current access
2600 full_key_t combined_key;
2602 /* if enabled, this will serialize the access to this instance.
2604 svn_mutex__t *mutex;
2605 } svn_membuffer_cache_t;
2607 /* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
2608 * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
2610 #define ALLOCATIONS_PER_POOL_CLEAR 10
2612 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2613 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2614 * This could replace combine_key() entirely but we actually use it only
2615 * when the quick path failed.
2618 combine_long_key(svn_membuffer_cache_t *cache,
2620 apr_ssize_t key_len)
2622 apr_uint32_t *digest_buffer;
2624 apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2625 apr_size_t aligned_key_len;
2627 /* handle variable-length keys */
2628 if (key_len == APR_HASH_KEY_STRING)
2629 key_len = strlen((const char *) key);
2631 aligned_key_len = ALIGN_VALUE(key_len);
2634 svn_membuf__ensure(&cache->combined_key.full_key,
2635 aligned_key_len + prefix_len);
2637 key_copy = (char *)cache->combined_key.full_key.data + prefix_len;
2638 cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len;
2639 memcpy(key_copy, key, key_len);
2640 memset(key_copy + key_len, 0, aligned_key_len - key_len);
2642 /* Hash key into 16 bytes. */
2643 digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint;
2644 svn__fnv1a_32x4_raw(digest_buffer, key, key_len);
2646 /* Combine with prefix. */
2647 cache->combined_key.entry_key.fingerprint[0]
2648 ^= cache->prefix.entry_key.fingerprint[0];
2649 cache->combined_key.entry_key.fingerprint[1]
2650 ^= cache->prefix.entry_key.fingerprint[1];
2653 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2654 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2657 combine_key(svn_membuffer_cache_t *cache,
2659 apr_ssize_t key_len)
2661 /* short, fixed-size keys are the most common case */
2662 if (key_len != APR_HASH_KEY_STRING && key_len <= 16)
2664 const apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2666 /* Copy of *key, padded with 0.
2667 * We put it just behind the prefix already copied into the COMBINED_KEY.
2668 * The buffer space has been allocated when the cache was created. */
2669 apr_uint64_t *data = (void *)((char *)cache->combined_key.full_key.data +
2671 assert(prefix_len <= cache->combined_key.full_key.size - 16);
2672 cache->combined_key.entry_key.key_len = prefix_len + 16;
2676 memcpy(data, key, key_len);
2678 /* scramble key DATA. All of this must be reversible to prevent key
2679 * collisions. So, we limit ourselves to xor and permutations. */
2680 data[1] = (data[1] << 27) | (data[1] >> 37);
2681 data[1] ^= data[0] & 0xffff;
2682 data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
2684 /* combine with this cache's namespace */
2685 cache->combined_key.entry_key.fingerprint[0]
2686 = data[0] ^ cache->prefix.entry_key.fingerprint[0];
2687 cache->combined_key.entry_key.fingerprint[1]
2688 = data[1] ^ cache->prefix.entry_key.fingerprint[1];
2692 /* longer or variably sized keys */
2693 combine_long_key(cache, key, key_len);
2697 /* Implement svn_cache__vtable_t.get (not thread-safe)
2699 static svn_error_t *
2700 svn_membuffer_cache_get(void **value_p,
2701 svn_boolean_t *found,
2704 apr_pool_t *result_pool)
2706 svn_membuffer_cache_t *cache = cache_void;
2708 DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2716 return SVN_NO_ERROR;
2719 /* construct the full, i.e. globally unique, key by adding
2720 * this cache instances' prefix
2722 combine_key(cache, key, cache->key_len);
2724 /* Look the item up. */
2725 SVN_ERR(membuffer_cache_get(cache->membuffer,
2726 &cache->combined_key,
2728 cache->deserializer,
2729 DEBUG_CACHE_MEMBUFFER_TAG
2733 *found = *value_p != NULL;
2735 return SVN_NO_ERROR;
2738 /* Implement svn_cache__vtable_t.has_key (not thread-safe)
2740 static svn_error_t *
2741 svn_membuffer_cache_has_key(svn_boolean_t *found,
2744 apr_pool_t *scratch_pool)
2746 svn_membuffer_cache_t *cache = cache_void;
2753 return SVN_NO_ERROR;
2756 /* construct the full, i.e. globally unique, key by adding
2757 * this cache instances' prefix
2759 combine_key(cache, key, cache->key_len);
2761 /* Look the item up. */
2762 SVN_ERR(membuffer_cache_has_key(cache->membuffer,
2763 &cache->combined_key,
2767 return SVN_NO_ERROR;
2770 /* Implement svn_cache__vtable_t.set (not thread-safe)
2772 static svn_error_t *
2773 svn_membuffer_cache_set(void *cache_void,
2776 apr_pool_t *scratch_pool)
2778 svn_membuffer_cache_t *cache = cache_void;
2780 DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2784 return SVN_NO_ERROR;
2786 /* construct the full, i.e. globally unique, key by adding
2787 * this cache instances' prefix
2789 combine_key(cache, key, cache->key_len);
2791 /* (probably) add the item to the cache. But there is no real guarantee
2792 * that the item will actually be cached afterwards.
2794 return membuffer_cache_set(cache->membuffer,
2795 &cache->combined_key,
2799 DEBUG_CACHE_MEMBUFFER_TAG
2803 /* Implement svn_cache__vtable_t.iter as "not implemented"
2805 static svn_error_t *
2806 svn_membuffer_cache_iter(svn_boolean_t *completed,
2808 svn_iter_apr_hash_cb_t user_cb,
2810 apr_pool_t *scratch_pool)
2812 return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2813 _("Can't iterate a membuffer-based cache"));
2816 /* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2818 static svn_error_t *
2819 svn_membuffer_cache_get_partial(void **value_p,
2820 svn_boolean_t *found,
2823 svn_cache__partial_getter_func_t func,
2825 apr_pool_t *result_pool)
2827 svn_membuffer_cache_t *cache = cache_void;
2829 DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2836 return SVN_NO_ERROR;
2839 combine_key(cache, key, cache->key_len);
2840 SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2841 &cache->combined_key,
2846 DEBUG_CACHE_MEMBUFFER_TAG
2849 return SVN_NO_ERROR;
2852 /* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2854 static svn_error_t *
2855 svn_membuffer_cache_set_partial(void *cache_void,
2857 svn_cache__partial_setter_func_t func,
2859 apr_pool_t *scratch_pool)
2861 svn_membuffer_cache_t *cache = cache_void;
2863 DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2867 combine_key(cache, key, cache->key_len);
2868 SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2869 &cache->combined_key,
2872 DEBUG_CACHE_MEMBUFFER_TAG
2875 return SVN_NO_ERROR;
2878 /* Implement svn_cache__vtable_t.is_cachable
2879 * (thread-safe even without mutex)
2881 static svn_boolean_t
2882 svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2884 /* Don't allow extremely large element sizes. Otherwise, the cache
2885 * might by thrashed by a few extremely large entries. And the size
2886 * must be small enough to be stored in a 32 bit value.
2888 svn_membuffer_cache_t *cache = cache_void;
2889 return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY
2890 ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size
2891 : size <= cache->membuffer->max_entry_size;
2894 /* Add statistics of SEGMENT to INFO. If INCLUDE_HISTOGRAM is TRUE,
2895 * accumulate index bucket fill levels in INFO->HISTOGRAM.
2897 static svn_error_t *
2898 svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2899 svn_cache__info_t *info,
2900 svn_boolean_t include_histogram)
2904 info->data_size += segment->l1.size + segment->l2.size;
2905 info->used_size += segment->data_used;
2906 info->total_size += segment->l1.size + segment->l2.size +
2907 segment->group_count * GROUP_SIZE * sizeof(entry_t);
2909 info->used_entries += segment->used_entries;
2910 info->total_entries += segment->group_count * GROUP_SIZE;
2912 if (include_histogram)
2913 for (i = 0; i < segment->group_count; ++i)
2914 if (is_group_initialized(segment, i))
2916 entry_group_t *chain_end
2917 = last_group_in_chain(segment, &segment->directory[i]);
2919 = MIN(chain_end->header.used,
2920 sizeof(info->histogram) / sizeof(info->histogram[0]) - 1);
2921 info->histogram[use]++;
2924 return SVN_NO_ERROR;
2927 /* Implement svn_cache__vtable_t.get_info
2928 * (thread-safe even without mutex)
2930 static svn_error_t *
2931 svn_membuffer_cache_get_info(void *cache_void,
2932 svn_cache__info_t *info,
2933 svn_boolean_t reset,
2934 apr_pool_t *result_pool)
2936 svn_membuffer_cache_t *cache = cache_void;
2939 /* cache front-end specific data */
2941 info->id = apr_pstrdup(result_pool, cache->prefix.full_key.data);
2943 /* collect info from shared cache back-end */
2945 for (i = 0; i < cache->membuffer->segment_count; ++i)
2947 svn_membuffer_t *segment = cache->membuffer + i;
2948 WITH_READ_LOCK(segment,
2949 svn_membuffer_get_segment_info(segment, info, FALSE));
2952 return SVN_NO_ERROR;
2956 /* the v-table for membuffer-based caches (single-threaded access)
2958 static svn_cache__vtable_t membuffer_cache_vtable = {
2959 svn_membuffer_cache_get,
2960 svn_membuffer_cache_has_key,
2961 svn_membuffer_cache_set,
2962 svn_membuffer_cache_iter,
2963 svn_membuffer_cache_is_cachable,
2964 svn_membuffer_cache_get_partial,
2965 svn_membuffer_cache_set_partial,
2966 svn_membuffer_cache_get_info
2969 /* Implement svn_cache__vtable_t.get and serialize all cache access.
2971 static svn_error_t *
2972 svn_membuffer_cache_get_synced(void **value_p,
2973 svn_boolean_t *found,
2976 apr_pool_t *result_pool)
2978 svn_membuffer_cache_t *cache = cache_void;
2979 SVN_MUTEX__WITH_LOCK(cache->mutex,
2980 svn_membuffer_cache_get(value_p,
2986 return SVN_NO_ERROR;
2989 /* Implement svn_cache__vtable_t.has_key and serialize all cache access.
2991 static svn_error_t *
2992 svn_membuffer_cache_has_key_synced(svn_boolean_t *found,
2995 apr_pool_t *result_pool)
2997 svn_membuffer_cache_t *cache = cache_void;
2998 SVN_MUTEX__WITH_LOCK(cache->mutex,
2999 svn_membuffer_cache_has_key(found,
3004 return SVN_NO_ERROR;
3007 /* Implement svn_cache__vtable_t.set and serialize all cache access.
3009 static svn_error_t *
3010 svn_membuffer_cache_set_synced(void *cache_void,
3013 apr_pool_t *scratch_pool)
3015 svn_membuffer_cache_t *cache = cache_void;
3016 SVN_MUTEX__WITH_LOCK(cache->mutex,
3017 svn_membuffer_cache_set(cache_void,
3022 return SVN_NO_ERROR;
3025 /* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
3027 static svn_error_t *
3028 svn_membuffer_cache_get_partial_synced(void **value_p,
3029 svn_boolean_t *found,
3032 svn_cache__partial_getter_func_t func,
3034 apr_pool_t *result_pool)
3036 svn_membuffer_cache_t *cache = cache_void;
3037 SVN_MUTEX__WITH_LOCK(cache->mutex,
3038 svn_membuffer_cache_get_partial(value_p,
3046 return SVN_NO_ERROR;
3049 /* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
3051 static svn_error_t *
3052 svn_membuffer_cache_set_partial_synced(void *cache_void,
3054 svn_cache__partial_setter_func_t func,
3056 apr_pool_t *scratch_pool)
3058 svn_membuffer_cache_t *cache = cache_void;
3059 SVN_MUTEX__WITH_LOCK(cache->mutex,
3060 svn_membuffer_cache_set_partial(cache_void,
3066 return SVN_NO_ERROR;
3069 /* the v-table for membuffer-based caches with multi-threading support)
3071 static svn_cache__vtable_t membuffer_cache_synced_vtable = {
3072 svn_membuffer_cache_get_synced,
3073 svn_membuffer_cache_has_key_synced,
3074 svn_membuffer_cache_set_synced,
3075 svn_membuffer_cache_iter, /* no sync required */
3076 svn_membuffer_cache_is_cachable, /* no sync required */
3077 svn_membuffer_cache_get_partial_synced,
3078 svn_membuffer_cache_set_partial_synced,
3079 svn_membuffer_cache_get_info /* no sync required */
3082 /* standard serialization function for svn_stringbuf_t items.
3083 * Implements svn_cache__serialize_func_t.
3085 static svn_error_t *
3086 serialize_svn_stringbuf(void **buffer,
3087 apr_size_t *buffer_size,
3089 apr_pool_t *result_pool)
3091 svn_stringbuf_t *value_str = item;
3093 *buffer = value_str->data;
3094 *buffer_size = value_str->len + 1;
3096 return SVN_NO_ERROR;
3099 /* standard de-serialization function for svn_stringbuf_t items.
3100 * Implements svn_cache__deserialize_func_t.
3102 static svn_error_t *
3103 deserialize_svn_stringbuf(void **item,
3105 apr_size_t buffer_size,
3106 apr_pool_t *result_pool)
3108 svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
3110 value_str->pool = result_pool;
3111 value_str->blocksize = buffer_size;
3112 value_str->data = buffer;
3113 value_str->len = buffer_size-1;
3116 return SVN_NO_ERROR;
3119 /* Construct a svn_cache__t object on top of a shared memcache.
3122 svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
3123 svn_membuffer_t *membuffer,
3124 svn_cache__serialize_func_t serializer,
3125 svn_cache__deserialize_func_t deserializer,
3128 apr_uint32_t priority,
3129 svn_boolean_t thread_safe,
3130 apr_pool_t *result_pool,
3131 apr_pool_t *scratch_pool)
3133 svn_checksum_t *checksum;
3134 apr_size_t prefix_len, prefix_orig_len;
3136 /* allocate the cache header structures
3138 svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
3139 svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache));
3141 /* initialize our internal cache header
3143 cache->membuffer = membuffer;
3144 cache->serializer = serializer
3146 : serialize_svn_stringbuf;
3147 cache->deserializer = deserializer
3149 : deserialize_svn_stringbuf;
3150 cache->priority = priority;
3151 cache->key_len = klen;
3153 SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool));
3155 /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT.
3156 * Don't forget to include the terminating NUL. */
3157 prefix_orig_len = strlen(prefix) + 1;
3158 prefix_len = ALIGN_VALUE(prefix_orig_len);
3160 svn_membuf__create(&cache->prefix.full_key, prefix_len, result_pool);
3161 memcpy((char *)cache->prefix.full_key.data, prefix, prefix_orig_len);
3162 memset((char *)cache->prefix.full_key.data + prefix_orig_len, 0,
3163 prefix_len - prefix_orig_len);
3165 /* Construct the folded prefix key. */
3166 SVN_ERR(svn_checksum(&checksum,
3171 memcpy(cache->prefix.entry_key.fingerprint, checksum->digest,
3172 sizeof(cache->prefix.entry_key.fingerprint));
3173 cache->prefix.entry_key.key_len = prefix_len;
3175 /* Initialize the combined key. Pre-allocate some extra room in the full
3176 * key such that we probably don't need to re-alloc. */
3177 cache->combined_key.entry_key = cache->prefix.entry_key;
3178 svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200,
3180 memcpy(cache->combined_key.full_key.data, cache->prefix.full_key.data,
3183 /* initialize the generic cache wrapper
3185 wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
3186 : &membuffer_cache_vtable;
3187 wrapper->cache_internal = cache;
3188 wrapper->error_handler = 0;
3189 wrapper->error_baton = 0;
3190 wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT");
3193 return SVN_NO_ERROR;
3196 static svn_error_t *
3197 svn_membuffer_get_global_segment_info(svn_membuffer_t *segment,
3198 svn_cache__info_t *info)
3200 info->gets += segment->total_reads;
3201 info->sets += segment->total_writes;
3202 info->hits += segment->total_hits;
3204 WITH_READ_LOCK(segment,
3205 svn_membuffer_get_segment_info(segment, info, TRUE));
3207 return SVN_NO_ERROR;
3211 svn_cache__membuffer_get_global_info(apr_pool_t *pool)
3215 svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache();
3216 svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info));
3218 /* cache front-end specific data */
3220 info->id = "membuffer globals";
3222 /* collect info from shared cache back-end */
3224 for (i = 0; i < membuffer->segment_count; ++i)
3225 svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i,