2 * cache-membuffer.c: in-memory caching for Subversion
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_thread_rwlock.h>
28 #include "svn_pools.h"
29 #include "svn_checksum.h"
31 #include "svn_private_config.h"
33 #include "svn_string.h"
34 #include "private/svn_dep_compat.h"
35 #include "private/svn_mutex.h"
36 #include "private/svn_pseudo_md5.h"
39 * This svn_cache__t implementation actually consists of two parts:
40 * a shared (per-process) singleton membuffer cache instance and shallow
41 * svn_cache__t front-end instances that each use different key spaces.
42 * For data management, they all forward to the singleton membuffer cache.
44 * A membuffer cache consists of two parts:
46 * 1. A linear data buffer containing cached items in a serialized
47 * representation. There may be arbitrary gaps between entries.
48 * 2. A directory of cache entries. This is organized similar to CPU
49 * data caches: for every possible key, there is exactly one group
50 * of entries that may contain the header info for an item with
51 * that given key. The result is a GROUP_SIZE-way associative cache.
53 * Only the start address of these two data parts are given as a native
54 * pointer. All other references are expressed as offsets to these pointers.
55 * With that design, it is relatively easy to share the same data structure
56 * between different processes and / or to persist them on disk. These
57 * out-of-process features have not been implemented, yet.
59 * The data buffer usage information is implicitly given by the directory
60 * entries. Every USED entry has a reference to the previous and the next
61 * used dictionary entry and this double-linked list is ordered by the
62 * offsets of their item data within the data buffer. So removing data,
63 * for instance, is done simply by unlinking it from the chain, implicitly
64 * marking the entry as well as the data buffer section previously
65 * associated to it as unused.
67 * Insertion can occur at only one, sliding position. It is marked by its
68 * offset in the data buffer plus the index of the first used entry at or
69 * behind that position. If this gap is too small to accommodate the new
70 * item, the insertion window is extended as described below. The new entry
71 * will always be inserted at the bottom end of the window and since the
72 * next used entry is known, properly sorted insertion is possible.
74 * To make the cache perform robustly in a wide range of usage scenarios,
75 * a randomized variant of LFU is used (see ensure_data_insertable for
76 * details). Every item holds a read hit counter and there is a global read
77 * hit counter. The more hits an entry has in relation to the average, the
78 * more it is likely to be kept using a rand()-based condition. The test is
79 * applied only to the entry following the insertion window. If it doesn't
80 * get evicted, it is moved to the begin of that window and the window is
83 * Moreover, the entry's hits get halved to make that entry more likely to
84 * be removed the next time the sliding insertion / removal window comes by.
85 * As a result, frequently used entries are likely not to be dropped until
86 * they get not used for a while. Also, even a cache thrashing situation
87 * about 50% of the content survives every 50% of the cache being re-written
88 * with new entries. For details on the fine-tuning involved, see the
89 * comments in ensure_data_insertable().
91 * To limit the entry size and management overhead, not the actual item keys
92 * but only their MD5 checksums will not be stored. This is reasonably safe
93 * to do since users have only limited control over the full keys, even if
94 * these contain folder paths. So, it is very hard to deliberately construct
95 * colliding keys. Random checksum collisions can be shown to be extremely
98 * All access to the cached data needs to be serialized. Because we want
99 * to scale well despite that bottleneck, we simply segment the cache into
100 * a number of independent caches (segments). Items will be multiplexed based
104 /* A 16-way associative cache seems to be a good compromise between
105 * performance (worst-case lookups) and efficiency-loss due to collisions.
107 * This value may be changed to any positive integer.
109 #define GROUP_SIZE 16
111 /* For more efficient copy operations, let's align all data items properly.
112 * Must be a power of 2.
114 #define ITEM_ALIGNMENT 16
116 /* By default, don't create cache segments smaller than this value unless
117 * the total cache size itself is smaller.
119 #define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
121 /* The minimum segment size we will allow for multi-segmented caches
123 #define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
125 /* The maximum number of segments allowed. Larger numbers reduce the size
126 * of each segment, in turn reducing the max size of a cachable item.
127 * Also, each segment gets its own lock object. The actual number supported
128 * by the OS may therefore be lower and svn_cache__membuffer_cache_create
129 * may return an error.
131 #define MAX_SEGMENT_COUNT 0x10000
133 /* As of today, APR won't allocate chunks of 4GB or more. So, limit the
134 * segment size to slightly below that.
136 #define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
138 /* We don't mark the initialization status for every group but initialize
139 * a number of groups at once. That will allow for a very small init flags
140 * vector that is likely to fit into the CPU caches even for fairly large
141 * membuffer caches. For instance, the default of 32 means 8x32 groups per
142 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
143 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
145 #define GROUP_INIT_GRANULARITY 32
147 /* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
149 #define NO_INDEX APR_UINT32_MAX
151 /* To save space in our group structure, we only use 32 bit size values
152 * and, therefore, limit the size of each entry to just below 4GB.
153 * Supporting larger items is not a good idea as the data transfer
154 * to and from the cache would block other threads for a very long time.
156 #define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
158 /* A 16 byte key type. We use that to identify cache entries.
159 * The notation as just two integer values will cause many compilers
160 * to create better code.
162 typedef apr_uint64_t entry_key_t[2];
164 /* Debugging / corruption detection support.
165 * If you define this macro, the getter functions will performed expensive
166 * checks on the item data, requested keys and entry types. If there is
167 * a mismatch found in any of them when being compared with the values
168 * remembered in the setter function, an error will be returned.
170 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
172 /* The prefix passed to svn_cache__create_membuffer_cache() effectively
173 * defines the type of all items stored by that cache instance. We'll take
174 * the last 7 bytes + \0 as plaintext for easy identification by the dev.
176 #define PREFIX_TAIL_LEN 8
178 /* This record will be attached to any cache entry. It tracks item data
179 * (content), key and type as hash values and is the baseline against which
180 * the getters will compare their results to detect inconsistencies.
182 typedef struct entry_tag_t
184 /* MD5 checksum over the serialized the item data.
186 unsigned char content_hash [APR_MD5_DIGESTSIZE];
188 /* Hash value of the svn_cache_t instance that wrote the item
189 * (i.e. a combination of type and repository)
191 unsigned char prefix_hash [APR_MD5_DIGESTSIZE];
193 /* Note that this only covers the variable part of the key,
194 * i.e. it will be different from the full key hash used for
197 unsigned char key_hash [APR_MD5_DIGESTSIZE];
199 /* Last letters from of the key in human readable format
200 * (ends with the type identifier, e.g. "DAG")
202 char prefix_tail[PREFIX_TAIL_LEN];
204 /* Length of the variable key part.
210 /* Per svn_cache_t instance initialization helper.
212 static void get_prefix_tail(const char *prefix, char *prefix_tail)
214 apr_size_t len = strlen(prefix);
215 apr_size_t to_copy = len > PREFIX_TAIL_LEN-1 ? PREFIX_TAIL_LEN-1 : len;
217 memset(prefix_tail, 0, PREFIX_TAIL_LEN);
218 memcpy(prefix_tail, prefix + len - to_copy, to_copy);
221 /* Initialize all members of TAG except for the content hash.
223 static svn_error_t *store_key_part(entry_tag_t *tag,
224 entry_key_t prefix_hash,
230 svn_checksum_t *checksum;
231 SVN_ERR(svn_checksum(&checksum,
237 memcpy(tag->prefix_hash, prefix_hash, sizeof(tag->prefix_hash));
238 memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
239 memcpy(tag->prefix_tail, prefix_tail, sizeof(tag->prefix_tail));
241 tag->key_len = key_len;
246 /* Initialize the content hash member of TAG.
248 static svn_error_t* store_content_part(entry_tag_t *tag,
253 svn_checksum_t *checksum;
254 SVN_ERR(svn_checksum(&checksum,
260 memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
265 /* Compare two tags and fail with an assertion upon differences.
267 static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
268 const entry_tag_t *rhs)
270 SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
271 sizeof(lhs->content_hash)) == 0);
272 SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
273 sizeof(lhs->prefix_hash)) == 0);
274 SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
275 sizeof(lhs->key_hash)) == 0);
276 SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
277 sizeof(lhs->prefix_tail)) == 0);
279 SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
284 /* Reoccurring code snippets.
287 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
289 #define DEBUG_CACHE_MEMBUFFER_TAG tag,
291 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG \
293 entry_tag_t *tag = &_tag; \
294 SVN_ERR(store_key_part(tag, \
296 cache->prefix_tail, \
298 cache->key_len == APR_HASH_KEY_STRING \
299 ? strlen((const char *) key) \
305 /* Don't generate any checks if consistency checks have not been enabled.
307 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG
308 #define DEBUG_CACHE_MEMBUFFER_TAG
309 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG
311 #endif /* SVN_DEBUG_CACHE_MEMBUFFER */
313 /* A single dictionary entry. Since all entries will be allocated once
314 * during cache creation, those entries might be either used or unused.
315 * An entry is used if and only if it is contained in the doubly-linked
316 * list of used entries.
318 typedef struct entry_t
320 /* Identifying the data item. Only valid for used entries.
324 /* The offset of the cached item's serialized data within the data buffer.
328 /* Size of the serialized item data. May be 0.
329 * Only valid for used entries.
333 /* Number of (read) hits for this entry. Will be reset upon write.
334 * Only valid for used entries.
336 apr_uint32_t hit_count;
338 /* Reference to the next used entry in the order defined by offset.
339 * NO_INDEX indicates the end of the list; this entry must be referenced
340 * by the caches membuffer_cache_t.last member. NO_INDEX also implies
341 * that the data buffer is not used beyond offset+size.
342 * Only valid for used entries.
346 /* Reference to the previous used entry in the order defined by offset.
347 * NO_INDEX indicates the end of the list; this entry must be referenced
348 * by the caches membuffer_cache_t.first member.
349 * Only valid for used entries.
351 apr_uint32_t previous;
353 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
354 /* Remember type, content and key hashes.
360 /* We group dictionary entries to make this GROUP-SIZE-way associative.
362 typedef struct entry_group_t
364 /* number of entries used [0 .. USED-1] */
367 /* the actual entries */
368 entry_t entries[GROUP_SIZE];
371 /* The cache header structure.
373 struct svn_membuffer_t
375 /* Number of cache segments. Must be a power of 2.
376 Please note that this structure represents only one such segment
377 and that all segments must / will report the same values here. */
378 apr_uint32_t segment_count;
380 /* The dictionary, GROUP_SIZE * group_count entries long. Never NULL.
382 entry_group_t *directory;
384 /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
385 * Allows for efficiently marking groups as "not initialized".
387 unsigned char *group_initialized;
389 /* Size of dictionary in groups. Must be > 0.
391 apr_uint32_t group_count;
393 /* Reference to the first (defined by the order content in the data
394 * buffer) dictionary entry used by any data item.
395 * NO_INDEX for an empty cache.
399 /* Reference to the last (defined by the order content in the data
400 * buffer) dictionary entry used by any data item.
401 * NO_INDEX for an empty cache.
405 /* Reference to the first (defined by the order content in the data
406 * buffer) used dictionary entry behind the insertion position
407 * (current_data). If NO_INDEX, the data buffer is free starting at the
408 * current_data offset.
413 /* Pointer to the data buffer, data_size bytes long. Never NULL.
417 /* Size of data buffer in bytes. Must be > 0.
419 apr_uint64_t data_size;
421 /* Offset in the data buffer where the next insertion shall occur.
423 apr_uint64_t current_data;
425 /* Total number of data buffer bytes in use.
427 apr_uint64_t data_used;
429 /* Largest entry size that we would accept. For total cache sizes
430 * less than 4TB (sic!), this is determined by the total cache size.
432 apr_uint64_t max_entry_size;
435 /* Number of used dictionary entries, i.e. number of cached items.
436 * In conjunction with hit_count, this is used calculate the average
437 * hit count as part of the randomized LFU algorithm.
439 apr_uint32_t used_entries;
441 /* Sum of (read) hit counts of all used dictionary entries.
442 * In conjunction used_entries used_entries, this is used calculate
443 * the average hit count as part of the randomized LFU algorithm.
445 apr_uint64_t hit_count;
448 /* Total number of calls to membuffer_cache_get.
449 * Purely statistical information that may be used for profiling.
451 apr_uint64_t total_reads;
453 /* Total number of calls to membuffer_cache_set.
454 * Purely statistical information that may be used for profiling.
456 apr_uint64_t total_writes;
458 /* Total number of hits since the cache's creation.
459 * Purely statistical information that may be used for profiling.
461 apr_uint64_t total_hits;
464 /* A lock for intra-process synchronization to the cache, or NULL if
465 * the cache's creator doesn't feel the cache needs to be
468 apr_thread_rwlock_t *lock;
470 /* If set, write access will wait until they get exclusive access.
471 * Otherwise, they will become no-ops if the segment is currently
474 svn_boolean_t allow_blocking_writes;
478 /* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
480 #define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
482 /* Align POINTER value to the next ITEM_ALIGNMENT boundary.
484 #define ALIGN_POINTER(pointer) ((void*)ALIGN_VALUE((apr_size_t)(char*)(pointer)))
486 /* If locking is supported for CACHE, acquire a read lock for it.
489 read_lock_cache(svn_membuffer_t *cache)
494 apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
496 return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
502 /* If locking is supported for CACHE, acquire a write lock for it.
505 write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
511 if (cache->allow_blocking_writes)
513 status = apr_thread_rwlock_wrlock(cache->lock);
517 status = apr_thread_rwlock_trywrlock(cache->lock);
518 if (SVN_LOCK_IS_BUSY(status))
521 status = APR_SUCCESS;
526 return svn_error_wrap_apr(status,
527 _("Can't write-lock cache mutex"));
533 /* If locking is supported for CACHE, acquire an unconditional write lock
537 force_write_lock_cache(svn_membuffer_t *cache)
540 apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
542 return svn_error_wrap_apr(status,
543 _("Can't write-lock cache mutex"));
548 /* If locking is supported for CACHE, release the current lock
552 unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
557 apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
562 return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
568 /* If supported, guard the execution of EXPR with a read lock to cache.
569 * Macro has been modeled after SVN_MUTEX__WITH_LOCK.
571 #define WITH_READ_LOCK(cache, expr) \
573 SVN_ERR(read_lock_cache(cache)); \
574 SVN_ERR(unlock_cache(cache, (expr))); \
577 /* If supported, guard the execution of EXPR with a write lock to cache.
578 * Macro has been modeled after SVN_MUTEX__WITH_LOCK.
580 * The write lock process is complicated if we don't allow to wait for
581 * the lock: If we didn't get the lock, we may still need to remove an
582 * existing entry for the given key because that content is now stale.
583 * Once we discovered such an entry, we unconditionally do a blocking
584 * wait for the write lock. In case no old content could be found, a
585 * failing lock attempt is simply a no-op and we exit the macro.
587 #define WITH_WRITE_LOCK(cache, expr) \
589 svn_boolean_t got_lock = TRUE; \
590 SVN_ERR(write_lock_cache(cache, &got_lock)); \
593 svn_boolean_t exists; \
594 SVN_ERR(entry_exists(cache, group_index, key, &exists)); \
596 SVN_ERR(force_write_lock_cache(cache)); \
600 SVN_ERR(unlock_cache(cache, (expr))); \
603 /* Resolve a dictionary entry reference, i.e. return the entry
606 static APR_INLINE entry_t *
607 get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
609 return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
612 /* Get the entry references for the given ENTRY.
614 static APR_INLINE apr_uint32_t
615 get_index(svn_membuffer_t *cache, entry_t *entry)
617 apr_size_t group_index
618 = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
620 return (apr_uint32_t)group_index * GROUP_SIZE
621 + (apr_uint32_t)(entry - cache->directory[group_index].entries);
624 /* Remove the used ENTRY from the CACHE, i.e. make it "unused".
625 * In contrast to insertion, removal is possible for any entry.
628 drop_entry(svn_membuffer_t *cache, entry_t *entry)
630 /* the group that ENTRY belongs to plus a number of useful index values
632 apr_uint32_t idx = get_index(cache, entry);
633 apr_uint32_t group_index = idx / GROUP_SIZE;
634 entry_group_t *group = &cache->directory[group_index];
635 apr_uint32_t last_in_group = group_index * GROUP_SIZE + group->used - 1;
637 /* Only valid to be called for used entries.
639 assert(idx <= last_in_group);
641 /* update global cache usage counters
643 cache->used_entries--;
644 cache->hit_count -= entry->hit_count;
645 cache->data_used -= entry->size;
647 /* extend the insertion window, if the entry happens to border it
649 if (idx == cache->next)
650 cache->next = entry->next;
652 if (entry->next == cache->next)
654 /* insertion window starts right behind the entry to remove
656 if (entry->previous == NO_INDEX)
658 /* remove the first entry -> insertion may start at pos 0, now */
659 cache->current_data = 0;
663 /* insertion may start right behind the previous entry */
664 entry_t *previous = get_entry(cache, entry->previous);
665 cache->current_data = ALIGN_VALUE( previous->offset
670 /* unlink it from the chain of used entries
672 if (entry->previous == NO_INDEX)
673 cache->first = entry->next;
675 get_entry(cache, entry->previous)->next = entry->next;
677 if (entry->next == NO_INDEX)
678 cache->last = entry->previous;
680 get_entry(cache, entry->next)->previous = entry->previous;
682 /* Move last entry into hole (if the removed one is not the last used).
683 * We need to do this since all used entries are at the beginning of
684 * the group's entries array.
686 if (idx < last_in_group)
688 /* copy the last used entry to the removed entry's index
690 *entry = group->entries[group->used-1];
692 /* update foreign links to new index
694 if (last_in_group == cache->next)
697 if (entry->previous == NO_INDEX)
700 get_entry(cache, entry->previous)->next = idx;
702 if (entry->next == NO_INDEX)
705 get_entry(cache, entry->next)->previous = idx;
708 /* Update the number of used entries.
713 /* Insert ENTRY into the chain of used dictionary entries. The entry's
714 * offset and size members must already have been initialized. Also,
715 * the offset must match the beginning of the insertion window.
718 insert_entry(svn_membuffer_t *cache, entry_t *entry)
720 /* the group that ENTRY belongs to plus a number of useful index values
722 apr_uint32_t idx = get_index(cache, entry);
723 apr_uint32_t group_index = idx / GROUP_SIZE;
724 entry_group_t *group = &cache->directory[group_index];
725 entry_t *next = cache->next == NO_INDEX
727 : get_entry(cache, cache->next);
729 /* The entry must start at the beginning of the insertion window.
730 * It must also be the first unused entry in the group.
732 assert(entry->offset == cache->current_data);
733 assert(idx == group_index * GROUP_SIZE + group->used);
734 cache->current_data = ALIGN_VALUE(entry->offset + entry->size);
736 /* update usage counters
738 cache->used_entries++;
739 cache->data_used += entry->size;
740 entry->hit_count = 0;
743 /* update entry chain
745 entry->next = cache->next;
746 if (cache->first == NO_INDEX)
748 /* insert as the first entry and only in the chain
750 entry->previous = NO_INDEX;
754 else if (next == NULL)
756 /* insert as the last entry in the chain.
757 * Note that it cannot also be at the beginning of the chain.
759 entry->previous = cache->last;
760 get_entry(cache, cache->last)->next = idx;
765 /* insert either at the start of a non-empty list or
766 * somewhere in the middle
768 entry->previous = next->previous;
769 next->previous = idx;
771 if (entry->previous != NO_INDEX)
772 get_entry(cache, entry->previous)->next = idx;
777 /* The current insertion position must never point outside our
780 assert(cache->current_data <= cache->data_size);
783 /* Map a KEY of 16 bytes to the CACHE and group that shall contain the
787 get_group_index(svn_membuffer_t **cache,
790 svn_membuffer_t *segment0 = *cache;
792 /* select the cache segment to use. they have all the same group_count */
793 *cache = &segment0[key[0] & (segment0->segment_count -1)];
794 return key[1] % segment0->group_count;
797 /* Reduce the hit count of ENTRY and update the accumulated hit info
798 * in CACHE accordingly.
800 static APR_INLINE void
801 let_entry_age(svn_membuffer_t *cache, entry_t *entry)
803 apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
805 cache->hit_count -= hits_removed;
806 entry->hit_count -= hits_removed;
809 /* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
810 * been initialized, yet. In that case, this group can not data. Otherwise,
811 * a non-zero value is returned.
813 static APR_INLINE unsigned char
814 is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
817 = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
818 unsigned char bit_mask
819 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
821 return flags & bit_mask;
824 /* Initializes the section of the directory in CACHE that contains
825 * the entry group identified by GROUP_INDEX. */
827 initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
829 unsigned char bit_mask;
832 /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
833 apr_uint32_t first_index =
834 (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
835 apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
836 if (last_index > cache->group_count)
837 last_index = cache->group_count;
839 for (i = first_index; i < last_index; ++i)
840 cache->directory[i].used = 0;
842 /* set the "initialized" bit for these groups */
844 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
845 cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
849 /* Given the GROUP_INDEX that shall contain an entry with the hash key
850 * TO_FIND, find that entry in the specified group.
852 * If FIND_EMPTY is not set, this function will return the one used entry
853 * that actually matches the hash or NULL, if no such entry exists.
855 * If FIND_EMPTY has been set, this function will drop the one used entry
856 * that actually matches the hash (i.e. make it fit to be replaced with
857 * new content), an unused entry or a forcibly removed entry (if all
858 * group entries are currently in use). The entries' hash value will be
859 * initialized with TO_FIND.
862 find_entry(svn_membuffer_t *cache,
863 apr_uint32_t group_index,
864 const apr_uint64_t to_find[2],
865 svn_boolean_t find_empty)
867 entry_group_t *group;
868 entry_t *entry = NULL;
871 /* get the group that *must* contain the entry
873 group = &cache->directory[group_index];
875 /* If the entry group has not been initialized, yet, there is no data.
877 if (! is_group_initialized(cache, group_index))
881 initialize_group(cache, group_index);
882 entry = &group->entries[0];
884 /* initialize entry for the new key */
885 entry->key[0] = to_find[0];
886 entry->key[1] = to_find[1];
892 /* try to find the matching entry
894 for (i = 0; i < group->used; ++i)
895 if ( to_find[0] == group->entries[i].key[0]
896 && to_find[1] == group->entries[i].key[1])
900 entry = &group->entries[i];
902 drop_entry(cache, entry);
907 /* None found. Are we looking for a free entry?
911 /* if there is no empty entry, delete the oldest entry
913 if (group->used == GROUP_SIZE)
915 /* every entry gets the same chance of being removed.
916 * Otherwise, we free the first entry, fill it and
917 * remove it again on the next occasion without considering
918 * the other entries in this group.
920 entry = &group->entries[rand() % GROUP_SIZE];
921 for (i = 1; i < GROUP_SIZE; ++i)
922 if (entry->hit_count > group->entries[i].hit_count)
923 entry = &group->entries[i];
925 /* for the entries that don't have been removed,
926 * reduce their hit counts to put them at a relative
927 * disadvantage the next time.
929 for (i = 0; i < GROUP_SIZE; ++i)
930 if (entry != &group->entries[i])
931 let_entry_age(cache, entry);
933 drop_entry(cache, entry);
936 /* initialize entry for the new key
938 entry = &group->entries[group->used];
939 entry->key[0] = to_find[0];
940 entry->key[1] = to_find[1];
946 /* Move a surviving ENTRY from just behind the insertion window to
947 * its beginning and move the insertion window up accordingly.
950 move_entry(svn_membuffer_t *cache, entry_t *entry)
952 apr_size_t size = ALIGN_VALUE(entry->size);
954 /* This entry survived this cleansing run. Reset half of its
955 * hit count so that its removal gets more likely in the next
956 * run unless someone read / hit this entry in the meantime.
958 let_entry_age(cache, entry);
960 /* Move the entry to the start of the empty / insertion section
961 * (if it isn't there already). Size-aligned moves are legal
962 * since all offsets and block sizes share this same alignment.
963 * Size-aligned moves tend to be faster than non-aligned ones
964 * because no "odd" bytes at the end need to special treatment.
966 if (entry->offset != cache->current_data)
968 memmove(cache->data + cache->current_data,
969 cache->data + entry->offset,
971 entry->offset = cache->current_data;
974 /* The insertion position is now directly behind this entry.
976 cache->current_data = entry->offset + size;
977 cache->next = entry->next;
979 /* The current insertion position must never point outside our
982 assert(cache->current_data <= cache->data_size);
985 /* If necessary, enlarge the insertion window until it is at least
986 * SIZE bytes long. SIZE must not exceed the data buffer size.
987 * Return TRUE if enough room could be found or made. A FALSE result
988 * indicates that the respective item shall not be added.
991 ensure_data_insertable(svn_membuffer_t *cache, apr_size_t size)
994 apr_uint64_t average_hit_value;
995 apr_uint64_t threshold;
997 /* accumulated size of the entries that have been removed to make
998 * room for the new one.
1000 apr_size_t drop_size = 0;
1002 /* This loop will eventually terminate because every cache entry
1003 * would get dropped eventually:
1004 * - hit counts become 0 after the got kept for 32 full scans
1005 * - larger elements get dropped as soon as their hit count is 0
1006 * - smaller and smaller elements get removed as the average
1007 * entry size drops (average drops by a factor of 8 per scan)
1008 * - after no more than 43 full scans, all elements would be removed
1010 * Since size is < 4th of the cache size and about 50% of all
1011 * entries get removed by a scan, it is very unlikely that more
1012 * than a fractional scan will be necessary.
1016 /* first offset behind the insertion window
1018 apr_uint64_t end = cache->next == NO_INDEX
1020 : get_entry(cache, cache->next)->offset;
1022 /* leave function as soon as the insertion window is large enough
1024 if (end >= size + cache->current_data)
1027 /* Don't be too eager to cache data. Smaller items will fit into
1028 * the cache after dropping a single item. Of the larger ones, we
1029 * will only accept about 50%. They are also likely to get evicted
1030 * soon due to their notoriously low hit counts.
1032 * As long as enough similarly or even larger sized entries already
1033 * exist in the cache, much less insert requests will be rejected.
1035 if (2 * drop_size > size)
1038 /* try to enlarge the insertion window
1040 if (cache->next == NO_INDEX)
1042 /* We reached the end of the data buffer; restart at the beginning.
1043 * Due to the randomized nature of our LFU implementation, very
1044 * large data items may require multiple passes. Therefore, SIZE
1045 * should be restricted to significantly less than data_size.
1047 cache->current_data = 0;
1048 cache->next = cache->first;
1052 entry = get_entry(cache, cache->next);
1054 /* Keep entries that are very small. Those are likely to be data
1055 * headers or similar management structures. So, they are probably
1056 * important while not occupying much space.
1057 * But keep them only as long as they are a minority.
1059 if ( (apr_uint64_t)entry->size * cache->used_entries
1060 < cache->data_used / 8)
1062 move_entry(cache, entry);
1068 if (cache->hit_count > cache->used_entries)
1070 /* Roll the dice and determine a threshold somewhere from 0 up
1071 * to 2 times the average hit count.
1073 average_hit_value = cache->hit_count / cache->used_entries;
1074 threshold = (average_hit_value+1) * (rand() % 4096) / 2048;
1076 keep = entry->hit_count >= threshold;
1080 /* general hit count is low. Keep everything that got hit
1081 * at all and assign some 50% survival chance to everything
1084 keep = (entry->hit_count > 0) || (rand() & 1);
1087 /* keepers or destroyers? */
1090 move_entry(cache, entry);
1094 /* Drop the entry from the end of the insertion window, if it
1095 * has been hit less than the threshold. Otherwise, keep it and
1096 * move the insertion window one entry further.
1098 drop_size += entry->size;
1099 drop_entry(cache, entry);
1105 /* This will never be reached. But if it was, "can't insert" was the
1109 /* Mimic apr_pcalloc in APR_POOL_DEBUG mode, i.e. handle failed allocations
1110 * (e.g. OOM) properly: Allocate at least SIZE bytes from POOL and zero
1111 * the content of the allocated memory if ZERO has been set. Return NULL
1112 * upon failed allocations.
1114 * Also, satisfy our buffer alignment needs for performance reasons.
1116 static void* secure_aligned_alloc(apr_pool_t *pool,
1120 void* memory = apr_palloc(pool, size + ITEM_ALIGNMENT);
1123 memory = ALIGN_POINTER(memory);
1125 memset(memory, 0, size);
1132 svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1133 apr_size_t total_size,
1134 apr_size_t directory_size,
1135 apr_size_t segment_count,
1136 svn_boolean_t thread_safe,
1137 svn_boolean_t allow_blocking_writes,
1143 apr_uint32_t group_count;
1144 apr_uint32_t group_init_size;
1145 apr_uint64_t data_size;
1146 apr_uint64_t max_entry_size;
1148 /* Limit the total size (only relevant if we can address > 4GB)
1150 #if APR_SIZEOF_VOIDP > 4
1151 if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1152 total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1155 /* Limit the segment count
1157 if (segment_count > MAX_SEGMENT_COUNT)
1158 segment_count = MAX_SEGMENT_COUNT;
1159 if (segment_count * MIN_SEGMENT_SIZE > total_size)
1160 segment_count = total_size / MIN_SEGMENT_SIZE;
1162 /* The segment count must be a power of two. Round it down as necessary.
1164 while ((segment_count & (segment_count-1)) != 0)
1165 segment_count &= segment_count-1;
1167 /* if the caller hasn't provided a reasonable segment count or the above
1168 * limitations set it to 0, derive one from the absolute cache size
1170 if (segment_count < 1)
1172 /* Determine a reasonable number of cache segments. Segmentation is
1173 * only useful for multi-threaded / multi-core servers as it reduces
1174 * lock contention on these systems.
1176 * But on these systems, we can assume that ample memory has been
1177 * allocated to this cache. Smaller caches should not be segmented
1178 * as this severely limits the maximum size of cachable items.
1180 * Segments should not be smaller than 32MB and max. cachable item
1181 * size should grow as fast as segmentation.
1184 apr_uint32_t segment_count_shift = 0;
1185 while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1187 ++segment_count_shift;
1189 segment_count = (apr_size_t)1 << segment_count_shift;
1192 /* If we have an extremely large cache (>512 GB), the default segment
1193 * size may exceed the amount allocatable as one chunk. In that case,
1194 * increase segmentation until we are under the threshold.
1196 while ( total_size / segment_count > MAX_SEGMENT_SIZE
1197 && segment_count < MAX_SEGMENT_COUNT)
1200 /* allocate cache as an array of segments / cache objects */
1201 c = apr_palloc(pool, segment_count * sizeof(*c));
1203 /* Split total cache size into segments of equal size
1205 total_size /= segment_count;
1206 directory_size /= segment_count;
1208 /* prevent pathological conditions: ensure a certain minimum cache size
1210 if (total_size < 2 * sizeof(entry_group_t))
1211 total_size = 2 * sizeof(entry_group_t);
1213 /* adapt the dictionary size accordingly, if necessary:
1214 * It must hold at least one group and must not exceed the cache size.
1216 if (directory_size > total_size - sizeof(entry_group_t))
1217 directory_size = total_size - sizeof(entry_group_t);
1218 if (directory_size < sizeof(entry_group_t))
1219 directory_size = sizeof(entry_group_t);
1221 /* limit the data size to what we can address.
1222 * Note that this cannot overflow since all values are of size_t.
1223 * Also, make it a multiple of the item placement granularity to
1224 * prevent subtle overflows.
1226 data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1228 /* For cache sizes > 4TB, individual cache segments will be larger
1229 * than 16GB allowing for >4GB entries. But caching chunks larger
1230 * than 4GB is simply not supported.
1232 max_entry_size = data_size / 4 > MAX_ITEM_SIZE
1236 /* to keep the entries small, we use 32 bit indexes only
1237 * -> we need to ensure that no more then 4G entries exist.
1239 * Note, that this limit could only be exceeded in a very
1240 * theoretical setup with about 1EB of cache.
1242 group_count = directory_size / sizeof(entry_group_t)
1243 >= (APR_UINT32_MAX / GROUP_SIZE)
1244 ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1245 : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1247 group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1248 for (seg = 0; seg < segment_count; ++seg)
1250 /* allocate buffers and initialize cache members
1252 c[seg].segment_count = (apr_uint32_t)segment_count;
1254 c[seg].group_count = group_count;
1255 c[seg].directory = apr_pcalloc(pool,
1256 group_count * sizeof(entry_group_t));
1258 /* Allocate and initialize directory entries as "not initialized",
1260 c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1262 c[seg].first = NO_INDEX;
1263 c[seg].last = NO_INDEX;
1264 c[seg].next = NO_INDEX;
1266 c[seg].data_size = data_size;
1267 c[seg].data = secure_aligned_alloc(pool, (apr_size_t)data_size, FALSE);
1268 c[seg].current_data = 0;
1269 c[seg].data_used = 0;
1270 c[seg].max_entry_size = max_entry_size;
1272 c[seg].used_entries = 0;
1273 c[seg].hit_count = 0;
1274 c[seg].total_reads = 0;
1275 c[seg].total_writes = 0;
1276 c[seg].total_hits = 0;
1278 /* were allocations successful?
1279 * If not, initialize a minimal cache structure.
1281 if (c[seg].data == NULL || c[seg].directory == NULL)
1283 /* We are OOM. There is no need to proceed with "half a cache".
1285 return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1289 /* A lock for intra-process synchronization to the cache, or NULL if
1290 * the cache's creator doesn't feel the cache needs to be
1296 apr_status_t status =
1297 apr_thread_rwlock_create(&(c[seg].lock), pool);
1299 return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1302 /* Select the behavior of write operations.
1304 c[seg].allow_blocking_writes = allow_blocking_writes;
1311 return SVN_NO_ERROR;
1314 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1315 * by the hash value TO_FIND and set *FOUND accordingly.
1317 * Note: This function requires the caller to serialize access.
1318 * Don't call it directly, call entry_exists instead.
1320 static svn_error_t *
1321 entry_exists_internal(svn_membuffer_t *cache,
1322 apr_uint32_t group_index,
1323 entry_key_t to_find,
1324 svn_boolean_t *found)
1326 *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1327 return SVN_NO_ERROR;
1330 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1331 * by the hash value TO_FIND and set *FOUND accordingly.
1333 static svn_error_t *
1334 entry_exists(svn_membuffer_t *cache,
1335 apr_uint32_t group_index,
1336 entry_key_t to_find,
1337 svn_boolean_t *found)
1339 WITH_READ_LOCK(cache,
1340 entry_exists_internal(cache,
1345 return SVN_NO_ERROR;
1349 /* Try to insert the serialized item given in BUFFER with SIZE into
1350 * the group GROUP_INDEX of CACHE and uniquely identify it by hash
1353 * However, there is no guarantee that it will actually be put into
1354 * the cache. If there is already some data associated with TO_FIND,
1355 * it will be removed from the cache even if the new data cannot
1358 * Note: This function requires the caller to serialization access.
1359 * Don't call it directly, call membuffer_cache_get_partial instead.
1361 static svn_error_t *
1362 membuffer_cache_set_internal(svn_membuffer_t *cache,
1363 entry_key_t to_find,
1364 apr_uint32_t group_index,
1367 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1368 apr_pool_t *scratch_pool)
1370 /* first, look for a previous entry for the given key */
1371 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
1373 /* if there is an old version of that entry and the new data fits into
1374 * the old spot, just re-use that space. */
1375 if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
1377 /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
1378 * lest we run into trouble with 32 bit underflow *not* treated as a
1381 cache->data_used += (apr_uint64_t)size - entry->size;
1384 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1386 /* Remember original content, type and key (hashes)
1388 SVN_ERR(store_content_part(tag, buffer, size, scratch_pool));
1389 memcpy(&entry->tag, tag, sizeof(*tag));
1394 memcpy(cache->data + entry->offset, buffer, size);
1396 cache->total_writes++;
1397 return SVN_NO_ERROR;
1400 /* if necessary, enlarge the insertion window.
1403 && cache->max_entry_size >= size
1404 && ensure_data_insertable(cache, size))
1406 /* Remove old data for this key, if that exists.
1407 * Get an unused entry for the key and and initialize it with
1408 * the serialized item's (future) position within data buffer.
1410 entry = find_entry(cache, group_index, to_find, TRUE);
1412 entry->offset = cache->current_data;
1414 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1416 /* Remember original content, type and key (hashes)
1418 SVN_ERR(store_content_part(tag, buffer, size, scratch_pool));
1419 memcpy(&entry->tag, tag, sizeof(*tag));
1423 /* Link the entry properly.
1425 insert_entry(cache, entry);
1427 /* Copy the serialized item data into the cache.
1430 memcpy(cache->data + entry->offset, buffer, size);
1432 cache->total_writes++;
1436 /* if there is already an entry for this key, drop it.
1437 * Since ensure_data_insertable may have removed entries from
1438 * ENTRY's group, re-do the lookup.
1440 entry = find_entry(cache, group_index, to_find, FALSE);
1442 drop_entry(cache, entry);
1445 return SVN_NO_ERROR;
1448 /* Try to insert the ITEM and use the KEY to uniquely identify it.
1449 * However, there is no guarantee that it will actually be put into
1450 * the cache. If there is already some data associated to the KEY,
1451 * it will be removed from the cache even if the new data cannot
1454 * The SERIALIZER is called to transform the ITEM into a single,
1455 * flat data buffer. Temporary allocations may be done in POOL.
1457 static svn_error_t *
1458 membuffer_cache_set(svn_membuffer_t *cache,
1461 svn_cache__serialize_func_t serializer,
1462 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1463 apr_pool_t *scratch_pool)
1465 apr_uint32_t group_index;
1466 void *buffer = NULL;
1467 apr_size_t size = 0;
1469 /* find the entry group that will hold the key.
1471 group_index = get_group_index(&cache, key);
1473 /* Serialize data data.
1476 SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
1478 /* The actual cache data access needs to sync'ed
1480 WITH_WRITE_LOCK(cache,
1481 membuffer_cache_set_internal(cache,
1486 DEBUG_CACHE_MEMBUFFER_TAG
1488 return SVN_NO_ERROR;
1491 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1492 * by the hash value TO_FIND. If no item has been stored for KEY,
1493 * *BUFFER will be NULL. Otherwise, return a copy of the serialized
1494 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
1497 * Note: This function requires the caller to serialization access.
1498 * Don't call it directly, call membuffer_cache_get_partial instead.
1500 static svn_error_t *
1501 membuffer_cache_get_internal(svn_membuffer_t *cache,
1502 apr_uint32_t group_index,
1503 entry_key_t to_find,
1505 apr_size_t *item_size,
1506 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1507 apr_pool_t *result_pool)
1512 /* The actual cache data access needs to sync'ed
1514 entry = find_entry(cache, group_index, to_find, FALSE);
1515 cache->total_reads++;
1518 /* no such entry found.
1523 return SVN_NO_ERROR;
1526 size = ALIGN_VALUE(entry->size);
1527 *buffer = ALIGN_POINTER(apr_palloc(result_pool, size + ITEM_ALIGNMENT-1));
1528 memcpy(*buffer, (const char*)cache->data + entry->offset, size);
1530 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1532 /* Check for overlapping entries.
1534 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
1535 entry->offset + size
1536 <= get_entry(cache, entry->next)->offset);
1538 /* Compare original content, type and key (hashes)
1540 SVN_ERR(store_content_part(tag, *buffer, entry->size, result_pool));
1541 SVN_ERR(assert_equal_tags(&entry->tag, tag));
1545 /* update hit statistics
1549 cache->total_hits++;
1551 *item_size = entry->size;
1553 return SVN_NO_ERROR;
1556 /* Look for the *ITEM identified by KEY. If no item has been stored
1557 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
1558 * re-construct the proper object from the serialized data.
1559 * Allocations will be done in POOL.
1561 static svn_error_t *
1562 membuffer_cache_get(svn_membuffer_t *cache,
1565 svn_cache__deserialize_func_t deserializer,
1566 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1567 apr_pool_t *result_pool)
1569 apr_uint32_t group_index;
1573 /* find the entry group that will hold the key.
1575 group_index = get_group_index(&cache, key);
1576 WITH_READ_LOCK(cache,
1577 membuffer_cache_get_internal(cache,
1582 DEBUG_CACHE_MEMBUFFER_TAG
1585 /* re-construct the original data object from its serialized form.
1590 return SVN_NO_ERROR;
1593 return deserializer(item, buffer, size, result_pool);
1596 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1597 * by the hash value TO_FIND. FOUND indicates whether that entry exists.
1598 * If not found, *ITEM will be NULL.
1600 * Otherwise, the DESERIALIZER is called with that entry and the BATON
1601 * provided and will extract the desired information. The result is set
1602 * in *ITEM. Allocations will be done in POOL.
1604 * Note: This function requires the caller to serialization access.
1605 * Don't call it directly, call membuffer_cache_get_partial instead.
1607 static svn_error_t *
1608 membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
1609 apr_uint32_t group_index,
1610 entry_key_t to_find,
1612 svn_boolean_t *found,
1613 svn_cache__partial_getter_func_t deserializer,
1615 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1616 apr_pool_t *result_pool)
1618 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
1619 cache->total_reads++;
1625 return SVN_NO_ERROR;
1633 cache->total_hits++;
1635 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1637 /* Check for overlapping entries.
1639 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
1640 entry->offset + entry->size
1641 <= get_entry(cache, entry->next)->offset);
1643 /* Compare original content, type and key (hashes)
1645 SVN_ERR(store_content_part(tag,
1646 (const char*)cache->data + entry->offset,
1649 SVN_ERR(assert_equal_tags(&entry->tag, tag));
1653 return deserializer(item,
1654 (const char*)cache->data + entry->offset,
1661 /* Look for the cache entry identified by KEY. FOUND indicates
1662 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
1663 * the DESERIALIZER is called with that entry and the BATON provided
1664 * and will extract the desired information. The result is set in *ITEM.
1665 * Allocations will be done in POOL.
1667 static svn_error_t *
1668 membuffer_cache_get_partial(svn_membuffer_t *cache,
1671 svn_boolean_t *found,
1672 svn_cache__partial_getter_func_t deserializer,
1674 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1675 apr_pool_t *result_pool)
1677 apr_uint32_t group_index = get_group_index(&cache, key);
1679 WITH_READ_LOCK(cache,
1680 membuffer_cache_get_partial_internal
1681 (cache, group_index, key, item, found,
1682 deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
1685 return SVN_NO_ERROR;
1688 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1689 * by the hash value TO_FIND. If no entry has been found, the function
1690 * returns without modifying the cache.
1692 * Otherwise, FUNC is called with that entry and the BATON provided
1693 * and may modify the cache entry. Allocations will be done in POOL.
1695 * Note: This function requires the caller to serialization access.
1696 * Don't call it directly, call membuffer_cache_set_partial instead.
1698 static svn_error_t *
1699 membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
1700 apr_uint32_t group_index,
1701 entry_key_t to_find,
1702 svn_cache__partial_setter_func_t func,
1704 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1705 apr_pool_t *scratch_pool)
1707 /* cache item lookup
1709 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
1710 cache->total_reads++;
1712 /* this function is a no-op if the item is not in cache
1718 /* access the serialized cache item */
1719 char *data = (char*)cache->data + entry->offset;
1720 char *orig_data = data;
1721 apr_size_t size = entry->size;
1725 cache->total_writes++;
1727 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1729 /* Check for overlapping entries.
1731 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
1732 entry->offset + size
1733 <= get_entry(cache, entry->next)->offset);
1735 /* Compare original content, type and key (hashes)
1737 SVN_ERR(store_content_part(tag, data, size, scratch_pool));
1738 SVN_ERR(assert_equal_tags(&entry->tag, tag));
1742 /* modify it, preferably in-situ.
1744 err = func((void **)&data, &size, baton, scratch_pool);
1748 /* Something somewhere when wrong while FUNC was modifying the
1749 * changed item. Thus, it might have become invalid /corrupted.
1750 * We better drop that.
1752 drop_entry(cache, entry);
1756 /* if the modification caused a re-allocation, we need to remove
1757 * the old entry and to copy the new data back into cache.
1759 if (data != orig_data)
1761 /* Remove the old entry and try to make space for the new one.
1763 drop_entry(cache, entry);
1764 if ( (cache->max_entry_size >= size)
1765 && ensure_data_insertable(cache, size))
1767 /* Write the new entry.
1769 entry = find_entry(cache, group_index, to_find, TRUE);
1771 entry->offset = cache->current_data;
1773 memcpy(cache->data + entry->offset, data, size);
1775 /* Link the entry properly.
1777 insert_entry(cache, entry);
1781 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1783 /* Remember original content, type and key (hashes)
1785 SVN_ERR(store_content_part(tag, data, size, scratch_pool));
1786 memcpy(&entry->tag, tag, sizeof(*tag));
1792 return SVN_NO_ERROR;
1795 /* Look for the cache entry identified by KEY. If no entry
1796 * has been found, the function returns without modifying the cache.
1797 * Otherwise, FUNC is called with that entry and the BATON provided
1798 * and may modify the cache entry. Allocations will be done in POOL.
1800 static svn_error_t *
1801 membuffer_cache_set_partial(svn_membuffer_t *cache,
1803 svn_cache__partial_setter_func_t func,
1805 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1806 apr_pool_t *scratch_pool)
1808 /* cache item lookup
1810 apr_uint32_t group_index = get_group_index(&cache, key);
1811 WITH_WRITE_LOCK(cache,
1812 membuffer_cache_set_partial_internal
1813 (cache, group_index, key, func, baton,
1814 DEBUG_CACHE_MEMBUFFER_TAG
1817 /* done here -> unlock the cache
1819 return SVN_NO_ERROR;
1822 /* Implement the svn_cache__t interface on top of a shared membuffer cache.
1824 * Because membuffer caches tend to be very large, there will be rather few
1825 * of them (usually only one). Thus, the same instance shall be used as the
1826 * backend to many application-visible svn_cache__t instances. This should
1827 * also achieve global resource usage fairness.
1829 * To accommodate items from multiple resources, the individual keys must be
1830 * unique over all sources. This is achieved by simply adding a prefix key
1831 * that unambiguously identifies the item's context (e.g. path to the
1832 * respective repository). The prefix will be set upon construction of the
1833 * svn_cache__t instance.
1836 /* Internal cache structure (used in svn_cache__t.cache_internal) basically
1837 * holding the additional parameters needed to call the respective membuffer
1840 typedef struct svn_membuffer_cache_t
1842 /* this is where all our data will end up in
1844 svn_membuffer_t *membuffer;
1846 /* use this conversion function when inserting an item into the memcache
1848 svn_cache__serialize_func_t serializer;
1850 /* use this conversion function when reading an item from the memcache
1852 svn_cache__deserialize_func_t deserializer;
1854 /* Prepend this byte sequence to any key passed to us.
1855 * This makes (very likely) our keys different from all keys used
1856 * by other svn_membuffer_cache_t instances.
1860 /* A copy of the unmodified prefix. It is being used as a user-visible
1861 * ID for this cache instance.
1863 const char* full_prefix;
1865 /* length of the keys that will be passed to us through the
1866 * svn_cache_t interface. May be APR_HASH_KEY_STRING.
1868 apr_ssize_t key_len;
1870 /* Temporary buffer containing the hash key for the current access
1872 entry_key_t combined_key;
1874 /* a pool for temporary allocations during get() and set()
1878 /* an internal counter that is used to clear the pool from time to time
1879 * but not too frequently.
1883 /* if enabled, this will serialize the access to this instance.
1885 svn_mutex__t *mutex;
1886 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1888 /* Invariant tag info for all items stored by this cache instance.
1890 char prefix_tail[PREFIX_TAIL_LEN];
1893 } svn_membuffer_cache_t;
1895 /* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
1896 * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
1898 #define ALLOCATIONS_PER_POOL_CLEAR 10
1901 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
1902 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
1905 combine_key(svn_membuffer_cache_t *cache,
1907 apr_ssize_t key_len)
1909 if (key_len == APR_HASH_KEY_STRING)
1910 key_len = strlen((const char *) key);
1914 apr_uint32_t data[4] = { 0 };
1915 memcpy(data, key, key_len);
1917 svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data);
1919 else if (key_len < 32)
1921 apr_uint32_t data[8] = { 0 };
1922 memcpy(data, key, key_len);
1924 svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data);
1926 else if (key_len < 64)
1928 apr_uint32_t data[16] = { 0 };
1929 memcpy(data, key, key_len);
1931 svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data);
1935 apr_md5((unsigned char*)cache->combined_key, key, key_len);
1938 cache->combined_key[0] ^= cache->prefix[0];
1939 cache->combined_key[1] ^= cache->prefix[1];
1942 /* Implement svn_cache__vtable_t.get (not thread-safe)
1944 static svn_error_t *
1945 svn_membuffer_cache_get(void **value_p,
1946 svn_boolean_t *found,
1949 apr_pool_t *result_pool)
1951 svn_membuffer_cache_t *cache = cache_void;
1953 DEBUG_CACHE_MEMBUFFER_INIT_TAG
1961 return SVN_NO_ERROR;
1964 /* construct the full, i.e. globally unique, key by adding
1965 * this cache instances' prefix
1967 combine_key(cache, key, cache->key_len);
1969 /* Look the item up. */
1970 SVN_ERR(membuffer_cache_get(cache->membuffer,
1971 cache->combined_key,
1973 cache->deserializer,
1974 DEBUG_CACHE_MEMBUFFER_TAG
1978 *found = *value_p != NULL;
1979 return SVN_NO_ERROR;
1982 /* Implement svn_cache__vtable_t.set (not thread-safe)
1984 static svn_error_t *
1985 svn_membuffer_cache_set(void *cache_void,
1988 apr_pool_t *scratch_pool)
1990 svn_membuffer_cache_t *cache = cache_void;
1992 DEBUG_CACHE_MEMBUFFER_INIT_TAG
1996 return SVN_NO_ERROR;
1998 /* we do some allocations below, so increase the allocation counter
1999 * by a slightly larger amount. Free allocated memory every now and then.
2001 cache->alloc_counter += 3;
2002 if (cache->alloc_counter > ALLOCATIONS_PER_POOL_CLEAR)
2004 svn_pool_clear(cache->pool);
2005 cache->alloc_counter = 0;
2008 /* construct the full, i.e. globally unique, key by adding
2009 * this cache instances' prefix
2011 combine_key(cache, key, cache->key_len);
2013 /* (probably) add the item to the cache. But there is no real guarantee
2014 * that the item will actually be cached afterwards.
2016 return membuffer_cache_set(cache->membuffer,
2017 cache->combined_key,
2020 DEBUG_CACHE_MEMBUFFER_TAG
2024 /* Implement svn_cache__vtable_t.iter as "not implemented"
2026 static svn_error_t *
2027 svn_membuffer_cache_iter(svn_boolean_t *completed,
2029 svn_iter_apr_hash_cb_t user_cb,
2031 apr_pool_t *scratch_pool)
2033 return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2034 _("Can't iterate a membuffer-based cache"));
2037 /* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2039 static svn_error_t *
2040 svn_membuffer_cache_get_partial(void **value_p,
2041 svn_boolean_t *found,
2044 svn_cache__partial_getter_func_t func,
2046 apr_pool_t *result_pool)
2048 svn_membuffer_cache_t *cache = cache_void;
2050 DEBUG_CACHE_MEMBUFFER_INIT_TAG
2057 return SVN_NO_ERROR;
2060 combine_key(cache, key, cache->key_len);
2061 SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2062 cache->combined_key,
2067 DEBUG_CACHE_MEMBUFFER_TAG
2070 return SVN_NO_ERROR;
2073 /* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2075 static svn_error_t *
2076 svn_membuffer_cache_set_partial(void *cache_void,
2078 svn_cache__partial_setter_func_t func,
2080 apr_pool_t *scratch_pool)
2082 svn_membuffer_cache_t *cache = cache_void;
2084 DEBUG_CACHE_MEMBUFFER_INIT_TAG
2088 combine_key(cache, key, cache->key_len);
2089 SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2090 cache->combined_key,
2093 DEBUG_CACHE_MEMBUFFER_TAG
2096 return SVN_NO_ERROR;
2099 /* Implement svn_cache__vtable_t.is_cachable
2100 * (thread-safe even without mutex)
2102 static svn_boolean_t
2103 svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2105 /* Don't allow extremely large element sizes. Otherwise, the cache
2106 * might by thrashed by a few extremely large entries. And the size
2107 * must be small enough to be stored in a 32 bit value.
2109 svn_membuffer_cache_t *cache = cache_void;
2110 return size <= cache->membuffer->max_entry_size;
2113 /* Add statistics of SEGMENT to INFO.
2115 static svn_error_t *
2116 svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2117 svn_cache__info_t *info)
2119 info->data_size += segment->data_size;
2120 info->used_size += segment->data_used;
2121 info->total_size += segment->data_size +
2122 segment->group_count * GROUP_SIZE * sizeof(entry_t);
2124 info->used_entries += segment->used_entries;
2125 info->total_entries += segment->group_count * GROUP_SIZE;
2127 return SVN_NO_ERROR;
2130 /* Implement svn_cache__vtable_t.get_info
2131 * (thread-safe even without mutex)
2133 static svn_error_t *
2134 svn_membuffer_cache_get_info(void *cache_void,
2135 svn_cache__info_t *info,
2136 svn_boolean_t reset,
2137 apr_pool_t *result_pool)
2139 svn_membuffer_cache_t *cache = cache_void;
2142 /* cache front-end specific data */
2144 info->id = apr_pstrdup(result_pool, cache->full_prefix);
2146 /* collect info from shared cache back-end */
2148 info->data_size = 0;
2149 info->used_size = 0;
2150 info->total_size = 0;
2152 info->used_entries = 0;
2153 info->total_entries = 0;
2155 for (i = 0; i < cache->membuffer->segment_count; ++i)
2157 svn_membuffer_t *segment = cache->membuffer + i;
2158 WITH_READ_LOCK(segment,
2159 svn_membuffer_get_segment_info(segment, info));
2162 return SVN_NO_ERROR;
2166 /* the v-table for membuffer-based caches (single-threaded access)
2168 static svn_cache__vtable_t membuffer_cache_vtable = {
2169 svn_membuffer_cache_get,
2170 svn_membuffer_cache_set,
2171 svn_membuffer_cache_iter,
2172 svn_membuffer_cache_is_cachable,
2173 svn_membuffer_cache_get_partial,
2174 svn_membuffer_cache_set_partial,
2175 svn_membuffer_cache_get_info
2178 /* Implement svn_cache__vtable_t.get and serialize all cache access.
2180 static svn_error_t *
2181 svn_membuffer_cache_get_synced(void **value_p,
2182 svn_boolean_t *found,
2185 apr_pool_t *result_pool)
2187 svn_membuffer_cache_t *cache = cache_void;
2188 SVN_MUTEX__WITH_LOCK(cache->mutex,
2189 svn_membuffer_cache_get(value_p,
2195 return SVN_NO_ERROR;
2198 /* Implement svn_cache__vtable_t.set and serialize all cache access.
2200 static svn_error_t *
2201 svn_membuffer_cache_set_synced(void *cache_void,
2204 apr_pool_t *scratch_pool)
2206 svn_membuffer_cache_t *cache = cache_void;
2207 SVN_MUTEX__WITH_LOCK(cache->mutex,
2208 svn_membuffer_cache_set(cache_void,
2213 return SVN_NO_ERROR;
2216 /* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
2218 static svn_error_t *
2219 svn_membuffer_cache_get_partial_synced(void **value_p,
2220 svn_boolean_t *found,
2223 svn_cache__partial_getter_func_t func,
2225 apr_pool_t *result_pool)
2227 svn_membuffer_cache_t *cache = cache_void;
2228 SVN_MUTEX__WITH_LOCK(cache->mutex,
2229 svn_membuffer_cache_get_partial(value_p,
2237 return SVN_NO_ERROR;
2240 /* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
2242 static svn_error_t *
2243 svn_membuffer_cache_set_partial_synced(void *cache_void,
2245 svn_cache__partial_setter_func_t func,
2247 apr_pool_t *scratch_pool)
2249 svn_membuffer_cache_t *cache = cache_void;
2250 SVN_MUTEX__WITH_LOCK(cache->mutex,
2251 svn_membuffer_cache_set_partial(cache_void,
2257 return SVN_NO_ERROR;
2260 /* the v-table for membuffer-based caches with multi-threading support)
2262 static svn_cache__vtable_t membuffer_cache_synced_vtable = {
2263 svn_membuffer_cache_get_synced,
2264 svn_membuffer_cache_set_synced,
2265 svn_membuffer_cache_iter, /* no sync required */
2266 svn_membuffer_cache_is_cachable, /* no sync required */
2267 svn_membuffer_cache_get_partial_synced,
2268 svn_membuffer_cache_set_partial_synced,
2269 svn_membuffer_cache_get_info /* no sync required */
2272 /* standard serialization function for svn_stringbuf_t items.
2273 * Implements svn_cache__serialize_func_t.
2275 static svn_error_t *
2276 serialize_svn_stringbuf(void **buffer,
2277 apr_size_t *buffer_size,
2279 apr_pool_t *result_pool)
2281 svn_stringbuf_t *value_str = item;
2283 *buffer = value_str->data;
2284 *buffer_size = value_str->len + 1;
2286 return SVN_NO_ERROR;
2289 /* standard de-serialization function for svn_stringbuf_t items.
2290 * Implements svn_cache__deserialize_func_t.
2292 static svn_error_t *
2293 deserialize_svn_stringbuf(void **item,
2295 apr_size_t buffer_size,
2296 apr_pool_t *result_pool)
2298 svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
2300 value_str->pool = result_pool;
2301 value_str->blocksize = buffer_size;
2302 value_str->data = buffer;
2303 value_str->len = buffer_size-1;
2306 return SVN_NO_ERROR;
2309 /* Construct a svn_cache__t object on top of a shared memcache.
2312 svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
2313 svn_membuffer_t *membuffer,
2314 svn_cache__serialize_func_t serializer,
2315 svn_cache__deserialize_func_t deserializer,
2318 svn_boolean_t thread_safe,
2321 svn_checksum_t *checksum;
2323 /* allocate the cache header structures
2325 svn_cache__t *wrapper = apr_pcalloc(pool, sizeof(*wrapper));
2326 svn_membuffer_cache_t *cache = apr_palloc(pool, sizeof(*cache));
2328 /* initialize our internal cache header
2330 cache->membuffer = membuffer;
2331 cache->serializer = serializer
2333 : serialize_svn_stringbuf;
2334 cache->deserializer = deserializer
2336 : deserialize_svn_stringbuf;
2337 cache->full_prefix = apr_pstrdup(pool, prefix);
2338 cache->key_len = klen;
2339 cache->pool = svn_pool_create(pool);
2340 cache->alloc_counter = 0;
2342 SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, pool));
2344 /* for performance reasons, we don't actually store the full prefix but a
2347 SVN_ERR(svn_checksum(&checksum,
2352 memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix));
2354 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2356 /* Initialize cache debugging support.
2358 get_prefix_tail(prefix, cache->prefix_tail);
2362 /* initialize the generic cache wrapper
2364 wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
2365 : &membuffer_cache_vtable;
2366 wrapper->cache_internal = cache;
2367 wrapper->error_handler = 0;
2368 wrapper->error_baton = 0;
2371 return SVN_NO_ERROR;