2 * cache-membuffer.c: in-memory caching for Subversion
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_thread_rwlock.h>
28 #include "svn_pools.h"
29 #include "svn_checksum.h"
31 #include "svn_private_config.h"
33 #include "svn_string.h"
34 #include "private/svn_dep_compat.h"
35 #include "private/svn_mutex.h"
36 #include "private/svn_pseudo_md5.h"
39 * This svn_cache__t implementation actually consists of two parts:
40 * a shared (per-process) singleton membuffer cache instance and shallow
41 * svn_cache__t front-end instances that each use different key spaces.
42 * For data management, they all forward to the singleton membuffer cache.
44 * A membuffer cache consists of two parts:
46 * 1. A linear data buffer containing cached items in a serialized
47 * representation. There may be arbitrary gaps between entries.
48 * 2. A directory of cache entries. This is organized similar to CPU
49 * data caches: for every possible key, there is exactly one group
50 * of entries that may contain the header info for an item with
51 * that given key. The result is a GROUP_SIZE-way associative cache.
53 * Only the start address of these two data parts are given as a native
54 * pointer. All other references are expressed as offsets to these pointers.
55 * With that design, it is relatively easy to share the same data structure
56 * between different processes and / or to persist them on disk. These
57 * out-of-process features have not been implemented, yet.
59 * The data buffer usage information is implicitly given by the directory
60 * entries. Every USED entry has a reference to the previous and the next
61 * used dictionary entry and this double-linked list is ordered by the
62 * offsets of their item data within the data buffer. So removing data,
63 * for instance, is done simply by unlinking it from the chain, implicitly
64 * marking the entry as well as the data buffer section previously
65 * associated to it as unused.
67 * Insertion can occur at only one, sliding position. It is marked by its
68 * offset in the data buffer plus the index of the first used entry at or
69 * behind that position. If this gap is too small to accommodate the new
70 * item, the insertion window is extended as described below. The new entry
71 * will always be inserted at the bottom end of the window and since the
72 * next used entry is known, properly sorted insertion is possible.
74 * To make the cache perform robustly in a wide range of usage scenarios,
75 * a randomized variant of LFU is used (see ensure_data_insertable for
76 * details). Every item holds a read hit counter and there is a global read
77 * hit counter. The more hits an entry has in relation to the average, the
78 * more it is likely to be kept using a rand()-based condition. The test is
79 * applied only to the entry following the insertion window. If it doesn't
80 * get evicted, it is moved to the begin of that window and the window is
83 * Moreover, the entry's hits get halved to make that entry more likely to
84 * be removed the next time the sliding insertion / removal window comes by.
85 * As a result, frequently used entries are likely not to be dropped until
86 * they get not used for a while. Also, even a cache thrashing situation
87 * about 50% of the content survives every 50% of the cache being re-written
88 * with new entries. For details on the fine-tuning involved, see the
89 * comments in ensure_data_insertable().
91 * To limit the entry size and management overhead, not the actual item keys
92 * but only their MD5 checksums will not be stored. This is reasonably safe
93 * to do since users have only limited control over the full keys, even if
94 * these contain folder paths. So, it is very hard to deliberately construct
95 * colliding keys. Random checksum collisions can be shown to be extremely
98 * All access to the cached data needs to be serialized. Because we want
99 * to scale well despite that bottleneck, we simply segment the cache into
100 * a number of independent caches (segments). Items will be multiplexed based
104 /* APR's read-write lock implementation on Windows is horribly inefficient.
105 * Even with very low contention a runtime overhead of 35% percent has been
106 * measured for 'svn-bench null-export' over ra_serf.
108 * Use a simple mutex on Windows. Because there is one mutex per segment,
109 * large machines should (and usually can) be configured with large caches
110 * such that read contention is kept low. This is basically the situation
111 * we head before 1.8.
114 # define USE_SIMPLE_MUTEX 1
116 # define USE_SIMPLE_MUTEX 0
119 /* A 16-way associative cache seems to be a good compromise between
120 * performance (worst-case lookups) and efficiency-loss due to collisions.
122 * This value may be changed to any positive integer.
124 #define GROUP_SIZE 16
126 /* For more efficient copy operations, let's align all data items properly.
127 * Must be a power of 2.
129 #define ITEM_ALIGNMENT 16
131 /* By default, don't create cache segments smaller than this value unless
132 * the total cache size itself is smaller.
134 #define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
136 /* The minimum segment size we will allow for multi-segmented caches
138 #define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
140 /* The maximum number of segments allowed. Larger numbers reduce the size
141 * of each segment, in turn reducing the max size of a cachable item.
142 * Also, each segment gets its own lock object. The actual number supported
143 * by the OS may therefore be lower and svn_cache__membuffer_cache_create
144 * may return an error.
146 #define MAX_SEGMENT_COUNT 0x10000
148 /* As of today, APR won't allocate chunks of 4GB or more. So, limit the
149 * segment size to slightly below that.
151 #define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
153 /* We don't mark the initialization status for every group but initialize
154 * a number of groups at once. That will allow for a very small init flags
155 * vector that is likely to fit into the CPU caches even for fairly large
156 * membuffer caches. For instance, the default of 32 means 8x32 groups per
157 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
158 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
160 #define GROUP_INIT_GRANULARITY 32
162 /* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
164 #define NO_INDEX APR_UINT32_MAX
166 /* To save space in our group structure, we only use 32 bit size values
167 * and, therefore, limit the size of each entry to just below 4GB.
168 * Supporting larger items is not a good idea as the data transfer
169 * to and from the cache would block other threads for a very long time.
171 #define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
173 /* A 16 byte key type. We use that to identify cache entries.
174 * The notation as just two integer values will cause many compilers
175 * to create better code.
177 typedef apr_uint64_t entry_key_t[2];
179 /* Debugging / corruption detection support.
180 * If you define this macro, the getter functions will performed expensive
181 * checks on the item data, requested keys and entry types. If there is
182 * a mismatch found in any of them when being compared with the values
183 * remembered in the setter function, an error will be returned.
185 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
187 /* The prefix passed to svn_cache__create_membuffer_cache() effectively
188 * defines the type of all items stored by that cache instance. We'll take
189 * the last 7 bytes + \0 as plaintext for easy identification by the dev.
191 #define PREFIX_TAIL_LEN 8
193 /* This record will be attached to any cache entry. It tracks item data
194 * (content), key and type as hash values and is the baseline against which
195 * the getters will compare their results to detect inconsistencies.
197 typedef struct entry_tag_t
199 /* MD5 checksum over the serialized the item data.
201 unsigned char content_hash [APR_MD5_DIGESTSIZE];
203 /* Hash value of the svn_cache_t instance that wrote the item
204 * (i.e. a combination of type and repository)
206 unsigned char prefix_hash [APR_MD5_DIGESTSIZE];
208 /* Note that this only covers the variable part of the key,
209 * i.e. it will be different from the full key hash used for
212 unsigned char key_hash [APR_MD5_DIGESTSIZE];
214 /* Last letters from of the key in human readable format
215 * (ends with the type identifier, e.g. "DAG")
217 char prefix_tail[PREFIX_TAIL_LEN];
219 /* Length of the variable key part.
225 /* Per svn_cache_t instance initialization helper.
227 static void get_prefix_tail(const char *prefix, char *prefix_tail)
229 apr_size_t len = strlen(prefix);
230 apr_size_t to_copy = len > PREFIX_TAIL_LEN-1 ? PREFIX_TAIL_LEN-1 : len;
232 memset(prefix_tail, 0, PREFIX_TAIL_LEN);
233 memcpy(prefix_tail, prefix + len - to_copy, to_copy);
236 /* Initialize all members of TAG except for the content hash.
238 static svn_error_t *store_key_part(entry_tag_t *tag,
239 entry_key_t prefix_hash,
245 svn_checksum_t *checksum;
246 SVN_ERR(svn_checksum(&checksum,
252 memcpy(tag->prefix_hash, prefix_hash, sizeof(tag->prefix_hash));
253 memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
254 memcpy(tag->prefix_tail, prefix_tail, sizeof(tag->prefix_tail));
256 tag->key_len = key_len;
261 /* Initialize the content hash member of TAG.
263 static svn_error_t* store_content_part(entry_tag_t *tag,
268 svn_checksum_t *checksum;
269 SVN_ERR(svn_checksum(&checksum,
275 memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
280 /* Compare two tags and fail with an assertion upon differences.
282 static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
283 const entry_tag_t *rhs)
285 SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
286 sizeof(lhs->content_hash)) == 0);
287 SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
288 sizeof(lhs->prefix_hash)) == 0);
289 SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
290 sizeof(lhs->key_hash)) == 0);
291 SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
292 sizeof(lhs->prefix_tail)) == 0);
294 SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
299 /* Reoccurring code snippets.
302 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
304 #define DEBUG_CACHE_MEMBUFFER_TAG tag,
306 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG \
308 entry_tag_t *tag = &_tag; \
309 SVN_ERR(store_key_part(tag, \
311 cache->prefix_tail, \
313 cache->key_len == APR_HASH_KEY_STRING \
314 ? strlen((const char *) key) \
320 /* Don't generate any checks if consistency checks have not been enabled.
322 #define DEBUG_CACHE_MEMBUFFER_TAG_ARG
323 #define DEBUG_CACHE_MEMBUFFER_TAG
324 #define DEBUG_CACHE_MEMBUFFER_INIT_TAG
326 #endif /* SVN_DEBUG_CACHE_MEMBUFFER */
328 /* A single dictionary entry. Since all entries will be allocated once
329 * during cache creation, those entries might be either used or unused.
330 * An entry is used if and only if it is contained in the doubly-linked
331 * list of used entries.
333 typedef struct entry_t
335 /* Identifying the data item. Only valid for used entries.
339 /* The offset of the cached item's serialized data within the data buffer.
343 /* Size of the serialized item data. May be 0.
344 * Only valid for used entries.
348 /* Number of (read) hits for this entry. Will be reset upon write.
349 * Only valid for used entries.
351 apr_uint32_t hit_count;
353 /* Reference to the next used entry in the order defined by offset.
354 * NO_INDEX indicates the end of the list; this entry must be referenced
355 * by the caches membuffer_cache_t.last member. NO_INDEX also implies
356 * that the data buffer is not used beyond offset+size.
357 * Only valid for used entries.
361 /* Reference to the previous used entry in the order defined by offset.
362 * NO_INDEX indicates the end of the list; this entry must be referenced
363 * by the caches membuffer_cache_t.first member.
364 * Only valid for used entries.
366 apr_uint32_t previous;
368 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
369 /* Remember type, content and key hashes.
375 /* We group dictionary entries to make this GROUP-SIZE-way associative.
377 typedef struct entry_group_t
379 /* number of entries used [0 .. USED-1] */
382 /* the actual entries */
383 entry_t entries[GROUP_SIZE];
386 /* The cache header structure.
388 struct svn_membuffer_t
390 /* Number of cache segments. Must be a power of 2.
391 Please note that this structure represents only one such segment
392 and that all segments must / will report the same values here. */
393 apr_uint32_t segment_count;
395 /* The dictionary, GROUP_SIZE * group_count entries long. Never NULL.
397 entry_group_t *directory;
399 /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
400 * Allows for efficiently marking groups as "not initialized".
402 unsigned char *group_initialized;
404 /* Size of dictionary in groups. Must be > 0.
406 apr_uint32_t group_count;
408 /* Reference to the first (defined by the order content in the data
409 * buffer) dictionary entry used by any data item.
410 * NO_INDEX for an empty cache.
414 /* Reference to the last (defined by the order content in the data
415 * buffer) dictionary entry used by any data item.
416 * NO_INDEX for an empty cache.
420 /* Reference to the first (defined by the order content in the data
421 * buffer) used dictionary entry behind the insertion position
422 * (current_data). If NO_INDEX, the data buffer is free starting at the
423 * current_data offset.
428 /* Pointer to the data buffer, data_size bytes long. Never NULL.
432 /* Size of data buffer in bytes. Must be > 0.
434 apr_uint64_t data_size;
436 /* Offset in the data buffer where the next insertion shall occur.
438 apr_uint64_t current_data;
440 /* Total number of data buffer bytes in use.
442 apr_uint64_t data_used;
444 /* Largest entry size that we would accept. For total cache sizes
445 * less than 4TB (sic!), this is determined by the total cache size.
447 apr_uint64_t max_entry_size;
450 /* Number of used dictionary entries, i.e. number of cached items.
451 * In conjunction with hit_count, this is used calculate the average
452 * hit count as part of the randomized LFU algorithm.
454 apr_uint32_t used_entries;
456 /* Sum of (read) hit counts of all used dictionary entries.
457 * In conjunction used_entries used_entries, this is used calculate
458 * the average hit count as part of the randomized LFU algorithm.
460 apr_uint64_t hit_count;
463 /* Total number of calls to membuffer_cache_get.
464 * Purely statistical information that may be used for profiling.
466 apr_uint64_t total_reads;
468 /* Total number of calls to membuffer_cache_set.
469 * Purely statistical information that may be used for profiling.
471 apr_uint64_t total_writes;
473 /* Total number of hits since the cache's creation.
474 * Purely statistical information that may be used for profiling.
476 apr_uint64_t total_hits;
479 /* A lock for intra-process synchronization to the cache, or NULL if
480 * the cache's creator doesn't feel the cache needs to be
483 # if USE_SIMPLE_MUTEX
486 apr_thread_rwlock_t *lock;
489 /* If set, write access will wait until they get exclusive access.
490 * Otherwise, they will become no-ops if the segment is currently
491 * read-locked. Only used when LOCK is an r/w lock.
493 svn_boolean_t allow_blocking_writes;
497 /* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
499 #define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
501 /* Align POINTER value to the next ITEM_ALIGNMENT boundary.
503 #define ALIGN_POINTER(pointer) ((void*)ALIGN_VALUE((apr_size_t)(char*)(pointer)))
505 /* If locking is supported for CACHE, acquire a read lock for it.
508 read_lock_cache(svn_membuffer_t *cache)
511 # if USE_SIMPLE_MUTEX
512 return svn_mutex__lock(cache->lock);
516 apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
518 return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
525 /* If locking is supported for CACHE, acquire a write lock for it.
528 write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
531 # if USE_SIMPLE_MUTEX
533 return svn_mutex__lock(cache->lock);
540 if (cache->allow_blocking_writes)
542 status = apr_thread_rwlock_wrlock(cache->lock);
546 status = apr_thread_rwlock_trywrlock(cache->lock);
547 if (SVN_LOCK_IS_BUSY(status))
550 status = APR_SUCCESS;
555 return svn_error_wrap_apr(status,
556 _("Can't write-lock cache mutex"));
564 /* If locking is supported for CACHE, acquire an unconditional write lock
568 force_write_lock_cache(svn_membuffer_t *cache)
571 # if USE_SIMPLE_MUTEX
573 return svn_mutex__lock(cache->lock);
577 apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
579 return svn_error_wrap_apr(status,
580 _("Can't write-lock cache mutex"));
587 /* If locking is supported for CACHE, release the current lock
591 unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
594 # if USE_SIMPLE_MUTEX
596 return svn_mutex__unlock(cache->lock, err);
602 apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
607 return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
615 /* If supported, guard the execution of EXPR with a read lock to cache.
616 * Macro has been modeled after SVN_MUTEX__WITH_LOCK.
618 #define WITH_READ_LOCK(cache, expr) \
620 SVN_ERR(read_lock_cache(cache)); \
621 SVN_ERR(unlock_cache(cache, (expr))); \
624 /* If supported, guard the execution of EXPR with a write lock to cache.
625 * Macro has been modeled after SVN_MUTEX__WITH_LOCK.
627 * The write lock process is complicated if we don't allow to wait for
628 * the lock: If we didn't get the lock, we may still need to remove an
629 * existing entry for the given key because that content is now stale.
630 * Once we discovered such an entry, we unconditionally do a blocking
631 * wait for the write lock. In case no old content could be found, a
632 * failing lock attempt is simply a no-op and we exit the macro.
634 #define WITH_WRITE_LOCK(cache, expr) \
636 svn_boolean_t got_lock = TRUE; \
637 SVN_ERR(write_lock_cache(cache, &got_lock)); \
640 svn_boolean_t exists; \
641 SVN_ERR(entry_exists(cache, group_index, key, &exists)); \
643 SVN_ERR(force_write_lock_cache(cache)); \
647 SVN_ERR(unlock_cache(cache, (expr))); \
650 /* Resolve a dictionary entry reference, i.e. return the entry
653 static APR_INLINE entry_t *
654 get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
656 return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
659 /* Get the entry references for the given ENTRY.
661 static APR_INLINE apr_uint32_t
662 get_index(svn_membuffer_t *cache, entry_t *entry)
664 apr_size_t group_index
665 = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
667 return (apr_uint32_t)group_index * GROUP_SIZE
668 + (apr_uint32_t)(entry - cache->directory[group_index].entries);
671 /* Remove the used ENTRY from the CACHE, i.e. make it "unused".
672 * In contrast to insertion, removal is possible for any entry.
675 drop_entry(svn_membuffer_t *cache, entry_t *entry)
677 /* the group that ENTRY belongs to plus a number of useful index values
679 apr_uint32_t idx = get_index(cache, entry);
680 apr_uint32_t group_index = idx / GROUP_SIZE;
681 entry_group_t *group = &cache->directory[group_index];
682 apr_uint32_t last_in_group = group_index * GROUP_SIZE + group->used - 1;
684 /* Only valid to be called for used entries.
686 assert(idx <= last_in_group);
688 /* update global cache usage counters
690 cache->used_entries--;
691 cache->hit_count -= entry->hit_count;
692 cache->data_used -= entry->size;
694 /* extend the insertion window, if the entry happens to border it
696 if (idx == cache->next)
697 cache->next = entry->next;
699 if (entry->next == cache->next)
701 /* insertion window starts right behind the entry to remove
703 if (entry->previous == NO_INDEX)
705 /* remove the first entry -> insertion may start at pos 0, now */
706 cache->current_data = 0;
710 /* insertion may start right behind the previous entry */
711 entry_t *previous = get_entry(cache, entry->previous);
712 cache->current_data = ALIGN_VALUE( previous->offset
717 /* unlink it from the chain of used entries
719 if (entry->previous == NO_INDEX)
720 cache->first = entry->next;
722 get_entry(cache, entry->previous)->next = entry->next;
724 if (entry->next == NO_INDEX)
725 cache->last = entry->previous;
727 get_entry(cache, entry->next)->previous = entry->previous;
729 /* Move last entry into hole (if the removed one is not the last used).
730 * We need to do this since all used entries are at the beginning of
731 * the group's entries array.
733 if (idx < last_in_group)
735 /* copy the last used entry to the removed entry's index
737 *entry = group->entries[group->used-1];
739 /* update foreign links to new index
741 if (last_in_group == cache->next)
744 if (entry->previous == NO_INDEX)
747 get_entry(cache, entry->previous)->next = idx;
749 if (entry->next == NO_INDEX)
752 get_entry(cache, entry->next)->previous = idx;
755 /* Update the number of used entries.
760 /* Insert ENTRY into the chain of used dictionary entries. The entry's
761 * offset and size members must already have been initialized. Also,
762 * the offset must match the beginning of the insertion window.
765 insert_entry(svn_membuffer_t *cache, entry_t *entry)
767 /* the group that ENTRY belongs to plus a number of useful index values
769 apr_uint32_t idx = get_index(cache, entry);
770 apr_uint32_t group_index = idx / GROUP_SIZE;
771 entry_group_t *group = &cache->directory[group_index];
772 entry_t *next = cache->next == NO_INDEX
774 : get_entry(cache, cache->next);
776 /* The entry must start at the beginning of the insertion window.
777 * It must also be the first unused entry in the group.
779 assert(entry->offset == cache->current_data);
780 assert(idx == group_index * GROUP_SIZE + group->used);
781 cache->current_data = ALIGN_VALUE(entry->offset + entry->size);
783 /* update usage counters
785 cache->used_entries++;
786 cache->data_used += entry->size;
787 entry->hit_count = 0;
790 /* update entry chain
792 entry->next = cache->next;
793 if (cache->first == NO_INDEX)
795 /* insert as the first entry and only in the chain
797 entry->previous = NO_INDEX;
801 else if (next == NULL)
803 /* insert as the last entry in the chain.
804 * Note that it cannot also be at the beginning of the chain.
806 entry->previous = cache->last;
807 get_entry(cache, cache->last)->next = idx;
812 /* insert either at the start of a non-empty list or
813 * somewhere in the middle
815 entry->previous = next->previous;
816 next->previous = idx;
818 if (entry->previous != NO_INDEX)
819 get_entry(cache, entry->previous)->next = idx;
824 /* The current insertion position must never point outside our
827 assert(cache->current_data <= cache->data_size);
830 /* Map a KEY of 16 bytes to the CACHE and group that shall contain the
834 get_group_index(svn_membuffer_t **cache,
837 svn_membuffer_t *segment0 = *cache;
839 /* select the cache segment to use. they have all the same group_count */
840 *cache = &segment0[key[0] & (segment0->segment_count -1)];
841 return key[1] % segment0->group_count;
844 /* Reduce the hit count of ENTRY and update the accumulated hit info
845 * in CACHE accordingly.
847 static APR_INLINE void
848 let_entry_age(svn_membuffer_t *cache, entry_t *entry)
850 apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
852 cache->hit_count -= hits_removed;
853 entry->hit_count -= hits_removed;
856 /* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
857 * been initialized, yet. In that case, this group can not data. Otherwise,
858 * a non-zero value is returned.
860 static APR_INLINE unsigned char
861 is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
864 = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
865 unsigned char bit_mask
866 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
868 return flags & bit_mask;
871 /* Initializes the section of the directory in CACHE that contains
872 * the entry group identified by GROUP_INDEX. */
874 initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
876 unsigned char bit_mask;
879 /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
880 apr_uint32_t first_index =
881 (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
882 apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
883 if (last_index > cache->group_count)
884 last_index = cache->group_count;
886 for (i = first_index; i < last_index; ++i)
887 cache->directory[i].used = 0;
889 /* set the "initialized" bit for these groups */
891 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
892 cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
896 /* Given the GROUP_INDEX that shall contain an entry with the hash key
897 * TO_FIND, find that entry in the specified group.
899 * If FIND_EMPTY is not set, this function will return the one used entry
900 * that actually matches the hash or NULL, if no such entry exists.
902 * If FIND_EMPTY has been set, this function will drop the one used entry
903 * that actually matches the hash (i.e. make it fit to be replaced with
904 * new content), an unused entry or a forcibly removed entry (if all
905 * group entries are currently in use). The entries' hash value will be
906 * initialized with TO_FIND.
909 find_entry(svn_membuffer_t *cache,
910 apr_uint32_t group_index,
911 const apr_uint64_t to_find[2],
912 svn_boolean_t find_empty)
914 entry_group_t *group;
915 entry_t *entry = NULL;
918 /* get the group that *must* contain the entry
920 group = &cache->directory[group_index];
922 /* If the entry group has not been initialized, yet, there is no data.
924 if (! is_group_initialized(cache, group_index))
928 initialize_group(cache, group_index);
929 entry = &group->entries[0];
931 /* initialize entry for the new key */
932 entry->key[0] = to_find[0];
933 entry->key[1] = to_find[1];
939 /* try to find the matching entry
941 for (i = 0; i < group->used; ++i)
942 if ( to_find[0] == group->entries[i].key[0]
943 && to_find[1] == group->entries[i].key[1])
947 entry = &group->entries[i];
949 drop_entry(cache, entry);
954 /* None found. Are we looking for a free entry?
958 /* if there is no empty entry, delete the oldest entry
960 if (group->used == GROUP_SIZE)
962 /* every entry gets the same chance of being removed.
963 * Otherwise, we free the first entry, fill it and
964 * remove it again on the next occasion without considering
965 * the other entries in this group.
967 entry = &group->entries[rand() % GROUP_SIZE];
968 for (i = 1; i < GROUP_SIZE; ++i)
969 if (entry->hit_count > group->entries[i].hit_count)
970 entry = &group->entries[i];
972 /* for the entries that don't have been removed,
973 * reduce their hit counts to put them at a relative
974 * disadvantage the next time.
976 for (i = 0; i < GROUP_SIZE; ++i)
977 if (entry != &group->entries[i])
978 let_entry_age(cache, entry);
980 drop_entry(cache, entry);
983 /* initialize entry for the new key
985 entry = &group->entries[group->used];
986 entry->key[0] = to_find[0];
987 entry->key[1] = to_find[1];
993 /* Move a surviving ENTRY from just behind the insertion window to
994 * its beginning and move the insertion window up accordingly.
997 move_entry(svn_membuffer_t *cache, entry_t *entry)
999 apr_size_t size = ALIGN_VALUE(entry->size);
1001 /* This entry survived this cleansing run. Reset half of its
1002 * hit count so that its removal gets more likely in the next
1003 * run unless someone read / hit this entry in the meantime.
1005 let_entry_age(cache, entry);
1007 /* Move the entry to the start of the empty / insertion section
1008 * (if it isn't there already). Size-aligned moves are legal
1009 * since all offsets and block sizes share this same alignment.
1010 * Size-aligned moves tend to be faster than non-aligned ones
1011 * because no "odd" bytes at the end need to special treatment.
1013 if (entry->offset != cache->current_data)
1015 memmove(cache->data + cache->current_data,
1016 cache->data + entry->offset,
1018 entry->offset = cache->current_data;
1021 /* The insertion position is now directly behind this entry.
1023 cache->current_data = entry->offset + size;
1024 cache->next = entry->next;
1026 /* The current insertion position must never point outside our
1029 assert(cache->current_data <= cache->data_size);
1032 /* If necessary, enlarge the insertion window until it is at least
1033 * SIZE bytes long. SIZE must not exceed the data buffer size.
1034 * Return TRUE if enough room could be found or made. A FALSE result
1035 * indicates that the respective item shall not be added.
1037 static svn_boolean_t
1038 ensure_data_insertable(svn_membuffer_t *cache, apr_size_t size)
1041 apr_uint64_t average_hit_value;
1042 apr_uint64_t threshold;
1044 /* accumulated size of the entries that have been removed to make
1045 * room for the new one.
1047 apr_size_t drop_size = 0;
1049 /* This loop will eventually terminate because every cache entry
1050 * would get dropped eventually:
1051 * - hit counts become 0 after the got kept for 32 full scans
1052 * - larger elements get dropped as soon as their hit count is 0
1053 * - smaller and smaller elements get removed as the average
1054 * entry size drops (average drops by a factor of 8 per scan)
1055 * - after no more than 43 full scans, all elements would be removed
1057 * Since size is < 4th of the cache size and about 50% of all
1058 * entries get removed by a scan, it is very unlikely that more
1059 * than a fractional scan will be necessary.
1063 /* first offset behind the insertion window
1065 apr_uint64_t end = cache->next == NO_INDEX
1067 : get_entry(cache, cache->next)->offset;
1069 /* leave function as soon as the insertion window is large enough
1071 if (end >= size + cache->current_data)
1074 /* Don't be too eager to cache data. Smaller items will fit into
1075 * the cache after dropping a single item. Of the larger ones, we
1076 * will only accept about 50%. They are also likely to get evicted
1077 * soon due to their notoriously low hit counts.
1079 * As long as enough similarly or even larger sized entries already
1080 * exist in the cache, much less insert requests will be rejected.
1082 if (2 * drop_size > size)
1085 /* try to enlarge the insertion window
1087 if (cache->next == NO_INDEX)
1089 /* We reached the end of the data buffer; restart at the beginning.
1090 * Due to the randomized nature of our LFU implementation, very
1091 * large data items may require multiple passes. Therefore, SIZE
1092 * should be restricted to significantly less than data_size.
1094 cache->current_data = 0;
1095 cache->next = cache->first;
1099 entry = get_entry(cache, cache->next);
1101 /* Keep entries that are very small. Those are likely to be data
1102 * headers or similar management structures. So, they are probably
1103 * important while not occupying much space.
1104 * But keep them only as long as they are a minority.
1106 if ( (apr_uint64_t)entry->size * cache->used_entries
1107 < cache->data_used / 8)
1109 move_entry(cache, entry);
1115 if (cache->hit_count > cache->used_entries)
1117 /* Roll the dice and determine a threshold somewhere from 0 up
1118 * to 2 times the average hit count.
1120 average_hit_value = cache->hit_count / cache->used_entries;
1121 threshold = (average_hit_value+1) * (rand() % 4096) / 2048;
1123 keep = entry->hit_count >= threshold;
1127 /* general hit count is low. Keep everything that got hit
1128 * at all and assign some 50% survival chance to everything
1131 keep = (entry->hit_count > 0) || (rand() & 1);
1134 /* keepers or destroyers? */
1137 move_entry(cache, entry);
1141 /* Drop the entry from the end of the insertion window, if it
1142 * has been hit less than the threshold. Otherwise, keep it and
1143 * move the insertion window one entry further.
1145 drop_size += entry->size;
1146 drop_entry(cache, entry);
1152 /* This will never be reached. But if it was, "can't insert" was the
1156 /* Mimic apr_pcalloc in APR_POOL_DEBUG mode, i.e. handle failed allocations
1157 * (e.g. OOM) properly: Allocate at least SIZE bytes from POOL and zero
1158 * the content of the allocated memory if ZERO has been set. Return NULL
1159 * upon failed allocations.
1161 * Also, satisfy our buffer alignment needs for performance reasons.
1163 static void* secure_aligned_alloc(apr_pool_t *pool,
1167 void* memory = apr_palloc(pool, size + ITEM_ALIGNMENT);
1170 memory = ALIGN_POINTER(memory);
1172 memset(memory, 0, size);
1179 svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1180 apr_size_t total_size,
1181 apr_size_t directory_size,
1182 apr_size_t segment_count,
1183 svn_boolean_t thread_safe,
1184 svn_boolean_t allow_blocking_writes,
1190 apr_uint32_t group_count;
1191 apr_uint32_t group_init_size;
1192 apr_uint64_t data_size;
1193 apr_uint64_t max_entry_size;
1195 /* Limit the total size (only relevant if we can address > 4GB)
1197 #if APR_SIZEOF_VOIDP > 4
1198 if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1199 total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1202 /* Limit the segment count
1204 if (segment_count > MAX_SEGMENT_COUNT)
1205 segment_count = MAX_SEGMENT_COUNT;
1206 if (segment_count * MIN_SEGMENT_SIZE > total_size)
1207 segment_count = total_size / MIN_SEGMENT_SIZE;
1209 /* The segment count must be a power of two. Round it down as necessary.
1211 while ((segment_count & (segment_count-1)) != 0)
1212 segment_count &= segment_count-1;
1214 /* if the caller hasn't provided a reasonable segment count or the above
1215 * limitations set it to 0, derive one from the absolute cache size
1217 if (segment_count < 1)
1219 /* Determine a reasonable number of cache segments. Segmentation is
1220 * only useful for multi-threaded / multi-core servers as it reduces
1221 * lock contention on these systems.
1223 * But on these systems, we can assume that ample memory has been
1224 * allocated to this cache. Smaller caches should not be segmented
1225 * as this severely limits the maximum size of cachable items.
1227 * Segments should not be smaller than 32MB and max. cachable item
1228 * size should grow as fast as segmentation.
1231 apr_uint32_t segment_count_shift = 0;
1232 while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1234 ++segment_count_shift;
1236 segment_count = (apr_size_t)1 << segment_count_shift;
1239 /* If we have an extremely large cache (>512 GB), the default segment
1240 * size may exceed the amount allocatable as one chunk. In that case,
1241 * increase segmentation until we are under the threshold.
1243 while ( total_size / segment_count > MAX_SEGMENT_SIZE
1244 && segment_count < MAX_SEGMENT_COUNT)
1247 /* allocate cache as an array of segments / cache objects */
1248 c = apr_palloc(pool, segment_count * sizeof(*c));
1250 /* Split total cache size into segments of equal size
1252 total_size /= segment_count;
1253 directory_size /= segment_count;
1255 /* prevent pathological conditions: ensure a certain minimum cache size
1257 if (total_size < 2 * sizeof(entry_group_t))
1258 total_size = 2 * sizeof(entry_group_t);
1260 /* adapt the dictionary size accordingly, if necessary:
1261 * It must hold at least one group and must not exceed the cache size.
1263 if (directory_size > total_size - sizeof(entry_group_t))
1264 directory_size = total_size - sizeof(entry_group_t);
1265 if (directory_size < sizeof(entry_group_t))
1266 directory_size = sizeof(entry_group_t);
1268 /* limit the data size to what we can address.
1269 * Note that this cannot overflow since all values are of size_t.
1270 * Also, make it a multiple of the item placement granularity to
1271 * prevent subtle overflows.
1273 data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1275 /* For cache sizes > 4TB, individual cache segments will be larger
1276 * than 16GB allowing for >4GB entries. But caching chunks larger
1277 * than 4GB is simply not supported.
1279 max_entry_size = data_size / 4 > MAX_ITEM_SIZE
1283 /* to keep the entries small, we use 32 bit indexes only
1284 * -> we need to ensure that no more then 4G entries exist.
1286 * Note, that this limit could only be exceeded in a very
1287 * theoretical setup with about 1EB of cache.
1289 group_count = directory_size / sizeof(entry_group_t)
1290 >= (APR_UINT32_MAX / GROUP_SIZE)
1291 ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1292 : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1294 group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1295 for (seg = 0; seg < segment_count; ++seg)
1297 /* allocate buffers and initialize cache members
1299 c[seg].segment_count = (apr_uint32_t)segment_count;
1301 c[seg].group_count = group_count;
1302 c[seg].directory = apr_pcalloc(pool,
1303 group_count * sizeof(entry_group_t));
1305 /* Allocate and initialize directory entries as "not initialized",
1307 c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1309 c[seg].first = NO_INDEX;
1310 c[seg].last = NO_INDEX;
1311 c[seg].next = NO_INDEX;
1313 c[seg].data_size = data_size;
1314 c[seg].data = secure_aligned_alloc(pool, (apr_size_t)data_size, FALSE);
1315 c[seg].current_data = 0;
1316 c[seg].data_used = 0;
1317 c[seg].max_entry_size = max_entry_size;
1319 c[seg].used_entries = 0;
1320 c[seg].hit_count = 0;
1321 c[seg].total_reads = 0;
1322 c[seg].total_writes = 0;
1323 c[seg].total_hits = 0;
1325 /* were allocations successful?
1326 * If not, initialize a minimal cache structure.
1328 if (c[seg].data == NULL || c[seg].directory == NULL)
1330 /* We are OOM. There is no need to proceed with "half a cache".
1332 return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1336 /* A lock for intra-process synchronization to the cache, or NULL if
1337 * the cache's creator doesn't feel the cache needs to be
1340 # if USE_SIMPLE_MUTEX
1342 SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
1349 apr_status_t status =
1350 apr_thread_rwlock_create(&(c[seg].lock), pool);
1352 return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1357 /* Select the behavior of write operations.
1359 c[seg].allow_blocking_writes = allow_blocking_writes;
1366 return SVN_NO_ERROR;
1369 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1370 * by the hash value TO_FIND and set *FOUND accordingly.
1372 * Note: This function requires the caller to serialize access.
1373 * Don't call it directly, call entry_exists instead.
1375 static svn_error_t *
1376 entry_exists_internal(svn_membuffer_t *cache,
1377 apr_uint32_t group_index,
1378 entry_key_t to_find,
1379 svn_boolean_t *found)
1381 *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1382 return SVN_NO_ERROR;
1385 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1386 * by the hash value TO_FIND and set *FOUND accordingly.
1388 static svn_error_t *
1389 entry_exists(svn_membuffer_t *cache,
1390 apr_uint32_t group_index,
1391 entry_key_t to_find,
1392 svn_boolean_t *found)
1394 WITH_READ_LOCK(cache,
1395 entry_exists_internal(cache,
1400 return SVN_NO_ERROR;
1404 /* Try to insert the serialized item given in BUFFER with SIZE into
1405 * the group GROUP_INDEX of CACHE and uniquely identify it by hash
1408 * However, there is no guarantee that it will actually be put into
1409 * the cache. If there is already some data associated with TO_FIND,
1410 * it will be removed from the cache even if the new data cannot
1413 * Note: This function requires the caller to serialization access.
1414 * Don't call it directly, call membuffer_cache_get_partial instead.
1416 static svn_error_t *
1417 membuffer_cache_set_internal(svn_membuffer_t *cache,
1418 entry_key_t to_find,
1419 apr_uint32_t group_index,
1422 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1423 apr_pool_t *scratch_pool)
1425 /* first, look for a previous entry for the given key */
1426 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
1428 /* if there is an old version of that entry and the new data fits into
1429 * the old spot, just re-use that space. */
1430 if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
1432 /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
1433 * lest we run into trouble with 32 bit underflow *not* treated as a
1436 cache->data_used += (apr_uint64_t)size - entry->size;
1439 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1441 /* Remember original content, type and key (hashes)
1443 SVN_ERR(store_content_part(tag, buffer, size, scratch_pool));
1444 memcpy(&entry->tag, tag, sizeof(*tag));
1449 memcpy(cache->data + entry->offset, buffer, size);
1451 cache->total_writes++;
1452 return SVN_NO_ERROR;
1455 /* if necessary, enlarge the insertion window.
1458 && cache->max_entry_size >= size
1459 && ensure_data_insertable(cache, size))
1461 /* Remove old data for this key, if that exists.
1462 * Get an unused entry for the key and and initialize it with
1463 * the serialized item's (future) position within data buffer.
1465 entry = find_entry(cache, group_index, to_find, TRUE);
1467 entry->offset = cache->current_data;
1469 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1471 /* Remember original content, type and key (hashes)
1473 SVN_ERR(store_content_part(tag, buffer, size, scratch_pool));
1474 memcpy(&entry->tag, tag, sizeof(*tag));
1478 /* Link the entry properly.
1480 insert_entry(cache, entry);
1482 /* Copy the serialized item data into the cache.
1485 memcpy(cache->data + entry->offset, buffer, size);
1487 cache->total_writes++;
1491 /* if there is already an entry for this key, drop it.
1492 * Since ensure_data_insertable may have removed entries from
1493 * ENTRY's group, re-do the lookup.
1495 entry = find_entry(cache, group_index, to_find, FALSE);
1497 drop_entry(cache, entry);
1500 return SVN_NO_ERROR;
1503 /* Try to insert the ITEM and use the KEY to uniquely identify it.
1504 * However, there is no guarantee that it will actually be put into
1505 * the cache. If there is already some data associated to the KEY,
1506 * it will be removed from the cache even if the new data cannot
1509 * The SERIALIZER is called to transform the ITEM into a single,
1510 * flat data buffer. Temporary allocations may be done in POOL.
1512 static svn_error_t *
1513 membuffer_cache_set(svn_membuffer_t *cache,
1516 svn_cache__serialize_func_t serializer,
1517 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1518 apr_pool_t *scratch_pool)
1520 apr_uint32_t group_index;
1521 void *buffer = NULL;
1522 apr_size_t size = 0;
1524 /* find the entry group that will hold the key.
1526 group_index = get_group_index(&cache, key);
1528 /* Serialize data data.
1531 SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
1533 /* The actual cache data access needs to sync'ed
1535 WITH_WRITE_LOCK(cache,
1536 membuffer_cache_set_internal(cache,
1541 DEBUG_CACHE_MEMBUFFER_TAG
1543 return SVN_NO_ERROR;
1546 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1547 * by the hash value TO_FIND. If no item has been stored for KEY,
1548 * *BUFFER will be NULL. Otherwise, return a copy of the serialized
1549 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
1552 * Note: This function requires the caller to serialization access.
1553 * Don't call it directly, call membuffer_cache_get_partial instead.
1555 static svn_error_t *
1556 membuffer_cache_get_internal(svn_membuffer_t *cache,
1557 apr_uint32_t group_index,
1558 entry_key_t to_find,
1560 apr_size_t *item_size,
1561 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1562 apr_pool_t *result_pool)
1567 /* The actual cache data access needs to sync'ed
1569 entry = find_entry(cache, group_index, to_find, FALSE);
1570 cache->total_reads++;
1573 /* no such entry found.
1578 return SVN_NO_ERROR;
1581 size = ALIGN_VALUE(entry->size);
1582 *buffer = ALIGN_POINTER(apr_palloc(result_pool, size + ITEM_ALIGNMENT-1));
1583 memcpy(*buffer, (const char*)cache->data + entry->offset, size);
1585 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1587 /* Check for overlapping entries.
1589 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
1590 entry->offset + size
1591 <= get_entry(cache, entry->next)->offset);
1593 /* Compare original content, type and key (hashes)
1595 SVN_ERR(store_content_part(tag, *buffer, entry->size, result_pool));
1596 SVN_ERR(assert_equal_tags(&entry->tag, tag));
1600 /* update hit statistics
1604 cache->total_hits++;
1606 *item_size = entry->size;
1608 return SVN_NO_ERROR;
1611 /* Look for the *ITEM identified by KEY. If no item has been stored
1612 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
1613 * re-construct the proper object from the serialized data.
1614 * Allocations will be done in POOL.
1616 static svn_error_t *
1617 membuffer_cache_get(svn_membuffer_t *cache,
1620 svn_cache__deserialize_func_t deserializer,
1621 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1622 apr_pool_t *result_pool)
1624 apr_uint32_t group_index;
1628 /* find the entry group that will hold the key.
1630 group_index = get_group_index(&cache, key);
1631 WITH_READ_LOCK(cache,
1632 membuffer_cache_get_internal(cache,
1637 DEBUG_CACHE_MEMBUFFER_TAG
1640 /* re-construct the original data object from its serialized form.
1645 return SVN_NO_ERROR;
1648 return deserializer(item, buffer, size, result_pool);
1651 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1652 * by the hash value TO_FIND. FOUND indicates whether that entry exists.
1653 * If not found, *ITEM will be NULL.
1655 * Otherwise, the DESERIALIZER is called with that entry and the BATON
1656 * provided and will extract the desired information. The result is set
1657 * in *ITEM. Allocations will be done in POOL.
1659 * Note: This function requires the caller to serialization access.
1660 * Don't call it directly, call membuffer_cache_get_partial instead.
1662 static svn_error_t *
1663 membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
1664 apr_uint32_t group_index,
1665 entry_key_t to_find,
1667 svn_boolean_t *found,
1668 svn_cache__partial_getter_func_t deserializer,
1670 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1671 apr_pool_t *result_pool)
1673 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
1674 cache->total_reads++;
1680 return SVN_NO_ERROR;
1688 cache->total_hits++;
1690 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1692 /* Check for overlapping entries.
1694 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
1695 entry->offset + entry->size
1696 <= get_entry(cache, entry->next)->offset);
1698 /* Compare original content, type and key (hashes)
1700 SVN_ERR(store_content_part(tag,
1701 (const char*)cache->data + entry->offset,
1704 SVN_ERR(assert_equal_tags(&entry->tag, tag));
1708 return deserializer(item,
1709 (const char*)cache->data + entry->offset,
1716 /* Look for the cache entry identified by KEY. FOUND indicates
1717 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
1718 * the DESERIALIZER is called with that entry and the BATON provided
1719 * and will extract the desired information. The result is set in *ITEM.
1720 * Allocations will be done in POOL.
1722 static svn_error_t *
1723 membuffer_cache_get_partial(svn_membuffer_t *cache,
1726 svn_boolean_t *found,
1727 svn_cache__partial_getter_func_t deserializer,
1729 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1730 apr_pool_t *result_pool)
1732 apr_uint32_t group_index = get_group_index(&cache, key);
1734 WITH_READ_LOCK(cache,
1735 membuffer_cache_get_partial_internal
1736 (cache, group_index, key, item, found,
1737 deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
1740 return SVN_NO_ERROR;
1743 /* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1744 * by the hash value TO_FIND. If no entry has been found, the function
1745 * returns without modifying the cache.
1747 * Otherwise, FUNC is called with that entry and the BATON provided
1748 * and may modify the cache entry. Allocations will be done in POOL.
1750 * Note: This function requires the caller to serialization access.
1751 * Don't call it directly, call membuffer_cache_set_partial instead.
1753 static svn_error_t *
1754 membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
1755 apr_uint32_t group_index,
1756 entry_key_t to_find,
1757 svn_cache__partial_setter_func_t func,
1759 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1760 apr_pool_t *scratch_pool)
1762 /* cache item lookup
1764 entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
1765 cache->total_reads++;
1767 /* this function is a no-op if the item is not in cache
1773 /* access the serialized cache item */
1774 char *data = (char*)cache->data + entry->offset;
1775 char *orig_data = data;
1776 apr_size_t size = entry->size;
1780 cache->total_writes++;
1782 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1784 /* Check for overlapping entries.
1786 SVN_ERR_ASSERT(entry->next == NO_INDEX ||
1787 entry->offset + size
1788 <= get_entry(cache, entry->next)->offset);
1790 /* Compare original content, type and key (hashes)
1792 SVN_ERR(store_content_part(tag, data, size, scratch_pool));
1793 SVN_ERR(assert_equal_tags(&entry->tag, tag));
1797 /* modify it, preferably in-situ.
1799 err = func((void **)&data, &size, baton, scratch_pool);
1803 /* Something somewhere when wrong while FUNC was modifying the
1804 * changed item. Thus, it might have become invalid /corrupted.
1805 * We better drop that.
1807 drop_entry(cache, entry);
1811 /* if the modification caused a re-allocation, we need to remove
1812 * the old entry and to copy the new data back into cache.
1814 if (data != orig_data)
1816 /* Remove the old entry and try to make space for the new one.
1818 drop_entry(cache, entry);
1819 if ( (cache->max_entry_size >= size)
1820 && ensure_data_insertable(cache, size))
1822 /* Write the new entry.
1824 entry = find_entry(cache, group_index, to_find, TRUE);
1826 entry->offset = cache->current_data;
1828 memcpy(cache->data + entry->offset, data, size);
1830 /* Link the entry properly.
1832 insert_entry(cache, entry);
1836 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1838 /* Remember original content, type and key (hashes)
1840 SVN_ERR(store_content_part(tag, data, size, scratch_pool));
1841 memcpy(&entry->tag, tag, sizeof(*tag));
1847 return SVN_NO_ERROR;
1850 /* Look for the cache entry identified by KEY. If no entry
1851 * has been found, the function returns without modifying the cache.
1852 * Otherwise, FUNC is called with that entry and the BATON provided
1853 * and may modify the cache entry. Allocations will be done in POOL.
1855 static svn_error_t *
1856 membuffer_cache_set_partial(svn_membuffer_t *cache,
1858 svn_cache__partial_setter_func_t func,
1860 DEBUG_CACHE_MEMBUFFER_TAG_ARG
1861 apr_pool_t *scratch_pool)
1863 /* cache item lookup
1865 apr_uint32_t group_index = get_group_index(&cache, key);
1866 WITH_WRITE_LOCK(cache,
1867 membuffer_cache_set_partial_internal
1868 (cache, group_index, key, func, baton,
1869 DEBUG_CACHE_MEMBUFFER_TAG
1872 /* done here -> unlock the cache
1874 return SVN_NO_ERROR;
1877 /* Implement the svn_cache__t interface on top of a shared membuffer cache.
1879 * Because membuffer caches tend to be very large, there will be rather few
1880 * of them (usually only one). Thus, the same instance shall be used as the
1881 * backend to many application-visible svn_cache__t instances. This should
1882 * also achieve global resource usage fairness.
1884 * To accommodate items from multiple resources, the individual keys must be
1885 * unique over all sources. This is achieved by simply adding a prefix key
1886 * that unambiguously identifies the item's context (e.g. path to the
1887 * respective repository). The prefix will be set upon construction of the
1888 * svn_cache__t instance.
1891 /* Internal cache structure (used in svn_cache__t.cache_internal) basically
1892 * holding the additional parameters needed to call the respective membuffer
1895 typedef struct svn_membuffer_cache_t
1897 /* this is where all our data will end up in
1899 svn_membuffer_t *membuffer;
1901 /* use this conversion function when inserting an item into the memcache
1903 svn_cache__serialize_func_t serializer;
1905 /* use this conversion function when reading an item from the memcache
1907 svn_cache__deserialize_func_t deserializer;
1909 /* Prepend this byte sequence to any key passed to us.
1910 * This makes (very likely) our keys different from all keys used
1911 * by other svn_membuffer_cache_t instances.
1915 /* A copy of the unmodified prefix. It is being used as a user-visible
1916 * ID for this cache instance.
1918 const char* full_prefix;
1920 /* length of the keys that will be passed to us through the
1921 * svn_cache_t interface. May be APR_HASH_KEY_STRING.
1923 apr_ssize_t key_len;
1925 /* Temporary buffer containing the hash key for the current access
1927 entry_key_t combined_key;
1929 /* a pool for temporary allocations during get() and set()
1933 /* an internal counter that is used to clear the pool from time to time
1934 * but not too frequently.
1938 /* if enabled, this will serialize the access to this instance.
1940 svn_mutex__t *mutex;
1941 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
1943 /* Invariant tag info for all items stored by this cache instance.
1945 char prefix_tail[PREFIX_TAIL_LEN];
1948 } svn_membuffer_cache_t;
1950 /* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
1951 * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
1953 #define ALLOCATIONS_PER_POOL_CLEAR 10
1956 /* Basically calculate a hash value for KEY of length KEY_LEN, combine it
1957 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
1960 combine_key(svn_membuffer_cache_t *cache,
1962 apr_ssize_t key_len)
1964 if (key_len == APR_HASH_KEY_STRING)
1965 key_len = strlen((const char *) key);
1969 apr_uint32_t data[4] = { 0 };
1970 memcpy(data, key, key_len);
1972 svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data);
1974 else if (key_len < 32)
1976 apr_uint32_t data[8] = { 0 };
1977 memcpy(data, key, key_len);
1979 svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data);
1981 else if (key_len < 64)
1983 apr_uint32_t data[16] = { 0 };
1984 memcpy(data, key, key_len);
1986 svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data);
1990 apr_md5((unsigned char*)cache->combined_key, key, key_len);
1993 cache->combined_key[0] ^= cache->prefix[0];
1994 cache->combined_key[1] ^= cache->prefix[1];
1997 /* Implement svn_cache__vtable_t.get (not thread-safe)
1999 static svn_error_t *
2000 svn_membuffer_cache_get(void **value_p,
2001 svn_boolean_t *found,
2004 apr_pool_t *result_pool)
2006 svn_membuffer_cache_t *cache = cache_void;
2008 DEBUG_CACHE_MEMBUFFER_INIT_TAG
2016 return SVN_NO_ERROR;
2019 /* construct the full, i.e. globally unique, key by adding
2020 * this cache instances' prefix
2022 combine_key(cache, key, cache->key_len);
2024 /* Look the item up. */
2025 SVN_ERR(membuffer_cache_get(cache->membuffer,
2026 cache->combined_key,
2028 cache->deserializer,
2029 DEBUG_CACHE_MEMBUFFER_TAG
2033 *found = *value_p != NULL;
2034 return SVN_NO_ERROR;
2037 /* Implement svn_cache__vtable_t.set (not thread-safe)
2039 static svn_error_t *
2040 svn_membuffer_cache_set(void *cache_void,
2043 apr_pool_t *scratch_pool)
2045 svn_membuffer_cache_t *cache = cache_void;
2047 DEBUG_CACHE_MEMBUFFER_INIT_TAG
2051 return SVN_NO_ERROR;
2053 /* we do some allocations below, so increase the allocation counter
2054 * by a slightly larger amount. Free allocated memory every now and then.
2056 cache->alloc_counter += 3;
2057 if (cache->alloc_counter > ALLOCATIONS_PER_POOL_CLEAR)
2059 svn_pool_clear(cache->pool);
2060 cache->alloc_counter = 0;
2063 /* construct the full, i.e. globally unique, key by adding
2064 * this cache instances' prefix
2066 combine_key(cache, key, cache->key_len);
2068 /* (probably) add the item to the cache. But there is no real guarantee
2069 * that the item will actually be cached afterwards.
2071 return membuffer_cache_set(cache->membuffer,
2072 cache->combined_key,
2075 DEBUG_CACHE_MEMBUFFER_TAG
2079 /* Implement svn_cache__vtable_t.iter as "not implemented"
2081 static svn_error_t *
2082 svn_membuffer_cache_iter(svn_boolean_t *completed,
2084 svn_iter_apr_hash_cb_t user_cb,
2086 apr_pool_t *scratch_pool)
2088 return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2089 _("Can't iterate a membuffer-based cache"));
2092 /* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2094 static svn_error_t *
2095 svn_membuffer_cache_get_partial(void **value_p,
2096 svn_boolean_t *found,
2099 svn_cache__partial_getter_func_t func,
2101 apr_pool_t *result_pool)
2103 svn_membuffer_cache_t *cache = cache_void;
2105 DEBUG_CACHE_MEMBUFFER_INIT_TAG
2112 return SVN_NO_ERROR;
2115 combine_key(cache, key, cache->key_len);
2116 SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2117 cache->combined_key,
2122 DEBUG_CACHE_MEMBUFFER_TAG
2125 return SVN_NO_ERROR;
2128 /* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2130 static svn_error_t *
2131 svn_membuffer_cache_set_partial(void *cache_void,
2133 svn_cache__partial_setter_func_t func,
2135 apr_pool_t *scratch_pool)
2137 svn_membuffer_cache_t *cache = cache_void;
2139 DEBUG_CACHE_MEMBUFFER_INIT_TAG
2143 combine_key(cache, key, cache->key_len);
2144 SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2145 cache->combined_key,
2148 DEBUG_CACHE_MEMBUFFER_TAG
2151 return SVN_NO_ERROR;
2154 /* Implement svn_cache__vtable_t.is_cachable
2155 * (thread-safe even without mutex)
2157 static svn_boolean_t
2158 svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2160 /* Don't allow extremely large element sizes. Otherwise, the cache
2161 * might by thrashed by a few extremely large entries. And the size
2162 * must be small enough to be stored in a 32 bit value.
2164 svn_membuffer_cache_t *cache = cache_void;
2165 return size <= cache->membuffer->max_entry_size;
2168 /* Add statistics of SEGMENT to INFO.
2170 static svn_error_t *
2171 svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2172 svn_cache__info_t *info)
2174 info->data_size += segment->data_size;
2175 info->used_size += segment->data_used;
2176 info->total_size += segment->data_size +
2177 segment->group_count * GROUP_SIZE * sizeof(entry_t);
2179 info->used_entries += segment->used_entries;
2180 info->total_entries += segment->group_count * GROUP_SIZE;
2182 return SVN_NO_ERROR;
2185 /* Implement svn_cache__vtable_t.get_info
2186 * (thread-safe even without mutex)
2188 static svn_error_t *
2189 svn_membuffer_cache_get_info(void *cache_void,
2190 svn_cache__info_t *info,
2191 svn_boolean_t reset,
2192 apr_pool_t *result_pool)
2194 svn_membuffer_cache_t *cache = cache_void;
2197 /* cache front-end specific data */
2199 info->id = apr_pstrdup(result_pool, cache->full_prefix);
2201 /* collect info from shared cache back-end */
2203 info->data_size = 0;
2204 info->used_size = 0;
2205 info->total_size = 0;
2207 info->used_entries = 0;
2208 info->total_entries = 0;
2210 for (i = 0; i < cache->membuffer->segment_count; ++i)
2212 svn_membuffer_t *segment = cache->membuffer + i;
2213 WITH_READ_LOCK(segment,
2214 svn_membuffer_get_segment_info(segment, info));
2217 return SVN_NO_ERROR;
2221 /* the v-table for membuffer-based caches (single-threaded access)
2223 static svn_cache__vtable_t membuffer_cache_vtable = {
2224 svn_membuffer_cache_get,
2225 svn_membuffer_cache_set,
2226 svn_membuffer_cache_iter,
2227 svn_membuffer_cache_is_cachable,
2228 svn_membuffer_cache_get_partial,
2229 svn_membuffer_cache_set_partial,
2230 svn_membuffer_cache_get_info
2233 /* Implement svn_cache__vtable_t.get and serialize all cache access.
2235 static svn_error_t *
2236 svn_membuffer_cache_get_synced(void **value_p,
2237 svn_boolean_t *found,
2240 apr_pool_t *result_pool)
2242 svn_membuffer_cache_t *cache = cache_void;
2243 SVN_MUTEX__WITH_LOCK(cache->mutex,
2244 svn_membuffer_cache_get(value_p,
2250 return SVN_NO_ERROR;
2253 /* Implement svn_cache__vtable_t.set and serialize all cache access.
2255 static svn_error_t *
2256 svn_membuffer_cache_set_synced(void *cache_void,
2259 apr_pool_t *scratch_pool)
2261 svn_membuffer_cache_t *cache = cache_void;
2262 SVN_MUTEX__WITH_LOCK(cache->mutex,
2263 svn_membuffer_cache_set(cache_void,
2268 return SVN_NO_ERROR;
2271 /* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
2273 static svn_error_t *
2274 svn_membuffer_cache_get_partial_synced(void **value_p,
2275 svn_boolean_t *found,
2278 svn_cache__partial_getter_func_t func,
2280 apr_pool_t *result_pool)
2282 svn_membuffer_cache_t *cache = cache_void;
2283 SVN_MUTEX__WITH_LOCK(cache->mutex,
2284 svn_membuffer_cache_get_partial(value_p,
2292 return SVN_NO_ERROR;
2295 /* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
2297 static svn_error_t *
2298 svn_membuffer_cache_set_partial_synced(void *cache_void,
2300 svn_cache__partial_setter_func_t func,
2302 apr_pool_t *scratch_pool)
2304 svn_membuffer_cache_t *cache = cache_void;
2305 SVN_MUTEX__WITH_LOCK(cache->mutex,
2306 svn_membuffer_cache_set_partial(cache_void,
2312 return SVN_NO_ERROR;
2315 /* the v-table for membuffer-based caches with multi-threading support)
2317 static svn_cache__vtable_t membuffer_cache_synced_vtable = {
2318 svn_membuffer_cache_get_synced,
2319 svn_membuffer_cache_set_synced,
2320 svn_membuffer_cache_iter, /* no sync required */
2321 svn_membuffer_cache_is_cachable, /* no sync required */
2322 svn_membuffer_cache_get_partial_synced,
2323 svn_membuffer_cache_set_partial_synced,
2324 svn_membuffer_cache_get_info /* no sync required */
2327 /* standard serialization function for svn_stringbuf_t items.
2328 * Implements svn_cache__serialize_func_t.
2330 static svn_error_t *
2331 serialize_svn_stringbuf(void **buffer,
2332 apr_size_t *buffer_size,
2334 apr_pool_t *result_pool)
2336 svn_stringbuf_t *value_str = item;
2338 *buffer = value_str->data;
2339 *buffer_size = value_str->len + 1;
2341 return SVN_NO_ERROR;
2344 /* standard de-serialization function for svn_stringbuf_t items.
2345 * Implements svn_cache__deserialize_func_t.
2347 static svn_error_t *
2348 deserialize_svn_stringbuf(void **item,
2350 apr_size_t buffer_size,
2351 apr_pool_t *result_pool)
2353 svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
2355 value_str->pool = result_pool;
2356 value_str->blocksize = buffer_size;
2357 value_str->data = buffer;
2358 value_str->len = buffer_size-1;
2361 return SVN_NO_ERROR;
2364 /* Construct a svn_cache__t object on top of a shared memcache.
2367 svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
2368 svn_membuffer_t *membuffer,
2369 svn_cache__serialize_func_t serializer,
2370 svn_cache__deserialize_func_t deserializer,
2373 svn_boolean_t thread_safe,
2376 svn_checksum_t *checksum;
2378 /* allocate the cache header structures
2380 svn_cache__t *wrapper = apr_pcalloc(pool, sizeof(*wrapper));
2381 svn_membuffer_cache_t *cache = apr_palloc(pool, sizeof(*cache));
2383 /* initialize our internal cache header
2385 cache->membuffer = membuffer;
2386 cache->serializer = serializer
2388 : serialize_svn_stringbuf;
2389 cache->deserializer = deserializer
2391 : deserialize_svn_stringbuf;
2392 cache->full_prefix = apr_pstrdup(pool, prefix);
2393 cache->key_len = klen;
2394 cache->pool = svn_pool_create(pool);
2395 cache->alloc_counter = 0;
2397 SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, pool));
2399 /* for performance reasons, we don't actually store the full prefix but a
2402 SVN_ERR(svn_checksum(&checksum,
2407 memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix));
2409 #ifdef SVN_DEBUG_CACHE_MEMBUFFER
2411 /* Initialize cache debugging support.
2413 get_prefix_tail(prefix, cache->prefix_tail);
2417 /* initialize the generic cache wrapper
2419 wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
2420 : &membuffer_cache_vtable;
2421 wrapper->cache_internal = cache;
2422 wrapper->error_handler = 0;
2423 wrapper->error_baton = 0;
2426 return SVN_NO_ERROR;