2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
6 * 1. Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * 4. Neither the name of the University nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include "opt_vmpage.h"
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker_set.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/sysctl.h>
41 #include <sys/vmmeter.h>
42 #include <sys/vnode.h>
45 #include <vm/vm_param.h>
46 #include <vm/vm_kern.h>
47 #include <vm/vm_object.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_pageout.h>
50 #include <vm/vm_pager.h>
51 #include <vm/vm_extern.h>
53 static void vm_coloring_init(void);
54 void setPQL2(int *const size, int *const ways);
56 struct vpgqueues vm_page_queues[PQ_MAXCOUNT];
57 struct pq_coloring page_queue_coloring;
59 static int pq_cachesize = 0; /* size of the cache in KB */
60 static int pq_cachenways = 0; /* associativity of the cache */
62 SYSCTL_DECL(_vm_stats);
63 SYSCTL_NODE(_vm_stats, OID_AUTO, pagequeue, CTLFLAG_RW, 0, "VM meter stats");
64 SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, page_colors, CTLFLAG_RD,
65 &(PQ_NUMCOLORS), 0, "Number of colors in the page queue");
66 SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, cachesize, CTLFLAG_RD,
67 &pq_cachesize, 0, "Size of the processor cache in KB");
68 SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, cachenways, CTLFLAG_RD,
69 &pq_cachenways, 0, "Associativity of the processor cache");
70 SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, prime1, CTLFLAG_RD,
71 &(PQ_PRIME1), 0, "Cache tuning value");
72 SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, prime2, CTLFLAG_RD,
73 &(PQ_PRIME2), 0, "Cache tuning value");
76 vm_coloring_init(void)
79 PQ_NUMCOLORS = PQ_PRIME1 = PQ_PRIME2 = 1;
82 setPQL2(&pq_cachesize, &pq_cachenways);
84 CTASSERT(PAGE_SIZE/1024 > 0);
86 if (pq_cachesize > 0 && pq_cachenways > 0)
87 PQ_NUMCOLORS = pq_cachesize / (PAGE_SIZE/1024) / \
92 if (PQ_MAXCOLORS < PQ_NUMCOLORS) {
93 printf("VM-PQ color limit (PQ_MAXCOLORS=%u) exceeded (%u), see vm_page.h", PQ_MAXCOLORS, PQ_NUMCOLORS);
94 PQ_NUMCOLORS = PQ_MAXCOLORS;
97 if (PQ_NUMCOLORS >= 128) {
100 } else if (PQ_NUMCOLORS >= 64) {
103 } else if (PQ_NUMCOLORS >= 32) {
106 } else if (PQ_NUMCOLORS >= 16) {
110 PQ_NUMCOLORS = PQ_PRIME1 = PQ_PRIME2 = 1;
114 * PQ_CACHE represents a
115 * PQ_NUMCOLORS consecutive queue.
117 PQ_COLORMASK = PQ_NUMCOLORS - 1;
118 PQ_INACTIVE = 1 + PQ_NUMCOLORS;
119 PQ_ACTIVE = 2 + PQ_NUMCOLORS;
120 PQ_CACHE = 3 + PQ_NUMCOLORS;
121 PQ_HOLD = 3 + 2 * PQ_NUMCOLORS;
122 PQ_COUNT = 4 + 2 * PQ_NUMCOLORS;
123 PQ_MAXLENGTH = PQ_NUMCOLORS / 3 + PQ_PRIME1;
126 /* XXX: is it possible to allocate vm_page_queues[PQ_COUNT] here? */
127 #error XXX: vm_page_queues = malloc(PQ_COUNT * sizeof(struct vpgqueues));
131 if (PQ_NUMCOLORS > 1)
132 printf("Using %d colors for the VM-PQ tuning (%d, %d)\n",
133 PQ_NUMCOLORS, pq_cachesize, pq_cachenways);
143 for (i = 0; i < PQ_NUMCOLORS; ++i) {
144 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
146 for (i = 0; i < PQ_NUMCOLORS; ++i) {
147 vm_page_queues[PQ_CACHE + i].cnt = &cnt.v_cache_count;
149 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
150 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
151 vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count;
153 for (i = 0; i < PQ_COUNT; i++) {
154 TAILQ_INIT(&vm_page_queues[i].pl);
159 vm_pageq_requeue(vm_page_t m)
161 int queue = VM_PAGE_GETQUEUE(m);
162 struct vpgqueues *vpq;
164 if (queue != PQ_NONE) {
165 vpq = &vm_page_queues[queue];
166 TAILQ_REMOVE(&vpq->pl, m, pageq);
167 TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
175 vm_pageq_enqueue(int queue, vm_page_t m)
177 struct vpgqueues *vpq;
179 vpq = &vm_page_queues[queue];
180 VM_PAGE_SETQUEUE2(m, queue);
181 TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
189 * Add a new page to the freelist for use by the system.
192 vm_pageq_add_new_page(vm_paddr_t pa)
196 char *cp, *list, *pos;
199 * See if a physical address in this page has been listed
200 * in the blacklist tunable. Entries in the tunable are
201 * separated by spaces or commas. If an invalid integer is
202 * encountered then the rest of the string is skipped.
204 if (testenv("vm.blacklist")) {
205 list = getenv("vm.blacklist");
206 for (pos = list; *pos != '\0'; pos = cp) {
207 bad = strtoq(pos, &cp, 0);
209 if (*cp == ' ' || *cp == ',') {
216 if (pa == trunc_page(bad)) {
217 printf("Skipping page with pa 0x%jx\n",
226 atomic_add_int(&cnt.v_page_count, 1);
227 m = PHYS_TO_VM_PAGE(pa);
230 m->pc = (pa >> PAGE_SHIFT) & PQ_COLORMASK;
232 mtx_lock_spin(&vm_page_queue_free_mtx);
233 vm_pageq_enqueue(m->pc + PQ_FREE, m);
234 mtx_unlock_spin(&vm_page_queue_free_mtx);
239 * vm_pageq_remove_nowakeup:
241 * vm_page_unqueue() without any wakeup
243 * The queue containing the given page must be locked.
244 * This routine may not block.
247 vm_pageq_remove_nowakeup(vm_page_t m)
249 int queue = VM_PAGE_GETQUEUE(m);
250 struct vpgqueues *pq;
252 if (queue != PQ_NONE) {
253 pq = &vm_page_queues[queue];
254 VM_PAGE_SETQUEUE2(m, PQ_NONE);
255 TAILQ_REMOVE(&pq->pl, m, pageq);
264 * Remove a page from its queue.
266 * The queue containing the given page must be locked.
267 * This routine may not block.
270 vm_pageq_remove(vm_page_t m)
272 int queue = VM_PAGE_GETQUEUE(m);
273 struct vpgqueues *pq;
275 if (queue != PQ_NONE) {
276 VM_PAGE_SETQUEUE2(m, PQ_NONE);
277 pq = &vm_page_queues[queue];
278 TAILQ_REMOVE(&pq->pl, m, pageq);
281 if (VM_PAGE_RESOLVEQUEUE(m, queue) == PQ_CACHE) {
282 if (vm_paging_needed())
293 * Find a page on the specified queue with color optimization.
295 * The page coloring optimization attempts to locate a page
296 * that does not overload other nearby pages in the object in
297 * the cpu's L2 cache. We need this optimization because cpu
298 * caches tend to be physical caches, while object spaces tend
301 * The specified queue must be locked.
302 * This routine may not block.
304 * This routine may only be called from the vm_pageq_find()
305 * function in this file.
307 static inline vm_page_t
308 _vm_pageq_find(int basequeue, int index)
312 struct vpgqueues *pq;
314 pq = &vm_page_queues[basequeue];
317 * Note that for the first loop, index+i and index-i wind up at the
318 * same place. Even though this is not totally optimal, we've already
319 * blown it by missing the cache case so we do not care.
321 for (i = PQ_NUMCOLORS / 2; i > 0; --i) {
322 if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_COLORMASK].pl)) \
326 if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_COLORMASK].pl)) \
332 #endif /* PQ_NOOPT */
335 vm_pageq_find(int basequeue, int index, boolean_t prefer_zero)
340 if (PQ_NUMCOLORS > 1) {
342 m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, \
345 m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl);
348 m = _vm_pageq_find(basequeue, index);
353 m = TAILQ_LAST(&vm_page_queues[basequeue].pl, pglist);
355 m = TAILQ_FIRST(&vm_page_queues[basequeue].pl);