2 * Copyright (c) 1991 Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
36 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
37 * All rights reserved.
39 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
41 * Permission to use, copy, modify and distribute this software and
42 * its documentation is hereby granted, provided that both the copyright
43 * notice and this permission notice appear in all copies of the
44 * software, derivative works or modified versions, and any portions
45 * thereof, and that both notices appear in supporting documentation.
47 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
48 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
49 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51 * Carnegie Mellon requests users of this software to return to
53 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
54 * School of Computer Science
55 * Carnegie Mellon University
56 * Pittsburgh PA 15213-3890
58 * any improvements or extensions that they make and grant Carnegie the
59 * rights to redistribute these changes.
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
65 #include <sys/param.h>
66 #include <sys/systm.h>
68 #include <sys/malloc.h>
69 #include <sys/mutex.h>
71 #include <sys/kernel.h>
72 #include <sys/linker_set.h>
73 #include <sys/sysctl.h>
74 #include <sys/vmmeter.h>
75 #include <sys/vnode.h>
78 #include <vm/vm_param.h>
79 #include <vm/vm_kern.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_pager.h>
86 #include <vm/vm_extern.h>
89 vm_contig_launder_page(vm_page_t m)
96 if (!VM_OBJECT_TRYLOCK(object))
98 if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
99 VM_OBJECT_UNLOCK(object);
100 vm_page_lock_queues();
103 vm_page_test_dirty(m);
104 if (m->dirty == 0 && m->hold_count == 0)
107 if (object->type == OBJT_VNODE) {
108 vm_page_unlock_queues();
110 VM_OBJECT_UNLOCK(object);
111 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
112 VM_OBJECT_LOCK(object);
113 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
114 VM_OBJECT_UNLOCK(object);
115 VOP_UNLOCK(vp, 0, curthread);
116 vm_page_lock_queues();
118 } else if (object->type == OBJT_SWAP ||
119 object->type == OBJT_DEFAULT) {
121 vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC);
122 VM_OBJECT_UNLOCK(object);
125 } else if (m->hold_count == 0)
127 VM_OBJECT_UNLOCK(object);
132 vm_contig_launder(int queue)
137 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) {
138 next = TAILQ_NEXT(m, pageq);
140 /* Skip marker pages */
141 if ((m->flags & PG_MARKER) != 0)
144 KASSERT(VM_PAGE_INQUEUE2(m, queue),
145 ("vm_contig_launder: page %p's queue is not %d", m, queue));
146 error = vm_contig_launder_page(m);
156 * This interface is for merging with malloc() someday.
157 * Even if we never implement compaction so that contiguous allocation
158 * works after initialization time, malloc()'s data structures are good
159 * for statistics and for allocations of less than a page.
163 unsigned long size, /* should be size_t here and for malloc() */
164 struct malloc_type *type,
168 unsigned long alignment,
169 unsigned long boundary,
175 vm_offset_t addr, tmp_addr;
177 int inactl, actl, inactmax, actmax;
178 vm_page_t pga = vm_page_array;
180 size = round_page(size);
182 panic("contigmalloc1: size must not be 0");
183 if ((alignment & (alignment - 1)) != 0)
184 panic("contigmalloc1: alignment must be a power of 2");
185 if ((boundary & (boundary - 1)) != 0)
186 panic("contigmalloc1: boundary must be a power of 2");
189 for (pass = 2; pass >= 0; pass--) {
190 vm_page_lock_queues();
192 mtx_lock_spin(&vm_page_queue_free_mtx);
195 * Find first page in array that is free, within range,
196 * aligned, and such that the boundary won't be crossed.
198 for (i = start; i < cnt.v_page_count; i++) {
199 phys = VM_PAGE_TO_PHYS(&pga[i]);
200 pqtype = pga[i].queue - pga[i].pc;
201 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
202 (phys >= low) && (phys < high) &&
203 ((phys & (alignment - 1)) == 0) &&
204 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
209 * If the above failed or we will exceed the upper bound, fail.
211 if ((i == cnt.v_page_count) ||
212 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
213 mtx_unlock_spin(&vm_page_queue_free_mtx);
215 * Instead of racing to empty the inactive/active
216 * queues, give up, even with more left to free,
217 * if we try more than the initial amount of pages.
219 * There's no point attempting this on the last pass.
223 inactmax = vm_page_queues[PQ_INACTIVE].lcnt;
224 actmax = vm_page_queues[PQ_ACTIVE].lcnt;
226 if (inactl < inactmax &&
227 vm_contig_launder(PQ_INACTIVE)) {
232 vm_contig_launder(PQ_ACTIVE)) {
237 vm_page_unlock_queues();
243 * Check successive pages for contiguous and free.
245 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
246 pqtype = pga[i].queue - pga[i].pc;
247 if ((VM_PAGE_TO_PHYS(&pga[i]) !=
248 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
249 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
254 mtx_unlock_spin(&vm_page_queue_free_mtx);
255 for (i = start; i < (start + size / PAGE_SIZE); i++) {
256 vm_page_t m = &pga[i];
258 if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) {
259 if (m->hold_count != 0) {
264 if (!VM_OBJECT_TRYLOCK(object)) {
268 if ((m->flags & PG_BUSY) || m->busy != 0) {
269 VM_OBJECT_UNLOCK(object);
274 VM_OBJECT_UNLOCK(object);
277 mtx_lock_spin(&vm_page_queue_free_mtx);
278 for (i = start; i < (start + size / PAGE_SIZE); i++) {
279 pqtype = pga[i].queue - pga[i].pc;
280 if (pqtype != PQ_FREE) {
285 for (i = start; i < (start + size / PAGE_SIZE); i++) {
286 vm_page_t m = &pga[i];
287 vm_pageq_remove_nowakeup(m);
288 m->valid = VM_PAGE_BITS_ALL;
289 if (m->flags & PG_ZERO)
290 vm_page_zero_count--;
291 /* Don't clear the PG_ZERO flag, we'll need it later. */
292 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
293 KASSERT(m->dirty == 0,
294 ("contigmalloc1: page %p was dirty", m));
298 mtx_unlock_spin(&vm_page_queue_free_mtx);
299 vm_page_unlock_queues();
301 * We've found a contiguous chunk that meets are requirements.
302 * Allocate kernel VM, unfree and assign the physical pages to
303 * it and return kernel VM pointer.
306 if (vm_map_findspace(map, vm_map_min(map), size, &addr) !=
309 * XXX We almost never run out of kernel virtual
310 * space, so we don't make the allocated memory
316 vm_object_reference(kernel_object);
317 vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS,
318 addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
322 VM_OBJECT_LOCK(kernel_object);
323 for (i = start; i < (start + size / PAGE_SIZE); i++) {
324 vm_page_t m = &pga[i];
325 vm_page_insert(m, kernel_object,
326 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
327 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
329 tmp_addr += PAGE_SIZE;
331 VM_OBJECT_UNLOCK(kernel_object);
332 vm_map_wire(map, addr, addr + size,
333 VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
335 return ((void *)addr);
341 vm_page_release_contigl(vm_page_t m, vm_pindex_t count)
350 vm_page_release_contig(vm_page_t m, vm_pindex_t count)
352 vm_page_lock_queues();
353 vm_page_release_contigl(m, count);
354 vm_page_unlock_queues();
358 vm_contig_unqueue_free(vm_page_t m)
362 mtx_lock_spin(&vm_page_queue_free_mtx);
363 if ((m->queue - m->pc) == PQ_FREE)
364 vm_pageq_remove_nowakeup(m);
367 mtx_unlock_spin(&vm_page_queue_free_mtx);
370 m->valid = VM_PAGE_BITS_ALL;
371 if (m->flags & PG_ZERO)
372 vm_page_zero_count--;
373 /* Don't clear the PG_ZERO flag; we'll need it later. */
374 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
375 KASSERT(m->dirty == 0,
376 ("contigmalloc2: page %p was dirty", m));
383 vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
384 vm_offset_t alignment, vm_offset_t boundary)
389 vm_page_t pga = vm_page_array;
390 static vm_pindex_t np = 0;
391 static vm_pindex_t start = 0;
394 size = npages << PAGE_SHIFT;
396 panic("vm_page_alloc_contig: size must not be 0");
397 if ((alignment & (alignment - 1)) != 0)
398 panic("vm_page_alloc_contig: alignment must be a power of 2");
399 if ((boundary & (boundary - 1)) != 0)
400 panic("vm_page_alloc_contig: boundary must be a power of 2");
403 * Two simple optimizations. First, don't scan high ordered pages
404 * if they are outside of the requested address range. Second, cache
405 * the starting page index across calls and reuse it instead of
406 * restarting the scan from the top. This is conditional on the
407 * requested number of pages being the same or greater than the
410 for (pass = 0; pass < 2; pass++) {
411 if ((np == 0) || (np > npages)) {
412 if (atop(high) < vm_page_array_size)
413 start = atop(high) - npages + 1;
415 start = vm_page_array_size - npages + 1;
418 vm_page_lock_queues();
422 * Find last page in array that is free, within range,
423 * aligned, and such that the boundary won't be crossed.
425 for (i = start; i >= 0; i--) {
426 phys = VM_PAGE_TO_PHYS(&pga[i]);
427 pqtype = pga[i].queue - pga[i].pc;
429 if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
431 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
432 pga[i].queue != PQ_ACTIVE &&
433 pga[i].queue != PQ_INACTIVE)
435 if (phys >= low && phys + size <= high &&
436 ((phys & (alignment - 1)) == 0) &&
437 ((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)
440 /* There are no candidates at all. */
442 vm_page_unlock_queues();
447 * Check successive pages for contiguous and free.
449 for (i = start + npages - 1; i > start; i--) {
450 pqtype = pga[i].queue - pga[i].pc;
451 if (VM_PAGE_TO_PHYS(&pga[i]) !=
452 VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE) {
453 start = i - npages + 1;
457 if (pqtype != PQ_FREE && pqtype != PQ_CACHE) {
458 start = i - npages + 1;
461 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
462 pga[i].queue != PQ_ACTIVE &&
463 pga[i].queue != PQ_INACTIVE) {
464 start = i - npages + 1;
468 for (i = start + npages - 1; i >= start; i--) {
469 vm_page_t m = &pga[i];
472 pqtype = m->queue - m->pc;
473 if (pass != 0 && pqtype != PQ_FREE &&
474 pqtype != PQ_CACHE) {
475 if (m->queue == PQ_ACTIVE ||
476 m->queue == PQ_INACTIVE) {
477 if (vm_contig_launder_page(m) != 0)
479 pqtype = m->queue - m->pc;
480 if (pqtype != PQ_FREE &&
485 vm_page_release_contigl(&pga[i + 1],
486 start + npages - 1 - i);
487 start = i - npages + 1;
491 if (pqtype == PQ_CACHE) {
492 if (m->hold_count != 0)
495 if (!VM_OBJECT_TRYLOCK(object))
497 if ((m->flags & PG_BUSY) || m->busy != 0) {
498 VM_OBJECT_UNLOCK(object);
502 VM_OBJECT_UNLOCK(object);
505 * There is no good API for freeing a page
506 * directly to PQ_NONE on our behalf, so spin.
508 if (vm_contig_unqueue_free(m) != 0)
511 vm_page_unlock_queues();
513 * We've found a contiguous chunk that meets are requirements.
516 return (&pga[start]);
522 contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
524 vm_object_t object = kernel_object;
525 vm_map_t map = kernel_map;
526 vm_offset_t addr, tmp_addr;
530 * Allocate kernel VM, unfree and assign the physical pages to
531 * it and return kernel VM pointer.
534 if (vm_map_findspace(map, vm_map_min(map), npages << PAGE_SHIFT, &addr)
539 vm_object_reference(object);
540 vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
541 addr, addr + (npages << PAGE_SHIFT), VM_PROT_ALL, VM_PROT_ALL, 0);
544 VM_OBJECT_LOCK(object);
545 for (i = 0; i < npages; i++) {
546 vm_page_insert(&m[i], object,
547 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
548 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
549 pmap_zero_page(&m[i]);
550 tmp_addr += PAGE_SIZE;
552 VM_OBJECT_UNLOCK(object);
553 vm_map_wire(map, addr, addr + (npages << PAGE_SHIFT),
554 VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
555 return ((void *)addr);
558 static int vm_old_contigmalloc = 0;
559 SYSCTL_INT(_vm, OID_AUTO, old_contigmalloc,
560 CTLFLAG_RW, &vm_old_contigmalloc, 0, "Use the old contigmalloc algorithm");
561 TUNABLE_INT("vm.old_contigmalloc", &vm_old_contigmalloc);
565 unsigned long size, /* should be size_t here and for malloc() */
566 struct malloc_type *type,
570 unsigned long alignment,
571 unsigned long boundary)
577 npgs = round_page(size) >> PAGE_SHIFT;
579 if (vm_old_contigmalloc) {
580 ret = contigmalloc1(size, type, flags, low, high, alignment,
581 boundary, kernel_map);
583 pages = vm_page_alloc_contig(npgs, low, high,
584 alignment, boundary);
588 ret = contigmalloc2(pages, npgs, flags);
590 vm_page_release_contig(pages, npgs);
595 malloc_type_allocated(type, ret == NULL ? 0 : npgs << PAGE_SHIFT);
600 contigfree(void *addr, unsigned long size, struct malloc_type *type)
604 npgs = round_page(size) >> PAGE_SHIFT;
605 kmem_free(kernel_map, (vm_offset_t)addr, size);
606 malloc_type_freed(type, npgs << PAGE_SHIFT);