sys/vm/vm_contig.c

   1 /*-
   2  * Copyright (c) 1991 Regents of the University of California.
   3  * All rights reserved.
   4  *
   5  * This code is derived from software contributed to Berkeley by
   6  * The Mach Operating System project at Carnegie-Mellon University.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  * 4. Neither the name of the University nor the names of its contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  30  * SUCH DAMAGE.
  31  *
  32  *      from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
  33  */
  34
  35 /*-
  36  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  37  * All rights reserved.
  38  *
  39  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  40  *
  41  * Permission to use, copy, modify and distribute this software and
  42  * its documentation is hereby granted, provided that both the copyright
  43  * notice and this permission notice appear in all copies of the
  44  * software, derivative works or modified versions, and any portions
  45  * thereof, and that both notices appear in supporting documentation.
  46  *
  47  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  48  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  49  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  50  *
  51  * Carnegie Mellon requests users of this software to return to
  52  *
  53  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  54  *  School of Computer Science
  55  *  Carnegie Mellon University
  56  *  Pittsburgh PA 15213-3890
  57  *
  58  * any improvements or extensions that they make and grant Carnegie the
  59  * rights to redistribute these changes.
  60  */
  61
  62 #include <sys/cdefs.h>
  63 __FBSDID("$FreeBSD$");
  64
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/eventhandler.h>
  68 #include <sys/lock.h>
  69 #include <sys/mount.h>
  70 #include <sys/mutex.h>
  71 #include <sys/proc.h>
  72 #include <sys/kernel.h>
  73 #include <sys/sysctl.h>
  74 #include <sys/vmmeter.h>
  75 #include <sys/vnode.h>
  76
  77 #include <vm/vm.h>
  78 #include <vm/vm_param.h>
  79 #include <vm/vm_kern.h>
  80 #include <vm/pmap.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_object.h>
  83 #include <vm/vm_page.h>
  84 #include <vm/vm_pageout.h>
  85 #include <vm/vm_pager.h>
  86 #include <vm/vm_phys.h>
  87 #include <vm/vm_extern.h>
  88
  89 static int
  90 vm_contig_launder_page(vm_page_t m, vm_page_t *next, int tries)
  91 {
  92         vm_object_t object;
  93         vm_page_t m_tmp;
  94         struct vnode *vp;
  95         struct mount *mp;
  96         int vfslocked;
  97
  98         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  99         if (!vm_pageout_page_lock(m, next) || m->hold_count != 0) {
 100                 vm_page_unlock(m);
 101                 return (EAGAIN);
 102         }
 103         object = m->object;
 104         if (!VM_OBJECT_TRYLOCK(object) &&
 105             (!vm_pageout_fallback_object_lock(m, next) || m->hold_count != 0)) {
 106                 vm_page_unlock(m);
 107                 VM_OBJECT_UNLOCK(object);
 108                 return (EAGAIN);
 109         }
 110         if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) {
 111                 if (tries == 0) {
 112                         vm_page_unlock(m);
 113                         VM_OBJECT_UNLOCK(object);
 114                         return (EAGAIN);
 115                 }
 116                 vm_page_sleep(m, "vpctw0");
 117                 VM_OBJECT_UNLOCK(object);
 118                 vm_page_lock_queues();
 119                 return (EBUSY);
 120         }
 121         vm_page_test_dirty(m);
 122         if (m->dirty == 0)
 123                 pmap_remove_all(m);
 124         if (m->dirty != 0) {
 125                 vm_page_unlock(m);
 126                 if (tries == 0 || (object->flags & OBJ_DEAD) != 0) {
 127                         VM_OBJECT_UNLOCK(object);
 128                         return (EAGAIN);
 129                 }
 130                 if (object->type == OBJT_VNODE) {
 131                         vm_page_unlock_queues();
 132                         vp = object->handle;
 133                         vm_object_reference_locked(object);
 134                         VM_OBJECT_UNLOCK(object);
 135                         (void) vn_start_write(vp, &mp, V_WAIT);
 136                         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 137                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 138                         VM_OBJECT_LOCK(object);
 139                         vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
 140                         VM_OBJECT_UNLOCK(object);
 141                         VOP_UNLOCK(vp, 0);
 142                         VFS_UNLOCK_GIANT(vfslocked);
 143                         vm_object_deallocate(object);
 144                         vn_finished_write(mp);
 145                         vm_page_lock_queues();
 146                         return (0);
 147                 } else if (object->type == OBJT_SWAP ||
 148                            object->type == OBJT_DEFAULT) {
 149                         vm_page_unlock_queues();
 150                         m_tmp = m;
 151                         vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0,
 152                             NULL, NULL);
 153                         VM_OBJECT_UNLOCK(object);
 154                         vm_page_lock_queues();
 155                         return (0);
 156                 }
 157         } else {
 158                 vm_page_cache(m);
 159                 vm_page_unlock(m);
 160         }
 161         VM_OBJECT_UNLOCK(object);
 162         return (EAGAIN);
 163 }
 164
 165 static int
 166 vm_contig_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high)
 167 {
 168         vm_page_t m, next;
 169         vm_paddr_t pa;
 170         int error;
 171
 172         TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) {
 173                 KASSERT(m->queue == queue,
 174                     ("vm_contig_launder: page %p's queue is not %d", m, queue));
 175                 if ((m->flags & PG_MARKER) != 0)
 176                         continue;
 177                 pa = VM_PAGE_TO_PHYS(m);
 178                 if (pa < low || pa + PAGE_SIZE > high)
 179                         continue;
 180                 error = vm_contig_launder_page(m, &next, tries);
 181                 if (error == 0)
 182                         return (TRUE);
 183                 if (error == EBUSY)
 184                         return (FALSE);
 185         }
 186         return (FALSE);
 187 }
 188
 189 /*
 190  *      Frees the given physically contiguous pages.
 191  *
 192  *      N.B.: Any pages with PG_ZERO set must, in fact, be zero filled.
 193  */
 194 static void
 195 vm_page_release_contig(vm_page_t m, vm_pindex_t count)
 196 {
 197
 198         while (count--) {
 199                 /* Leave PG_ZERO unchanged. */
 200                 vm_page_free_toq(m);
 201                 m++;
 202         }
 203 }
 204
 205 /*
 206  * Increase the number of cached pages.  The specified value, "tries",
 207  * determines which categories of pages are cached:
 208  *
 209  *  0: All clean, inactive pages within the specified physical address range
 210  *     are cached.  Will not sleep.
 211  *  1: The vm_lowmem handlers are called.  All inactive pages within
 212  *     the specified physical address range are cached.  May sleep.
 213  *  2: The vm_lowmem handlers are called.  All inactive and active pages
 214  *     within the specified physical address range are cached.  May sleep.
 215  */
 216 void
 217 vm_contig_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
 218 {
 219         int actl, actmax, inactl, inactmax;
 220
 221         if (tries > 0) {
 222                 /*
 223                  * Decrease registered cache sizes.  The vm_lowmem handlers
 224                  * may acquire locks and/or sleep, so they can only be invoked
 225                  * when "tries" is greater than zero.
 226                  */
 227                 EVENTHANDLER_INVOKE(vm_lowmem, 0);
 228
 229                 /*
 230                  * We do this explicitly after the caches have been drained
 231                  * above.
 232                  */
 233                 uma_reclaim();
 234         }
 235         vm_page_lock_queues();
 236         inactl = 0;
 237         inactmax = cnt.v_inactive_count;
 238         actl = 0;
 239         actmax = tries < 2 ? 0 : cnt.v_active_count;
 240 again:
 241         if (inactl < inactmax && vm_contig_launder(PQ_INACTIVE, tries, low,
 242             high)) {
 243                 inactl++;
 244                 goto again;
 245         }
 246         if (actl < actmax && vm_contig_launder(PQ_ACTIVE, tries, low, high)) {
 247                 actl++;
 248                 goto again;
 249         }
 250         vm_page_unlock_queues();
 251 }
 252
 253 /*
 254  * Allocates a region from the kernel address map and pages within the
 255  * specified physical address range to the kernel object, creates a wired
 256  * mapping from the region to these pages, and returns the region's starting
 257  * virtual address.  The allocated pages are not necessarily physically
 258  * contiguous.  If M_ZERO is specified through the given flags, then the pages
 259  * are zeroed before they are mapped.
 260  */
 261 vm_offset_t
 262 kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
 263     vm_paddr_t high, vm_memattr_t memattr)
 264 {
 265         vm_object_t object = kernel_object;
 266         vm_offset_t addr, i, offset;
 267         vm_page_t m;
 268         int tries;
 269
 270         size = round_page(size);
 271         vm_map_lock(map);
 272         if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
 273                 vm_map_unlock(map);
 274                 return (0);
 275         }
 276         offset = addr - VM_MIN_KERNEL_ADDRESS;
 277         vm_object_reference(object);
 278         vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
 279             VM_PROT_ALL, 0);
 280         VM_OBJECT_LOCK(object);
 281         for (i = 0; i < size; i += PAGE_SIZE) {
 282                 tries = 0;
 283 retry:
 284                 m = vm_phys_alloc_contig(1, low, high, PAGE_SIZE, 0);
 285                 if (m == NULL) {
 286                         if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
 287                                 VM_OBJECT_UNLOCK(object);
 288                                 vm_map_unlock(map);
 289                                 vm_contig_grow_cache(tries, low, high);
 290                                 vm_map_lock(map);
 291                                 VM_OBJECT_LOCK(object);
 292                                 tries++;
 293                                 goto retry;
 294                         }
 295                         while (i != 0) {
 296                                 i -= PAGE_SIZE;
 297                                 m = vm_page_lookup(object, OFF_TO_IDX(offset +
 298                                     i));
 299                                 vm_page_free(m);
 300                         }
 301                         VM_OBJECT_UNLOCK(object);
 302                         vm_map_delete(map, addr, addr + size);
 303                         vm_map_unlock(map);
 304                         return (0);
 305                 }
 306                 if (memattr != VM_MEMATTR_DEFAULT)
 307                         pmap_page_set_memattr(m, memattr);
 308                 vm_page_insert(m, object, OFF_TO_IDX(offset + i));
 309                 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
 310                         pmap_zero_page(m);
 311                 m->valid = VM_PAGE_BITS_ALL;
 312         }
 313         VM_OBJECT_UNLOCK(object);
 314         vm_map_unlock(map);
 315         vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
 316             VM_MAP_WIRE_NOHOLES);
 317         return (addr);
 318 }
 319
 320 /*
 321  *      Allocates a region from the kernel address map, inserts the
 322  *      given physically contiguous pages into the kernel object,
 323  *      creates a wired mapping from the region to the pages, and
 324  *      returns the region's starting virtual address.  If M_ZERO is
 325  *      specified through the given flags, then the pages are zeroed
 326  *      before they are mapped.
 327  */
 328 static vm_offset_t
 329 contigmapping(vm_map_t map, vm_size_t size, vm_page_t m, vm_memattr_t memattr,
 330     int flags)
 331 {
 332         vm_object_t object = kernel_object;
 333         vm_offset_t addr, tmp_addr;
 334
 335         vm_map_lock(map);
 336         if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
 337                 vm_map_unlock(map);
 338                 return (0);
 339         }
 340         vm_object_reference(object);
 341         vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
 342             addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
 343         vm_map_unlock(map);
 344         VM_OBJECT_LOCK(object);
 345         for (tmp_addr = addr; tmp_addr < addr + size; tmp_addr += PAGE_SIZE) {
 346                 if (memattr != VM_MEMATTR_DEFAULT)
 347                         pmap_page_set_memattr(m, memattr);
 348                 vm_page_insert(m, object,
 349                     OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
 350                 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
 351                         pmap_zero_page(m);
 352                 m->valid = VM_PAGE_BITS_ALL;
 353                 m++;
 354         }
 355         VM_OBJECT_UNLOCK(object);
 356         vm_map_wire(map, addr, addr + size,
 357             VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 358         return (addr);
 359 }
 360
 361 vm_offset_t
 362 kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
 363     vm_paddr_t high, unsigned long alignment, unsigned long boundary,
 364     vm_memattr_t memattr)
 365 {
 366         vm_offset_t ret;
 367         vm_page_t pages;
 368         unsigned long npgs;
 369         int tries;
 370
 371         size = round_page(size);
 372         npgs = size >> PAGE_SHIFT;
 373         tries = 0;
 374 retry:
 375         pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary);
 376         if (pages == NULL) {
 377                 if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
 378                         vm_contig_grow_cache(tries, low, high);
 379                         tries++;
 380                         goto retry;
 381                 }
 382                 ret = 0;
 383         } else {
 384                 ret = contigmapping(map, size, pages, memattr, flags);
 385                 if (ret == 0)
 386                         vm_page_release_contig(pages, npgs);
 387         }
 388         return (ret);
 389 }