sys/vm/vm_reserv.c

   1 /*-
   2  * Copyright (c) 2002-2006 Rice University
   3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
   4  * All rights reserved.
   5  *
   6  * This software was developed for the FreeBSD Project by Alan L. Cox,
   7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*
  33  *      Superpage reservation management module
  34  */
  35
  36 #include <sys/cdefs.h>
  37 __FBSDID("$FreeBSD$");
  38
  39 #include "opt_vm.h"
  40
  41 #include <sys/param.h>
  42 #include <sys/kernel.h>
  43 #include <sys/lock.h>
  44 #include <sys/malloc.h>
  45 #include <sys/mutex.h>
  46 #include <sys/queue.h>
  47 #include <sys/sbuf.h>
  48 #include <sys/sysctl.h>
  49 #include <sys/systm.h>
  50
  51 #include <vm/vm.h>
  52 #include <vm/vm_param.h>
  53 #include <vm/vm_object.h>
  54 #include <vm/vm_page.h>
  55 #include <vm/vm_phys.h>
  56 #include <vm/vm_reserv.h>
  57
  58 /*
  59  * The reservation system supports the speculative allocation of large physical
  60  * pages ("superpages").  Speculative allocation enables the fully-automatic
  61  * utilization of superpages by the virtual memory system.  In other words, no
  62  * programmatic directives are required to use superpages.
  63  */
  64
  65 #if VM_NRESERVLEVEL > 0
  66
  67 /*
  68  * The number of small pages that are contained in a level 0 reservation
  69  */
  70 #define VM_LEVEL_0_NPAGES       (1 << VM_LEVEL_0_ORDER)
  71
  72 /*
  73  * The number of bits by which a physical address is shifted to obtain the
  74  * reservation number
  75  */
  76 #define VM_LEVEL_0_SHIFT        (VM_LEVEL_0_ORDER + PAGE_SHIFT)
  77
  78 /*
  79  * The size of a level 0 reservation in bytes
  80  */
  81 #define VM_LEVEL_0_SIZE         (1 << VM_LEVEL_0_SHIFT)
  82
  83 /*
  84  * Computes the index of the small page underlying the given (object, pindex)
  85  * within the reservation's array of small pages.
  86  */
  87 #define VM_RESERV_INDEX(object, pindex) \
  88     (((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1))
  89
  90 /*
  91  * The reservation structure
  92  *
  93  * A reservation structure is constructed whenever a large physical page is
  94  * speculatively allocated to an object.  The reservation provides the small
  95  * physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets
  96  * within that object.  The reservation's "popcnt" tracks the number of these
  97  * small physical pages that are in use at any given time.  When and if the
  98  * reservation is not fully utilized, it appears in the queue of partially-
  99  * populated reservations.  The reservation always appears on the containing
 100  * object's list of reservations.
 101  *
 102  * A partially-populated reservation can be broken and reclaimed at any time.
 103  */
 104 struct vm_reserv {
 105         TAILQ_ENTRY(vm_reserv) partpopq;
 106         LIST_ENTRY(vm_reserv) objq;
 107         vm_object_t     object;                 /* containing object */
 108         vm_pindex_t     pindex;                 /* offset within object */
 109         vm_page_t       pages;                  /* first page of a superpage */
 110         int             popcnt;                 /* # of pages in use */
 111         char            inpartpopq;
 112 };
 113
 114 /*
 115  * The reservation array
 116  *
 117  * This array is analoguous in function to vm_page_array.  It differs in the
 118  * respect that it may contain a greater number of useful reservation
 119  * structures than there are (physical) superpages.  These "invalid"
 120  * reservation structures exist to trade-off space for time in the
 121  * implementation of vm_reserv_from_page().  Invalid reservation structures are
 122  * distinguishable from "valid" reservation structures by inspecting the
 123  * reservation's "pages" field.  Invalid reservation structures have a NULL
 124  * "pages" field.
 125  *
 126  * vm_reserv_from_page() maps a small (physical) page to an element of this
 127  * array by computing a physical reservation number from the page's physical
 128  * address.  The physical reservation number is used as the array index.
 129  *
 130  * An "active" reservation is a valid reservation structure that has a non-NULL
 131  * "object" field and a non-zero "popcnt" field.  In other words, every active
 132  * reservation belongs to a particular object.  Moreover, every active
 133  * reservation has an entry in the containing object's list of reservations.
 134  */
 135 static vm_reserv_t vm_reserv_array;
 136
 137 /*
 138  * The partially-populated reservation queue
 139  *
 140  * This queue enables the fast recovery of an unused cached or free small page
 141  * from a partially-populated reservation.  The head of this queue is either
 142  * the least-recently-populated or most-recently-depopulated reservation.
 143  *
 144  * Access to this queue is synchronized by the free page queue lock.
 145  */
 146 static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop =
 147                             TAILQ_HEAD_INITIALIZER(vm_rvq_partpop);
 148
 149 static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
 150
 151 static long vm_reserv_broken;
 152 SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
 153     &vm_reserv_broken, 0, "Cumulative number of broken reservations");
 154
 155 static long vm_reserv_freed;
 156 SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
 157     &vm_reserv_freed, 0, "Cumulative number of freed reservations");
 158
 159 static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
 160
 161 SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
 162     sysctl_vm_reserv_partpopq, "A", "Partially-populated reservation queues");
 163
 164 static long vm_reserv_reclaimed;
 165 SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
 166     &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
 167
 168 static void             vm_reserv_depopulate(vm_reserv_t rv);
 169 static vm_reserv_t      vm_reserv_from_page(vm_page_t m);
 170 static boolean_t        vm_reserv_has_pindex(vm_reserv_t rv,
 171                             vm_pindex_t pindex);
 172 static void             vm_reserv_populate(vm_reserv_t rv);
 173
 174 /*
 175  * Describes the current state of the partially-populated reservation queue.
 176  */
 177 static int
 178 sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
 179 {
 180         struct sbuf sbuf;
 181         vm_reserv_t rv;
 182         char *cbuf;
 183         const int cbufsize = (VM_NRESERVLEVEL + 1) * 81;
 184         int counter, error, level, unused_pages;
 185
 186         cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
 187         sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
 188         sbuf_printf(&sbuf, "\nLEVEL     SIZE  NUMBER\n\n");
 189         for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
 190                 counter = 0;
 191                 unused_pages = 0;
 192                 mtx_lock(&vm_page_queue_free_mtx);
 193                 TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) {
 194                         counter++;
 195                         unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
 196                 }
 197                 mtx_unlock(&vm_page_queue_free_mtx);
 198                 sbuf_printf(&sbuf, "%5.5d: %6.6dK, %6.6d\n", level,
 199                     unused_pages * (PAGE_SIZE / 1024), counter);
 200         }
 201         sbuf_finish(&sbuf);
 202         error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
 203         sbuf_delete(&sbuf);
 204         free(cbuf, M_TEMP);
 205         return (error);
 206 }
 207
 208 /*
 209  * Reduces the given reservation's population count.  If the population count
 210  * becomes zero, the reservation is destroyed.  Additionally, moves the
 211  * reservation to the head of the partially-populated reservations queue if the
 212  * population count is non-zero.
 213  *
 214  * The free page queue lock must be held.
 215  */
 216 static void
 217 vm_reserv_depopulate(vm_reserv_t rv)
 218 {
 219
 220         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 221         KASSERT(rv->object != NULL,
 222             ("vm_reserv_depopulate: reserv %p is free", rv));
 223         KASSERT(rv->popcnt > 0,
 224             ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
 225         if (rv->inpartpopq) {
 226                 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
 227                 rv->inpartpopq = FALSE;
 228         }
 229         rv->popcnt--;
 230         if (rv->popcnt == 0) {
 231                 LIST_REMOVE(rv, objq);
 232                 rv->object = NULL;
 233                 vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
 234                 vm_reserv_freed++;
 235         } else {
 236                 rv->inpartpopq = TRUE;
 237                 TAILQ_INSERT_HEAD(&vm_rvq_partpop, rv, partpopq);
 238         }
 239 }
 240
 241 /*
 242  * Returns the reservation to which the given page might belong.
 243  */
 244 static __inline vm_reserv_t
 245 vm_reserv_from_page(vm_page_t m)
 246 {
 247
 248         return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
 249 }
 250
 251 /*
 252  * Returns TRUE if the given reservation contains the given page index and
 253  * FALSE otherwise.
 254  */
 255 static __inline boolean_t
 256 vm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex)
 257 {
 258
 259         return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0);
 260 }
 261
 262 /*
 263  * Increases the given reservation's population count.  Moves the reservation
 264  * to the tail of the partially-populated reservation queue.
 265  *
 266  * The free page queue must be locked.
 267  */
 268 static void
 269 vm_reserv_populate(vm_reserv_t rv)
 270 {
 271
 272         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 273         KASSERT(rv->object != NULL,
 274             ("vm_reserv_populate: reserv %p is free", rv));
 275         KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
 276             ("vm_reserv_populate: reserv %p is already full", rv));
 277         if (rv->inpartpopq) {
 278                 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
 279                 rv->inpartpopq = FALSE;
 280         }
 281         rv->popcnt++;
 282         if (rv->popcnt < VM_LEVEL_0_NPAGES) {
 283                 rv->inpartpopq = TRUE;
 284                 TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
 285         }
 286 }
 287
 288 /*
 289  * Allocates a page from an existing or newly-created reservation.
 290  *
 291  * The object and free page queue must be locked.
 292  */
 293 vm_page_t
 294 vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex)
 295 {
 296         vm_page_t m, mpred, msucc;
 297         vm_pindex_t first, leftcap, rightcap;
 298         vm_reserv_t rv;
 299
 300         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 301
 302         /*
 303          * Is a reservation fundamentally not possible?
 304          */
 305         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 306         if (pindex < VM_RESERV_INDEX(object, pindex) ||
 307             pindex >= object->size)
 308                 return (NULL);
 309
 310         /*
 311          * Look for an existing reservation.
 312          */
 313         msucc = NULL;
 314         mpred = object->root;
 315         while (mpred != NULL) {
 316                 KASSERT(mpred->pindex != pindex,
 317                     ("vm_reserv_alloc_page: pindex already allocated"));
 318                 rv = vm_reserv_from_page(mpred);
 319                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) {
 320                         m = &rv->pages[VM_RESERV_INDEX(object, pindex)];
 321                         /* Handle vm_page_rename(m, new_object, ...). */
 322                         if ((m->flags & (PG_CACHED | PG_FREE)) == 0)
 323                                 return (NULL);
 324                         vm_reserv_populate(rv);
 325                         return (m);
 326                 } else if (mpred->pindex < pindex) {
 327                         if (msucc != NULL ||
 328                             (msucc = TAILQ_NEXT(mpred, listq)) == NULL)
 329                                 break;
 330                         KASSERT(msucc->pindex != pindex,
 331                             ("vm_reserv_alloc_page: pindex already allocated"));
 332                         rv = vm_reserv_from_page(msucc);
 333                         if (rv->object == object &&
 334                             vm_reserv_has_pindex(rv, pindex)) {
 335                                 m = &rv->pages[VM_RESERV_INDEX(object, pindex)];
 336                                 /* Handle vm_page_rename(m, new_object, ...). */
 337                                 if ((m->flags & (PG_CACHED | PG_FREE)) == 0)
 338                                         return (NULL);
 339                                 vm_reserv_populate(rv);
 340                                 return (m);
 341                         } else if (pindex < msucc->pindex)
 342                                 break;
 343                 } else if (msucc == NULL) {
 344                         msucc = mpred;
 345                         mpred = TAILQ_PREV(msucc, pglist, listq);
 346                         continue;
 347                 }
 348                 msucc = NULL;
 349                 mpred = object->root = vm_page_splay(pindex, object->root);
 350         }
 351
 352         /*
 353          * Determine the first index to the left that can be used.
 354          */
 355         if (mpred == NULL)
 356                 leftcap = 0;
 357         else if ((rv = vm_reserv_from_page(mpred))->object != object)
 358                 leftcap = mpred->pindex + 1;
 359         else
 360                 leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
 361
 362         /*
 363          * Determine the first index to the right that cannot be used.
 364          */
 365         if (msucc == NULL)
 366                 rightcap = pindex + VM_LEVEL_0_NPAGES;
 367         else if ((rv = vm_reserv_from_page(msucc))->object != object)
 368                 rightcap = msucc->pindex;
 369         else
 370                 rightcap = rv->pindex;
 371
 372         /*
 373          * Determine if a reservation fits between the first index to
 374          * the left that can be used and the first index to the right
 375          * that cannot be used.
 376          */
 377         first = pindex - VM_RESERV_INDEX(object, pindex);
 378         if (first < leftcap || first + VM_LEVEL_0_NPAGES > rightcap)
 379                 return (NULL);
 380
 381         /*
 382          * Would a new reservation extend past the end of the given object?
 383          */
 384         if (object->size < first + VM_LEVEL_0_NPAGES) {
 385                 /*
 386                  * Don't allocate a new reservation if the object is a vnode or
 387                  * backed by another object that is a vnode.
 388                  */
 389                 if (object->type == OBJT_VNODE ||
 390                     (object->backing_object != NULL &&
 391                     object->backing_object->type == OBJT_VNODE))
 392                         return (NULL);
 393                 /* Speculate that the object may grow. */
 394         }
 395
 396         /*
 397          * Allocate a new reservation.
 398          */
 399         m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
 400         if (m != NULL) {
 401                 rv = vm_reserv_from_page(m);
 402                 KASSERT(rv->pages == m,
 403                     ("vm_reserv_alloc_page: reserv %p's pages is corrupted",
 404                     rv));
 405                 KASSERT(rv->object == NULL,
 406                     ("vm_reserv_alloc_page: reserv %p isn't free", rv));
 407                 LIST_INSERT_HEAD(&object->rvq, rv, objq);
 408                 rv->object = object;
 409                 rv->pindex = first;
 410                 KASSERT(rv->popcnt == 0,
 411                     ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted",
 412                     rv));
 413                 KASSERT(!rv->inpartpopq,
 414                     ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE",
 415                     rv));
 416                 vm_reserv_populate(rv);
 417                 m = &rv->pages[VM_RESERV_INDEX(object, pindex)];
 418         }
 419         return (m);
 420 }
 421
 422 /*
 423  * Breaks all reservations belonging to the given object.
 424  */
 425 void
 426 vm_reserv_break_all(vm_object_t object)
 427 {
 428         vm_reserv_t rv;
 429         int i;
 430
 431         mtx_lock(&vm_page_queue_free_mtx);
 432         while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
 433                 KASSERT(rv->object == object,
 434                     ("vm_reserv_break_all: reserv %p is corrupted", rv));
 435                 if (rv->inpartpopq) {
 436                         TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
 437                         rv->inpartpopq = FALSE;
 438                 }
 439                 LIST_REMOVE(rv, objq);
 440                 rv->object = NULL;
 441                 for (i = 0; i < VM_LEVEL_0_NPAGES; i++) {
 442                         if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
 443                                 vm_phys_free_pages(&rv->pages[i], 0);
 444                         else
 445                                 rv->popcnt--;
 446                 }
 447                 KASSERT(rv->popcnt == 0,
 448                     ("vm_reserv_break_all: reserv %p's popcnt is corrupted",
 449                     rv));
 450                 vm_reserv_broken++;
 451         }
 452         mtx_unlock(&vm_page_queue_free_mtx);
 453 }
 454
 455 /*
 456  * Frees the given page if it belongs to a reservation.  Returns TRUE if the
 457  * page is freed and FALSE otherwise.
 458  *
 459  * The free page queue lock must be held.
 460  */
 461 boolean_t
 462 vm_reserv_free_page(vm_page_t m)
 463 {
 464         vm_reserv_t rv;
 465
 466         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 467         rv = vm_reserv_from_page(m);
 468         if (rv->object != NULL) {
 469                 vm_reserv_depopulate(rv);
 470                 return (TRUE);
 471         }
 472         return (FALSE);
 473 }
 474
 475 /*
 476  * Initializes the reservation management system.  Specifically, initializes
 477  * the reservation array.
 478  *
 479  * Requires that vm_page_array and first_page are initialized!
 480  */
 481 void
 482 vm_reserv_init(void)
 483 {
 484         vm_paddr_t paddr;
 485         int i;
 486
 487         /*
 488          * Initialize the reservation array.  Specifically, initialize the
 489          * "pages" field for every element that has an underlying superpage.
 490          */
 491         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 492                 paddr = roundup2(phys_avail[i], VM_LEVEL_0_SIZE);
 493                 while (paddr + VM_LEVEL_0_SIZE <= phys_avail[i + 1]) {
 494                         vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
 495                             PHYS_TO_VM_PAGE(paddr);
 496                         paddr += VM_LEVEL_0_SIZE;
 497                 }
 498         }
 499 }
 500
 501 /*
 502  * Returns a reservation level if the given page belongs to a fully-populated
 503  * reservation and -1 otherwise.
 504  */
 505 int
 506 vm_reserv_level_iffullpop(vm_page_t m)
 507 {
 508         vm_reserv_t rv;
 509
 510         rv = vm_reserv_from_page(m);
 511         return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1);
 512 }
 513
 514 /*
 515  * Prepare for the reactivation of a cached page.
 516  *
 517  * First, suppose that the given page "m" was allocated individually, i.e., not
 518  * as part of a reservation, and cached.  Then, suppose a reservation
 519  * containing "m" is allocated by the same object.  Although "m" and the
 520  * reservation belong to the same object, "m"'s pindex may not match the
 521  * reservation's.
 522  *
 523  * The free page queue must be locked.
 524  */
 525 boolean_t
 526 vm_reserv_reactivate_page(vm_page_t m)
 527 {
 528         vm_reserv_t rv;
 529         int i, m_index;
 530
 531         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 532         rv = vm_reserv_from_page(m);
 533         if (rv->object == NULL)
 534                 return (FALSE);
 535         KASSERT((m->flags & PG_CACHED) != 0,
 536             ("vm_reserv_uncache_page: page %p is not cached", m));
 537         if (m->object == rv->object &&
 538             m->pindex - rv->pindex == VM_RESERV_INDEX(m->object, m->pindex))
 539                 vm_reserv_populate(rv);
 540         else {
 541                 KASSERT(rv->inpartpopq,
 542                     ("vm_reserv_uncache_page: reserv %p's inpartpopq is FALSE",
 543                     rv));
 544                 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
 545                 rv->inpartpopq = FALSE;
 546                 LIST_REMOVE(rv, objq);
 547                 rv->object = NULL;
 548                 /* Don't vm_phys_free_pages(m, 0). */
 549                 m_index = m - rv->pages;
 550                 for (i = 0; i < m_index; i++) {
 551                         if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
 552                                 vm_phys_free_pages(&rv->pages[i], 0);
 553                         else
 554                                 rv->popcnt--;
 555                 }
 556                 for (i++; i < VM_LEVEL_0_NPAGES; i++) {
 557                         if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
 558                                 vm_phys_free_pages(&rv->pages[i], 0);
 559                         else
 560                                 rv->popcnt--;
 561                 }
 562                 KASSERT(rv->popcnt == 0,
 563                     ("vm_reserv_uncache_page: reserv %p's popcnt is corrupted",
 564                     rv));
 565                 vm_reserv_broken++;
 566         }
 567         return (TRUE);
 568 }
 569
 570 /*
 571  * Breaks the reservation at the head of the partially-populated reservation
 572  * queue, releasing its cached and free pages to the physical memory
 573  * allocator.  Returns TRUE if a reservation is broken and FALSE otherwise.
 574  *
 575  * The free page queue lock must be held.
 576  */
 577 boolean_t
 578 vm_reserv_reclaim(void)
 579 {
 580         vm_reserv_t rv;
 581         int i;
 582
 583         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 584         if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) {
 585                 KASSERT(rv->inpartpopq,
 586                     ("vm_reserv_reclaim: reserv %p's inpartpopq is corrupted",
 587                     rv));
 588                 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
 589                 rv->inpartpopq = FALSE;
 590                 KASSERT(rv->object != NULL,
 591                     ("vm_reserv_reclaim: reserv %p is free", rv));
 592                 LIST_REMOVE(rv, objq);
 593                 rv->object = NULL;
 594                 for (i = 0; i < VM_LEVEL_0_NPAGES; i++) {
 595                         if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
 596                                 vm_phys_free_pages(&rv->pages[i], 0);
 597                         else
 598                                 rv->popcnt--;
 599                 }
 600                 KASSERT(rv->popcnt == 0,
 601                     ("vm_reserv_reclaim: reserv %p's popcnt is corrupted",
 602                     rv));
 603                 vm_reserv_reclaimed++;
 604                 return (TRUE);
 605         }
 606         return (FALSE);
 607 }
 608
 609 /*
 610  * Transfers the reservation underlying the given page to a new object.
 611  *
 612  * The object must be locked.
 613  */
 614 void
 615 vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
 616     vm_pindex_t old_object_offset)
 617 {
 618         vm_reserv_t rv;
 619
 620         VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
 621         rv = vm_reserv_from_page(m);
 622         if (rv->object == old_object) {
 623                 mtx_lock(&vm_page_queue_free_mtx);
 624                 if (rv->object == old_object) {
 625                         LIST_REMOVE(rv, objq);
 626                         LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
 627                         rv->object = new_object;
 628                         rv->pindex -= old_object_offset;
 629                 }
 630                 mtx_unlock(&vm_page_queue_free_mtx);
 631         }
 632 }
 633
 634 /*
 635  * Allocates the virtual and physical memory required by the reservation
 636  * management system's data structures, in particular, the reservation array.
 637  */
 638 vm_paddr_t
 639 vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
 640 {
 641         vm_paddr_t new_end;
 642         size_t size;
 643
 644         /*
 645          * Calculate the size (in bytes) of the reservation array.  Round up
 646          * from "high_water" because every small page is mapped to an element
 647          * in the reservation array based on its physical address.  Thus, the
 648          * number of elements in the reservation array can be greater than the
 649          * number of superpages.
 650          */
 651         size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv);
 652
 653         /*
 654          * Allocate and map the physical memory for the reservation array.  The
 655          * next available virtual address is returned by reference.
 656          */
 657         new_end = end - round_page(size);
 658         vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
 659             VM_PROT_READ | VM_PROT_WRITE);
 660         bzero(vm_reserv_array, size);
 661
 662         /*
 663          * Return the next available physical address.
 664          */
 665         return (new_end);
 666 }
 667
 668 #endif  /* VM_NRESERVLEVEL > 0 */