]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_reserv.c
Move __va_list and related defines to sys/sys/_types.h
[FreeBSD/FreeBSD.git] / sys / vm / vm_reserv.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2002-2006 Rice University
5  * Copyright (c) 2007-2011 Alan L. Cox <alc@cs.rice.edu>
6  * All rights reserved.
7  *
8  * This software was developed for the FreeBSD Project by Alan L. Cox,
9  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
24  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
27  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
30  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 /*
35  *      Superpage reservation management module
36  *
37  * Any external functions defined by this module are only to be used by the
38  * virtual memory system.
39  */
40
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43
44 #include "opt_vm.h"
45
46 #include <sys/param.h>
47 #include <sys/kernel.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mutex.h>
51 #include <sys/queue.h>
52 #include <sys/rwlock.h>
53 #include <sys/sbuf.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/vmmeter.h>
57
58 #include <vm/vm.h>
59 #include <vm/vm_param.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pageout.h>
63 #include <vm/vm_phys.h>
64 #include <vm/vm_pagequeue.h>
65 #include <vm/vm_radix.h>
66 #include <vm/vm_reserv.h>
67
68 /*
69  * The reservation system supports the speculative allocation of large physical
70  * pages ("superpages").  Speculative allocation enables the fully automatic
71  * utilization of superpages by the virtual memory system.  In other words, no
72  * programmatic directives are required to use superpages.
73  */
74
75 #if VM_NRESERVLEVEL > 0
76
77 /*
78  * The number of small pages that are contained in a level 0 reservation
79  */
80 #define VM_LEVEL_0_NPAGES       (1 << VM_LEVEL_0_ORDER)
81
82 /*
83  * The number of bits by which a physical address is shifted to obtain the
84  * reservation number
85  */
86 #define VM_LEVEL_0_SHIFT        (VM_LEVEL_0_ORDER + PAGE_SHIFT)
87
88 /*
89  * The size of a level 0 reservation in bytes
90  */
91 #define VM_LEVEL_0_SIZE         (1 << VM_LEVEL_0_SHIFT)
92
93 /*
94  * Computes the index of the small page underlying the given (object, pindex)
95  * within the reservation's array of small pages.
96  */
97 #define VM_RESERV_INDEX(object, pindex) \
98     (((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1))
99
100 /*
101  * The size of a population map entry
102  */
103 typedef u_long          popmap_t;
104
105 /*
106  * The number of bits in a population map entry
107  */
108 #define NBPOPMAP        (NBBY * sizeof(popmap_t))
109
110 /*
111  * The number of population map entries in a reservation
112  */
113 #define NPOPMAP         howmany(VM_LEVEL_0_NPAGES, NBPOPMAP)
114
115 /*
116  * Clear a bit in the population map.
117  */
118 static __inline void
119 popmap_clear(popmap_t popmap[], int i)
120 {
121
122         popmap[i / NBPOPMAP] &= ~(1UL << (i % NBPOPMAP));
123 }
124
125 /*
126  * Set a bit in the population map.
127  */
128 static __inline void
129 popmap_set(popmap_t popmap[], int i)
130 {
131
132         popmap[i / NBPOPMAP] |= 1UL << (i % NBPOPMAP);
133 }
134
135 /*
136  * Is a bit in the population map clear?
137  */
138 static __inline boolean_t
139 popmap_is_clear(popmap_t popmap[], int i)
140 {
141
142         return ((popmap[i / NBPOPMAP] & (1UL << (i % NBPOPMAP))) == 0);
143 }
144
145 /*
146  * Is a bit in the population map set?
147  */
148 static __inline boolean_t
149 popmap_is_set(popmap_t popmap[], int i)
150 {
151
152         return ((popmap[i / NBPOPMAP] & (1UL << (i % NBPOPMAP))) != 0);
153 }
154
155 /*
156  * The reservation structure
157  *
158  * A reservation structure is constructed whenever a large physical page is
159  * speculatively allocated to an object.  The reservation provides the small
160  * physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets
161  * within that object.  The reservation's "popcnt" tracks the number of these
162  * small physical pages that are in use at any given time.  When and if the
163  * reservation is not fully utilized, it appears in the queue of partially
164  * populated reservations.  The reservation always appears on the containing
165  * object's list of reservations.
166  *
167  * A partially populated reservation can be broken and reclaimed at any time.
168  *
169  * f - vm_domain_free_lock
170  * o - vm_reserv_object_lock
171  * c - constant after boot
172  */
173 struct vm_reserv {
174         TAILQ_ENTRY(vm_reserv) partpopq;        /* (f) per-domain queue. */
175         LIST_ENTRY(vm_reserv) objq;             /* (o, f) object queue */
176         vm_object_t     object;                 /* (o, f) containing object */
177         vm_pindex_t     pindex;                 /* (o, f) offset in object */
178         vm_page_t       pages;                  /* (c) first page  */
179         int             domain;                 /* (c) NUMA domain. */
180         int             popcnt;                 /* (f) # of pages in use */
181         char            inpartpopq;             /* (f) */
182         popmap_t        popmap[NPOPMAP];        /* (f) bit vector, used pages */
183 };
184
185 /*
186  * The reservation array
187  *
188  * This array is analoguous in function to vm_page_array.  It differs in the
189  * respect that it may contain a greater number of useful reservation
190  * structures than there are (physical) superpages.  These "invalid"
191  * reservation structures exist to trade-off space for time in the
192  * implementation of vm_reserv_from_page().  Invalid reservation structures are
193  * distinguishable from "valid" reservation structures by inspecting the
194  * reservation's "pages" field.  Invalid reservation structures have a NULL
195  * "pages" field.
196  *
197  * vm_reserv_from_page() maps a small (physical) page to an element of this
198  * array by computing a physical reservation number from the page's physical
199  * address.  The physical reservation number is used as the array index.
200  *
201  * An "active" reservation is a valid reservation structure that has a non-NULL
202  * "object" field and a non-zero "popcnt" field.  In other words, every active
203  * reservation belongs to a particular object.  Moreover, every active
204  * reservation has an entry in the containing object's list of reservations.  
205  */
206 static vm_reserv_t vm_reserv_array;
207
208 /*
209  * The partially populated reservation queue
210  *
211  * This queue enables the fast recovery of an unused free small page from a
212  * partially populated reservation.  The reservation at the head of this queue
213  * is the least recently changed, partially populated reservation.
214  *
215  * Access to this queue is synchronized by the free page queue lock.
216  */
217 static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM];
218
219 static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
220
221 static long vm_reserv_broken;
222 SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
223     &vm_reserv_broken, 0, "Cumulative number of broken reservations");
224
225 static long vm_reserv_freed;
226 SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
227     &vm_reserv_freed, 0, "Cumulative number of freed reservations");
228
229 static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS);
230
231 SYSCTL_PROC(_vm_reserv, OID_AUTO, fullpop, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
232     sysctl_vm_reserv_fullpop, "I", "Current number of full reservations");
233
234 static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
235
236 SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
237     sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues");
238
239 static long vm_reserv_reclaimed;
240 SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
241     &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
242
243 /*
244  * The object lock pool is used to synchronize the rvq.  We can not use a
245  * pool mutex because it is required before malloc works.
246  *
247  * The "hash" function could be made faster without divide and modulo.
248  */
249 #define VM_RESERV_OBJ_LOCK_COUNT        MAXCPU
250
251 struct mtx_padalign vm_reserv_object_mtx[VM_RESERV_OBJ_LOCK_COUNT];
252
253 #define vm_reserv_object_lock_idx(object)                       \
254             (((uintptr_t)object / sizeof(*object)) % VM_RESERV_OBJ_LOCK_COUNT)
255 #define vm_reserv_object_lock_ptr(object)                       \
256             &vm_reserv_object_mtx[vm_reserv_object_lock_idx((object))]
257 #define vm_reserv_object_lock(object)                           \
258             mtx_lock(vm_reserv_object_lock_ptr((object)))
259 #define vm_reserv_object_unlock(object)                         \
260             mtx_unlock(vm_reserv_object_lock_ptr((object)))
261
262 static void             vm_reserv_break(vm_reserv_t rv, vm_page_t m);
263 static void             vm_reserv_depopulate(vm_reserv_t rv, int index);
264 static vm_reserv_t      vm_reserv_from_page(vm_page_t m);
265 static boolean_t        vm_reserv_has_pindex(vm_reserv_t rv,
266                             vm_pindex_t pindex);
267 static void             vm_reserv_populate(vm_reserv_t rv, int index);
268 static void             vm_reserv_reclaim(vm_reserv_t rv);
269
270 /*
271  * Returns the current number of full reservations.
272  *
273  * Since the number of full reservations is computed without acquiring the
274  * free page queue lock, the returned value may be inexact.
275  */
276 static int
277 sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS)
278 {
279         vm_paddr_t paddr;
280         struct vm_phys_seg *seg;
281         vm_reserv_t rv;
282         int fullpop, segind;
283
284         fullpop = 0;
285         for (segind = 0; segind < vm_phys_nsegs; segind++) {
286                 seg = &vm_phys_segs[segind];
287                 paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
288                 while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
289                         rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
290                         fullpop += rv->popcnt == VM_LEVEL_0_NPAGES;
291                         paddr += VM_LEVEL_0_SIZE;
292                 }
293         }
294         return (sysctl_handle_int(oidp, &fullpop, 0, req));
295 }
296
297 /*
298  * Describes the current state of the partially populated reservation queue.
299  */
300 static int
301 sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
302 {
303         struct sbuf sbuf;
304         vm_reserv_t rv;
305         int counter, error, domain, level, unused_pages;
306
307         error = sysctl_wire_old_buffer(req, 0);
308         if (error != 0)
309                 return (error);
310         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
311         sbuf_printf(&sbuf, "\nDOMAIN    LEVEL     SIZE  NUMBER\n\n");
312         for (domain = 0; domain < vm_ndomains; domain++) {
313                 for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
314                         counter = 0;
315                         unused_pages = 0;
316                         vm_domain_free_lock(VM_DOMAIN(domain));
317                         TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
318                                 counter++;
319                                 unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
320                         }
321                         vm_domain_free_unlock(VM_DOMAIN(domain));
322                         sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
323                             domain, level,
324                             unused_pages * ((int)PAGE_SIZE / 1024), counter);
325                 }
326         }
327         error = sbuf_finish(&sbuf);
328         sbuf_delete(&sbuf);
329         return (error);
330 }
331
332 /*
333  * Remove a reservation from the object's objq.
334  */
335 static void
336 vm_reserv_remove(vm_reserv_t rv)
337 {
338         vm_object_t object;
339
340         KASSERT(rv->object != NULL,
341             ("vm_reserv_remove: reserv %p is free", rv));
342         KASSERT(!rv->inpartpopq,
343             ("vm_reserv_remove: reserv %p's inpartpopq is TRUE", rv));
344         object = rv->object;
345         vm_reserv_object_lock(object);
346         LIST_REMOVE(rv, objq);
347         rv->object = NULL;
348         vm_reserv_object_unlock(object);
349 }
350
351 /*
352  * Insert a new reservation into the object's objq.
353  */
354 static void
355 vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
356 {
357         int i;
358
359         KASSERT(rv->object == NULL,
360             ("vm_reserv_insert: reserv %p isn't free", rv));
361         KASSERT(rv->popcnt == 0,
362             ("vm_reserv_insert: reserv %p's popcnt is corrupted", rv));
363         KASSERT(!rv->inpartpopq,
364             ("vm_reserv_insert: reserv %p's inpartpopq is TRUE", rv));
365         for (i = 0; i < NPOPMAP; i++)
366                 KASSERT(rv->popmap[i] == 0,
367                     ("vm_reserv_insert: reserv %p's popmap is corrupted", rv));
368         vm_reserv_object_lock(object);
369         rv->pindex = pindex;
370         rv->object = object;
371         LIST_INSERT_HEAD(&object->rvq, rv, objq);
372         vm_reserv_object_unlock(object);
373 }
374
375 /*
376  * Reduces the given reservation's population count.  If the population count
377  * becomes zero, the reservation is destroyed.  Additionally, moves the
378  * reservation to the tail of the partially populated reservation queue if the
379  * population count is non-zero.
380  *
381  * The free page queue lock must be held.
382  */
383 static void
384 vm_reserv_depopulate(vm_reserv_t rv, int index)
385 {
386
387         vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
388         KASSERT(rv->object != NULL,
389             ("vm_reserv_depopulate: reserv %p is free", rv));
390         KASSERT(popmap_is_set(rv->popmap, index),
391             ("vm_reserv_depopulate: reserv %p's popmap[%d] is clear", rv,
392             index));
393         KASSERT(rv->popcnt > 0,
394             ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
395         KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
396             ("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
397             rv, rv->domain));
398         if (rv->inpartpopq) {
399                 TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
400                 rv->inpartpopq = FALSE;
401         } else {
402                 KASSERT(rv->pages->psind == 1,
403                     ("vm_reserv_depopulate: reserv %p is already demoted",
404                     rv));
405                 rv->pages->psind = 0;
406         }
407         popmap_clear(rv->popmap, index);
408         rv->popcnt--;
409         if (rv->popcnt == 0) {
410                 vm_reserv_remove(rv);
411                 vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
412                 vm_reserv_freed++;
413         } else {
414                 rv->inpartpopq = TRUE;
415                 TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
416         }
417 }
418
419 /*
420  * Returns the reservation to which the given page might belong.
421  */
422 static __inline vm_reserv_t
423 vm_reserv_from_page(vm_page_t m)
424 {
425
426         return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
427 }
428
429 /*
430  * Returns an existing reservation or NULL and initialized successor pointer.
431  */
432 static vm_reserv_t
433 vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex,
434     vm_page_t mpred, vm_page_t *msuccp)
435 {
436         vm_reserv_t rv;
437         vm_page_t msucc;
438
439         msucc = NULL;
440         if (mpred != NULL) {
441                 KASSERT(mpred->object == object,
442                     ("vm_reserv_from_object: object doesn't contain mpred"));
443                 KASSERT(mpred->pindex < pindex,
444                     ("vm_reserv_from_object: mpred doesn't precede pindex"));
445                 rv = vm_reserv_from_page(mpred);
446                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
447                         goto found;
448                 msucc = TAILQ_NEXT(mpred, listq);
449         } else
450                 msucc = TAILQ_FIRST(&object->memq);
451         if (msucc != NULL) {
452                 KASSERT(msucc->pindex > pindex,
453                     ("vm_reserv_from_object: msucc doesn't succeed pindex"));
454                 rv = vm_reserv_from_page(msucc);
455                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
456                         goto found;
457         }
458         rv = NULL;
459
460 found:
461         *msuccp = msucc;
462
463         return (rv);
464 }
465
466 /*
467  * Returns TRUE if the given reservation contains the given page index and
468  * FALSE otherwise.
469  */
470 static __inline boolean_t
471 vm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex)
472 {
473
474         return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0);
475 }
476
477 /*
478  * Increases the given reservation's population count.  Moves the reservation
479  * to the tail of the partially populated reservation queue.
480  *
481  * The free page queue must be locked.
482  */
483 static void
484 vm_reserv_populate(vm_reserv_t rv, int index)
485 {
486
487         vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
488         KASSERT(rv->object != NULL,
489             ("vm_reserv_populate: reserv %p is free", rv));
490         KASSERT(popmap_is_clear(rv->popmap, index),
491             ("vm_reserv_populate: reserv %p's popmap[%d] is set", rv,
492             index));
493         KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
494             ("vm_reserv_populate: reserv %p is already full", rv));
495         KASSERT(rv->pages->psind == 0,
496             ("vm_reserv_populate: reserv %p is already promoted", rv));
497         KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
498             ("vm_reserv_populate: reserv %p's domain is corrupted %d",
499             rv, rv->domain));
500         if (rv->inpartpopq) {
501                 TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
502                 rv->inpartpopq = FALSE;
503         }
504         popmap_set(rv->popmap, index);
505         rv->popcnt++;
506         if (rv->popcnt < VM_LEVEL_0_NPAGES) {
507                 rv->inpartpopq = TRUE;
508                 TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
509         } else
510                 rv->pages->psind = 1;
511 }
512
513 /*
514  * Allocates a contiguous set of physical pages of the given size "npages"
515  * from existing or newly created reservations.  All of the physical pages
516  * must be at or above the given physical address "low" and below the given
517  * physical address "high".  The given value "alignment" determines the
518  * alignment of the first physical page in the set.  If the given value
519  * "boundary" is non-zero, then the set of physical pages cannot cross any
520  * physical address boundary that is a multiple of that value.  Both
521  * "alignment" and "boundary" must be a power of two.
522  *
523  * The page "mpred" must immediately precede the offset "pindex" within the
524  * specified object.
525  *
526  * The object and free page queue must be locked.
527  */
528 vm_page_t
529 vm_reserv_extend_contig(int req, vm_object_t object, vm_pindex_t pindex,
530     int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
531     u_long alignment, vm_paddr_t boundary, vm_page_t mpred)
532 {
533         struct vm_domain *vmd;
534         vm_paddr_t pa, size;
535         vm_page_t m, msucc;
536         vm_reserv_t rv;
537         int i, index;
538
539         VM_OBJECT_ASSERT_WLOCKED(object);
540         KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
541
542         /*
543          * Is a reservation fundamentally impossible?
544          */
545         if (pindex < VM_RESERV_INDEX(object, pindex) ||
546             pindex + npages > object->size || object->resident_page_count == 0)
547                 return (NULL);
548
549         /*
550          * All reservations of a particular size have the same alignment.
551          * Assuming that the first page is allocated from a reservation, the
552          * least significant bits of its physical address can be determined
553          * from its offset from the beginning of the reservation and the size
554          * of the reservation.
555          *
556          * Could the specified index within a reservation of the smallest
557          * possible size satisfy the alignment and boundary requirements?
558          */
559         pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
560         if ((pa & (alignment - 1)) != 0)
561                 return (NULL);
562         size = npages << PAGE_SHIFT;
563         if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
564                 return (NULL);
565
566         /*
567          * Look for an existing reservation.
568          */
569         rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
570         if (rv == NULL)
571                 return (NULL);
572         KASSERT(object != kernel_object || rv->domain == domain,
573             ("vm_reserv_extend_contig: Domain mismatch from reservation."));
574         index = VM_RESERV_INDEX(object, pindex);
575         /* Does the allocation fit within the reservation? */
576         if (index + npages > VM_LEVEL_0_NPAGES)
577                 return (NULL);
578         domain = rv->domain;
579         vmd = VM_DOMAIN(domain);
580         vm_domain_free_lock(vmd);
581         if (rv->object != object || !vm_domain_available(vmd, req, npages)) {
582                 m = NULL;
583                 goto out;
584         }
585         m = &rv->pages[index];
586         pa = VM_PAGE_TO_PHYS(m);
587         if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
588             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
589                 m = NULL;
590                 goto out;
591         }
592         /* Handle vm_page_rename(m, new_object, ...). */
593         for (i = 0; i < npages; i++) {
594                 if (popmap_is_set(rv->popmap, index + i)) {
595                         m = NULL;
596                         goto out;
597                 }
598         }
599         for (i = 0; i < npages; i++)
600                 vm_reserv_populate(rv, index + i);
601         vm_domain_freecnt_adj(vmd, -npages);
602 out:
603         vm_domain_free_unlock(vmd);
604         return (m);
605 }
606
607 /*
608  * Allocates a contiguous set of physical pages of the given size "npages"
609  * from existing or newly created reservations.  All of the physical pages
610  * must be at or above the given physical address "low" and below the given
611  * physical address "high".  The given value "alignment" determines the
612  * alignment of the first physical page in the set.  If the given value
613  * "boundary" is non-zero, then the set of physical pages cannot cross any
614  * physical address boundary that is a multiple of that value.  Both
615  * "alignment" and "boundary" must be a power of two.
616  *
617  * The page "mpred" must immediately precede the offset "pindex" within the
618  * specified object.
619  *
620  * The object and free page queue must be locked.
621  */
622 vm_page_t
623 vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
624     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
625     vm_paddr_t boundary, vm_page_t mpred)
626 {
627         vm_paddr_t pa, size;
628         vm_page_t m, m_ret, msucc;
629         vm_pindex_t first, leftcap, rightcap;
630         vm_reserv_t rv;
631         u_long allocpages, maxpages, minpages;
632         int i, index, n;
633
634         vm_domain_free_assert_locked(VM_DOMAIN(domain));
635         VM_OBJECT_ASSERT_WLOCKED(object);
636         KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
637
638         /*
639          * Is a reservation fundamentally impossible?
640          */
641         if (pindex < VM_RESERV_INDEX(object, pindex) ||
642             pindex + npages > object->size)
643                 return (NULL);
644
645         /*
646          * All reservations of a particular size have the same alignment.
647          * Assuming that the first page is allocated from a reservation, the
648          * least significant bits of its physical address can be determined
649          * from its offset from the beginning of the reservation and the size
650          * of the reservation.
651          *
652          * Could the specified index within a reservation of the smallest
653          * possible size satisfy the alignment and boundary requirements?
654          */
655         pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
656         if ((pa & (alignment - 1)) != 0)
657                 return (NULL);
658         size = npages << PAGE_SHIFT;
659         if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
660                 return (NULL);
661
662         /*
663          * Callers should've extended an existing reservation prior to
664          * calling this function.  If a reservation exists it is
665          * incompatible with the allocation.
666          */
667         rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
668         if (rv != NULL)
669                 return (NULL);
670
671         /*
672          * Could at least one reservation fit between the first index to the
673          * left that can be used ("leftcap") and the first index to the right
674          * that cannot be used ("rightcap")?
675          *
676          * We must synchronize with the reserv object lock to protect the
677          * pindex/object of the resulting reservations against rename while
678          * we are inspecting.
679          */
680         first = pindex - VM_RESERV_INDEX(object, pindex);
681         minpages = VM_RESERV_INDEX(object, pindex) + npages;
682         maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
683         allocpages = maxpages;
684         vm_reserv_object_lock(object);
685         if (mpred != NULL) {
686                 if ((rv = vm_reserv_from_page(mpred))->object != object)
687                         leftcap = mpred->pindex + 1;
688                 else
689                         leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
690                 if (leftcap > first) {
691                         vm_reserv_object_unlock(object);
692                         return (NULL);
693                 }
694         }
695         if (msucc != NULL) {
696                 if ((rv = vm_reserv_from_page(msucc))->object != object)
697                         rightcap = msucc->pindex;
698                 else
699                         rightcap = rv->pindex;
700                 if (first + maxpages > rightcap) {
701                         if (maxpages == VM_LEVEL_0_NPAGES) {
702                                 vm_reserv_object_unlock(object);
703                                 return (NULL);
704                         }
705
706                         /*
707                          * At least one reservation will fit between "leftcap"
708                          * and "rightcap".  However, a reservation for the
709                          * last of the requested pages will not fit.  Reduce
710                          * the size of the upcoming allocation accordingly.
711                          */
712                         allocpages = minpages;
713                 }
714         }
715         vm_reserv_object_unlock(object);
716
717         /*
718          * Would the last new reservation extend past the end of the object?
719          */
720         if (first + maxpages > object->size) {
721                 /*
722                  * Don't allocate the last new reservation if the object is a
723                  * vnode or backed by another object that is a vnode. 
724                  */
725                 if (object->type == OBJT_VNODE ||
726                     (object->backing_object != NULL &&
727                     object->backing_object->type == OBJT_VNODE)) {
728                         if (maxpages == VM_LEVEL_0_NPAGES)
729                                 return (NULL);
730                         allocpages = minpages;
731                 }
732                 /* Speculate that the object may grow. */
733         }
734
735         /*
736          * Allocate the physical pages.  The alignment and boundary specified
737          * for this allocation may be different from the alignment and
738          * boundary specified for the requested pages.  For instance, the
739          * specified index may not be the first page within the first new
740          * reservation.
741          */
742         m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
743             VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
744         if (m == NULL)
745                 return (NULL);
746         KASSERT(vm_phys_domain(m) == domain,
747             ("vm_reserv_alloc_contig: Page domain does not match requested."));
748
749         /*
750          * The allocated physical pages always begin at a reservation
751          * boundary, but they do not always end at a reservation boundary.
752          * Initialize every reservation that is completely covered by the
753          * allocated physical pages.
754          */
755         m_ret = NULL;
756         index = VM_RESERV_INDEX(object, pindex);
757         do {
758                 rv = vm_reserv_from_page(m);
759                 KASSERT(rv->pages == m,
760                     ("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
761                     rv));
762                 vm_reserv_insert(rv, object, first);
763                 n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
764                 for (i = 0; i < n; i++)
765                         vm_reserv_populate(rv, index + i);
766                 npages -= n;
767                 if (m_ret == NULL) {
768                         m_ret = &rv->pages[index];
769                         index = 0;
770                 }
771                 m += VM_LEVEL_0_NPAGES;
772                 first += VM_LEVEL_0_NPAGES;
773                 allocpages -= VM_LEVEL_0_NPAGES;
774         } while (allocpages >= VM_LEVEL_0_NPAGES);
775         return (m_ret);
776 }
777
778 /*
779  * Attempts to extend an existing reservation and allocate the page to the
780  * object.
781  *
782  * The page "mpred" must immediately precede the offset "pindex" within the
783  * specified object.
784  *
785  * The object must be locked.
786  */
787 vm_page_t
788 vm_reserv_extend(int req, vm_object_t object, vm_pindex_t pindex, int domain,
789     vm_page_t mpred)
790 {
791         struct vm_domain *vmd;
792         vm_page_t m, msucc;
793         vm_reserv_t rv;
794         int index, free_count;
795
796         VM_OBJECT_ASSERT_WLOCKED(object);
797
798         /*
799          * Could a reservation currently exist?
800          */
801         if (pindex < VM_RESERV_INDEX(object, pindex) ||
802             pindex >= object->size || object->resident_page_count == 0)
803                 return (NULL);
804
805         /*
806          * Look for an existing reservation.
807          */
808         rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
809         if (rv == NULL)
810                 return (NULL);
811
812         KASSERT(object != kernel_object || rv->domain == domain,
813             ("vm_reserv_extend: Domain mismatch from reservation."));
814         domain = rv->domain;
815         vmd = VM_DOMAIN(domain);
816         index = VM_RESERV_INDEX(object, pindex);
817         m = &rv->pages[index];
818         vm_domain_free_lock(vmd);
819         if (vm_domain_available(vmd, req, 1) == 0 ||
820             /* Handle reclaim race. */
821             rv->object != object ||
822             /* Handle vm_page_rename(m, new_object, ...). */
823             popmap_is_set(rv->popmap, index))
824                 m = NULL;
825         if (m != NULL) {
826                 vm_reserv_populate(rv, index);
827                 free_count = vm_domain_freecnt_adj(vmd, -1);
828         } else
829                 free_count = vmd->vmd_free_count;
830         vm_domain_free_unlock(vmd);
831
832         if (vm_paging_needed(vmd, free_count))
833                 pagedaemon_wakeup(domain);
834
835         return (m);
836 }
837
838 /*
839  * Allocates a page from an existing reservation.
840  *
841  * The page "mpred" must immediately precede the offset "pindex" within the
842  * specified object.
843  *
844  * The object and free page queue must be locked.
845  */
846 vm_page_t
847 vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
848     vm_page_t mpred)
849 {
850         vm_page_t m, msucc;
851         vm_pindex_t first, leftcap, rightcap;
852         vm_reserv_t rv;
853         int index;
854
855         vm_domain_free_assert_locked(VM_DOMAIN(domain));
856         VM_OBJECT_ASSERT_WLOCKED(object);
857
858         /*
859          * Is a reservation fundamentally impossible?
860          */
861         if (pindex < VM_RESERV_INDEX(object, pindex) ||
862             pindex >= object->size)
863                 return (NULL);
864
865         /*
866          * Callers should've extended an existing reservation prior to
867          * calling this function.  If a reservation exists it is
868          * incompatible with the allocation.
869          */
870         rv = vm_reserv_from_object(object, pindex, mpred, &msucc);
871         if (rv != NULL)
872                 return (NULL);
873
874         /*
875          * Could a reservation fit between the first index to the left that
876          * can be used and the first index to the right that cannot be used?
877          *
878          * We must synchronize with the reserv object lock to protect the
879          * pindex/object of the resulting reservations against rename while
880          * we are inspecting.
881          */
882         first = pindex - VM_RESERV_INDEX(object, pindex);
883         vm_reserv_object_lock(object);
884         if (mpred != NULL) {
885                 if ((rv = vm_reserv_from_page(mpred))->object != object)
886                         leftcap = mpred->pindex + 1;
887                 else
888                         leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
889                 if (leftcap > first) {
890                         vm_reserv_object_unlock(object);
891                         return (NULL);
892                 }
893         }
894         if (msucc != NULL) {
895                 if ((rv = vm_reserv_from_page(msucc))->object != object)
896                         rightcap = msucc->pindex;
897                 else
898                         rightcap = rv->pindex;
899                 if (first + VM_LEVEL_0_NPAGES > rightcap) {
900                         vm_reserv_object_unlock(object);
901                         return (NULL);
902                 }
903         }
904         vm_reserv_object_unlock(object);
905
906         /*
907          * Would a new reservation extend past the end of the object? 
908          */
909         if (first + VM_LEVEL_0_NPAGES > object->size) {
910                 /*
911                  * Don't allocate a new reservation if the object is a vnode or
912                  * backed by another object that is a vnode. 
913                  */
914                 if (object->type == OBJT_VNODE ||
915                     (object->backing_object != NULL &&
916                     object->backing_object->type == OBJT_VNODE))
917                         return (NULL);
918                 /* Speculate that the object may grow. */
919         }
920
921         /*
922          * Allocate and populate the new reservation.
923          */
924         m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
925         if (m == NULL)
926                 return (NULL);
927         rv = vm_reserv_from_page(m);
928         KASSERT(rv->pages == m,
929             ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
930         vm_reserv_insert(rv, object, first);
931         index = VM_RESERV_INDEX(object, pindex);
932         vm_reserv_populate(rv, index);
933         return (&rv->pages[index]);
934 }
935
936 /*
937  * Breaks the given reservation.  Except for the specified free page, all free
938  * pages in the reservation are returned to the physical memory allocator.
939  * The reservation's population count and map are reset to their initial
940  * state.
941  *
942  * The given reservation must not be in the partially populated reservation
943  * queue.  The free page queue lock must be held.
944  */
945 static void
946 vm_reserv_break(vm_reserv_t rv, vm_page_t m)
947 {
948         int begin_zeroes, hi, i, lo;
949
950         vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
951         vm_reserv_remove(rv);
952         if (m != NULL) {
953                 /*
954                  * Since the reservation is being broken, there is no harm in
955                  * abusing the population map to stop "m" from being returned
956                  * to the physical memory allocator.
957                  */
958                 i = m - rv->pages;
959                 KASSERT(popmap_is_clear(rv->popmap, i),
960                     ("vm_reserv_break: reserv %p's popmap is corrupted", rv));
961                 popmap_set(rv->popmap, i);
962                 rv->popcnt++;
963         }
964         i = hi = 0;
965         do {
966                 /* Find the next 0 bit.  Any previous 0 bits are < "hi". */
967                 lo = ffsl(~(((1UL << hi) - 1) | rv->popmap[i]));
968                 if (lo == 0) {
969                         /* Redundantly clears bits < "hi". */
970                         rv->popmap[i] = 0;
971                         rv->popcnt -= NBPOPMAP - hi;
972                         while (++i < NPOPMAP) {
973                                 lo = ffsl(~rv->popmap[i]);
974                                 if (lo == 0) {
975                                         rv->popmap[i] = 0;
976                                         rv->popcnt -= NBPOPMAP;
977                                 } else
978                                         break;
979                         }
980                         if (i == NPOPMAP)
981                                 break;
982                         hi = 0;
983                 }
984                 KASSERT(lo > 0, ("vm_reserv_break: lo is %d", lo));
985                 /* Convert from ffsl() to ordinary bit numbering. */
986                 lo--;
987                 if (lo > 0) {
988                         /* Redundantly clears bits < "hi". */
989                         rv->popmap[i] &= ~((1UL << lo) - 1);
990                         rv->popcnt -= lo - hi;
991                 }
992                 begin_zeroes = NBPOPMAP * i + lo;
993                 /* Find the next 1 bit. */
994                 do
995                         hi = ffsl(rv->popmap[i]);
996                 while (hi == 0 && ++i < NPOPMAP);
997                 if (i != NPOPMAP)
998                         /* Convert from ffsl() to ordinary bit numbering. */
999                         hi--;
1000                 vm_phys_free_contig(&rv->pages[begin_zeroes], NBPOPMAP * i +
1001                     hi - begin_zeroes);
1002         } while (i < NPOPMAP);
1003         KASSERT(rv->popcnt == 0,
1004             ("vm_reserv_break: reserv %p's popcnt is corrupted", rv));
1005         vm_reserv_broken++;
1006 }
1007
1008 /*
1009  * Breaks all reservations belonging to the given object.
1010  */
1011 void
1012 vm_reserv_break_all(vm_object_t object)
1013 {
1014         vm_reserv_t rv;
1015         struct vm_domain *vmd;
1016
1017         /*
1018          * This access of object->rvq is unsynchronized so that the
1019          * object rvq lock can nest after the domain_free lock.  We
1020          * must check for races in the results.  However, the object
1021          * lock prevents new additions, so we are guaranteed that when
1022          * it returns NULL the object is properly empty.
1023          */
1024         vmd = NULL;
1025         while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
1026                 if (vmd != VM_DOMAIN(rv->domain)) {
1027                         if (vmd != NULL)
1028                                 vm_domain_free_unlock(vmd);
1029                         vmd = VM_DOMAIN(rv->domain);
1030                         vm_domain_free_lock(vmd);
1031                 }
1032                 /* Reclaim race. */
1033                 if (rv->object != object)
1034                         continue;
1035                 KASSERT(rv->object == object,
1036                     ("vm_reserv_break_all: reserv %p is corrupted", rv));
1037                 if (rv->inpartpopq) {
1038                         TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
1039                         rv->inpartpopq = FALSE;
1040                 }
1041                 vm_reserv_break(rv, NULL);
1042         }
1043         if (vmd != NULL)
1044                 vm_domain_free_unlock(vmd);
1045 }
1046
1047 /*
1048  * Frees the given page if it belongs to a reservation.  Returns TRUE if the
1049  * page is freed and FALSE otherwise.
1050  *
1051  * The free page queue lock must be held.
1052  */
1053 boolean_t
1054 vm_reserv_free_page(vm_page_t m)
1055 {
1056         vm_reserv_t rv;
1057
1058         rv = vm_reserv_from_page(m);
1059         vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
1060         if (rv->object == NULL)
1061                 return (FALSE);
1062         vm_reserv_depopulate(rv, m - rv->pages);
1063         return (TRUE);
1064 }
1065
1066 /*
1067  * Initializes the reservation management system.  Specifically, initializes
1068  * the reservation array.
1069  *
1070  * Requires that vm_page_array and first_page are initialized!
1071  */
1072 void
1073 vm_reserv_init(void)
1074 {
1075         vm_paddr_t paddr;
1076         struct vm_phys_seg *seg;
1077         int i, segind;
1078
1079         /*
1080          * Initialize the reservation array.  Specifically, initialize the
1081          * "pages" field for every element that has an underlying superpage.
1082          */
1083         for (segind = 0; segind < vm_phys_nsegs; segind++) {
1084                 seg = &vm_phys_segs[segind];
1085                 paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
1086                 while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
1087                         vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
1088                             PHYS_TO_VM_PAGE(paddr);
1089                         vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain =
1090                             seg->domain;
1091                         paddr += VM_LEVEL_0_SIZE;
1092                 }
1093         }
1094         for (i = 0; i < MAXMEMDOM; i++)
1095                 TAILQ_INIT(&vm_rvq_partpop[i]);
1096 }
1097
1098 /*
1099  * Returns true if the given page belongs to a reservation and that page is
1100  * free.  Otherwise, returns false.
1101  */
1102 bool
1103 vm_reserv_is_page_free(vm_page_t m)
1104 {
1105         vm_reserv_t rv;
1106
1107         rv = vm_reserv_from_page(m);
1108         vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
1109         if (rv->object == NULL)
1110                 return (false);
1111         return (popmap_is_clear(rv->popmap, m - rv->pages));
1112 }
1113
1114 /*
1115  * If the given page belongs to a reservation, returns the level of that
1116  * reservation.  Otherwise, returns -1.
1117  */
1118 int
1119 vm_reserv_level(vm_page_t m)
1120 {
1121         vm_reserv_t rv;
1122
1123         rv = vm_reserv_from_page(m);
1124         return (rv->object != NULL ? 0 : -1);
1125 }
1126
1127 /*
1128  * Returns a reservation level if the given page belongs to a fully populated
1129  * reservation and -1 otherwise.
1130  */
1131 int
1132 vm_reserv_level_iffullpop(vm_page_t m)
1133 {
1134         vm_reserv_t rv;
1135
1136         rv = vm_reserv_from_page(m);
1137         return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1);
1138 }
1139
1140 /*
1141  * Breaks the given partially populated reservation, releasing its free pages
1142  * to the physical memory allocator.
1143  *
1144  * The free page queue lock must be held.
1145  */
1146 static void
1147 vm_reserv_reclaim(vm_reserv_t rv)
1148 {
1149
1150         vm_domain_free_assert_locked(VM_DOMAIN(rv->domain));
1151         KASSERT(rv->inpartpopq,
1152             ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
1153         KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
1154             ("vm_reserv_reclaim: reserv %p's domain is corrupted %d",
1155             rv, rv->domain));
1156         TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
1157         rv->inpartpopq = FALSE;
1158         vm_reserv_break(rv, NULL);
1159         vm_reserv_reclaimed++;
1160 }
1161
1162 /*
1163  * Breaks the reservation at the head of the partially populated reservation
1164  * queue, releasing its free pages to the physical memory allocator.  Returns
1165  * TRUE if a reservation is broken and FALSE otherwise.
1166  *
1167  * The free page queue lock must be held.
1168  */
1169 boolean_t
1170 vm_reserv_reclaim_inactive(int domain)
1171 {
1172         vm_reserv_t rv;
1173
1174         vm_domain_free_assert_locked(VM_DOMAIN(domain));
1175         if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
1176                 vm_reserv_reclaim(rv);
1177                 return (TRUE);
1178         }
1179         return (FALSE);
1180 }
1181
1182 /*
1183  * Searches the partially populated reservation queue for the least recently
1184  * changed reservation with free pages that satisfy the given request for
1185  * contiguous physical memory.  If a satisfactory reservation is found, it is
1186  * broken.  Returns TRUE if a reservation is broken and FALSE otherwise.
1187  *
1188  * The free page queue lock must be held.
1189  */
1190 boolean_t
1191 vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
1192     vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
1193 {
1194         vm_paddr_t pa, size;
1195         vm_reserv_t rv;
1196         int hi, i, lo, low_index, next_free;
1197
1198         vm_domain_free_assert_locked(VM_DOMAIN(domain));
1199         if (npages > VM_LEVEL_0_NPAGES - 1)
1200                 return (FALSE);
1201         size = npages << PAGE_SHIFT;
1202         TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
1203                 pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
1204                 if (pa + PAGE_SIZE - size < low) {
1205                         /* This entire reservation is too low; go to next. */
1206                         continue;
1207                 }
1208                 pa = VM_PAGE_TO_PHYS(&rv->pages[0]);
1209                 if (pa + size > high) {
1210                         /* This entire reservation is too high; go to next. */
1211                         continue;
1212                 }
1213                 if (pa < low) {
1214                         /* Start the search for free pages at "low". */
1215                         low_index = (low + PAGE_MASK - pa) >> PAGE_SHIFT;
1216                         i = low_index / NBPOPMAP;
1217                         hi = low_index % NBPOPMAP;
1218                 } else
1219                         i = hi = 0;
1220                 do {
1221                         /* Find the next free page. */
1222                         lo = ffsl(~(((1UL << hi) - 1) | rv->popmap[i]));
1223                         while (lo == 0 && ++i < NPOPMAP)
1224                                 lo = ffsl(~rv->popmap[i]);
1225                         if (i == NPOPMAP)
1226                                 break;
1227                         /* Convert from ffsl() to ordinary bit numbering. */
1228                         lo--;
1229                         next_free = NBPOPMAP * i + lo;
1230                         pa = VM_PAGE_TO_PHYS(&rv->pages[next_free]);
1231                         KASSERT(pa >= low,
1232                             ("vm_reserv_reclaim_contig: pa is too low"));
1233                         if (pa + size > high) {
1234                                 /* The rest of this reservation is too high. */
1235                                 break;
1236                         } else if ((pa & (alignment - 1)) != 0 ||
1237                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
1238                                 /*
1239                                  * The current page doesn't meet the alignment
1240                                  * and/or boundary requirements.  Continue
1241                                  * searching this reservation until the rest
1242                                  * of its free pages are either excluded or
1243                                  * exhausted.
1244                                  */
1245                                 hi = lo + 1;
1246                                 if (hi >= NBPOPMAP) {
1247                                         hi = 0;
1248                                         i++;
1249                                 }
1250                                 continue;
1251                         }
1252                         /* Find the next used page. */
1253                         hi = ffsl(rv->popmap[i] & ~((1UL << lo) - 1));
1254                         while (hi == 0 && ++i < NPOPMAP) {
1255                                 if ((NBPOPMAP * i - next_free) * PAGE_SIZE >=
1256                                     size) {
1257                                         vm_reserv_reclaim(rv);
1258                                         return (TRUE);
1259                                 }
1260                                 hi = ffsl(rv->popmap[i]);
1261                         }
1262                         /* Convert from ffsl() to ordinary bit numbering. */
1263                         if (i != NPOPMAP)
1264                                 hi--;
1265                         if ((NBPOPMAP * i + hi - next_free) * PAGE_SIZE >=
1266                             size) {
1267                                 vm_reserv_reclaim(rv);
1268                                 return (TRUE);
1269                         }
1270                 } while (i < NPOPMAP);
1271         }
1272         return (FALSE);
1273 }
1274
1275 /*
1276  * Transfers the reservation underlying the given page to a new object.
1277  *
1278  * The object must be locked.
1279  */
1280 void
1281 vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
1282     vm_pindex_t old_object_offset)
1283 {
1284         vm_reserv_t rv;
1285
1286         VM_OBJECT_ASSERT_WLOCKED(new_object);
1287         rv = vm_reserv_from_page(m);
1288         if (rv->object == old_object) {
1289                 vm_domain_free_lock(VM_DOMAIN(rv->domain));
1290                 if (rv->object == old_object) {
1291                         vm_reserv_object_lock(old_object);
1292                         rv->object = NULL;
1293                         LIST_REMOVE(rv, objq);
1294                         vm_reserv_object_unlock(old_object);
1295                         vm_reserv_object_lock(new_object);
1296                         rv->object = new_object;
1297                         rv->pindex -= old_object_offset;
1298                         LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
1299                         vm_reserv_object_unlock(new_object);
1300                 }
1301                 vm_domain_free_unlock(VM_DOMAIN(rv->domain));
1302         }
1303 }
1304
1305 /*
1306  * Returns the size (in bytes) of a reservation of the specified level.
1307  */
1308 int
1309 vm_reserv_size(int level)
1310 {
1311
1312         switch (level) {
1313         case 0:
1314                 return (VM_LEVEL_0_SIZE);
1315         case -1:
1316                 return (PAGE_SIZE);
1317         default:
1318                 return (0);
1319         }
1320 }
1321
1322 /*
1323  * Allocates the virtual and physical memory required by the reservation
1324  * management system's data structures, in particular, the reservation array.
1325  */
1326 vm_paddr_t
1327 vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
1328 {
1329         vm_paddr_t new_end;
1330         size_t size;
1331         int i;
1332
1333         /*
1334          * Calculate the size (in bytes) of the reservation array.  Round up
1335          * from "high_water" because every small page is mapped to an element
1336          * in the reservation array based on its physical address.  Thus, the
1337          * number of elements in the reservation array can be greater than the
1338          * number of superpages. 
1339          */
1340         size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv);
1341
1342         /*
1343          * Allocate and map the physical memory for the reservation array.  The
1344          * next available virtual address is returned by reference.
1345          */
1346         new_end = end - round_page(size);
1347         vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
1348             VM_PROT_READ | VM_PROT_WRITE);
1349         bzero(vm_reserv_array, size);
1350
1351         for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++)
1352                 mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL,
1353                     MTX_DEF);
1354
1355         /*
1356          * Return the next available physical address.
1357          */
1358         return (new_end);
1359 }
1360
1361 /*
1362  * Returns the superpage containing the given page.
1363  */
1364 vm_page_t
1365 vm_reserv_to_superpage(vm_page_t m)
1366 {
1367         vm_reserv_t rv;
1368
1369         VM_OBJECT_ASSERT_LOCKED(m->object);
1370         rv = vm_reserv_from_page(m);
1371         return (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES ?
1372             rv->pages : NULL);
1373 }
1374
1375 #endif  /* VM_NRESERVLEVEL > 0 */