]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/vm/vm_phys.c
Merged ^/head r283871 through r284187.
[FreeBSD/FreeBSD.git] / sys / vm / vm_phys.c
1 /*-
2  * Copyright (c) 2002-2006 Rice University
3  * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Alan L. Cox,
7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 /*
33  *      Physical memory system implementation
34  *
35  * Any external functions defined by this module are only to be used by the
36  * virtual memory system.
37  */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41
42 #include "opt_ddb.h"
43 #include "opt_vm.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/lock.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/mutex.h>
51 #if MAXMEMDOM > 1
52 #include <sys/proc.h>
53 #endif
54 #include <sys/queue.h>
55 #include <sys/rwlock.h>
56 #include <sys/sbuf.h>
57 #include <sys/sysctl.h>
58 #include <sys/tree.h>
59 #include <sys/vmmeter.h>
60
61 #include <ddb/ddb.h>
62
63 #include <vm/vm.h>
64 #include <vm/vm_param.h>
65 #include <vm/vm_kern.h>
66 #include <vm/vm_object.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_phys.h>
69
70 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
71     "Too many physsegs.");
72
73 struct mem_affinity *mem_affinity;
74 int *mem_locality;
75
76 int vm_ndomains = 1;
77
78 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
79 int vm_phys_nsegs;
80
81 struct vm_phys_fictitious_seg;
82 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
83     struct vm_phys_fictitious_seg *);
84
85 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree =
86     RB_INITIALIZER(_vm_phys_fictitious_tree);
87
88 struct vm_phys_fictitious_seg {
89         RB_ENTRY(vm_phys_fictitious_seg) node;
90         /* Memory region data */
91         vm_paddr_t      start;
92         vm_paddr_t      end;
93         vm_page_t       first_page;
94 };
95
96 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node,
97     vm_phys_fictitious_cmp);
98
99 static struct rwlock vm_phys_fictitious_reg_lock;
100 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
101
102 static struct vm_freelist
103     vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
104
105 static int vm_nfreelists;
106
107 /*
108  * Provides the mapping from VM_FREELIST_* to free list indices (flind).
109  */
110 static int vm_freelist_to_flind[VM_NFREELIST];
111
112 CTASSERT(VM_FREELIST_DEFAULT == 0);
113
114 #ifdef VM_FREELIST_ISADMA
115 #define VM_ISADMA_BOUNDARY      16777216
116 #endif
117 #ifdef VM_FREELIST_DMA32
118 #define VM_DMA32_BOUNDARY       ((vm_paddr_t)1 << 32)
119 #endif
120
121 /*
122  * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about
123  * the ordering of the free list boundaries.
124  */
125 #if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY)
126 CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY);
127 #endif
128 #if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY)
129 CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY);
130 #endif
131
132 static int cnt_prezero;
133 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
134     &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
135
136 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
137 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
138     NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
139
140 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
141 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
142     NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
143
144 static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
145 SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
146     NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info");
147
148 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
149     &vm_ndomains, 0, "Number of physical memory domains available.");
150
151 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
152     int order);
153 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
154 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
155 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
156 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
157     int order);
158
159 /*
160  * Red-black tree helpers for vm fictitious range management.
161  */
162 static inline int
163 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p,
164     struct vm_phys_fictitious_seg *range)
165 {
166
167         KASSERT(range->start != 0 && range->end != 0,
168             ("Invalid range passed on search for vm_fictitious page"));
169         if (p->start >= range->end)
170                 return (1);
171         if (p->start < range->start)
172                 return (-1);
173
174         return (0);
175 }
176
177 static int
178 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
179     struct vm_phys_fictitious_seg *p2)
180 {
181
182         /* Check if this is a search for a page */
183         if (p1->end == 0)
184                 return (vm_phys_fictitious_in_range(p1, p2));
185
186         KASSERT(p2->end != 0,
187     ("Invalid range passed as second parameter to vm fictitious comparison"));
188
189         /* Searching to add a new range */
190         if (p1->end <= p2->start)
191                 return (-1);
192         if (p1->start >= p2->end)
193                 return (1);
194
195         panic("Trying to add overlapping vm fictitious ranges:\n"
196             "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start,
197             (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
198 }
199
200 static __inline int
201 vm_rr_selectdomain(void)
202 {
203 #if MAXMEMDOM > 1
204         struct thread *td;
205
206         td = curthread;
207
208         td->td_dom_rr_idx++;
209         td->td_dom_rr_idx %= vm_ndomains;
210         return (td->td_dom_rr_idx);
211 #else
212         return (0);
213 #endif
214 }
215
216 boolean_t
217 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
218 {
219         struct vm_phys_seg *s;
220         int idx;
221
222         while ((idx = ffsl(mask)) != 0) {
223                 idx--;  /* ffsl counts from 1 */
224                 mask &= ~(1UL << idx);
225                 s = &vm_phys_segs[idx];
226                 if (low < s->end && high > s->start)
227                         return (TRUE);
228         }
229         return (FALSE);
230 }
231
232 /*
233  * Outputs the state of the physical memory allocator, specifically,
234  * the amount of physical memory in each free list.
235  */
236 static int
237 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
238 {
239         struct sbuf sbuf;
240         struct vm_freelist *fl;
241         int dom, error, flind, oind, pind;
242
243         error = sysctl_wire_old_buffer(req, 0);
244         if (error != 0)
245                 return (error);
246         sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
247         for (dom = 0; dom < vm_ndomains; dom++) {
248                 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom);
249                 for (flind = 0; flind < vm_nfreelists; flind++) {
250                         sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
251                             "\n  ORDER (SIZE)  |  NUMBER"
252                             "\n              ", flind);
253                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
254                                 sbuf_printf(&sbuf, "  |  POOL %d", pind);
255                         sbuf_printf(&sbuf, "\n--            ");
256                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
257                                 sbuf_printf(&sbuf, "-- --      ");
258                         sbuf_printf(&sbuf, "--\n");
259                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
260                                 sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
261                                     1 << (PAGE_SHIFT - 10 + oind));
262                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
263                                 fl = vm_phys_free_queues[dom][flind][pind];
264                                         sbuf_printf(&sbuf, "  |  %6d",
265                                             fl[oind].lcnt);
266                                 }
267                                 sbuf_printf(&sbuf, "\n");
268                         }
269                 }
270         }
271         error = sbuf_finish(&sbuf);
272         sbuf_delete(&sbuf);
273         return (error);
274 }
275
276 /*
277  * Outputs the set of physical memory segments.
278  */
279 static int
280 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
281 {
282         struct sbuf sbuf;
283         struct vm_phys_seg *seg;
284         int error, segind;
285
286         error = sysctl_wire_old_buffer(req, 0);
287         if (error != 0)
288                 return (error);
289         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
290         for (segind = 0; segind < vm_phys_nsegs; segind++) {
291                 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
292                 seg = &vm_phys_segs[segind];
293                 sbuf_printf(&sbuf, "start:     %#jx\n",
294                     (uintmax_t)seg->start);
295                 sbuf_printf(&sbuf, "end:       %#jx\n",
296                     (uintmax_t)seg->end);
297                 sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
298                 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
299         }
300         error = sbuf_finish(&sbuf);
301         sbuf_delete(&sbuf);
302         return (error);
303 }
304
305 /*
306  * Return affinity, or -1 if there's no affinity information.
307  */
308 static int
309 vm_phys_mem_affinity(int f, int t)
310 {
311
312         if (mem_locality == NULL)
313                 return (-1);
314         if (f >= vm_ndomains || t >= vm_ndomains)
315                 return (-1);
316         return (mem_locality[f * vm_ndomains + t]);
317 }
318
319 /*
320  * Outputs the VM locality table.
321  */
322 static int
323 sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
324 {
325         struct sbuf sbuf;
326         int error, i, j;
327
328         error = sysctl_wire_old_buffer(req, 0);
329         if (error != 0)
330                 return (error);
331         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
332
333         sbuf_printf(&sbuf, "\n");
334
335         for (i = 0; i < vm_ndomains; i++) {
336                 sbuf_printf(&sbuf, "%d: ", i);
337                 for (j = 0; j < vm_ndomains; j++) {
338                         sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j));
339                 }
340                 sbuf_printf(&sbuf, "\n");
341         }
342         error = sbuf_finish(&sbuf);
343         sbuf_delete(&sbuf);
344         return (error);
345 }
346
347 static void
348 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
349 {
350
351         m->order = order;
352         if (tail)
353                 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q);
354         else
355                 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q);
356         fl[order].lcnt++;
357 }
358
359 static void
360 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order)
361 {
362
363         TAILQ_REMOVE(&fl[order].pl, m, plinks.q);
364         fl[order].lcnt--;
365         m->order = VM_NFREEORDER;
366 }
367
368 /*
369  * Create a physical memory segment.
370  */
371 static void
372 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
373 {
374         struct vm_phys_seg *seg;
375
376         KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
377             ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
378         KASSERT(domain < vm_ndomains,
379             ("vm_phys_create_seg: invalid domain provided"));
380         seg = &vm_phys_segs[vm_phys_nsegs++];
381         while (seg > vm_phys_segs && (seg - 1)->start >= end) {
382                 *seg = *(seg - 1);
383                 seg--;
384         }
385         seg->start = start;
386         seg->end = end;
387         seg->domain = domain;
388 }
389
390 static void
391 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
392 {
393         int i;
394
395         if (mem_affinity == NULL) {
396                 _vm_phys_create_seg(start, end, 0);
397                 return;
398         }
399
400         for (i = 0;; i++) {
401                 if (mem_affinity[i].end == 0)
402                         panic("Reached end of affinity info");
403                 if (mem_affinity[i].end <= start)
404                         continue;
405                 if (mem_affinity[i].start > start)
406                         panic("No affinity info for start %jx",
407                             (uintmax_t)start);
408                 if (mem_affinity[i].end >= end) {
409                         _vm_phys_create_seg(start, end,
410                             mem_affinity[i].domain);
411                         break;
412                 }
413                 _vm_phys_create_seg(start, mem_affinity[i].end,
414                     mem_affinity[i].domain);
415                 start = mem_affinity[i].end;
416         }
417 }
418
419 /*
420  * Add a physical memory segment.
421  */
422 void
423 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end)
424 {
425         vm_paddr_t paddr;
426
427         KASSERT((start & PAGE_MASK) == 0,
428             ("vm_phys_define_seg: start is not page aligned"));
429         KASSERT((end & PAGE_MASK) == 0,
430             ("vm_phys_define_seg: end is not page aligned"));
431
432         /*
433          * Split the physical memory segment if it spans two or more free
434          * list boundaries.
435          */
436         paddr = start;
437 #ifdef  VM_FREELIST_ISADMA
438         if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) {
439                 vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY);
440                 paddr = VM_ISADMA_BOUNDARY;
441         }
442 #endif
443 #ifdef  VM_FREELIST_LOWMEM
444         if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) {
445                 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY);
446                 paddr = VM_LOWMEM_BOUNDARY;
447         }
448 #endif
449 #ifdef  VM_FREELIST_DMA32
450         if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) {
451                 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY);
452                 paddr = VM_DMA32_BOUNDARY;
453         }
454 #endif
455         vm_phys_create_seg(paddr, end);
456 }
457
458 /*
459  * Initialize the physical memory allocator.
460  *
461  * Requires that vm_page_array is initialized!
462  */
463 void
464 vm_phys_init(void)
465 {
466         struct vm_freelist *fl;
467         struct vm_phys_seg *seg;
468         u_long npages;
469         int dom, flind, freelist, oind, pind, segind;
470
471         /*
472          * Compute the number of free lists, and generate the mapping from the
473          * manifest constants VM_FREELIST_* to the free list indices.
474          *
475          * Initially, the entries of vm_freelist_to_flind[] are set to either
476          * 0 or 1 to indicate which free lists should be created.
477          */
478         npages = 0;
479         for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
480                 seg = &vm_phys_segs[segind];
481 #ifdef  VM_FREELIST_ISADMA
482                 if (seg->end <= VM_ISADMA_BOUNDARY)
483                         vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1;
484                 else
485 #endif
486 #ifdef  VM_FREELIST_LOWMEM
487                 if (seg->end <= VM_LOWMEM_BOUNDARY)
488                         vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1;
489                 else
490 #endif
491 #ifdef  VM_FREELIST_DMA32
492                 if (
493 #ifdef  VM_DMA32_NPAGES_THRESHOLD
494                     /*
495                      * Create the DMA32 free list only if the amount of
496                      * physical memory above physical address 4G exceeds the
497                      * given threshold.
498                      */
499                     npages > VM_DMA32_NPAGES_THRESHOLD &&
500 #endif
501                     seg->end <= VM_DMA32_BOUNDARY)
502                         vm_freelist_to_flind[VM_FREELIST_DMA32] = 1;
503                 else
504 #endif
505                 {
506                         npages += atop(seg->end - seg->start);
507                         vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1;
508                 }
509         }
510         /* Change each entry into a running total of the free lists. */
511         for (freelist = 1; freelist < VM_NFREELIST; freelist++) {
512                 vm_freelist_to_flind[freelist] +=
513                     vm_freelist_to_flind[freelist - 1];
514         }
515         vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1];
516         KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists"));
517         /* Change each entry into a free list index. */
518         for (freelist = 0; freelist < VM_NFREELIST; freelist++)
519                 vm_freelist_to_flind[freelist]--;
520
521         /*
522          * Initialize the first_page and free_queues fields of each physical
523          * memory segment.
524          */
525 #ifdef VM_PHYSSEG_SPARSE
526         npages = 0;
527 #endif
528         for (segind = 0; segind < vm_phys_nsegs; segind++) {
529                 seg = &vm_phys_segs[segind];
530 #ifdef VM_PHYSSEG_SPARSE
531                 seg->first_page = &vm_page_array[npages];
532                 npages += atop(seg->end - seg->start);
533 #else
534                 seg->first_page = PHYS_TO_VM_PAGE(seg->start);
535 #endif
536 #ifdef  VM_FREELIST_ISADMA
537                 if (seg->end <= VM_ISADMA_BOUNDARY) {
538                         flind = vm_freelist_to_flind[VM_FREELIST_ISADMA];
539                         KASSERT(flind >= 0,
540                             ("vm_phys_init: ISADMA flind < 0"));
541                 } else
542 #endif
543 #ifdef  VM_FREELIST_LOWMEM
544                 if (seg->end <= VM_LOWMEM_BOUNDARY) {
545                         flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM];
546                         KASSERT(flind >= 0,
547                             ("vm_phys_init: LOWMEM flind < 0"));
548                 } else
549 #endif
550 #ifdef  VM_FREELIST_DMA32
551                 if (seg->end <= VM_DMA32_BOUNDARY) {
552                         flind = vm_freelist_to_flind[VM_FREELIST_DMA32];
553                         KASSERT(flind >= 0,
554                             ("vm_phys_init: DMA32 flind < 0"));
555                 } else
556 #endif
557                 {
558                         flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT];
559                         KASSERT(flind >= 0,
560                             ("vm_phys_init: DEFAULT flind < 0"));
561                 }
562                 seg->free_queues = &vm_phys_free_queues[seg->domain][flind];
563         }
564
565         /*
566          * Initialize the free queues.
567          */
568         for (dom = 0; dom < vm_ndomains; dom++) {
569                 for (flind = 0; flind < vm_nfreelists; flind++) {
570                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
571                                 fl = vm_phys_free_queues[dom][flind][pind];
572                                 for (oind = 0; oind < VM_NFREEORDER; oind++)
573                                         TAILQ_INIT(&fl[oind].pl);
574                         }
575                 }
576         }
577
578         rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
579 }
580
581 /*
582  * Split a contiguous, power of two-sized set of physical pages.
583  */
584 static __inline void
585 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
586 {
587         vm_page_t m_buddy;
588
589         while (oind > order) {
590                 oind--;
591                 m_buddy = &m[1 << oind];
592                 KASSERT(m_buddy->order == VM_NFREEORDER,
593                     ("vm_phys_split_pages: page %p has unexpected order %d",
594                     m_buddy, m_buddy->order));
595                 vm_freelist_add(fl, m_buddy, oind, 0);
596         }
597 }
598
599 /*
600  * Initialize a physical page and add it to the free lists.
601  */
602 void
603 vm_phys_add_page(vm_paddr_t pa)
604 {
605         vm_page_t m;
606         struct vm_domain *vmd;
607
608         vm_cnt.v_page_count++;
609         m = vm_phys_paddr_to_vm_page(pa);
610         m->phys_addr = pa;
611         m->queue = PQ_NONE;
612         m->segind = vm_phys_paddr_to_segind(pa);
613         vmd = vm_phys_domain(m);
614         vmd->vmd_page_count++;
615         vmd->vmd_segs |= 1UL << m->segind;
616         KASSERT(m->order == VM_NFREEORDER,
617             ("vm_phys_add_page: page %p has unexpected order %d",
618             m, m->order));
619         m->pool = VM_FREEPOOL_DEFAULT;
620         pmap_page_init(m);
621         mtx_lock(&vm_page_queue_free_mtx);
622         vm_phys_freecnt_adj(m, 1);
623         vm_phys_free_pages(m, 0);
624         mtx_unlock(&vm_page_queue_free_mtx);
625 }
626
627 /*
628  * Allocate a contiguous, power of two-sized set of physical pages
629  * from the free lists.
630  *
631  * The free page queues must be locked.
632  */
633 vm_page_t
634 vm_phys_alloc_pages(int pool, int order)
635 {
636         vm_page_t m;
637         int dom, domain, flind;
638
639         KASSERT(pool < VM_NFREEPOOL,
640             ("vm_phys_alloc_pages: pool %d is out of range", pool));
641         KASSERT(order < VM_NFREEORDER,
642             ("vm_phys_alloc_pages: order %d is out of range", order));
643
644         for (dom = 0; dom < vm_ndomains; dom++) {
645                 domain = vm_rr_selectdomain();
646                 for (flind = 0; flind < vm_nfreelists; flind++) {
647                         m = vm_phys_alloc_domain_pages(domain, flind, pool,
648                             order);
649                         if (m != NULL)
650                                 return (m);
651                 }
652         }
653         return (NULL);
654 }
655
656 /*
657  * Allocate a contiguous, power of two-sized set of physical pages from the
658  * specified free list.  The free list must be specified using one of the
659  * manifest constants VM_FREELIST_*.
660  *
661  * The free page queues must be locked.
662  */
663 vm_page_t
664 vm_phys_alloc_freelist_pages(int freelist, int pool, int order)
665 {
666         vm_page_t m;
667         int dom, domain;
668
669         KASSERT(freelist < VM_NFREELIST,
670             ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
671             freelist));
672         KASSERT(pool < VM_NFREEPOOL,
673             ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
674         KASSERT(order < VM_NFREEORDER,
675             ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
676         for (dom = 0; dom < vm_ndomains; dom++) {
677                 domain = vm_rr_selectdomain();
678                 m = vm_phys_alloc_domain_pages(domain,
679                     vm_freelist_to_flind[freelist], pool, order);
680                 if (m != NULL)
681                         return (m);
682         }
683         return (NULL);
684 }
685
686 static vm_page_t
687 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
688 {       
689         struct vm_freelist *fl;
690         struct vm_freelist *alt;
691         int oind, pind;
692         vm_page_t m;
693
694         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
695         fl = &vm_phys_free_queues[domain][flind][pool][0];
696         for (oind = order; oind < VM_NFREEORDER; oind++) {
697                 m = TAILQ_FIRST(&fl[oind].pl);
698                 if (m != NULL) {
699                         vm_freelist_rem(fl, m, oind);
700                         vm_phys_split_pages(m, oind, fl, order);
701                         return (m);
702                 }
703         }
704
705         /*
706          * The given pool was empty.  Find the largest
707          * contiguous, power-of-two-sized set of pages in any
708          * pool.  Transfer these pages to the given pool, and
709          * use them to satisfy the allocation.
710          */
711         for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
712                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
713                         alt = &vm_phys_free_queues[domain][flind][pind][0];
714                         m = TAILQ_FIRST(&alt[oind].pl);
715                         if (m != NULL) {
716                                 vm_freelist_rem(alt, m, oind);
717                                 vm_phys_set_pool(pool, m, oind);
718                                 vm_phys_split_pages(m, oind, fl, order);
719                                 return (m);
720                         }
721                 }
722         }
723         return (NULL);
724 }
725
726 /*
727  * Find the vm_page corresponding to the given physical address.
728  */
729 vm_page_t
730 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
731 {
732         struct vm_phys_seg *seg;
733         int segind;
734
735         for (segind = 0; segind < vm_phys_nsegs; segind++) {
736                 seg = &vm_phys_segs[segind];
737                 if (pa >= seg->start && pa < seg->end)
738                         return (&seg->first_page[atop(pa - seg->start)]);
739         }
740         return (NULL);
741 }
742
743 vm_page_t
744 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
745 {
746         struct vm_phys_fictitious_seg tmp, *seg;
747         vm_page_t m;
748
749         m = NULL;
750         tmp.start = pa;
751         tmp.end = 0;
752
753         rw_rlock(&vm_phys_fictitious_reg_lock);
754         seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
755         rw_runlock(&vm_phys_fictitious_reg_lock);
756         if (seg == NULL)
757                 return (NULL);
758
759         m = &seg->first_page[atop(pa - seg->start)];
760         KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m));
761
762         return (m);
763 }
764
765 static inline void
766 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
767     long page_count, vm_memattr_t memattr)
768 {
769         long i;
770
771         for (i = 0; i < page_count; i++) {
772                 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
773                 range[i].oflags &= ~VPO_UNMANAGED;
774                 range[i].busy_lock = VPB_UNBUSIED;
775         }
776 }
777
778 int
779 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
780     vm_memattr_t memattr)
781 {
782         struct vm_phys_fictitious_seg *seg;
783         vm_page_t fp;
784         long page_count;
785 #ifdef VM_PHYSSEG_DENSE
786         long pi, pe;
787         long dpage_count;
788 #endif
789
790         KASSERT(start < end,
791             ("Start of segment isn't less than end (start: %jx end: %jx)",
792             (uintmax_t)start, (uintmax_t)end));
793
794         page_count = (end - start) / PAGE_SIZE;
795
796 #ifdef VM_PHYSSEG_DENSE
797         pi = atop(start);
798         pe = atop(end);
799         if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
800                 fp = &vm_page_array[pi - first_page];
801                 if ((pe - first_page) > vm_page_array_size) {
802                         /*
803                          * We have a segment that starts inside
804                          * of vm_page_array, but ends outside of it.
805                          *
806                          * Use vm_page_array pages for those that are
807                          * inside of the vm_page_array range, and
808                          * allocate the remaining ones.
809                          */
810                         dpage_count = vm_page_array_size - (pi - first_page);
811                         vm_phys_fictitious_init_range(fp, start, dpage_count,
812                             memattr);
813                         page_count -= dpage_count;
814                         start += ptoa(dpage_count);
815                         goto alloc;
816                 }
817                 /*
818                  * We can allocate the full range from vm_page_array,
819                  * so there's no need to register the range in the tree.
820                  */
821                 vm_phys_fictitious_init_range(fp, start, page_count, memattr);
822                 return (0);
823         } else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
824                 /*
825                  * We have a segment that ends inside of vm_page_array,
826                  * but starts outside of it.
827                  */
828                 fp = &vm_page_array[0];
829                 dpage_count = pe - first_page;
830                 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count,
831                     memattr);
832                 end -= ptoa(dpage_count);
833                 page_count -= dpage_count;
834                 goto alloc;
835         } else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
836                 /*
837                  * Trying to register a fictitious range that expands before
838                  * and after vm_page_array.
839                  */
840                 return (EINVAL);
841         } else {
842 alloc:
843 #endif
844                 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
845                     M_WAITOK | M_ZERO);
846 #ifdef VM_PHYSSEG_DENSE
847         }
848 #endif
849         vm_phys_fictitious_init_range(fp, start, page_count, memattr);
850
851         seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO);
852         seg->start = start;
853         seg->end = end;
854         seg->first_page = fp;
855
856         rw_wlock(&vm_phys_fictitious_reg_lock);
857         RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg);
858         rw_wunlock(&vm_phys_fictitious_reg_lock);
859
860         return (0);
861 }
862
863 void
864 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
865 {
866         struct vm_phys_fictitious_seg *seg, tmp;
867 #ifdef VM_PHYSSEG_DENSE
868         long pi, pe;
869 #endif
870
871         KASSERT(start < end,
872             ("Start of segment isn't less than end (start: %jx end: %jx)",
873             (uintmax_t)start, (uintmax_t)end));
874
875 #ifdef VM_PHYSSEG_DENSE
876         pi = atop(start);
877         pe = atop(end);
878         if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
879                 if ((pe - first_page) <= vm_page_array_size) {
880                         /*
881                          * This segment was allocated using vm_page_array
882                          * only, there's nothing to do since those pages
883                          * were never added to the tree.
884                          */
885                         return;
886                 }
887                 /*
888                  * We have a segment that starts inside
889                  * of vm_page_array, but ends outside of it.
890                  *
891                  * Calculate how many pages were added to the
892                  * tree and free them.
893                  */
894                 start = ptoa(first_page + vm_page_array_size);
895         } else if (pe > first_page && (pe - first_page) < vm_page_array_size) {
896                 /*
897                  * We have a segment that ends inside of vm_page_array,
898                  * but starts outside of it.
899                  */
900                 end = ptoa(first_page);
901         } else if (pi < first_page && pe > (first_page + vm_page_array_size)) {
902                 /* Since it's not possible to register such a range, panic. */
903                 panic(
904                     "Unregistering not registered fictitious range [%#jx:%#jx]",
905                     (uintmax_t)start, (uintmax_t)end);
906         }
907 #endif
908         tmp.start = start;
909         tmp.end = 0;
910
911         rw_wlock(&vm_phys_fictitious_reg_lock);
912         seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp);
913         if (seg->start != start || seg->end != end) {
914                 rw_wunlock(&vm_phys_fictitious_reg_lock);
915                 panic(
916                     "Unregistering not registered fictitious range [%#jx:%#jx]",
917                     (uintmax_t)start, (uintmax_t)end);
918         }
919         RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg);
920         rw_wunlock(&vm_phys_fictitious_reg_lock);
921         free(seg->first_page, M_FICT_PAGES);
922         free(seg, M_FICT_PAGES);
923 }
924
925 /*
926  * Find the segment containing the given physical address.
927  */
928 static int
929 vm_phys_paddr_to_segind(vm_paddr_t pa)
930 {
931         struct vm_phys_seg *seg;
932         int segind;
933
934         for (segind = 0; segind < vm_phys_nsegs; segind++) {
935                 seg = &vm_phys_segs[segind];
936                 if (pa >= seg->start && pa < seg->end)
937                         return (segind);
938         }
939         panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
940             (uintmax_t)pa);
941 }
942
943 /*
944  * Free a contiguous, power of two-sized set of physical pages.
945  *
946  * The free page queues must be locked.
947  */
948 void
949 vm_phys_free_pages(vm_page_t m, int order)
950 {
951         struct vm_freelist *fl;
952         struct vm_phys_seg *seg;
953         vm_paddr_t pa;
954         vm_page_t m_buddy;
955
956         KASSERT(m->order == VM_NFREEORDER,
957             ("vm_phys_free_pages: page %p has unexpected order %d",
958             m, m->order));
959         KASSERT(m->pool < VM_NFREEPOOL,
960             ("vm_phys_free_pages: page %p has unexpected pool %d",
961             m, m->pool));
962         KASSERT(order < VM_NFREEORDER,
963             ("vm_phys_free_pages: order %d is out of range", order));
964         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
965         seg = &vm_phys_segs[m->segind];
966         if (order < VM_NFREEORDER - 1) {
967                 pa = VM_PAGE_TO_PHYS(m);
968                 do {
969                         pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
970                         if (pa < seg->start || pa >= seg->end)
971                                 break;
972                         m_buddy = &seg->first_page[atop(pa - seg->start)];
973                         if (m_buddy->order != order)
974                                 break;
975                         fl = (*seg->free_queues)[m_buddy->pool];
976                         vm_freelist_rem(fl, m_buddy, order);
977                         if (m_buddy->pool != m->pool)
978                                 vm_phys_set_pool(m->pool, m_buddy, order);
979                         order++;
980                         pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
981                         m = &seg->first_page[atop(pa - seg->start)];
982                 } while (order < VM_NFREEORDER - 1);
983         }
984         fl = (*seg->free_queues)[m->pool];
985         vm_freelist_add(fl, m, order, 1);
986 }
987
988 /*
989  * Free a contiguous, arbitrarily sized set of physical pages.
990  *
991  * The free page queues must be locked.
992  */
993 void
994 vm_phys_free_contig(vm_page_t m, u_long npages)
995 {
996         u_int n;
997         int order;
998
999         /*
1000          * Avoid unnecessary coalescing by freeing the pages in the largest
1001          * possible power-of-two-sized subsets.
1002          */
1003         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1004         for (;; npages -= n) {
1005                 /*
1006                  * Unsigned "min" is used here so that "order" is assigned
1007                  * "VM_NFREEORDER - 1" when "m"'s physical address is zero
1008                  * or the low-order bits of its physical address are zero
1009                  * because the size of a physical address exceeds the size of
1010                  * a long.
1011                  */
1012                 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
1013                     VM_NFREEORDER - 1);
1014                 n = 1 << order;
1015                 if (npages < n)
1016                         break;
1017                 vm_phys_free_pages(m, order);
1018                 m += n;
1019         }
1020         /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
1021         for (; npages > 0; npages -= n) {
1022                 order = flsl(npages) - 1;
1023                 n = 1 << order;
1024                 vm_phys_free_pages(m, order);
1025                 m += n;
1026         }
1027 }
1028
1029 /*
1030  * Set the pool for a contiguous, power of two-sized set of physical pages. 
1031  */
1032 void
1033 vm_phys_set_pool(int pool, vm_page_t m, int order)
1034 {
1035         vm_page_t m_tmp;
1036
1037         for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
1038                 m_tmp->pool = pool;
1039 }
1040
1041 /*
1042  * Search for the given physical page "m" in the free lists.  If the search
1043  * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
1044  * FALSE, indicating that "m" is not in the free lists.
1045  *
1046  * The free page queues must be locked.
1047  */
1048 boolean_t
1049 vm_phys_unfree_page(vm_page_t m)
1050 {
1051         struct vm_freelist *fl;
1052         struct vm_phys_seg *seg;
1053         vm_paddr_t pa, pa_half;
1054         vm_page_t m_set, m_tmp;
1055         int order;
1056
1057         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1058
1059         /*
1060          * First, find the contiguous, power of two-sized set of free
1061          * physical pages containing the given physical page "m" and
1062          * assign it to "m_set".
1063          */
1064         seg = &vm_phys_segs[m->segind];
1065         for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
1066             order < VM_NFREEORDER - 1; ) {
1067                 order++;
1068                 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
1069                 if (pa >= seg->start)
1070                         m_set = &seg->first_page[atop(pa - seg->start)];
1071                 else
1072                         return (FALSE);
1073         }
1074         if (m_set->order < order)
1075                 return (FALSE);
1076         if (m_set->order == VM_NFREEORDER)
1077                 return (FALSE);
1078         KASSERT(m_set->order < VM_NFREEORDER,
1079             ("vm_phys_unfree_page: page %p has unexpected order %d",
1080             m_set, m_set->order));
1081
1082         /*
1083          * Next, remove "m_set" from the free lists.  Finally, extract
1084          * "m" from "m_set" using an iterative algorithm: While "m_set"
1085          * is larger than a page, shrink "m_set" by returning the half
1086          * of "m_set" that does not contain "m" to the free lists.
1087          */
1088         fl = (*seg->free_queues)[m_set->pool];
1089         order = m_set->order;
1090         vm_freelist_rem(fl, m_set, order);
1091         while (order > 0) {
1092                 order--;
1093                 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
1094                 if (m->phys_addr < pa_half)
1095                         m_tmp = &seg->first_page[atop(pa_half - seg->start)];
1096                 else {
1097                         m_tmp = m_set;
1098                         m_set = &seg->first_page[atop(pa_half - seg->start)];
1099                 }
1100                 vm_freelist_add(fl, m_tmp, order, 0);
1101         }
1102         KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
1103         return (TRUE);
1104 }
1105
1106 /*
1107  * Try to zero one physical page.  Used by an idle priority thread.
1108  */
1109 boolean_t
1110 vm_phys_zero_pages_idle(void)
1111 {
1112         static struct vm_freelist *fl;
1113         static int flind, oind, pind;
1114         vm_page_t m, m_tmp;
1115         int domain;
1116
1117         domain = vm_rr_selectdomain();
1118         fl = vm_phys_free_queues[domain][0][0];
1119         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1120         for (;;) {
1121                 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
1122                         for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
1123                                 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
1124                                         vm_phys_unfree_page(m_tmp);
1125                                         vm_phys_freecnt_adj(m, -1);
1126                                         mtx_unlock(&vm_page_queue_free_mtx);
1127                                         pmap_zero_page_idle(m_tmp);
1128                                         m_tmp->flags |= PG_ZERO;
1129                                         mtx_lock(&vm_page_queue_free_mtx);
1130                                         vm_phys_freecnt_adj(m, 1);
1131                                         vm_phys_free_pages(m_tmp, 0);
1132                                         vm_page_zero_count++;
1133                                         cnt_prezero++;
1134                                         return (TRUE);
1135                                 }
1136                         }
1137                 }
1138                 oind++;
1139                 if (oind == VM_NFREEORDER) {
1140                         oind = 0;
1141                         pind++;
1142                         if (pind == VM_NFREEPOOL) {
1143                                 pind = 0;
1144                                 flind++;
1145                                 if (flind == vm_nfreelists)
1146                                         flind = 0;
1147                         }
1148                         fl = vm_phys_free_queues[domain][flind][pind];
1149                 }
1150         }
1151 }
1152
1153 /*
1154  * Allocate a contiguous set of physical pages of the given size
1155  * "npages" from the free lists.  All of the physical pages must be at
1156  * or above the given physical address "low" and below the given
1157  * physical address "high".  The given value "alignment" determines the
1158  * alignment of the first physical page in the set.  If the given value
1159  * "boundary" is non-zero, then the set of physical pages cannot cross
1160  * any physical address boundary that is a multiple of that value.  Both
1161  * "alignment" and "boundary" must be a power of two.
1162  */
1163 vm_page_t
1164 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
1165     u_long alignment, vm_paddr_t boundary)
1166 {
1167         struct vm_freelist *fl;
1168         struct vm_phys_seg *seg;
1169         vm_paddr_t pa, pa_last, size;
1170         vm_page_t m, m_ret;
1171         u_long npages_end;
1172         int dom, domain, flind, oind, order, pind;
1173
1174         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
1175         size = npages << PAGE_SHIFT;
1176         KASSERT(size != 0,
1177             ("vm_phys_alloc_contig: size must not be 0"));
1178         KASSERT((alignment & (alignment - 1)) == 0,
1179             ("vm_phys_alloc_contig: alignment must be a power of 2"));
1180         KASSERT((boundary & (boundary - 1)) == 0,
1181             ("vm_phys_alloc_contig: boundary must be a power of 2"));
1182         /* Compute the queue that is the best fit for npages. */
1183         for (order = 0; (1 << order) < npages; order++);
1184         dom = 0;
1185 restartdom:
1186         domain = vm_rr_selectdomain();
1187         for (flind = 0; flind < vm_nfreelists; flind++) {
1188                 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
1189                         for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1190                                 fl = &vm_phys_free_queues[domain][flind][pind][0];
1191                                 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) {
1192                                         /*
1193                                          * A free list may contain physical pages
1194                                          * from one or more segments.
1195                                          */
1196                                         seg = &vm_phys_segs[m_ret->segind];
1197                                         if (seg->start > high ||
1198                                             low >= seg->end)
1199                                                 continue;
1200
1201                                         /*
1202                                          * Is the size of this allocation request
1203                                          * larger than the largest block size?
1204                                          */
1205                                         if (order >= VM_NFREEORDER) {
1206                                                 /*
1207                                                  * Determine if a sufficient number
1208                                                  * of subsequent blocks to satisfy
1209                                                  * the allocation request are free.
1210                                                  */
1211                                                 pa = VM_PAGE_TO_PHYS(m_ret);
1212                                                 pa_last = pa + size;
1213                                                 for (;;) {
1214                                                         pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
1215                                                         if (pa >= pa_last)
1216                                                                 break;
1217                                                         if (pa < seg->start ||
1218                                                             pa >= seg->end)
1219                                                                 break;
1220                                                         m = &seg->first_page[atop(pa - seg->start)];
1221                                                         if (m->order != VM_NFREEORDER - 1)
1222                                                                 break;
1223                                                 }
1224                                                 /* If not, continue to the next block. */
1225                                                 if (pa < pa_last)
1226                                                         continue;
1227                                         }
1228
1229                                         /*
1230                                          * Determine if the blocks are within the given range,
1231                                          * satisfy the given alignment, and do not cross the
1232                                          * given boundary.
1233                                          */
1234                                         pa = VM_PAGE_TO_PHYS(m_ret);
1235                                         if (pa >= low &&
1236                                             pa + size <= high &&
1237                                             (pa & (alignment - 1)) == 0 &&
1238                                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
1239                                                 goto done;
1240                                 }
1241                         }
1242                 }
1243         }
1244         if (++dom < vm_ndomains)
1245                 goto restartdom;
1246         return (NULL);
1247 done:
1248         for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
1249                 fl = (*seg->free_queues)[m->pool];
1250                 vm_freelist_rem(fl, m, m->order);
1251         }
1252         if (m_ret->pool != VM_FREEPOOL_DEFAULT)
1253                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
1254         fl = (*seg->free_queues)[m_ret->pool];
1255         vm_phys_split_pages(m_ret, oind, fl, order);
1256         /* Return excess pages to the free lists. */
1257         npages_end = roundup2(npages, 1 << imin(oind, order));
1258         if (npages < npages_end)
1259                 vm_phys_free_contig(&m_ret[npages], npages_end - npages);
1260         return (m_ret);
1261 }
1262
1263 #ifdef DDB
1264 /*
1265  * Show the number of physical pages in each of the free lists.
1266  */
1267 DB_SHOW_COMMAND(freepages, db_show_freepages)
1268 {
1269         struct vm_freelist *fl;
1270         int flind, oind, pind, dom;
1271
1272         for (dom = 0; dom < vm_ndomains; dom++) {
1273                 db_printf("DOMAIN: %d\n", dom);
1274                 for (flind = 0; flind < vm_nfreelists; flind++) {
1275                         db_printf("FREE LIST %d:\n"
1276                             "\n  ORDER (SIZE)  |  NUMBER"
1277                             "\n              ", flind);
1278                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
1279                                 db_printf("  |  POOL %d", pind);
1280                         db_printf("\n--            ");
1281                         for (pind = 0; pind < VM_NFREEPOOL; pind++)
1282                                 db_printf("-- --      ");
1283                         db_printf("--\n");
1284                         for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
1285                                 db_printf("  %2.2d (%6.6dK)", oind,
1286                                     1 << (PAGE_SHIFT - 10 + oind));
1287                                 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1288                                 fl = vm_phys_free_queues[dom][flind][pind];
1289                                         db_printf("  |  %6.6d", fl[oind].lcnt);
1290                                 }
1291                                 db_printf("\n");
1292                         }
1293                         db_printf("\n");
1294                 }
1295                 db_printf("\n");
1296         }
1297 }
1298 #endif